Build branch unpack_genome with version unpack_genome (7c0705a)

Build pipeline: viash-hub.htrnaseq.unpack-genome-5zk5c

Source commit: 7c0705a514

Source message: Add ability to unpack input genome
This commit is contained in:
CI
2024-08-20 14:02:33 +00:00
commit c439638095
67 changed files with 43617 additions and 0 deletions

7
.gitignore vendored Normal file
View File

@@ -0,0 +1,7 @@
target
testData
# Nextflow related files
.nextflow
.nextflow.log*
work

13
_viash.yaml Normal file
View File

@@ -0,0 +1,13 @@
name: htrnaseq
description: |
Demultiplexing pipeline [WIP]
license: MIT
keywords: [bioinformatics, sequence, high-throughput, mapping, counting, pipeline]
links:
issue_tracker: https://github.com/viash-hub/htrnaseq/issues
repository: https://github.com/viash-hub/htrnaseq
viash_version: 0.9.0-RC7
config_mods: |
.requirements.commands := ['ps']

3
main.nf Normal file
View File

@@ -0,0 +1,3 @@
workflow {
print("This is a dummy placeholder for pipeline execution. Please use the corresponding nf files for running pipelines.")
}

6
nextflow.config Normal file
View File

@@ -0,0 +1,6 @@
manifest {
name = "htrnaseq"
version = "unpack_genome"
defaultBranch = "main"
nextflowVersion = "!>=20.12.1-edge"
}

43
src/config/labels.config Normal file
View File

@@ -0,0 +1,43 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// Resource labels
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 10 }
withLabel: highcpu { cpus = 20 }
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

38
src/config/tests.config Normal file
View File

@@ -0,0 +1,38 @@
process.container = 'nextflow/bash:latest'
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
}

View File

@@ -0,0 +1,101 @@
name: parallel_map
description: |
Map wells in batch, using STAR
Spliced Transcripts Alignment to a Reference (C) Alexander Dobin
https://github.com/alexdobin/STAR
argument_groups:
- name: Input arguments
arguments:
- name: "--input_r1"
type: file
required: true
multiple: true
- name: "--input_r2"
type: file
required: true
multiple: true
- name: "--genomeDir"
type: file
required: true
description: STAR reference directory
- name: "--barcodes"
type: string
multiple: true
required: true
description: The barcodes/wells to process
- name: Barcode arguments
arguments:
- name: "--wellBarcodesLength"
type: integer
required: true
description: The length of the well barcodes
- name: "--umiLength"
type: integer
required: true
description: The length of the UMIs
- name: "--limitBAMsortRAM"
type: string
default: "10000000000"
- name: Runtime arguments
arguments:
- name: "--runThreadN"
description: "Number of threads to use for a single STAR execution."
type: integer
default: 1
- name: Output arguments
arguments:
- name: "--output"
type: file
description: |
Location of the output folders, 1 folder per barcode. The value used
for this argument must contain a '*', which will be replaced with the
barcode to form the final output location for that barcode.
required: true
multiple: true
direction: output
default: './*'
- name: "--joblog"
type: file
description: Where to store the log file listing all the jobs.
required: false
direction: output
default: "execution_log.txt"
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
engines:
- type: docker
image: debian:stable-slim
setup:
- type: apt
packages:
- procps
- gzip
- bzip2
- parallel
- wget
- zlib1g-dev
- unzip
- xxd
- file
- type: docker
env:
- STAR_VERSION "2.7.11b"
- STAR_SOURCE "https://github.com/alexdobin/STAR/releases/download/$STAR_VERSION/STAR_$STAR_VERSION.zip"
- STAR_TARGET "/tmp/star.zip"
- STAR_BINARY "STAR"
run: |
wget -O $STAR_TARGET $STAR_SOURCE && \
unzip $STAR_TARGET -d /tmp && \
mv /tmp/STAR_$STAR_VERSION/Linux_x86_64_static/STAR /usr/local/bin/$STAR_BINARY && \
chmod +x /usr/local/bin/$STAR_BINARY && \
rm $STAR_TARGET && rm -rf /tmp/STAR_$STAR_VERSION
runners:
- type: executable
- type: nextflow

287
src/parallel_map/script.sh Executable file
View File

@@ -0,0 +1,287 @@
#!/bin/bash
## VIASH START
par_input_r1="work/2c/5b8b3a2dd4a988b8838e3f72d38a37/_viash_par/input_r1_1/two__ACACCGAATT.concat_text_r1.output.txt"
par_input_r2="work/2c/5b8b3a2dd4a988b8838e3f72d38a37/_viash_par/input_r2_1/two__ACACCGAATT.concat_text_r2.output.txt"
par_barcodes="ACACCGAATT;GGCTATTGAT"
par_output="./*"
par_genomeDir="star"
par_wellBarcodesLength=10
par_umiLength=10
par_limitBAMsortRAM="10000000000"
meta_cpus=2
par_runThreadN=1
## VIASH END
set -eo pipefail
# Check if wildcard character is present in output folder template
printf "Checking if output folder template ($par_output) contains a single wildcard character '*'. "
output_glob_character="${par_output//[^\*]}"
if [[ "${#output_glob_character}" -ne "1" ]]; then
echo "The value for --output must contain exactly one '*' character. Exiting..."
exit 1
else
echo "Done, wildcard character found!"
fi
# Split the delimited strings into arrays
IFS=';' read -r -a barcodes <<< "$par_barcodes"
IFS=';' read -r -a input_r1 <<< "$par_input_r1"
IFS=';' read -r -a input_r2 <<< "$par_input_r2"
# Check that the number of values provided for the barcodes and the fastq files are the same.
num_barcodes="${#barcodes[@]}"
num_r1_inputs="${#input_r1[@]}"
num_r2_inputs="${#input_r2[@]}"
if [ ! "$num_barcodes" -eq "$num_r1_inputs" ] || [ ! "$num_r1_inputs" -eq "$num_r2_inputs" ]; then
echo "The number of values for arguments 'barcodes' ($num_barcodes), "\
"'input_r1' ($num_r1_inputs) and 'input_r2' ($num_r2_inputs) "\
"should be the same, and their order should match."
exit 1
else
echo "Checked if length of barcodes input ($num_barcodes) is "\
"the same as R1 reads ($num_r1_inputs) and R2 reads "\
"($num_r2_inputs). Seems OK!"
fi
# Function to test for unique values in array
function arrayContainsUniqueValues {
# Pass the argument by reference
local -n arr=$1
# Create a temporary associative array
# in order to use its uniqueness of keys
# 'declare' in a function is automatically local
declare -A uniq_tmp
for item in "${arr[@]}"; do
uniq_tmp[$item]=0 # assigning a placeholder
done
local unique_array_values=(${!uniq_tmp[@]})
if [ "${#unique_array_values[@]}" -eq "${#arr[@]}" ]; then
return
fi
false
}
arrayContainsUniqueValues barcodes
is_array_unique_exit_code=$?
if ! (exit $is_array_unique_exit_code); then
echo "The provided barcodes should be unique!"
echo "Values: $par_barcodes"
exit 1
fi
# Define the function that will be used to run a single job
function _run() {
local par_wellBarcodeLength="$1"
local par_UMIlength="$2"
local par_output="$3"
local par_genomeDir="$4"
local par_limitBAMsortRAM="$5"
local par_runThreadN="$6"
local barcode="$7"
local input_R1="$8"
local input_R2="$9"
local par_UMIstart=$(($par_wellBarcodeLength + 1))
set -eo pipefail
echo <<-EOF
Processing $barcode
For the following inputs (lanes):
"$star_readFilesIn
EOF
echo "Writing barcode '$barcode' to $barcode.txt and using it as input".
# Note that there is no possible conflict between jobs here
# because the barcodes are unique (and the barcode is part of the name
# of the file).
echo "$barcode" > "$barcode.txt"
local dir="${par_output//\*/$barcode}/"
echo "Setting output for barcode '$barcode' to '$dir'."
mkdir -p "$dir"
# check if files are compressed
local TMPDIR=$(mktemp -d "$meta_temp_dir/parallel_map-$barcode-XXXXXX")
function clean_up {
[[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
}
trap clean_up RETURN
# Decompress the input files when needed
# NOTE: for some reason, using STAR's --readFilesCommand does not always work
# This might be because STAR creates fifo files (see https://man7.org/linux/man-pages/man7/fifo.7.html)
# and this requires a filesystem that supports this. Another cause might be that the input files
# are symlinks. When testing this, using '--readFilesCommand "zcat"'
# always produced empty BAM files, but also a succesfull exit code (0) so the problem is not reported.
# However, the logs showed the following error: "gzip -: unexpected end of file".
function is_gzipped {
printf "Checking if input '$1' (barcode '$barcode') is gzipped... "
if file "$1" | grep -q 'gzip'; then
echo "Done, detected compressed file."
return
fi
echo "Done, file does not need decompression."
false
}
# Resolve symbolic links to actual file paths
input_R1=$(realpath $input_R1)
input_R2=$(realpath $input_R2)
if is_gzipped $input_R1; then
local compressed_file_name_r1="$(basename -- $input_R1)"
local uncompressed_file_r1="$TMPDIR/${compressed_file_name_r1%.gz}"
printf "Unpacking input to $uncompressed_file_r1... "
zcat "$input_R1" > "$uncompressed_file_r1"
echo "Decompression done."
else
local uncompressed_file_r1="$input_R1"
fi
if is_gzipped $input_R2; then
local compressed_file_name_r2="$(basename -- $input_R2)"
local uncompressed_file_r2="$TMPDIR/${compressed_file_name_r2%.gz}"
printf "Unpacking input to $uncompressed_file_r2... "
zcat "$input_R2" > "$uncompressed_file_r2"
echo "Decompression done."
else
local uncompressed_file_r2="$input_R2"
fi
local n_input_lines_r1=$(wc -l < "$uncompressed_file_r1")
local n_input_lines_r2=$(wc -l < "$uncompressed_file_r2")
printf "Checking if length of input file mates match. "
if (( $n_input_lines_r1 != n_input_lines_r2 )); then
echo "The length of file $input_R1 ($n_input_lines_r1) does not match with $input_R2 ($n_input_lines_r2)"
return 1
else
echo "Seems OK, $n_input_lines_r1 input lines."
fi
echo "Starting STAR for barcode '$barcode'"
# soloType 'Droplet' is the same as 'CB_UMI_Simple': one UMI and one cell barcode of fixed length.
# By default in this mode, STAR will look for the cell barcode and the UMI int the last files specified with --readFilesIn
# So we need to specify R2 first and R1 second, because R1 contains the barcode and UMI.
# Also, you might be tempted to use '--soloBarcodeMate 1' to alter this behavior, but this requires the clipping
# the barcode from this mate by specifying --clip5pNbases and/or --clip3pNbases, which we do not want to do.
STAR \
--readFilesIn "$uncompressed_file_r2" "$uncompressed_file_r1" \
--soloType Droplet \
--quantMode GeneCounts \
--genomeLoad LoadAndKeep \
--limitBAMsortRAM "$par_limitBAMsortRAM" \
--runThreadN "$par_runThreadN" \
--outFilterMultimapNmax 1 \
--outSAMtype BAM SortedByCoordinate \
--soloCBstart 1 \
--readFilesType "Fastx" \
--soloCBlen "$par_wellBarcodeLength" \
--soloUMIstart "$par_UMIstart" \
--soloUMIlen "$par_UMIlength" \
--soloBarcodeReadLength 0 \
--soloStrand Unstranded \
--soloFeatures Gene \
--genomeDir "$par_genomeDir" \
--outReadsUnmapped Fastx \
--outSAMunmapped Within \
--outSAMattributes NH HI nM AS CR UR CB UB GX GN \
--soloCBwhitelist "$barcode.txt" \
--outFileNamePrefix "$dir" \
--outTmpDir "$TMPDIR/STARtemp/"
printf "Done running STAR. "
# Check if the number of processed reads is equal to the number of input reads
local n_input_reads=$(($n_input_lines_r1 / 4))
local nr_output_reads=$(grep -Po "Number\ of\ input\ reads \\|\W*\K\d+" "$dir/Log.final.out")
if (( $nr_output_reads != $n_input_reads )); then
echo "Not all input reads were processed for barcode $barcode."
return 1
else
echo "Processed $nr_output_reads reads for barcode $barcode".
fi
}
# Export the function - requires bash
export -f _run
# Load reference genome
echo "Loading reference genome"
STAR --genomeLoad LoadAndExit --genomeDir "$par_genomeDir"
# Run the concurrent jobs using GNU parallel
# Make sure that parallel uses the correct shell
export PARALLEL_SHELL="/bin/bash"
# Some notes:
# --halt now,fail=1: instruct parallel to exit when a job has failed and kill remaining running jobs.
#
# ::: is a special syntax for GNU parallel to delineate inputs
# If multiple ::: are given, each group will be treated as an input source, and all combinations of input
# sources will be generated. E.g. ::: 1 2 ::: a b c will result in the combinations (1,a) (1,b) (1,c) (2,a) (2,b) (2,c)
# The delimiter :::+ (note the extra '+') links the argument to the previous argument, and one argument from each of the input
# sources will be read.
parallel_cmd=("parallel" "--jobs" "80%" "--verbose" "--memfree" "2G"
"--tmpdir" "$meta_temp_dir"
"--retry-failed" "--retries" "4" "--halt" "soon,fail=1"
"--joblog" "$par_joblog" "_run" "{}")
# Arguments for which there is one value, so these will not create extra jobs
parallel_cmd+=(":::" "$par_wellBarcodesLength" ":::" "$par_umiLength" ":::" "$par_output" ":::" "$par_genomeDir" ":::" "$par_limitBAMsortRAM" ":::" "$par_runThreadN")
# Argument which in fact will cause extra jobs to be spawned, per job one item from each argument will be selected
# Thus, these argument lists should have the same length.
parallel_cmd+=(":::" "${barcodes[@]}" ":::+" "${input_r1[@]}" ":::+" "${input_r2[@]}")
set +eo pipefail
"${parallel_cmd[@]}"
exit_code=$?
set -eo pipefail
echo "GNU parallel finished!"
# Unload reference
printf "Unloading reference genome. "
STAR --genomeLoad Remove --genomeDir "$par_genomeDir"
echo "Done!"
# Exit code from GNU parallel:
# If fail=1 is used, the exit status will be the exit status of the failing job.
echo "Checking exit code"
if ((exit_code>0)); then
# Note that the ending HERE must be indented with TAB characters (not spaces)
# in order to remove leading indentation
MESSAGE=$(
cat <<-HERE
==================================================================
!!! An error occurred for one of the jobs.
Exit code of the failing job: $exit_code.
%s
==================================================================
HERE
)
printf "$MESSAGE" "$(<$par_joblog)"
exit 1
else
cat <<-HERE
==================================================================
Mapping went fine (exit code '$exit_code'), zero errors occurred
==================================================================
HERE
fi

356
src/parallel_map/test.sh Executable file
View File

@@ -0,0 +1,356 @@
set -eo pipefail
## VIASH START
meta_executable="target/executable/parallel_map/parallel_map"
## VIASH END
# Some helper functions
assert_directory_exists() {
[ -d "$1" ] || { echo "File '$1' does not exist" && exit 1; }
}
assert_file_exists() {
[ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
}
assert_file_contains() {
grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
}
assert_file_contains_regex() {
grep -q -E "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
}
echo "> Prepare test data in $meta_temp_dir"
TMPDIR=$(mktemp -d --tmpdir="$meta_temp_dir")
function clean_up {
[[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
}
trap clean_up EXIT
# Sample 1, barcode ACAGTCACAG, UMI CTACGGATGA
cat > "$TMPDIR/sample1_R1.fastq" <<'EOF'
@SAMPLE_1_SEQ_ID1
ACAGTCACAGCTACGGATGAGCCTCATAAGCCTCACACATCCGCGCCTATGTTGTGACTCTCTGTGAG
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@SAMPLE_1_SEQ_ID2
ACAGTCACAGCTACGGATGAGCCTCATAAGCCTCACACATCCGCGCCTATGTTGTGACTCTCTGTGAG
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
EOF
cat > "$TMPDIR/sample1_R2.fastq" <<'EOF'
@SAMPLE_1_SEQ_ID1
CTCACAGAGAGTCACAACATAGGCGCGGATGTGTGAGGCTTATGAGGC
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@SAMPLE_1_SEQ_ID2
CTCACAGAGAGTCACAACATAGGCGCGGATGTGTGAGGCTTATGAGGC
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
EOF
# Sample 2, barcode CGGGTTTACC, UMI GCTAGCTAGC
cat > "$TMPDIR/sample2_R1.fastq" << 'EOF'
@SAMPLE_2_SEQ_ID1
CGGGTTTACCGCTAGCTAGCCACCACTATGGTTGGCCGGTTAGTAGTGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@SAMPLE_2_SEQ_ID2
CGGGTTTACCGCTAGCTAGCCACCACTATGGTTGGCCGGTTAGTAGTGT
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
EOF
cat > "$TMPDIR/sample2_R2.fastq" <<'EOF'
@SAMPLE_2_SEQ_ID1
ACACTACTAACCGGCCAACCATAGTGGTG
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIII
@SAMPLE_2_SEQ_ID2
ACACTACTAACCGGCCAACCATAGTGGTG
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
EOF
# Note that there is a sjdbGTFchrPrefix argument for STAR:
# prefix for chromosome names in a GTF file (default: '-')
cat > "$TMPDIR/genome.fasta" <<'EOF'
>1
TGGCATGAGCCAACGAACGCTGCCTCATAAGCCTCACACATCCGCGCCTATGTTGTGACTCTCTGTGAGCGTTCGTGGG
GCTCGTCACCACTATGGTTGGCCGGTTAGTAGTGTGACTCCTGGTTTTCTGGAGCTTCTTTAAACCGTAGTCCAGTCAA
TGCGAATGGCACTTCACGACGGACTGTCCTTAGCTCAGGGGA
EOF
cat > "$TMPDIR/genes.gtf" <<'EOF'
1 example_source gene 0 72 . + . gene_id "gene1"; gene_name: "GENE1;
1 example_source exon 20 71 . + . gene_id "gene1"; gene_name: "GENE1"; exon_id: gene1_exon1;
1 example_source gene 80 160 . + . gene_id "gene2"; gene_name: "GENE2;
1 example_source exon 80 159 . + . gene_id "gene2"; gene_name: "GENE2"; exon_id: gene2_exon1;
EOF
echo "> Generate index"
STAR \
${meta_cpus:+--runThreadN $meta_cpus} \
--runMode genomeGenerate \
--genomeDir "$TMPDIR/index/" \
--genomeFastaFiles "$TMPDIR/genome.fasta" \
--sjdbGTFfile "$TMPDIR/genes.gtf" \
--genomeSAindexNbases 2 > /dev/null 2>&1
echo "> Run test 1"
run_1_dir="$TMPDIR/run_1"
mkdir -p "$run_1_dir"
pushd "$run_1_dir" > /dev/null
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq;$TMPDIR/sample2_R1.fastq" \
--input_r2 "$TMPDIR/sample1_R2.fastq;$TMPDIR/sample2_R2.fastq" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG;CGGGTTTACC" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_*" > /dev/null 2>&1
popd
echo ">> Check if output directories exists"
sample1_out="$TMPDIR/output_ACAGTCACAG"
sample2_out="$TMPDIR/output_CGGGTTTACC"
assert_directory_exists "$sample1_out"
assert_directory_exists "$sample2_out"
echo ">> Check if output files have been created"
for sample in "$sample1_out" "$sample2_out"; do
assert_file_exists "$sample/Aligned.sortedByCoord.out.bam"
assert_file_exists "$sample/Unmapped.out.mate1"
assert_file_exists "$sample/Unmapped.out.mate2"
assert_file_exists "$sample/Log.out"
assert_file_exists "$sample/Log.final.out"
assert_file_exists "$sample/ReadsPerGene.out.tab"
done
echo ">> Check if Solo output is present"
for sample in "$sample1_out" "$sample2_out"; do
assert_directory_exists "$sample1_out/Solo.out"
assert_directory_exists "$sample1_out/Solo.out/Gene"
assert_file_exists "$sample1_out/Solo.out/Barcodes.stats"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/barcodes.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/features.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/matrix.mtx"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/barcodes.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/features.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/matrix.mtx"
done
echo ">> Check contents of output"
echo ">>> Sample 1"
assert_file_contains "$sample1_out/Solo.out/Barcodes.stats" "yesWLmatchExact 2"
assert_file_contains "$sample1_out/Log.final.out" "Uniquely mapped reads number | 2"
assert_file_contains "$sample1_out/Log.final.out" "Number of input reads | 2"
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/barcodes.tsv" || { echo "Barcodes file is different"; exit 1; }
ACAGTCACAG
EOF
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/features.tsv" || { echo "Features file is different"; exit 1; }
gene1 gene1 Gene Expression
gene2 gene2 Gene Expression
EOF
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/matrix.mtx" || { echo "Matrix file is different"; exit 1; }
%%MatrixMarket matrix coordinate integer general
%
2 1 1
1 1 1
EOF
echo ">>> Sample 2"
assert_file_contains "$sample2_out/Solo.out/Barcodes.stats" "yesWLmatchExact 2"
assert_file_contains "$sample2_out/Log.final.out" "Uniquely mapped reads number | 2"
assert_file_contains "$sample2_out/Log.final.out" "Number of input reads | 2"
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/barcodes.tsv" || { echo "Barcodes file is different"; exit 1; }
CGGGTTTACC
EOF
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/features.tsv" || { echo "Features file is different"; exit 1; }
gene1 gene1 Gene Expression
gene2 gene2 Gene Expression
EOF
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/matrix.mtx" || { echo "Matrix file is different"; exit 1; }
%%MatrixMarket matrix coordinate integer general
%
2 1 1
2 1 1
EOF
echo "> Run test 2 (compressed input)"
gzip -c "$TMPDIR/sample1_R1.fastq" > "$TMPDIR/sample1_R1.fastq.gz"
gzip -c "$TMPDIR/sample2_R1.fastq" > "$TMPDIR/sample2_R1.fastq.gz"
gzip -c "$TMPDIR/sample1_R2.fastq" > "$TMPDIR/sample1_R2.fastq.gz"
gzip -c "$TMPDIR/sample2_R2.fastq" > "$TMPDIR/sample2_R2.fastq.gz"
run_2_dir="$TMPDIR/run_2"
mkdir -p "$run_2_dir"
pushd "$run_2_dir" > /dev/null
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq.gz;$TMPDIR/sample2_R1.fastq.gz" \
--input_r2 "$TMPDIR/sample1_R2.fastq.gz;$TMPDIR/sample2_R2.fastq.gz" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG;CGGGTTTACC" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_gz_*" > /dev/null 2>&1
popd > /dev/null
echo ">> Check if output directories exists"
sample1_out="$TMPDIR/output_gz_ACAGTCACAG"
sample2_out="$TMPDIR/output_gz_CGGGTTTACC"
assert_directory_exists "$sample1_out"
assert_directory_exists "$sample2_out"
echo ">> Check if output files have been created"
for sample in "$sample1_out" "$sample2_out"; do
assert_file_exists "$sample/Aligned.sortedByCoord.out.bam"
assert_file_exists "$sample/Unmapped.out.mate1"
assert_file_exists "$sample/Unmapped.out.mate2"
assert_file_exists "$sample/Log.out"
assert_file_exists "$sample/Log.final.out"
assert_file_exists "$sample/ReadsPerGene.out.tab"
done
echo ">> Check if Solo output is present"
for sample in "$sample1_out" "$sample2_out"; do
assert_directory_exists "$sample1_out/Solo.out"
assert_directory_exists "$sample1_out/Solo.out/Gene"
assert_file_exists "$sample1_out/Solo.out/Barcodes.stats"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/barcodes.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/features.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/matrix.mtx"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/barcodes.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/features.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/matrix.mtx"
done
echo ">> Check contents of output"
echo ">>> Sample 1"
assert_file_contains "$sample1_out/Solo.out/Barcodes.stats" "yesWLmatchExact 2"
assert_file_contains "$sample1_out/Log.final.out" "Uniquely mapped reads number | 2"
assert_file_contains "$sample1_out/Log.final.out" "Number of input reads | 2"
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/barcodes.tsv" || { echo "Barcodes file is different"; exit 1; }
ACAGTCACAG
EOF
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/features.tsv" || { echo "Features file is different"; exit 1; }
gene1 gene1 Gene Expression
gene2 gene2 Gene Expression
EOF
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/matrix.mtx" || { echo "Matrix file is different"; exit 1; }
%%MatrixMarket matrix coordinate integer general
%
2 1 1
1 1 1
EOF
echo ">>> Sample 2"
assert_file_contains "$sample2_out/Solo.out/Barcodes.stats" "yesWLmatchExact 2"
assert_file_contains "$sample2_out/Log.final.out" "Uniquely mapped reads number | 2"
assert_file_contains "$sample2_out/Log.final.out" "Number of input reads | 2"
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/barcodes.tsv" || { echo "Barcodes file is different"; exit 1; }
CGGGTTTACC
EOF
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/features.tsv" || { echo "Features file is different"; exit 1; }
gene1 gene1 Gene Expression
gene2 gene2 Gene Expression
EOF
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/matrix.mtx" || { echo "Matrix file is different"; exit 1; }
%%MatrixMarket matrix coordinate integer general
%
2 1 1
2 1 1
EOF
echo "> Check that wrong number of barcodes are detected."
run_3_dir="$TMPDIR/run_3"
mkdir -p "$run_3_dir"
pushd "$run_3_dir" > /dev/null
set +eo pipefail
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq.gz;$TMPDIR/sample2_R1.fastq.gz" \
--input_r2 "$TMPDIR/sample1_R2.fastq.gz;$TMPDIR/sample2_R2.fastq.gz" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_gz_*" > /dev/null 2>&1 && echo "Expected non-zero exit code " && exit 1
set -eo pipefail
popd > /dev/null
echo "> Check that missing wildcard character is detected."
run_4_dir="$TMPDIR/run_4"
mkdir -p "$run_4_dir"
pushd "$run_4_dir" > /dev/null
set +eo pipefail
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq.gz;$TMPDIR/sample2_R1.fastq.gz" \
--input_r2 "$TMPDIR/sample1_R2.fastq.gz;$TMPDIR/sample2_R2.fastq.gz" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG;CGGGTTTACC" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_run4" > /dev/null 2>&1 && echo "Expected non-zero exit code." && exit 1
set -eo pipefail
popd > /dev/null
echo "> Check that a mismatch in the length of the input mates is detected."
empty_input_file="$TMPDIR/empty.fastq"
touch "$empty_input_file"
run_5_dir="$TMPDIR/run_5"
mkdir -p "$run_5_dir"
pushd "$run_5_dir" > /dev/null
set +eo pipefail
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq;$empty_input_file" \
--input_r2 "$TMPDIR/sample1_R2.fastq;$TMPDIR/sample2_R2.fastq" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG;CGGGTTTACC" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_run5_*" > /dev/null 2>&1 && echo "Expected non-zero exit code " && exit 1
set -eo pipefail
popd > /dev/null
echo "> Check that wrong number of input files is detected."
run_6_dir="$TMPDIR/run_6"
mkdir -p "$run_6_dir"
pushd "$run_6_dir" > /dev/null
set +eo pipefail
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq" \
--input_r2 "$TMPDIR/sample1_R2.fastq;$TMPDIR/sample2_R2.fastq" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG;CGGGTTTACC" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_run_6_*" > /dev/null 2>&1 && echo "Expected non-zero exit code " && exit 1
set -eo pipefail
popd > /dev/null

View File

@@ -0,0 +1,79 @@
name: htrnaseq
namespace: workflows
argument_groups:
- name: Input arguments
arguments:
- name: --input_r1
description: R1
type: file
required: true
- name: --input_r2
description: R2
type: file
required: true
- name: --barcodesFasta
type: file
required: true
- name: --genomeDir
type: file
required: true
- name: Output arguments
arguments:
- name: --fastq_output
description: List of demultiplexed fastq files
type: file
direction: output
multiple: true
required: true
default: "fastq/*_001.fastq"
- name: --star_output
description: Output from mapping with STAR
type: file
direction: output
multiple: true
required: true
default: $id/star/*
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
# test_resources:
# - type: nextflow_script
# path: test.nf
# entrypoint: test_wf
dependencies:
- name: workflows/well_demultiplex
repository: local
- name: untar
repository: cb
- name: parallel_map
repository: local
- name: workflows/utils/splitWells
repository: local
- name: workflows/utils/groupLanes
repository: local
- name: workflows/utils/groupPairs
repository: local
- name: workflows/utils/groupWells
repository: local
- name: concat_text
repository: cb
repositories:
- name: local
type: local
- name: bb
type: vsh
repo: biobox
tag: v0.1.0
- name: cb
type: vsh
repo: craftbox
tag: concat_text
runners:
- type: nextflow
engines:
- type: native

View File

@@ -0,0 +1,32 @@
#!/bin/bash
# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"
# Make sure the workflow is built
viash ns build -q 'workflows|parallel_map'
export NXF_VER=24.04.3
nextflow run . \
-main-script target/nextflow/workflows/htrnaseq/main.nf \
-params-file params1.yaml \
-config ./src/config/tests.config \
-profile docker \
--publish_dir output \
-resume
# bin/nextflow run . \
# -main-script src/workflows/htrnaseq_wf/test.nf \
# -entry test_wf_NextSeq550 \
# -profile docker,local \
# -resume \
# --mappingReferenceRoot testData/genomeDir/subset \
# --barcodesReferenceRoot testData \
# --compoundAnnotationRoot testData \
# -ansi-log false \
# --publish_dir htrnaseq_test_results
#

View File

@@ -0,0 +1,184 @@
workflow run_wf {
take:
input_ch
main:
// Untar input genome if needed
untar_ch = input_ch
| toSortedList()
| map {ids_and_states ->
def ids = ids_and_states.collect{it[0]}
def genomeDirs = ids_and_states.collect{it[1].genomeDir}.unique()
assert genomeDirs.size() == 1, "Only one value for 'genomeDirs' should have been defined across all inputs."
def genomeDir
genomeDirs.each { genomeDir = it }
["unpack_genome", ["genomeDir": genomeDir, "original_ids": ids]]
}
| untar.run(
runIf: {id, state ->
def genomeDirStr = state.genomeDir.toString()
genomeDirStr.endsWith(".tar.gz") || \
genomeDirStr.endsWith(".tar") || \
genomeDirStr.endsWith(".tgz") ? true : false
},
fromState: [
"input": "genomeDir",
],
toState: { id, result, state ->
state + ["genomeDir": result.output]
},
)
| flatMap{ id, state ->
state.original_ids.collect{[it, state.genomeDir]}
}
output_ch = input_ch.join(untar_ch)
| niceView()
| map {id, state, genomeDir ->
def newState = state + ["genomeDir": genomeDir]
[id, newState]
}
| well_demultiplex.run(
fromState: { id, state ->
[
input_r1: state.input_r1,
input_r2: state.input_r2,
barcodesFasta: state.barcodesFasta,
]
},
toState: { id, result, state ->
state + [
fastq_output: result.output,
output: result.output,
]
},
directives: [label: ["midmem", "midcpu"]]
)
| splitWells.run(
fromState: { id, state ->
[
input: state.output,
]
},
toState: { id, result, state ->
state + result
}
)
| setState(
[
"input": "barcode_path",
"barcode": "barcode",
"barcodesFasta": "barcodesFasta",
"genomeDir": "genomeDir",
"star_output": "star_output",
"fastq_output": "fastq_output",
]
)
// TODO: Expand this into matching a whitelist/blacklist of barcodes
// ... and turn into separate component
| filter{ id, state -> state.barcode != "unknown" }
| groupPairs.run(
fromState: { id, state ->
[
input: state.input
]
},
toState: { id, result, state ->
state + result
}
)
// Does the sequencing platform use lanes?
// Should those lanes be discriminated over?
| groupLanes.run(
fromState: { id, state ->
[
input_r1: state.r1,
input_r2: state.r2
]
},
toState: { id, result, state ->
state + result + [ multiple_lanes: result.output_r1.size() > 1]
}
)
| setState(
[
"input_r1": "r1",
"input_r2": "r2",
"barcodesFasta": "barcodesFasta",
"genomeDir": "genomeDir",
"barcode": "barcode",
"star_output": "star_output",
"fastq_output": "fastq_output",
]
)
| concat_text.run(
key: "concat_text_r1",
fromState: { id, state ->
[
input: state.input_r1,
gzip_output: true
]
},
toState: { id, result, state ->
state + [ input_r1: result.output ]
}
)
| concat_text.run(
key: "concat_text_r2",
fromState: { id, state ->
[
input: state.input_r2,
gzip_output: true
]
},
toState: { id, result, state ->
state + [ input_r2: result.output ]
}
)
| groupWells.run(
fromState: { id, state ->
[
input_r1: state.input_r1,
input_r2: state.input_r2,
well: state.barcode
]
},
toState: { id, result, state ->
state + [ "wells": result.wells, "input_r1": result.output_r1, "input_r2": result.output_r2]
}
)
| parallel_map.run(
fromState: { id, state ->
[
input_r1: state.input_r1,
input_r2: state.input_r2,
genomeDir: state.genomeDir,
barcodes: state.wells,
wellBarcodesLength: 10,
umiLength: 10,
output: state.star_output[0],
]
},
toState: { id, result, state ->
state + [
star_output: result.output,
]
},
directives: [label: ["midmem", "midcpu"]]
)
| niceView()
| setState(["star_output", "fastq_output"])
//| niceView()
//
//| setState( [ "output": "out" ] )
emit:
output_ch
}

View File

@@ -0,0 +1,39 @@
name: groupLanes
namespace: workflows/utils
description: |
N/A
argument_groups:
- name: Inputs
arguments:
- name: "--input_r1"
type: file
description: Path to the input for R1
required: true
example: input.fastq.gz
- name: "--input_r2"
type: file
description: Path to the input for R2
required: true
example: input.fastq.gz
- name: Output
arguments:
- name: "--output_r1"
type: file
description: Path to output for R2
multiple: true
required: true
direction: output
- name: "--output_r2"
type: file
description: Path to the output for R2
multiple: true
required: true
direction: output
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
runners:
- type: nextflow

View File

@@ -0,0 +1,24 @@
workflow run_wf {
take: in_
main:
out_ = in_
| map{ id, f -> [ id.replaceAll("_L00\\d", "").replaceAll("_lane\\d", ""), [ input_r1: f.input_r1, input_r2: f.input_r2 ] , id ] }
| groupTuple(sort: "hash")
| map{ new_id, inputs, ids ->
[
new_id,
[
output_r1: inputs.collect{it.input_r1},
output_r2: inputs.collect{it.input_r2},
_meta: [ join_id: ids[0] ]
]
]
}
emit: out_
}

View File

@@ -0,0 +1,32 @@
name: groupPairs
namespace: workflows/utils
description: |
N/A
argument_groups:
- name: Inputs
arguments:
- name: "--input"
type: file
description: Path to the input for R1
required: true
example: input.fastq.gz
- name: Output
arguments:
- name: "--r1"
type: file
description: Path to output for R2
required: true
direction: output
- name: "--r2"
type: file
description: Path to output for R2
required: true
direction: output
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
runners:
- type: nextflow

View File

@@ -0,0 +1,26 @@
workflow run_wf {
take: in_
main:
out_ = in_
| map{ id, f -> [ id.replaceAll("__R[12]", ""), [ input: f.input ] , id ] }
| groupTuple(sort: "hash")
| map{ new_id, inputs, ids ->
r1 = inputs.collect{it.input}.findAll{it =~ "_R1_"}[0]
r2 = inputs.collect{it.input}.findAll{it =~ "_R2_"}[0]
[
new_id,
[
r1: r1,
r2: r2,
_meta: [ join_id: ids[0] ]
]
]
}
emit: out_
}

View File

@@ -0,0 +1,48 @@
name: groupWells
namespace: workflows/utils
description: |
N/A
argument_groups:
- name: Inputs
arguments:
- name: "--well"
type: string
description: Barcode identifier for a well
required: true
example: input.fastq.gz
- name: "--input_r1"
type: file
description: Path to the input for R1
required: true
example: input.fastq.gz
- name: "--input_r2"
type: file
description: Path to the input for R1
required: true
example: input.fastq.gz
- name: Output
arguments:
- name: "--wells"
type: string
description: List of grouped wells (by means of barcodes)
multiple: true
direction: output
example: input.fastq.gz
- name: "--output_r1"
type: file
description: Path to output for R2
multiple: true
direction: output
- name: "--output_r2"
type: file
description: Path to the output for R2
multiple: true
direction: output
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
runners:
- type: nextflow

View File

@@ -0,0 +1,25 @@
workflow run_wf {
take: in_
main:
out_ = in_
| map{ id, f -> [ id.replaceAll("__[ACGT]+", ""), [ input_r1: f.input_r1, input_r2: f.input_r2, well: f.well ] , id ] }
| groupTuple(sort: "hash")
| map{ new_id, inputs, ids ->
[
new_id,
[
output_r1: inputs.collect{it.input_r1},
output_r2: inputs.collect{it.input_r2},
wells: inputs.collect{it.well},
_meta: [ join_id: ids[0] ]
]
]
}
emit: out_
}

View File

@@ -0,0 +1,39 @@
name: splitWells
namespace: workflows/utils
description: |
N/A
argument_groups:
- name: Inputs
arguments:
- name: "--input"
type: file
description: List of demultiplexed fastq files
required: true
multiple: true
example: ACAGCGATCGAC_R1_001.fastq
- name: Output
arguments:
- name: "--pool"
type: string
description: The original pool / sample name
direction: output
- name: "--barcode"
type: string
direction: output
- name: "--barcode_path"
type: file
direction: output
- name: "--lane"
type: string
direction: output
- name: "--pair_end"
type: string
direction: output
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
runners:
- type: nextflow

View File

@@ -0,0 +1,31 @@
workflow run_wf {
take: in_
main:
out_ = in_
| flatMap{ id, state ->
state.input.collect{ p ->
barcode = (p =~ /.*\\/([ACTG]*|unknown)_R?.*/)[0][1]
pair_end = (p =~ /.*_(R[12])_.*/)[0][1]
lane = (id =~ /.*_(L\d+).*/) ? (id =~ /.*_(L\d+).*/)[0][1] : "no_lanes"
[
id + "__" + barcode + "__" + pair_end,
[
pool: id,
barcode: barcode,
barcode_path: p,
lane: lane,
pair_end: pair_end,
_meta: [ join_id: id ]
]
]
}
}
emit: out_
}

View File

@@ -0,0 +1,52 @@
name: well_demultiplex
namespace: workflows
description: Demultiplexing on well level
argument_groups:
- name: Input arguments
arguments:
- name: --input_r1
description: R1
type: file
required: true
- name: --input_r2
description: R2
type: file
required: true
- name: --barcodesFasta
type: file
required: true
- name: Output arguments
arguments:
- name: --output
description: List of demultiplexed fastq files
type: file
direction: output
multiple: true
required: true
default: "fastq/*_001.fastq"
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
# Test dataset: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM5357044
test_resources:
- type: nextflow_script
path: test.nf
entrypoint: test_wf
dependencies:
- name: cutadapt
repository: bb
repositories:
- name: bb
type: vsh
repo: biobox
tag: v0.1.0
runners:
- type: nextflow
engines:
- type: native

View File

@@ -0,0 +1,33 @@
workflow run_wf {
take:
input_ch
main:
output_ch = input_ch
//| niceView()
| cutadapt.run(
// TODO: Remove hard-coded directives and replace with profiles
directives: [
cpus: 4
],
fromState: { id, state ->
[
input: state.input_r1,
input_r2: state.input_r2,
no_indels: true,
action: "none",
front_fasta: state.barcodesFasta,
output: state.output
]
},
toState: { id, result, state ->
[
output: result.output,
]
}
)
//| niceView()
emit:
output_ch
}

View File

@@ -0,0 +1,9 @@
manifest {
nextflowVersion = '!>=20.12.1-edge'
}
params {
rootDir = java.nio.file.Paths.get("$projectDir/../../../").toAbsolutePath().normalize().toString()
}

View File

@@ -0,0 +1,46 @@
include { well_demultiplex } from params.rootDir + "/target/nextflow/workflows/well_demultiplex/main.nf"
base = "gs://viash-hub-test-data/htrnaseq/v1/"
workflow test_wf {
output_ch = Channel.fromList([
[
id: "SRR14730301",
input_r1: base + "100k/SRR14730301/VH02001612_S9_R1_001.fastq",
input_r2: base + "100k/SRR14730301/VH02001612_S9_R2_001.fastq",
barcodesFasta: base + "2-wells.fasta",
],
[
id: "SRR14730302",
input_r1: base + "100k/SRR14730302/VH02001614_S8_R1_001.fastq",
input_r2: base + "100k/SRR14730302/VH02001614_S8_R2_001.fastq",
barcodesFasta: base + "2-wells.fasta",
],
])
| map { state -> [ state.id, state ] }
| well_demultiplex.run(
fromState: { id, state ->
[
input_r1: state.input_r1,
input_r2: state.input_r2,
barcodesFasta: state.barcodesFasta,
]
},
toState: { id, output, state ->
output }
)
| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, file]"
assert output[1].output.size == 6 : "6 fastq files should be generated: pairs for 2 wells and 1 unkonwn"
"Output: $output"
}
| view { id, state ->
assert state.output.size == 6 : "6 fastq files should be generated: pairs for 2 wells and 1 unkonwn"
"State: $state"
}
| toSortedList()
| view { output ->
assert output.size() == 2 : "2 samples in should result in 2 results out"
}
}

0
target/.build.yaml Normal file
View File

View File

@@ -0,0 +1,733 @@
name: "cutadapt"
version: "v0.1.0"
argument_groups:
- name: "Specify Adapters for R1"
arguments:
- type: "string"
name: "--adapter"
alternatives:
- "-a"
description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\
\ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\
\ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\
\ the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--front"
alternatives:
- "-g"
description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\
\ first read). The adapter and any preceding bases\nare trimmed. Partial matches\
\ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\
\ the adapter is\nonly found if it is a prefix of the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--anywhere"
alternatives:
- "-b"
description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\
\ (paired data: of the first read). Both types of\nmatches as described under\
\ -a and -g are allowed. If the\nfirst base of the read is part of the match,\
\ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\
for rescuing failed library preparations - do not use if\nyou know which end\
\ your adapter was ligated to!\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Specify Adapters using Fasta files for R1"
arguments:
- type: "file"
name: "--adapter_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 3'\
\ end (paired data:\nof the first read). The adapter and subsequent bases are\n\
trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\
\ found if it is a suffix of the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--front_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 5'\
\ end (paired data:\nof the first read). The adapter and any preceding bases\n\
are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\
\ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\
\ the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--anywhere_fasta"
description: "Fasta file containing sequences of an adapter that may be ligated\
\ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\
\ as described under -a and -g are allowed. If the\nfirst base of the read is\
\ part of the match, the behavior\nis as with -g, otherwise as with -a. This\
\ option is mostly\nfor rescuing failed library preparations - do not use if\n\
you know which end your adapter was ligated to!\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Specify Adapters for R2"
arguments:
- type: "string"
name: "--adapter_r2"
alternatives:
- "-A"
description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\
\ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\
\ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\
\ the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--front_r2"
alternatives:
- "-G"
description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\
\ first read). The adapter and any preceding bases\nare trimmed. Partial matches\
\ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\
\ the adapter is\nonly found if it is a prefix of the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--anywhere_r2"
alternatives:
- "-B"
description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\
\ (paired data: of the first read). Both types of\nmatches as described under\
\ -a and -g are allowed. If the\nfirst base of the read is part of the match,\
\ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\
for rescuing failed library preparations - do not use if\nyou know which end\
\ your adapter was ligated to!\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Specify Adapters using Fasta files for R2"
arguments:
- type: "file"
name: "--adapter_r2_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 3'\
\ end (paired data:\nof the first read). The adapter and subsequent bases are\n\
trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\
\ found if it is a suffix of the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--front_r2_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 5'\
\ end (paired data:\nof the first read). The adapter and any preceding bases\n\
are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\
\ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\
\ the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--anywhere_r2_fasta"
description: "Fasta file containing sequences of an adapter that may be ligated\
\ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\
\ as described under -a and -g are allowed. If the\nfirst base of the read is\
\ part of the match, the behavior\nis as with -g, otherwise as with -a. This\
\ option is mostly\nfor rescuing failed library preparations - do not use if\n\
you know which end your adapter was ligated to!\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Paired-end options"
arguments:
- type: "boolean_true"
name: "--pair_adapters"
description: "Treat adapters given with -a/-A etc. as pairs. Either both\nor none\
\ are removed from each read pair.\n"
info: null
direction: "input"
- type: "string"
name: "--pair_filter"
description: "Which of the reads in a paired-end read have to match the\nfiltering\
\ criterion in order for the pair to be filtered.\n"
info: null
required: false
choices:
- "any"
- "both"
- "first"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--interleaved"
description: "Read and/or write interleaved paired-end reads.\n"
info: null
direction: "input"
- name: "Input parameters"
arguments:
- type: "file"
name: "--input"
description: "Input fastq file for single-end reads or R1 for paired-end reads.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_r2"
description: "Input fastq file for R2 in the case of paired-end reads.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--error_rate"
alternatives:
- "-E"
- "--errors"
description: "Maximum allowed error rate (if 0 <= E < 1), or absolute\nnumber\
\ of errors for full-length adapter match (if E is an\ninteger >= 1). Error\
\ rate = no. of errors divided by\nlength of matching region. Default: 0.1 (10%).\n"
info: null
example:
- 0.1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_false"
name: "--no_indels"
description: "Allow only mismatches in alignments.\n"
info: null
direction: "input"
- type: "integer"
name: "--times"
alternatives:
- "-n"
description: "Remove up to COUNT adapters from each read. Default: 1.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--overlap"
alternatives:
- "-O"
description: "Require MINLENGTH overlap between read and adapter for an\nadapter\
\ to be found. The default is 3.\n"
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--match_read_wildcards"
description: "Interpret IUPAC wildcards in reads.\n"
info: null
direction: "input"
- type: "boolean_false"
name: "--no_match_adapter_wildcards"
description: "Do not interpret IUPAC wildcards in adapters.\n"
info: null
direction: "input"
- type: "string"
name: "--action"
description: "What to do if a match was found. trim: trim adapter and\nup- or\
\ downstream sequence; retain: trim, but retain\nadapter; mask: replace with\
\ 'N' characters; lowercase:\nconvert to lowercase; none: leave unchanged.\n\
The default is trim.\n"
info: null
example:
- "trim"
required: false
choices:
- "trim"
- "retain"
- "mask"
- "lowercase"
- "none"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--revcomp"
alternatives:
- "--rc"
description: "Check both the read and its reverse complement for adapter\nmatches.\
\ If match is on reverse-complemented version,\noutput that one.\n"
info: null
direction: "input"
- name: "Read modifications"
arguments:
- type: "integer"
name: "--cut"
alternatives:
- "-u"
description: "Remove LEN bases from each read (or R1 if paired; use --cut_r2\n\
option for R2). If LEN is positive, remove bases from the\nbeginning. If LEN\
\ is negative, remove bases from the end.\nCan be used twice if LENs have different\
\ signs. Applied\n*before* adapter trimming.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "integer"
name: "--cut_r2"
description: "Remove LEN bases from each read (for R2). If LEN is positive, remove\
\ bases from the\nbeginning. If LEN is negative, remove bases from the end.\n\
Can be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--nextseq_trim"
description: "NextSeq-specific quality trimming (each read). Trims also\ndark\
\ cycles appearing as high-quality G bases.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--quality_cutoff"
alternatives:
- "-q"
description: "Trim low-quality bases from 5' and/or 3' ends of each read\nbefore\
\ adapter removal. Applied to both reads if data is\npaired. If one value is\
\ given, only the 3' end is trimmed.\nIf two comma-separated cutoffs are given,\
\ the 5' end is\ntrimmed with the first cutoff, the 3' end with the second.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--quality_cutoff_r2"
alternatives:
- "-Q"
description: "Quality-trimming cutoff for R2. Default: same as for R1\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--quality_base"
description: "Assume that quality values in FASTQ are encoded as\nascii(quality\
\ + N). This needs to be set to 64 for some\nold Illumina FASTQ files. The default\
\ is 33.\n"
info: null
example:
- 33
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--poly_a"
description: "Trim poly-A tails"
info: null
direction: "input"
- type: "integer"
name: "--length"
alternatives:
- "-l"
description: "Shorten reads to LENGTH. Positive values remove bases at\nthe end\
\ while negative ones remove bases at the beginning.\nThis and the following\
\ modifications are applied after\nadapter trimming.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--trim_n"
description: "Trim N's on ends of reads."
info: null
direction: "input"
- type: "string"
name: "--length_tag"
description: "Search for TAG followed by a decimal number in the\ndescription\
\ field of the read. Replace the decimal number\nwith the correct length of\
\ the trimmed read. For example,\nuse --length-tag 'length=' to correct fields\
\ like\n'length=123'.\n"
info: null
example:
- "length="
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strip_suffix"
description: "Remove this suffix from read names if present. Can be\ngiven multiple\
\ times.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--prefix"
alternatives:
- "-x"
description: "Add this prefix to read names. Use {name} to insert the\nname of\
\ the matching adapter.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--suffix"
alternatives:
- "-y"
description: "Add this suffix to read names; can also include {name}\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--rename"
description: "Rename reads using TEMPLATE containing variables such as\n{id},\
\ {adapter_name} etc. (see documentation)\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--zero_cap"
alternatives:
- "-z"
description: "Change negative quality values to zero."
info: null
direction: "input"
- name: "Filtering of processed reads"
description: "Filters are applied after above read modifications. Paired-end reads\
\ are\nalways discarded pairwise (see also --pair_filter).\n"
arguments:
- type: "string"
name: "--minimum_length"
alternatives:
- "-m"
description: "Discard reads shorter than LEN. Default is 0.\nWhen trimming paired-end\
\ reads, the minimum lengths for R1 and R2 can be specified separately by separating\
\ them with a colon (:).\nIf the colon syntax is not used, the same minimum\
\ length applies to both reads, as discussed above.\nAlso, one of the values\
\ can be omitted to impose no restrictions.\nFor example, with -m 17:, the length\
\ of R1 must be at least 17, but the length of R2 is ignored.\n"
info: null
example:
- "0"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--maximum_length"
alternatives:
- "-M"
description: "Discard reads longer than LEN. Default: no limit.\nFor paired reads,\
\ see the remark for --minimum_length\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--max_n"
description: "Discard reads with more than COUNT 'N' bases. If COUNT is\na number\
\ between 0 and 1, it is interpreted as a fraction\nof the read length.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "long"
name: "--max_expected_errors"
alternatives:
- "--max_ee"
description: "Discard reads whose expected number of errors (computed\nfrom quality\
\ values) exceeds ERRORS.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "long"
name: "--max_average_error_rate"
alternatives:
- "--max_aer"
description: "as --max_expected_errors (see above), but divided by\nlength to\
\ account for reads of varying length.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--discard_trimmed"
alternatives:
- "--discard"
description: "Discard reads that contain an adapter. Use also -O to\navoid discarding\
\ too many randomly matching reads.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--discard_untrimmed"
alternatives:
- "--trimmed_only"
description: "Discard reads that do not contain an adapter.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--discard_casava"
description: "Discard reads that did not pass CASAVA filtering (header\nhas :Y:).\n"
info: null
direction: "input"
- name: "Output parameters"
arguments:
- type: "string"
name: "--report"
description: "Which type of report to print: 'full' (default) or 'minimal'.\n"
info: null
example:
- "full"
required: false
choices:
- "full"
- "minimal"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--json"
description: "Write report in JSON format to this file.\n"
info: null
direction: "input"
- type: "file"
name: "--output"
description: "Glob pattern for matching the expected output files.\nShould include\
\ `$output_dir`.\n"
info: null
example:
- "fastq/*_001.fast[a,q]"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--fasta"
description: "Output FASTA to standard output even on FASTQ input.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--info_file"
description: "Write information about each read and its adapter matches\ninto\
\ info.txt in the output directory.\nSee the documentation for the file format.\n"
info: null
direction: "input"
- name: "Debug"
arguments:
- type: "boolean_true"
name: "--debug"
description: "Print debug information"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Cutadapt removes adapter sequences from high-throughput sequencing reads.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "RNA-seq"
- "scRNA-seq"
- "high-throughput"
license: "MIT"
references:
doi:
- "10.14806/ej.17.1.200"
links:
repository: "https://github.com/marcelm/cutadapt"
homepage: "https://cutadapt.readthedocs.io"
documentation: "https://cutadapt.readthedocs.io"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12"
target_registry: "images.viash-hub.com"
target_tag: "v0.1.0"
namespace_separator: "/"
setup:
- type: "python"
user: false
pip:
- "cutadapt"
upgrade: true
- type: "docker"
run:
- "cutadapt --version | sed 's/\\(.*\\)/cutadapt: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/cutadapt/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/cutadapt"
executable: "target/nextflow/cutadapt/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "b84b29747d0635f2ac83ea63b496be9a9edb6724"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.1.0"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC6"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.1.0'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'cutadapt'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v0.1.0'
description = 'Cutadapt removes adapter sequences from high-throughput sequencing reads.\n'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,749 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "cutadapt",
"description": "Cutadapt removes adapter sequences from high-throughput sequencing reads.\n",
"type": "object",
"definitions": {
"specify adapters for r1" : {
"title": "Specify Adapters for R1",
"type": "object",
"description": "No description",
"properties": {
"adapter": {
"type":
"string",
"description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 3\u0027 end (paired data:\nof the first read)",
"help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 3\u0027 end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a \u0027$\u0027 character is appended (\u0027anchoring\u0027), the\nadapter is only found if it is a suffix of the read.\n"
}
,
"front": {
"type":
"string",
"description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 5\u0027 end (paired data:\nof the first read)",
"help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 5\u0027 end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5\u0027 end are allowed. If\na \u0027^\u0027 character is prepended (\u0027anchoring\u0027), the adapter is\nonly found if it is a prefix of the read.\n"
}
,
"anywhere": {
"type":
"string",
"description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read)",
"help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n"
}
}
},
"specify adapters using fasta files for r1" : {
"title": "Specify Adapters using Fasta files for R1",
"type": "object",
"description": "No description",
"properties": {
"adapter_fasta": {
"type":
"string",
"description": "Type: List of `file`, multiple_sep: `\":\"`. Fasta file containing sequences of an adapter ligated to the 3\u0027 end (paired data:\nof the first read)",
"help_text": "Type: List of `file`, multiple_sep: `\":\"`. Fasta file containing sequences of an adapter ligated to the 3\u0027 end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a \u0027$\u0027 character is appended (\u0027anchoring\u0027), the\nadapter is only found if it is a suffix of the read.\n"
}
,
"front_fasta": {
"type":
"string",
"description": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 5\u0027 end (paired data:\nof the first read)",
"help_text": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 5\u0027 end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5\u0027 end are allowed. If\na \u0027^\u0027 character is prepended (\u0027anchoring\u0027), the adapter is\nonly found if it is a prefix of the read.\n"
}
,
"anywhere_fasta": {
"type":
"string",
"description": "Type: `file`. Fasta file containing sequences of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read)",
"help_text": "Type: `file`. Fasta file containing sequences of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n"
}
}
},
"specify adapters for r2" : {
"title": "Specify Adapters for R2",
"type": "object",
"description": "No description",
"properties": {
"adapter_r2": {
"type":
"string",
"description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 3\u0027 end (paired data:\nof the first read)",
"help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 3\u0027 end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a \u0027$\u0027 character is appended (\u0027anchoring\u0027), the\nadapter is only found if it is a suffix of the read.\n"
}
,
"front_r2": {
"type":
"string",
"description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 5\u0027 end (paired data:\nof the first read)",
"help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 5\u0027 end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5\u0027 end are allowed. If\na \u0027^\u0027 character is prepended (\u0027anchoring\u0027), the adapter is\nonly found if it is a prefix of the read.\n"
}
,
"anywhere_r2": {
"type":
"string",
"description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read)",
"help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n"
}
}
},
"specify adapters using fasta files for r2" : {
"title": "Specify Adapters using Fasta files for R2",
"type": "object",
"description": "No description",
"properties": {
"adapter_r2_fasta": {
"type":
"string",
"description": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 3\u0027 end (paired data:\nof the first read)",
"help_text": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 3\u0027 end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a \u0027$\u0027 character is appended (\u0027anchoring\u0027), the\nadapter is only found if it is a suffix of the read.\n"
}
,
"front_r2_fasta": {
"type":
"string",
"description": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 5\u0027 end (paired data:\nof the first read)",
"help_text": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 5\u0027 end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5\u0027 end are allowed. If\na \u0027^\u0027 character is prepended (\u0027anchoring\u0027), the adapter is\nonly found if it is a prefix of the read.\n"
}
,
"anywhere_r2_fasta": {
"type":
"string",
"description": "Type: `file`. Fasta file containing sequences of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read)",
"help_text": "Type: `file`. Fasta file containing sequences of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n"
}
}
},
"paired-end options" : {
"title": "Paired-end options",
"type": "object",
"description": "No description",
"properties": {
"pair_adapters": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Treat adapters given with -a/-A etc",
"help_text": "Type: `boolean_true`, default: `false`. Treat adapters given with -a/-A etc. as pairs. Either both\nor none are removed from each read pair.\n"
,
"default": "False"
}
,
"pair_filter": {
"type":
"string",
"description": "Type: `string`, choices: ``any`, `both`, `first``. Which of the reads in a paired-end read have to match the\nfiltering criterion in order for the pair to be filtered",
"help_text": "Type: `string`, choices: ``any`, `both`, `first``. Which of the reads in a paired-end read have to match the\nfiltering criterion in order for the pair to be filtered.\n",
"enum": ["any", "both", "first"]
}
,
"interleaved": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Read and/or write interleaved paired-end reads",
"help_text": "Type: `boolean_true`, default: `false`. Read and/or write interleaved paired-end reads.\n"
,
"default": "False"
}
}
},
"input parameters" : {
"title": "Input parameters",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: `file`, required. Input fastq file for single-end reads or R1 for paired-end reads",
"help_text": "Type: `file`, required. Input fastq file for single-end reads or R1 for paired-end reads.\n"
}
,
"input_r2": {
"type":
"string",
"description": "Type: `file`. Input fastq file for R2 in the case of paired-end reads",
"help_text": "Type: `file`. Input fastq file for R2 in the case of paired-end reads.\n"
}
,
"error_rate": {
"type":
"number",
"description": "Type: `double`, example: `0.1`. Maximum allowed error rate (if 0 \u003c= E \u003c 1), or absolute\nnumber of errors for full-length adapter match (if E is an\ninteger \u003e= 1)",
"help_text": "Type: `double`, example: `0.1`. Maximum allowed error rate (if 0 \u003c= E \u003c 1), or absolute\nnumber of errors for full-length adapter match (if E is an\ninteger \u003e= 1). Error rate = no. of errors divided by\nlength of matching region. Default: 0.1 (10%).\n"
}
,
"no_indels": {
"type":
"boolean",
"description": "Type: `boolean_false`, default: `true`. Allow only mismatches in alignments",
"help_text": "Type: `boolean_false`, default: `true`. Allow only mismatches in alignments.\n"
,
"default": "True"
}
,
"times": {
"type":
"integer",
"description": "Type: `integer`, example: `1`. Remove up to COUNT adapters from each read",
"help_text": "Type: `integer`, example: `1`. Remove up to COUNT adapters from each read. Default: 1.\n"
}
,
"overlap": {
"type":
"integer",
"description": "Type: `integer`, example: `3`. Require MINLENGTH overlap between read and adapter for an\nadapter to be found",
"help_text": "Type: `integer`, example: `3`. Require MINLENGTH overlap between read and adapter for an\nadapter to be found. The default is 3.\n"
}
,
"match_read_wildcards": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Interpret IUPAC wildcards in reads",
"help_text": "Type: `boolean_true`, default: `false`. Interpret IUPAC wildcards in reads.\n"
,
"default": "False"
}
,
"no_match_adapter_wildcards": {
"type":
"boolean",
"description": "Type: `boolean_false`, default: `true`. Do not interpret IUPAC wildcards in adapters",
"help_text": "Type: `boolean_false`, default: `true`. Do not interpret IUPAC wildcards in adapters.\n"
,
"default": "True"
}
,
"action": {
"type":
"string",
"description": "Type: `string`, example: `trim`, choices: ``trim`, `retain`, `mask`, `lowercase`, `none``. What to do if a match was found",
"help_text": "Type: `string`, example: `trim`, choices: ``trim`, `retain`, `mask`, `lowercase`, `none``. What to do if a match was found. trim: trim adapter and\nup- or downstream sequence; retain: trim, but retain\nadapter; mask: replace with \u0027N\u0027 characters; lowercase:\nconvert to lowercase; none: leave unchanged.\nThe default is trim.\n",
"enum": ["trim", "retain", "mask", "lowercase", "none"]
}
,
"revcomp": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Check both the read and its reverse complement for adapter\nmatches",
"help_text": "Type: `boolean_true`, default: `false`. Check both the read and its reverse complement for adapter\nmatches. If match is on reverse-complemented version,\noutput that one.\n"
,
"default": "False"
}
}
},
"read modifications" : {
"title": "Read modifications",
"type": "object",
"description": "No description",
"properties": {
"cut": {
"type":
"string",
"description": "Type: List of `integer`, multiple_sep: `\":\"`. Remove LEN bases from each read (or R1 if paired; use --cut_r2\noption for R2)",
"help_text": "Type: List of `integer`, multiple_sep: `\":\"`. Remove LEN bases from each read (or R1 if paired; use --cut_r2\noption for R2). If LEN is positive, remove bases from the\nbeginning. If LEN is negative, remove bases from the end.\nCan be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n"
}
,
"cut_r2": {
"type":
"string",
"description": "Type: List of `integer`, multiple_sep: `\":\"`. Remove LEN bases from each read (for R2)",
"help_text": "Type: List of `integer`, multiple_sep: `\":\"`. Remove LEN bases from each read (for R2). If LEN is positive, remove bases from the\nbeginning. If LEN is negative, remove bases from the end.\nCan be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n"
}
,
"nextseq_trim": {
"type":
"string",
"description": "Type: `string`. NextSeq-specific quality trimming (each read)",
"help_text": "Type: `string`. NextSeq-specific quality trimming (each read). Trims also\ndark cycles appearing as high-quality G bases.\n"
}
,
"quality_cutoff": {
"type":
"string",
"description": "Type: `string`. Trim low-quality bases from 5\u0027 and/or 3\u0027 ends of each read\nbefore adapter removal",
"help_text": "Type: `string`. Trim low-quality bases from 5\u0027 and/or 3\u0027 ends of each read\nbefore adapter removal. Applied to both reads if data is\npaired. If one value is given, only the 3\u0027 end is trimmed.\nIf two comma-separated cutoffs are given, the 5\u0027 end is\ntrimmed with the first cutoff, the 3\u0027 end with the second.\n"
}
,
"quality_cutoff_r2": {
"type":
"string",
"description": "Type: `string`. Quality-trimming cutoff for R2",
"help_text": "Type: `string`. Quality-trimming cutoff for R2. Default: same as for R1\n"
}
,
"quality_base": {
"type":
"integer",
"description": "Type: `integer`, example: `33`. Assume that quality values in FASTQ are encoded as\nascii(quality + N)",
"help_text": "Type: `integer`, example: `33`. Assume that quality values in FASTQ are encoded as\nascii(quality + N). This needs to be set to 64 for some\nold Illumina FASTQ files. The default is 33.\n"
}
,
"poly_a": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Trim poly-A tails",
"help_text": "Type: `boolean_true`, default: `false`. Trim poly-A tails"
,
"default": "False"
}
,
"length": {
"type":
"integer",
"description": "Type: `integer`. Shorten reads to LENGTH",
"help_text": "Type: `integer`. Shorten reads to LENGTH. Positive values remove bases at\nthe end while negative ones remove bases at the beginning.\nThis and the following modifications are applied after\nadapter trimming.\n"
}
,
"trim_n": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Trim N\u0027s on ends of reads",
"help_text": "Type: `boolean_true`, default: `false`. Trim N\u0027s on ends of reads."
,
"default": "False"
}
,
"length_tag": {
"type":
"string",
"description": "Type: `string`, example: `length=`. Search for TAG followed by a decimal number in the\ndescription field of the read",
"help_text": "Type: `string`, example: `length=`. Search for TAG followed by a decimal number in the\ndescription field of the read. Replace the decimal number\nwith the correct length of the trimmed read. For example,\nuse --length-tag \u0027length=\u0027 to correct fields like\n\u0027length=123\u0027.\n"
}
,
"strip_suffix": {
"type":
"string",
"description": "Type: `string`. Remove this suffix from read names if present",
"help_text": "Type: `string`. Remove this suffix from read names if present. Can be\ngiven multiple times.\n"
}
,
"prefix": {
"type":
"string",
"description": "Type: `string`. Add this prefix to read names",
"help_text": "Type: `string`. Add this prefix to read names. Use {name} to insert the\nname of the matching adapter.\n"
}
,
"suffix": {
"type":
"string",
"description": "Type: `string`. Add this suffix to read names; can also include {name}\n",
"help_text": "Type: `string`. Add this suffix to read names; can also include {name}\n"
}
,
"rename": {
"type":
"string",
"description": "Type: `string`. Rename reads using TEMPLATE containing variables such as\n{id}, {adapter_name} etc",
"help_text": "Type: `string`. Rename reads using TEMPLATE containing variables such as\n{id}, {adapter_name} etc. (see documentation)\n"
}
,
"zero_cap": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Change negative quality values to zero",
"help_text": "Type: `boolean_true`, default: `false`. Change negative quality values to zero."
,
"default": "False"
}
}
},
"filtering of processed reads" : {
"title": "Filtering of processed reads",
"type": "object",
"description": "Filters are applied after above read modifications. Paired-end reads are\nalways discarded pairwise (see also --pair_filter).\n",
"properties": {
"minimum_length": {
"type":
"string",
"description": "Type: `string`, example: `0`. Discard reads shorter than LEN",
"help_text": "Type: `string`, example: `0`. Discard reads shorter than LEN. Default is 0.\nWhen trimming paired-end reads, the minimum lengths for R1 and R2 can be specified separately by separating them with a colon (:).\nIf the colon syntax is not used, the same minimum length applies to both reads, as discussed above.\nAlso, one of the values can be omitted to impose no restrictions.\nFor example, with -m 17:, the length of R1 must be at least 17, but the length of R2 is ignored.\n"
}
,
"maximum_length": {
"type":
"string",
"description": "Type: `string`. Discard reads longer than LEN",
"help_text": "Type: `string`. Discard reads longer than LEN. Default: no limit.\nFor paired reads, see the remark for --minimum_length\n"
}
,
"max_n": {
"type":
"string",
"description": "Type: `string`. Discard reads with more than COUNT \u0027N\u0027 bases",
"help_text": "Type: `string`. Discard reads with more than COUNT \u0027N\u0027 bases. If COUNT is\na number between 0 and 1, it is interpreted as a fraction\nof the read length.\n"
}
,
"max_expected_errors": {
"type":
"string",
"description": "Type: `long`. Discard reads whose expected number of errors (computed\nfrom quality values) exceeds ERRORS",
"help_text": "Type: `long`. Discard reads whose expected number of errors (computed\nfrom quality values) exceeds ERRORS.\n"
}
,
"max_average_error_rate": {
"type":
"string",
"description": "Type: `long`. as --max_expected_errors (see above), but divided by\nlength to account for reads of varying length",
"help_text": "Type: `long`. as --max_expected_errors (see above), but divided by\nlength to account for reads of varying length.\n"
}
,
"discard_trimmed": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Discard reads that contain an adapter",
"help_text": "Type: `boolean_true`, default: `false`. Discard reads that contain an adapter. Use also -O to\navoid discarding too many randomly matching reads.\n"
,
"default": "False"
}
,
"discard_untrimmed": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Discard reads that do not contain an adapter",
"help_text": "Type: `boolean_true`, default: `false`. Discard reads that do not contain an adapter.\n"
,
"default": "False"
}
,
"discard_casava": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Discard reads that did not pass CASAVA filtering (header\nhas :Y:)",
"help_text": "Type: `boolean_true`, default: `false`. Discard reads that did not pass CASAVA filtering (header\nhas :Y:).\n"
,
"default": "False"
}
}
},
"output parameters" : {
"title": "Output parameters",
"type": "object",
"description": "No description",
"properties": {
"report": {
"type":
"string",
"description": "Type: `string`, example: `full`, choices: ``full`, `minimal``. Which type of report to print: \u0027full\u0027 (default) or \u0027minimal\u0027",
"help_text": "Type: `string`, example: `full`, choices: ``full`, `minimal``. Which type of report to print: \u0027full\u0027 (default) or \u0027minimal\u0027.\n",
"enum": ["full", "minimal"]
}
,
"json": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Write report in JSON format to this file",
"help_text": "Type: `boolean_true`, default: `false`. Write report in JSON format to this file.\n"
,
"default": "False"
}
,
"output": {
"type":
"string",
"description": "Type: List of `file`, required, default: `$id.$key.output_*.fast[a,q]`, example: `fastq/*_001.fast[a,q]`, multiple_sep: `\":\"`. Glob pattern for matching the expected output files",
"help_text": "Type: List of `file`, required, default: `$id.$key.output_*.fast[a,q]`, example: `fastq/*_001.fast[a,q]`, multiple_sep: `\":\"`. Glob pattern for matching the expected output files.\nShould include `$output_dir`.\n"
,
"default": "$id.$key.output_*.fast[a,q]"
}
,
"fasta": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Output FASTA to standard output even on FASTQ input",
"help_text": "Type: `boolean_true`, default: `false`. Output FASTA to standard output even on FASTQ input.\n"
,
"default": "False"
}
,
"info_file": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Write information about each read and its adapter matches\ninto info",
"help_text": "Type: `boolean_true`, default: `false`. Write information about each read and its adapter matches\ninto info.txt in the output directory.\nSee the documentation for the file format.\n"
,
"default": "False"
}
}
},
"debug" : {
"title": "Debug",
"type": "object",
"description": "No description",
"properties": {
"debug": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Print debug information",
"help_text": "Type: `boolean_true`, default: `false`. Print debug information"
,
"default": "False"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/specify adapters for r1"
},
{
"$ref": "#/definitions/specify adapters using fasta files for r1"
},
{
"$ref": "#/definitions/specify adapters for r2"
},
{
"$ref": "#/definitions/specify adapters using fasta files for r2"
},
{
"$ref": "#/definitions/paired-end options"
},
{
"$ref": "#/definitions/input parameters"
},
{
"$ref": "#/definitions/read modifications"
},
{
"$ref": "#/definitions/filtering of processed reads"
},
{
"$ref": "#/definitions/output parameters"
},
{
"$ref": "#/definitions/debug"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,197 @@
name: "concat_text"
version: "concat_text"
authors:
- name: "Toni Verbeiren"
roles:
- "author"
- "maintainer"
info:
links:
github: "tverbeiren"
linkedin: "verbeiren"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist and CEO"
- name: "Dries Schaumont"
roles:
- "reviewer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input"
description: "A list of (gzipped) text files."
info: null
example:
- "input?.txt.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "boolean_true"
name: "--gzip_output"
description: "Should the output be zipped?"
info: null
direction: "input"
- type: "file"
name: "--output"
description: "File to write the output to, optionally gzipped."
info: null
example:
- "output.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Concatenate a number of text files, handle gzipped text files gracefully\
\ and\noptionally gzip the output text file.\n\nThis component is useful for concatening\
\ fastq files from different lanes, for instance.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info:
improvements: "This component could be improved in 2 ways:\n 1. Allow for a mix\
\ of zipped and plain input files\n 2. Allow to specify a compression algorithm\
\ for the output\n"
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/craftbox"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "alpine:latest"
target_registry: "images.viash-hub.com"
target_tag: "concat_text"
namespace_separator: "/"
setup:
- type: "apk"
packages:
- "bash"
- "procps"
- "file"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/concat_text/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/concat_text"
executable: "target/nextflow/concat_text/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "53228705a20c6764e3f0ae9ed2147e99009d7c34"
git_remote: "https://github.com/viash-hub/craftbox"
package_config:
name: "craftbox"
version: "concat_text"
description: "A collection of custom-tailored scripts and applied tools.\n"
info: null
viash_version: "0.9.0-RC6"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'concat_text'"
keywords:
- "scripts"
- "custom"
- "implementations"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/craftbox"
issue_tracker: "https://github.com/viash-hub/craftbox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,126 @@
manifest {
name = 'concat_text'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'concat_text'
description = 'Concatenate a number of text files, handle gzipped text files gracefully and\noptionally gzip the output text file.\n\nThis component is useful for concatening fastq files from different lanes, for instance.\n'
author = 'Toni Verbeiren, Dries Schaumont'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,106 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "concat_text",
"description": "Concatenate a number of text files, handle gzipped text files gracefully and\noptionally gzip the output text file.\n\nThis component is useful for concatening fastq files from different lanes, for instance.\n",
"type": "object",
"definitions": {
"input arguments" : {
"title": "Input arguments",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: List of `file`, required, example: `input?.txt.gz`, multiple_sep: `\":\"`. A list of (gzipped) text files",
"help_text": "Type: List of `file`, required, example: `input?.txt.gz`, multiple_sep: `\":\"`. A list of (gzipped) text files."
}
}
},
"output arguments" : {
"title": "Output arguments",
"type": "object",
"description": "No description",
"properties": {
"gzip_output": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Should the output be zipped?",
"help_text": "Type: `boolean_true`, default: `false`. Should the output be zipped?"
,
"default": "False"
}
,
"output": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.output.txt`, example: `output.txt`. File to write the output to, optionally gzipped",
"help_text": "Type: `file`, default: `$id.$key.output.txt`, example: `output.txt`. File to write the output to, optionally gzipped."
,
"default": "$id.$key.output.txt"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input arguments"
},
{
"$ref": "#/definitions/output arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,173 @@
name: "untar"
version: "concat_text"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input"
description: "Tarball file to be unpacked."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output"
description: "Directory to write the contents of the .tar file to."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Other arguments"
arguments:
- type: "string"
name: "--exclude"
alternatives:
- "-e"
description: "Prevents any file or member whose name matches the shell wildcard\
\ (pattern) from being extracted."
info: null
example:
- "docs/figures"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Unpack a .tar file. When the contents of the .tar file is just a single\
\ directory,\nput the contents of the directory into the output folder instead of\
\ that directory.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/craftbox"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "concat_text"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/untar/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/untar"
executable: "target/nextflow/untar/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "53228705a20c6764e3f0ae9ed2147e99009d7c34"
git_remote: "https://github.com/viash-hub/craftbox"
package_config:
name: "craftbox"
version: "concat_text"
description: "A collection of custom-tailored scripts and applied tools.\n"
info: null
viash_version: "0.9.0-RC6"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'concat_text'"
keywords:
- "scripts"
- "custom"
- "implementations"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/craftbox"
issue_tracker: "https://github.com/viash-hub/craftbox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'untar'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'concat_text'
description = 'Unpack a .tar file. When the contents of the .tar file is just a single directory,\nput the contents of the directory into the output folder instead of that directory.\n'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,119 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "untar",
"description": "Unpack a .tar file. When the contents of the .tar file is just a single directory,\nput the contents of the directory into the output folder instead of that directory.\n",
"type": "object",
"definitions": {
"input arguments" : {
"title": "Input arguments",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: `file`, required. Tarball file to be unpacked",
"help_text": "Type: `file`, required. Tarball file to be unpacked."
}
}
},
"output arguments" : {
"title": "Output arguments",
"type": "object",
"description": "No description",
"properties": {
"output": {
"type":
"string",
"description": "Type: `file`, required, default: `$id.$key.output.output`. Directory to write the contents of the ",
"help_text": "Type: `file`, required, default: `$id.$key.output.output`. Directory to write the contents of the .tar file to."
,
"default": "$id.$key.output.output"
}
}
},
"other arguments" : {
"title": "Other arguments",
"type": "object",
"description": "No description",
"properties": {
"exclude": {
"type":
"string",
"description": "Type: `string`, example: `docs/figures`. Prevents any file or member whose name matches the shell wildcard (pattern) from being extracted",
"help_text": "Type: `string`, example: `docs/figures`. Prevents any file or member whose name matches the shell wildcard (pattern) from being extracted."
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input arguments"
},
{
"$ref": "#/definitions/output arguments"
},
{
"$ref": "#/definitions/other arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,260 @@
name: "parallel_map"
version: "unpack_genome"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input_r1"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--input_r2"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--genomeDir"
description: "STAR reference directory"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--barcodes"
description: "The barcodes/wells to process"
info: null
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Barcode arguments"
arguments:
- type: "integer"
name: "--wellBarcodesLength"
description: "The length of the well barcodes"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--umiLength"
description: "The length of the UMIs"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--limitBAMsortRAM"
info: null
default:
- "10000000000"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Runtime arguments"
arguments:
- type: "integer"
name: "--runThreadN"
description: "Number of threads to use for a single STAR execution."
info: null
default:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output"
description: "Location of the output folders, 1 folder per barcode. The value\
\ used\nfor this argument must contain a '*', which will be replaced with the\n\
barcode to form the final output location for that barcode.\n"
info: null
default:
- "./*"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--joblog"
description: "Where to store the log file listing all the jobs."
info: null
default:
- "execution_log.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Map wells in batch, using STAR\nSpliced Transcripts Alignment to a Reference\
\ (C) Alexander Dobin\nhttps://github.com/alexdobin/STAR\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "unpack_genome"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
- "gzip"
- "bzip2"
- "parallel"
- "wget"
- "zlib1g-dev"
- "unzip"
- "xxd"
- "file"
interactive: false
- type: "docker"
run:
- "wget -O $STAR_TARGET $STAR_SOURCE && \\\n unzip $STAR_TARGET -d /tmp && \\\
\n mv /tmp/STAR_$STAR_VERSION/Linux_x86_64_static/STAR /usr/local/bin/$STAR_BINARY\
\ && \\\n chmod +x /usr/local/bin/$STAR_BINARY && \\\n rm $STAR_TARGET &&\
\ rm -rf /tmp/STAR_$STAR_VERSION\n"
env:
- "STAR_VERSION \"2.7.11b\""
- "STAR_SOURCE \"https://github.com/alexdobin/STAR/releases/download/$STAR_VERSION/STAR_$STAR_VERSION.zip\""
- "STAR_TARGET \"/tmp/star.zip\""
- "STAR_BINARY \"STAR\""
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/parallel_map/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/parallel_map"
executable: "target/executable/parallel_map/parallel_map"
viash_version: "0.9.0-RC6"
git_commit: "7c0705a514b86c8d335f308abf7fb1b62d16aa2f"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "unpack_genome"
description: "Demultiplexing pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC6"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'unpack_genome'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,260 @@
name: "parallel_map"
version: "unpack_genome"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input_r1"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--input_r2"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--genomeDir"
description: "STAR reference directory"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--barcodes"
description: "The barcodes/wells to process"
info: null
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Barcode arguments"
arguments:
- type: "integer"
name: "--wellBarcodesLength"
description: "The length of the well barcodes"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--umiLength"
description: "The length of the UMIs"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--limitBAMsortRAM"
info: null
default:
- "10000000000"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Runtime arguments"
arguments:
- type: "integer"
name: "--runThreadN"
description: "Number of threads to use for a single STAR execution."
info: null
default:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output"
description: "Location of the output folders, 1 folder per barcode. The value\
\ used\nfor this argument must contain a '*', which will be replaced with the\n\
barcode to form the final output location for that barcode.\n"
info: null
default:
- "./*"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--joblog"
description: "Where to store the log file listing all the jobs."
info: null
default:
- "execution_log.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Map wells in batch, using STAR\nSpliced Transcripts Alignment to a Reference\
\ (C) Alexander Dobin\nhttps://github.com/alexdobin/STAR\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "unpack_genome"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
- "gzip"
- "bzip2"
- "parallel"
- "wget"
- "zlib1g-dev"
- "unzip"
- "xxd"
- "file"
interactive: false
- type: "docker"
run:
- "wget -O $STAR_TARGET $STAR_SOURCE && \\\n unzip $STAR_TARGET -d /tmp && \\\
\n mv /tmp/STAR_$STAR_VERSION/Linux_x86_64_static/STAR /usr/local/bin/$STAR_BINARY\
\ && \\\n chmod +x /usr/local/bin/$STAR_BINARY && \\\n rm $STAR_TARGET &&\
\ rm -rf /tmp/STAR_$STAR_VERSION\n"
env:
- "STAR_VERSION \"2.7.11b\""
- "STAR_SOURCE \"https://github.com/alexdobin/STAR/releases/download/$STAR_VERSION/STAR_$STAR_VERSION.zip\""
- "STAR_TARGET \"/tmp/star.zip\""
- "STAR_BINARY \"STAR\""
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/parallel_map/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/parallel_map"
executable: "target/nextflow/parallel_map/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "7c0705a514b86c8d335f308abf7fb1b62d16aa2f"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "unpack_genome"
description: "Demultiplexing pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC6"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'unpack_genome'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'parallel_map'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'unpack_genome'
description = 'Map wells in batch, using STAR\nSpliced Transcripts Alignment to a Reference (C) Alexander Dobin\nhttps://github.com/alexdobin/STAR\n'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,206 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "parallel_map",
"description": "Map wells in batch, using STAR\nSpliced Transcripts Alignment to a Reference (C) Alexander Dobin\nhttps://github.com/alexdobin/STAR\n",
"type": "object",
"definitions": {
"input arguments" : {
"title": "Input arguments",
"type": "object",
"description": "No description",
"properties": {
"input_r1": {
"type":
"string",
"description": "Type: List of `file`, required, multiple_sep: `\":\"`. ",
"help_text": "Type: List of `file`, required, multiple_sep: `\":\"`. "
}
,
"input_r2": {
"type":
"string",
"description": "Type: List of `file`, required, multiple_sep: `\":\"`. ",
"help_text": "Type: List of `file`, required, multiple_sep: `\":\"`. "
}
,
"genomeDir": {
"type":
"string",
"description": "Type: `file`, required. STAR reference directory",
"help_text": "Type: `file`, required. STAR reference directory"
}
,
"barcodes": {
"type":
"string",
"description": "Type: List of `string`, required, multiple_sep: `\":\"`. The barcodes/wells to process",
"help_text": "Type: List of `string`, required, multiple_sep: `\":\"`. The barcodes/wells to process"
}
}
},
"barcode arguments" : {
"title": "Barcode arguments",
"type": "object",
"description": "No description",
"properties": {
"wellBarcodesLength": {
"type":
"integer",
"description": "Type: `integer`, required. The length of the well barcodes",
"help_text": "Type: `integer`, required. The length of the well barcodes"
}
,
"umiLength": {
"type":
"integer",
"description": "Type: `integer`, required. The length of the UMIs",
"help_text": "Type: `integer`, required. The length of the UMIs"
}
,
"limitBAMsortRAM": {
"type":
"string",
"description": "Type: `string`, default: `10000000000`. ",
"help_text": "Type: `string`, default: `10000000000`. "
,
"default": "10000000000"
}
}
},
"runtime arguments" : {
"title": "Runtime arguments",
"type": "object",
"description": "No description",
"properties": {
"runThreadN": {
"type":
"integer",
"description": "Type: `integer`, default: `1`. Number of threads to use for a single STAR execution",
"help_text": "Type: `integer`, default: `1`. Number of threads to use for a single STAR execution."
,
"default": "1"
}
}
},
"output arguments" : {
"title": "Output arguments",
"type": "object",
"description": "No description",
"properties": {
"output": {
"type":
"string",
"description": "Type: List of `file`, required, default: `$id.$key.output_*./*`, multiple_sep: `\":\"`. Location of the output folders, 1 folder per barcode",
"help_text": "Type: List of `file`, required, default: `$id.$key.output_*./*`, multiple_sep: `\":\"`. Location of the output folders, 1 folder per barcode. The value used\nfor this argument must contain a \u0027*\u0027, which will be replaced with the\nbarcode to form the final output location for that barcode.\n"
,
"default": "$id.$key.output_*./*"
}
,
"joblog": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.joblog.txt`. Where to store the log file listing all the jobs",
"help_text": "Type: `file`, default: `$id.$key.joblog.txt`. Where to store the log file listing all the jobs."
,
"default": "$id.$key.joblog.txt"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input arguments"
},
{
"$ref": "#/definitions/barcode arguments"
},
{
"$ref": "#/definitions/runtime arguments"
},
{
"$ref": "#/definitions/output arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,233 @@
name: "htrnaseq"
namespace: "workflows"
version: "unpack_genome"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input_r1"
description: "R1"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_r2"
description: "R2"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--barcodesFasta"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genomeDir"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--fastq_output"
description: "List of demultiplexed fastq files"
info: null
default:
- "fastq/*_001.fastq"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--star_output"
description: "Output from mapping with STAR"
info: null
default:
- "$id/star/*"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
info: null
status: "enabled"
requirements:
commands:
- "ps"
dependencies:
- name: "workflows/well_demultiplex"
repository:
type: "local"
- name: "untar"
repository:
type: "vsh"
repo: "craftbox"
tag: "concat_text"
- name: "parallel_map"
repository:
type: "local"
- name: "workflows/utils/splitWells"
repository:
type: "local"
- name: "workflows/utils/groupLanes"
repository:
type: "local"
- name: "workflows/utils/groupPairs"
repository:
type: "local"
- name: "workflows/utils/groupWells"
repository:
type: "local"
- name: "concat_text"
repository:
type: "vsh"
repo: "craftbox"
tag: "concat_text"
repositories:
- type: "local"
name: "local"
- type: "vsh"
name: "bb"
repo: "biobox"
tag: "v0.1.0"
- type: "vsh"
name: "cb"
repo: "craftbox"
tag: "concat_text"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
- type: "native"
id: "native"
build_info:
config: "src/workflows/htrnaseq/config.vsh.yaml"
runner: "nextflow"
engine: "native|native"
output: "target/nextflow/workflows/htrnaseq"
executable: "target/nextflow/workflows/htrnaseq/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "7c0705a514b86c8d335f308abf7fb1b62d16aa2f"
git_remote: "https://github.com/viash-hub/htrnaseq"
dependencies:
- "target/nextflow/workflows/well_demultiplex"
- "target/dependencies/vsh/vsh/craftbox/concat_text/nextflow/untar"
- "target/nextflow/parallel_map"
- "target/nextflow/workflows/utils/splitWells"
- "target/nextflow/workflows/utils/groupLanes"
- "target/nextflow/workflows/utils/groupPairs"
- "target/nextflow/workflows/utils/groupWells"
- "target/dependencies/vsh/vsh/craftbox/concat_text/nextflow/concat_text"
package_config:
name: "htrnaseq"
version: "unpack_genome"
description: "Demultiplexing pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC6"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'unpack_genome'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,124 @@
manifest {
name = 'workflows/htrnaseq'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'unpack_genome'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,136 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "htrnaseq",
"description": "No description",
"type": "object",
"definitions": {
"input arguments" : {
"title": "Input arguments",
"type": "object",
"description": "No description",
"properties": {
"input_r1": {
"type":
"string",
"description": "Type: `file`, required. R1",
"help_text": "Type: `file`, required. R1"
}
,
"input_r2": {
"type":
"string",
"description": "Type: `file`, required. R2",
"help_text": "Type: `file`, required. R2"
}
,
"barcodesFasta": {
"type":
"string",
"description": "Type: `file`, required. ",
"help_text": "Type: `file`, required. "
}
,
"genomeDir": {
"type":
"string",
"description": "Type: `file`, required. ",
"help_text": "Type: `file`, required. "
}
}
},
"output arguments" : {
"title": "Output arguments",
"type": "object",
"description": "No description",
"properties": {
"fastq_output": {
"type":
"string",
"description": "Type: List of `file`, required, default: `$id.$key.fastq_output_*.fastq`, multiple_sep: `\":\"`. List of demultiplexed fastq files",
"help_text": "Type: List of `file`, required, default: `$id.$key.fastq_output_*.fastq`, multiple_sep: `\":\"`. List of demultiplexed fastq files"
,
"default": "$id.$key.fastq_output_*.fastq"
}
,
"star_output": {
"type":
"string",
"description": "Type: List of `file`, required, default: `$id.$key.star_output_*.star_output_*`, multiple_sep: `\":\"`. Output from mapping with STAR",
"help_text": "Type: List of `file`, required, default: `$id.$key.star_output_*.star_output_*`, multiple_sep: `\":\"`. Output from mapping with STAR"
,
"default": "$id.$key.star_output_*.star_output_*"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input arguments"
},
{
"$ref": "#/definitions/output arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,165 @@
name: "groupLanes"
namespace: "workflows/utils"
version: "unpack_genome"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input_r1"
description: "Path to the input for R1"
info: null
example:
- "input.fastq.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_r2"
description: "Path to the input for R2"
info: null
example:
- "input.fastq.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_r1"
description: "Path to output for R2"
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--output_r2"
description: "Path to the output for R2"
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
description: "N/A\n"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/utils/groupLanes/config.vsh.yaml"
runner: "nextflow"
engine: "native"
output: "target/nextflow/workflows/utils/groupLanes"
executable: "target/nextflow/workflows/utils/groupLanes/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "7c0705a514b86c8d335f308abf7fb1b62d16aa2f"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "unpack_genome"
description: "Demultiplexing pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC6"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'unpack_genome'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'workflows/utils/groupLanes'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'unpack_genome'
description = 'N/A\n'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,116 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "groupLanes",
"description": "N/A\n",
"type": "object",
"definitions": {
"inputs" : {
"title": "Inputs",
"type": "object",
"description": "No description",
"properties": {
"input_r1": {
"type":
"string",
"description": "Type: `file`, required, example: `input.fastq.gz`. Path to the input for R1",
"help_text": "Type: `file`, required, example: `input.fastq.gz`. Path to the input for R1"
}
,
"input_r2": {
"type":
"string",
"description": "Type: `file`, required, example: `input.fastq.gz`. Path to the input for R2",
"help_text": "Type: `file`, required, example: `input.fastq.gz`. Path to the input for R2"
}
}
},
"output" : {
"title": "Output",
"type": "object",
"description": "No description",
"properties": {
"output_r1": {
"type":
"string",
"description": "Type: List of `file`, required, default: `$id.$key.output_r1_*.output_r1_*`, multiple_sep: `\":\"`. Path to output for R2",
"help_text": "Type: List of `file`, required, default: `$id.$key.output_r1_*.output_r1_*`, multiple_sep: `\":\"`. Path to output for R2"
,
"default": "$id.$key.output_r1_*.output_r1_*"
}
,
"output_r2": {
"type":
"string",
"description": "Type: List of `file`, required, default: `$id.$key.output_r2_*.output_r2_*`, multiple_sep: `\":\"`. Path to the output for R2",
"help_text": "Type: List of `file`, required, default: `$id.$key.output_r2_*.output_r2_*`, multiple_sep: `\":\"`. Path to the output for R2"
,
"default": "$id.$key.output_r2_*.output_r2_*"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/inputs"
},
{
"$ref": "#/definitions/output"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,153 @@
name: "groupPairs"
namespace: "workflows/utils"
version: "unpack_genome"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "Path to the input for R1"
info: null
example:
- "input.fastq.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--r1"
description: "Path to output for R2"
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--r2"
description: "Path to output for R2"
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
description: "N/A\n"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/utils/groupPairs/config.vsh.yaml"
runner: "nextflow"
engine: "native"
output: "target/nextflow/workflows/utils/groupPairs"
executable: "target/nextflow/workflows/utils/groupPairs/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "7c0705a514b86c8d335f308abf7fb1b62d16aa2f"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "unpack_genome"
description: "Demultiplexing pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC6"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'unpack_genome'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'workflows/utils/groupPairs'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'unpack_genome'
description = 'N/A\n'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,106 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "groupPairs",
"description": "N/A\n",
"type": "object",
"definitions": {
"inputs" : {
"title": "Inputs",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: `file`, required, example: `input.fastq.gz`. Path to the input for R1",
"help_text": "Type: `file`, required, example: `input.fastq.gz`. Path to the input for R1"
}
}
},
"output" : {
"title": "Output",
"type": "object",
"description": "No description",
"properties": {
"r1": {
"type":
"string",
"description": "Type: `file`, required, default: `$id.$key.r1.r1`. Path to output for R2",
"help_text": "Type: `file`, required, default: `$id.$key.r1.r1`. Path to output for R2"
,
"default": "$id.$key.r1.r1"
}
,
"r2": {
"type":
"string",
"description": "Type: `file`, required, default: `$id.$key.r2.r2`. Path to output for R2",
"help_text": "Type: `file`, required, default: `$id.$key.r2.r2`. Path to output for R2"
,
"default": "$id.$key.r2.r2"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/inputs"
},
{
"$ref": "#/definitions/output"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,185 @@
name: "groupWells"
namespace: "workflows/utils"
version: "unpack_genome"
argument_groups:
- name: "Inputs"
arguments:
- type: "string"
name: "--well"
description: "Barcode identifier for a well"
info: null
example:
- "input.fastq.gz"
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_r1"
description: "Path to the input for R1"
info: null
example:
- "input.fastq.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_r2"
description: "Path to the input for R1"
info: null
example:
- "input.fastq.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "string"
name: "--wells"
description: "List of grouped wells (by means of barcodes)"
info: null
example:
- "input.fastq.gz"
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--output_r1"
description: "Path to output for R2"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--output_r2"
description: "Path to the output for R2"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
description: "N/A\n"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/utils/groupWells/config.vsh.yaml"
runner: "nextflow"
engine: "native"
output: "target/nextflow/workflows/utils/groupWells"
executable: "target/nextflow/workflows/utils/groupWells/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "7c0705a514b86c8d335f308abf7fb1b62d16aa2f"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "unpack_genome"
description: "Demultiplexing pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC6"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'unpack_genome'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'workflows/utils/groupWells'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'unpack_genome'
description = 'N/A\n'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,136 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "groupWells",
"description": "N/A\n",
"type": "object",
"definitions": {
"inputs" : {
"title": "Inputs",
"type": "object",
"description": "No description",
"properties": {
"well": {
"type":
"string",
"description": "Type: `string`, required, example: `input.fastq.gz`. Barcode identifier for a well",
"help_text": "Type: `string`, required, example: `input.fastq.gz`. Barcode identifier for a well"
}
,
"input_r1": {
"type":
"string",
"description": "Type: `file`, required, example: `input.fastq.gz`. Path to the input for R1",
"help_text": "Type: `file`, required, example: `input.fastq.gz`. Path to the input for R1"
}
,
"input_r2": {
"type":
"string",
"description": "Type: `file`, required, example: `input.fastq.gz`. Path to the input for R1",
"help_text": "Type: `file`, required, example: `input.fastq.gz`. Path to the input for R1"
}
}
},
"output" : {
"title": "Output",
"type": "object",
"description": "No description",
"properties": {
"wells": {
"type":
"string",
"description": "Type: List of `string`, example: `input.fastq.gz`, multiple_sep: `\":\"`. List of grouped wells (by means of barcodes)",
"help_text": "Type: List of `string`, example: `input.fastq.gz`, multiple_sep: `\":\"`. List of grouped wells (by means of barcodes)"
}
,
"output_r1": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.output_r1_*.output_r1_*`, multiple_sep: `\":\"`. Path to output for R2",
"help_text": "Type: List of `file`, default: `$id.$key.output_r1_*.output_r1_*`, multiple_sep: `\":\"`. Path to output for R2"
,
"default": "$id.$key.output_r1_*.output_r1_*"
}
,
"output_r2": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.output_r2_*.output_r2_*`, multiple_sep: `\":\"`. Path to the output for R2",
"help_text": "Type: List of `file`, default: `$id.$key.output_r2_*.output_r2_*`, multiple_sep: `\":\"`. Path to the output for R2"
,
"default": "$id.$key.output_r2_*.output_r2_*"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/inputs"
},
{
"$ref": "#/definitions/output"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,171 @@
name: "splitWells"
namespace: "workflows/utils"
version: "unpack_genome"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "List of demultiplexed fastq files"
info: null
example:
- "ACAGCGATCGAC_R1_001.fastq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Output"
arguments:
- type: "string"
name: "--pool"
description: "The original pool / sample name"
info: null
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--barcode"
info: null
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--barcode_path"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--lane"
info: null
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--pair_end"
info: null
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
description: "N/A\n"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/utils/splitWells/config.vsh.yaml"
runner: "nextflow"
engine: "native"
output: "target/nextflow/workflows/utils/splitWells"
executable: "target/nextflow/workflows/utils/splitWells/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "7c0705a514b86c8d335f308abf7fb1b62d16aa2f"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "unpack_genome"
description: "Demultiplexing pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC6"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'unpack_genome'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'workflows/utils/splitWells'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'unpack_genome'
description = 'N/A\n'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,135 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "splitWells",
"description": "N/A\n",
"type": "object",
"definitions": {
"inputs" : {
"title": "Inputs",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: List of `file`, required, example: `ACAGCGATCGAC_R1_001.fastq`, multiple_sep: `\":\"`. List of demultiplexed fastq files",
"help_text": "Type: List of `file`, required, example: `ACAGCGATCGAC_R1_001.fastq`, multiple_sep: `\":\"`. List of demultiplexed fastq files"
}
}
},
"output" : {
"title": "Output",
"type": "object",
"description": "No description",
"properties": {
"pool": {
"type":
"string",
"description": "Type: `string`. The original pool / sample name",
"help_text": "Type: `string`. The original pool / sample name"
}
,
"barcode": {
"type":
"string",
"description": "Type: `string`. ",
"help_text": "Type: `string`. "
}
,
"barcode_path": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.barcode_path.barcode_path`. ",
"help_text": "Type: `file`, default: `$id.$key.barcode_path.barcode_path`. "
,
"default": "$id.$key.barcode_path.barcode_path"
}
,
"lane": {
"type":
"string",
"description": "Type: `string`. ",
"help_text": "Type: `string`. "
}
,
"pair_end": {
"type":
"string",
"description": "Type: `string`. ",
"help_text": "Type: `string`. "
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/inputs"
},
{
"$ref": "#/definitions/output"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,182 @@
name: "well_demultiplex"
namespace: "workflows"
version: "unpack_genome"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input_r1"
description: "R1"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_r2"
description: "R2"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--barcodesFasta"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output"
description: "List of demultiplexed fastq files"
info: null
default:
- "fastq/*_001.fastq"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
description: "Demultiplexing on well level"
test_resources:
- type: "nextflow_script"
path: "test.nf"
is_executable: true
entrypoint: "test_wf"
info: null
status: "enabled"
requirements:
commands:
- "ps"
dependencies:
- name: "cutadapt"
repository:
type: "vsh"
repo: "biobox"
tag: "v0.1.0"
repositories:
- type: "vsh"
name: "bb"
repo: "biobox"
tag: "v0.1.0"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
- type: "native"
id: "native"
build_info:
config: "src/workflows/well_demultiplex/config.vsh.yaml"
runner: "nextflow"
engine: "native|native"
output: "target/nextflow/workflows/well_demultiplex"
executable: "target/nextflow/workflows/well_demultiplex/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "7c0705a514b86c8d335f308abf7fb1b62d16aa2f"
git_remote: "https://github.com/viash-hub/htrnaseq"
dependencies:
- "target/dependencies/vsh/vsh/biobox/v0.1.0/nextflow/cutadapt"
package_config:
name: "htrnaseq"
version: "unpack_genome"
description: "Demultiplexing pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC6"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'unpack_genome'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'workflows/well_demultiplex'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'unpack_genome'
description = 'Demultiplexing on well level'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,115 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "well_demultiplex",
"description": "Demultiplexing on well level",
"type": "object",
"definitions": {
"input arguments" : {
"title": "Input arguments",
"type": "object",
"description": "No description",
"properties": {
"input_r1": {
"type":
"string",
"description": "Type: `file`, required. R1",
"help_text": "Type: `file`, required. R1"
}
,
"input_r2": {
"type":
"string",
"description": "Type: `file`, required. R2",
"help_text": "Type: `file`, required. R2"
}
,
"barcodesFasta": {
"type":
"string",
"description": "Type: `file`, required. ",
"help_text": "Type: `file`, required. "
}
}
},
"output arguments" : {
"title": "Output arguments",
"type": "object",
"description": "No description",
"properties": {
"output": {
"type":
"string",
"description": "Type: List of `file`, required, default: `$id.$key.output_*.fastq`, multiple_sep: `\":\"`. List of demultiplexed fastq files",
"help_text": "Type: List of `file`, required, default: `$id.$key.output_*.fastq`, multiple_sep: `\":\"`. List of demultiplexed fastq files"
,
"default": "$id.$key.output_*.fastq"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input arguments"
},
{
"$ref": "#/definitions/output arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}