Build branch main with version main (1e1ffb3)
Build pipeline: vsh-ci-dev-jsbwk
Source commit: 1e1ffb315f
Source message: Merge pull request #17 from viash-hub/add_biobox_modules
- Migrate a number of components to biobox
- Fix tests
- Reduce size of test resources
- Prepare for Viash Hub
This commit is contained in:
277
target/executable/dupradar/.config.vsh.yaml
Normal file
277
target/executable/dupradar/.config.vsh.yaml
Normal file
@@ -0,0 +1,277 @@
|
||||
name: "dupradar"
|
||||
version: "main"
|
||||
argument_groups:
|
||||
- name: "Input"
|
||||
arguments:
|
||||
- type: "string"
|
||||
name: "--id"
|
||||
description: "Sample ID"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
description: "path to input alignment file in BAM format"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--gtf_annotation"
|
||||
description: "path to GTF annotation file."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean"
|
||||
name: "--paired"
|
||||
description: "add flag if input alignment file consists of paired reads"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--strandedness"
|
||||
description: "strandedness of input bam file reads (forward, reverse or unstranded\
|
||||
\ (default, applicable to paired reads))"
|
||||
info: null
|
||||
required: false
|
||||
choices:
|
||||
- "forward"
|
||||
- "reverse"
|
||||
- "unstranded"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Output"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output_dupmatrix"
|
||||
description: "path to output file (txt) of duplicate tag counts"
|
||||
info: null
|
||||
default:
|
||||
- "$id.dup_matrix.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_dup_intercept_mqc"
|
||||
description: "path to output file (txt) of multiqc intercept value DupRadar"
|
||||
info: null
|
||||
default:
|
||||
- "$id.dup_intercept_mqc.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_duprate_exp_boxplot"
|
||||
description: "path to output file (pdf) of distribution of expression box plot"
|
||||
info: null
|
||||
default:
|
||||
- "$id.duprate_exp_boxplot.pdf"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_duprate_exp_densplot"
|
||||
description: "path to output file (pdf) of 2D density scatter plot of duplicate\
|
||||
\ tag counts"
|
||||
info: null
|
||||
default:
|
||||
- "$id.duprate_exp_densityplot.pdf"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_duprate_exp_denscurve_mqc"
|
||||
description: "path to output file (pdf) of density curve of gene duplication multiqc"
|
||||
info: null
|
||||
default:
|
||||
- "$id.duprate_exp_density_curve_mqc.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_expression_histogram"
|
||||
description: "path to output file (pdf) of distribution of RPK values per gene\
|
||||
\ histogram"
|
||||
info: null
|
||||
default:
|
||||
- "$id.expression_hist.pdf"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_intercept_slope"
|
||||
description: "output file (txt) with progression of duplication rate value"
|
||||
info: null
|
||||
default:
|
||||
- "$id.intercept_slope.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "bash_script"
|
||||
path: "script.sh"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "dupradar.r"
|
||||
description: "Assessment of duplication rates in RNA-Seq datasets\n"
|
||||
test_resources:
|
||||
- type: "bash_script"
|
||||
path: "test.sh"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam"
|
||||
- type: "file"
|
||||
path: "genes.gtf"
|
||||
info:
|
||||
migration_info:
|
||||
git_repo: "https://github.com/nf-core/rnaseq.git"
|
||||
paths:
|
||||
- "modules/local/dupradar.nf"
|
||||
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "ubuntu:22.04"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "r-base"
|
||||
interactive: false
|
||||
- type: "r"
|
||||
bioc:
|
||||
- "dupRadar"
|
||||
bioc_force_install: false
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/dupradar/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/dupradar"
|
||||
executable: "target/executable/dupradar/dupradar"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
|
||||
git_remote: "https://github.com/viash-hub/rnaseq"
|
||||
package_config:
|
||||
version: "main"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/rnaseq/v1"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
|
||||
\ := '$id'\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
organization: "vsh"
|
||||
1418
target/executable/dupradar/dupradar
Executable file
1418
target/executable/dupradar/dupradar
Executable file
File diff suppressed because it is too large
Load Diff
154
target/executable/dupradar/dupradar.r
Normal file
154
target/executable/dupradar/dupradar.r
Normal file
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env Rscript
|
||||
|
||||
# Command line argument processing
|
||||
args = commandArgs(trailingOnly=TRUE)
|
||||
if (length(args) < 5) {
|
||||
stop("Usage: dupRadar.r <input.bam> <sample_id> <annotation.gtf> <strandDirection:0=unstranded/1=forward/2=reverse> <paired/single> <nbThreads> <R-package-location (optional)>", call.=FALSE)
|
||||
}
|
||||
|
||||
message("paired_end is", args[5])
|
||||
message("the type is is", class(args[5]))
|
||||
|
||||
input_bam <- args[1]
|
||||
output_prefix <- args[2]
|
||||
annotation_gtf <- args[3]
|
||||
stranded <- as.numeric(args[4])
|
||||
paired_end <- ifelse(args[5] == "true", TRUE, FALSE)
|
||||
threads <- as.numeric(args[6])
|
||||
|
||||
bamRegex <- "(.+)\\.bam$"
|
||||
|
||||
if(!(grepl(bamRegex, input_bam) && file.exists(input_bam) && (!file.info(input_bam)$isdir))) stop("First argument '<input.bam>' must be an existing file (not a directory) with '.bam' extension...")
|
||||
if(!(file.exists(annotation_gtf) && (!file.info(annotation_gtf)$isdir))) stop("Third argument '<annotation.gtf>' must be an existing file (and not a directory)...")
|
||||
if(is.na(stranded) || (!(stranded %in% (0:2)))) stop("Fourth argument <strandDirection> must be a numeric value in 0(unstranded)/1(forward)/2(reverse)...")
|
||||
if(is.na(threads) || (threads<=0)) stop("Fifth argument <nbThreads> must be a strictly positive numeric value...")
|
||||
|
||||
# Debug messages (stderr)
|
||||
message("Input bam (Arg 1): ", input_bam)
|
||||
message("Output basename(Arg 2): ", output_prefix)
|
||||
message("Input gtf (Arg 3): ", annotation_gtf)
|
||||
message("Strandness (Arg 4): ", c("unstranded", "forward", "reverse")[stranded+1])
|
||||
message("paired_end (Arg 5): ", paired_end)
|
||||
message("Nb threads (Arg 6): ", threads)
|
||||
message("R package loc. (Arg 7): ", ifelse(length(args) > 4, args[5], "Not specified"))
|
||||
|
||||
|
||||
# Load / install packages
|
||||
if (length(args) > 5) { .libPaths( c( args[6], .libPaths() ) ) }
|
||||
if (!require("dupRadar")){
|
||||
source("http://bioconductor.org/biocLite.R")
|
||||
biocLite("dupRadar", suppressUpdates=TRUE)
|
||||
library("dupRadar")
|
||||
}
|
||||
if (!require("parallel")) {
|
||||
install.packages("parallel", dependencies=TRUE, repos='http://cloud.r-project.org/')
|
||||
library("parallel")
|
||||
}
|
||||
|
||||
# Duplicate stats
|
||||
dm <- analyzeDuprates(input_bam, annotation_gtf, stranded, paired_end, threads)
|
||||
write.table(dm, file=paste(output_prefix, "_dupMatrix.txt", sep=""), quote=F, row.name=F, sep="\t")
|
||||
|
||||
# 2D density scatter plot
|
||||
pdf(paste0(output_prefix, "_duprateExpDens.pdf"))
|
||||
duprateExpDensPlot(DupMat=dm)
|
||||
title("Density scatter plot")
|
||||
mtext(output_prefix, side=3)
|
||||
dev.off()
|
||||
fit <- duprateExpFit(DupMat=dm)
|
||||
cat(
|
||||
paste("- dupRadar Int (duprate at low read counts):", fit$intercept),
|
||||
paste("- dupRadar Sl (progression of the duplication rate):", fit$slope),
|
||||
fill=TRUE, labels=output_prefix,
|
||||
file=paste0(output_prefix, "_intercept_slope.txt"), append=FALSE
|
||||
)
|
||||
|
||||
# Create a multiqc file dupInt
|
||||
sample_name <- gsub("Aligned.sortedByCoord.out.markDups", "", output_prefix)
|
||||
line="#id: DupInt
|
||||
#plot_type: 'generalstats'
|
||||
#pconfig:
|
||||
# dupRadar_intercept:
|
||||
# title: 'dupInt'
|
||||
# namespace: 'DupRadar'
|
||||
# description: 'Intercept value from DupRadar'
|
||||
# max: 100
|
||||
# min: 0
|
||||
# scale: 'RdYlGn-rev'
|
||||
# format: '{:.2f}%'
|
||||
Sample dupRadar_intercept"
|
||||
|
||||
write(line,file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE)
|
||||
write(paste(sample_name, fit$intercept),file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE)
|
||||
|
||||
# Get numbers from dupRadar GLM
|
||||
curve_x <- sort(log10(dm$RPK))
|
||||
curve_y = 100*predict(fit$glm, data.frame(x=curve_x), type="response")
|
||||
# Remove all of the infinite values
|
||||
infs = which(curve_x %in% c(-Inf,Inf))
|
||||
curve_x = curve_x[-infs]
|
||||
curve_y = curve_y[-infs]
|
||||
# Reduce number of data points
|
||||
curve_x <- curve_x[seq(1, length(curve_x), 10)]
|
||||
curve_y <- curve_y[seq(1, length(curve_y), 10)]
|
||||
# Convert x values back to real counts
|
||||
curve_x = 10^curve_x
|
||||
# Write to file
|
||||
line="#id: dupradar
|
||||
#section_name: 'DupRadar'
|
||||
#section_href: 'bioconductor.org/packages/release/bioc/html/dupRadar.html'
|
||||
#description: \"provides duplication rate quality control for RNA-Seq datasets. Highly expressed genes can be expected to have a lot of duplicate reads, but high numbers of duplicates at low read counts can indicate low library complexity with technical duplication.
|
||||
# This plot shows the general linear models - a summary of the gene duplication distributions. \"
|
||||
#pconfig:
|
||||
# title: 'DupRadar General Linear Model'
|
||||
# xLog: True
|
||||
# xlab: 'expression (reads/kbp)'
|
||||
# ylab: '% duplicate reads'
|
||||
# ymax: 100
|
||||
# ymin: 0
|
||||
# tt_label: '<b>{point.x:.1f} reads/kbp</b>: {point.y:,.2f}% duplicates'
|
||||
# xPlotLines:
|
||||
# - color: 'green'
|
||||
# dashStyle: 'LongDash'
|
||||
# label:
|
||||
# style: {color: 'green'}
|
||||
# text: '0.5 RPKM'
|
||||
# verticalAlign: 'bottom'
|
||||
# y: -65
|
||||
# value: 0.5
|
||||
# width: 1
|
||||
# - color: 'red'
|
||||
# dashStyle: 'LongDash'
|
||||
# label:
|
||||
# style: {color: 'red'}
|
||||
# text: '1 read/bp'
|
||||
# verticalAlign: 'bottom'
|
||||
# y: -65
|
||||
# value: 1000
|
||||
# width: 1"
|
||||
|
||||
write(line,file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"),append=TRUE)
|
||||
write.table(
|
||||
cbind(curve_x, curve_y),
|
||||
file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"),
|
||||
quote=FALSE, row.names=FALSE, col.names=FALSE, append=TRUE,
|
||||
)
|
||||
|
||||
# Distribution of expression box plot
|
||||
pdf(paste0(output_prefix, "_duprateExpBoxplot.pdf"))
|
||||
duprateExpBoxplot(DupMat=dm)
|
||||
title("Percent Duplication by Expression")
|
||||
mtext(output_prefix, side=3)
|
||||
dev.off()
|
||||
|
||||
# Distribution of RPK values per gene
|
||||
pdf(paste0(output_prefix, "_expressionHist.pdf"))
|
||||
expressionHist(DupMat=dm)
|
||||
title("Distribution of RPK values per gene")
|
||||
mtext(output_prefix, side=3)
|
||||
dev.off()
|
||||
|
||||
# Print sessioninfo to standard out
|
||||
print(output_prefix)
|
||||
citation("dupRadar")
|
||||
sessionInfo()
|
||||
Reference in New Issue
Block a user