Build branch main with version main (82647a4)
Build pipeline: viash-hub.htrnaseq.main-8kbhw
Source commit: 82647a421d
Source message: Assert that the Well ID matches the required format (#22)
This commit is contained in:
235
target/executable/report/create_report/.config.vsh.yaml
Normal file
235
target/executable/report/create_report/.config.vsh.yaml
Normal file
@@ -0,0 +1,235 @@
|
||||
name: "create_report"
|
||||
namespace: "report"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Marijke Van Moerbeke"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
github: "mvanmoerbeke"
|
||||
orcid: "0000-0002-3097-5621"
|
||||
linkedin: "marijke-van-moerbeke-84303a34"
|
||||
organizations:
|
||||
- name: "OpenAnalytics"
|
||||
href: "https://www.openanalytics.eu"
|
||||
role: "Statistical Consultant"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--eset"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_report"
|
||||
info: null
|
||||
example:
|
||||
- "report.html"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "r_script"
|
||||
path: "script.R"
|
||||
is_executable: true
|
||||
- type: "r_script"
|
||||
path: "template.Rmd"
|
||||
is_executable: true
|
||||
- type: "r_script"
|
||||
path: "plateLayouts.R"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "OutputSTARsolo.png"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Create a basic QC report in HTML format based on a number of esets.\n"
|
||||
test_resources:
|
||||
- type: "r_script"
|
||||
path: "test.R"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "test_data"
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "rocker/r2u:24.04"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
- "pandoc"
|
||||
interactive: false
|
||||
- type: "r"
|
||||
cran:
|
||||
- "ggplot2"
|
||||
- "knitr"
|
||||
- "gridExtra"
|
||||
- "RColorBrewer"
|
||||
- "processx"
|
||||
- "whisker"
|
||||
- "rmarkdown"
|
||||
- "bookdown"
|
||||
- "data.table"
|
||||
- "platetools"
|
||||
- "htmltools"
|
||||
- "DT"
|
||||
- "logger"
|
||||
- "bit64"
|
||||
bioc:
|
||||
- "Biobase"
|
||||
- "ComplexHeatmap"
|
||||
script:
|
||||
- "install.packages(\"oaStyle\", repos = c(rdepot = \"https://repos.openanalytics.eu/repo/public\"\
|
||||
, getOption(\"repos\")))"
|
||||
bioc_force_install: false
|
||||
test_setup:
|
||||
- type: "r"
|
||||
packages:
|
||||
- "testthat"
|
||||
- "R.utils"
|
||||
bioc_force_install: false
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/report/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/report/create_report"
|
||||
executable: "target/executable/report/create_report/create_report"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "82647a421dae521a9563f7f02050f13a1319eb4a"
|
||||
git_remote: "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
BIN
target/executable/report/create_report/OutputSTARsolo.png
Normal file
BIN
target/executable/report/create_report/OutputSTARsolo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 77 KiB |
1162
target/executable/report/create_report/create_report
Executable file
1162
target/executable/report/create_report/create_report
Executable file
File diff suppressed because it is too large
Load Diff
108
target/executable/report/create_report/nextflow_labels.config
Normal file
108
target/executable/report/create_report/nextflow_labels.config
Normal file
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
424
target/executable/report/create_report/plateLayouts.R
Executable file
424
target/executable/report/create_report/plateLayouts.R
Executable file
@@ -0,0 +1,424 @@
|
||||
|
||||
#' Displays the annotation of the wells in a plateLayout
|
||||
#' @param plateData a data.table object containing the information
|
||||
#' of the plate. This must contain a "WellID".
|
||||
#' @param plateName The plate name
|
||||
#' @param valueVariable The name of the variable in 'plateData' to
|
||||
#' be visualized in a plate layout.
|
||||
#' @param textVariable The name of the variable in 'plateData' to be
|
||||
#' shown in the wells of the plate layout. If NULL, the valueVariable
|
||||
#' is shown.
|
||||
#' @param colours A named character vector containing the colours
|
||||
#' for the different levels of the valuevariable. The names should
|
||||
#' correspond to the dose levels. if not specified, a scheme of blues
|
||||
#' will be provided.
|
||||
#' @param breaks Numeric vector indicating breaks for plot coloring.
|
||||
#' @param colourWellText Colour to display the text in the wells.
|
||||
#' @param layout Integer vector of length two with number of rows and
|
||||
#' colums in a plate, e.g. \code{c(16,24)}
|
||||
#' @param legend.title A title for the legend
|
||||
#' @param plot.title A title for the plot, will be contracted
|
||||
#' with the plate name
|
||||
#' @param ... additional arguments for \code{plateLayout.default} function
|
||||
#' @import data.table
|
||||
#' @importFrom platetools fill_plate
|
||||
#' @export
|
||||
plateLayout.annotation <- function(
|
||||
plateData,
|
||||
plateName = character(),
|
||||
valueVariable = "Dose",
|
||||
textVariable = NULL,
|
||||
breaks = NULL, colours = NULL,
|
||||
colourWellText = "black",
|
||||
layout = c(16, 24),
|
||||
legend.title = "Dose",
|
||||
plot.title = "Plate Annotation - ",
|
||||
textFontSize = 9, ...
|
||||
) {
|
||||
WellID <- Label <- NULL
|
||||
|
||||
if (!(all(c("WellID", "SampleName") %in% colnames(plateData)))) {
|
||||
stop(" 'WellID' and 'SampleName' column required in plateData object")
|
||||
}
|
||||
|
||||
|
||||
plateData[, WellID := paste0(
|
||||
sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
|
||||
sprintf(
|
||||
"%02d", as.numeric(sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID))
|
||||
)
|
||||
)]
|
||||
|
||||
plateData <- platetools::fill_plate(plateData, "WellID", plate = 384)
|
||||
|
||||
plateData$column <- factor(
|
||||
sprintf(
|
||||
"%02d",
|
||||
as.numeric(sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID))
|
||||
),
|
||||
levels = sprintf("%02d", seq(1, layout[2]))
|
||||
)
|
||||
plateData$row <- factor(sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
|
||||
levels = LETTERS[seq(1, layout[1])])
|
||||
|
||||
if (!is.null(valueVariable)){
|
||||
plateData[, values := as.character(plateData[, ..valueVariable][[1]])]
|
||||
valueVar <- "values"
|
||||
}else{
|
||||
plateData[, values := "grey"]
|
||||
valueVar <- "values"
|
||||
colours <- setNames("grey", "grey")
|
||||
}
|
||||
|
||||
|
||||
if (is.null(colours)) {
|
||||
|
||||
blues <- colorRampPalette(c("#d6e0ff", "#2171B5"))
|
||||
greens <- colorRampPalette(c("light green", "dark green"))
|
||||
|
||||
numLevels <- sort(as.numeric(as.character(unique(plateData[, values])[
|
||||
grepl(
|
||||
"^[[:digit:]]+([.][[:digit:]]+)?$",
|
||||
trimws(unique(plateData[, values]))
|
||||
)
|
||||
])))
|
||||
otherLevels <- sort(as.character(unique(plateData[, values])[
|
||||
!grepl(
|
||||
"^[[:digit:]]+([.][[:digit:]]+)?$",
|
||||
trimws(unique(plateData[,values]))
|
||||
)
|
||||
]))
|
||||
|
||||
colours <- c(blues(length(numLevels)), greens(length(otherLevels)), "red")
|
||||
names(colours) <- c(numLevels, otherLevels, "failed")
|
||||
}
|
||||
|
||||
if (!is.null(textVariable)) {
|
||||
plateData[,
|
||||
Label := do.call(paste, c(.SD, sep = "\n ")),
|
||||
.SDcols = textVariable
|
||||
]
|
||||
plateData[, Label := gsub("-", "-\n", Label)]
|
||||
plateData[, Label := gsub("_", "_\n", Label)]
|
||||
textVar <- "Label"
|
||||
} else {
|
||||
textVar <- NULL
|
||||
}
|
||||
|
||||
|
||||
if (is.null(breaks)){
|
||||
breaks <- seq_len(length(colours))
|
||||
}
|
||||
|
||||
plateLayout(
|
||||
plateData = plateData, valueVariable = valueVar,
|
||||
textVariable = textVar, plateName = plateName,
|
||||
breaks = breaks, colourWellText = colourWellText,
|
||||
legend.title = legend.title, layout = layout,
|
||||
colours = colours, plot.title = plot.title,
|
||||
textFontSize = textFontSize, ...
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
|
||||
#' Create a heatmap of values in a plateLayout view. The values can be
|
||||
#' library sizes, number of genes, qcScore (0/1) or a factor.
|
||||
#' @param plateData A data.table of the values to be visualized with
|
||||
#' at least the column of interest (specified in 'varOfInterest')
|
||||
#' and a 'WellID' column indicating the wells in the plate. The WellID
|
||||
#' is a combination of a letter (row in the plate) and an integer
|
||||
#' (column in the plate).
|
||||
#' @param valueVariable The name of the variable in 'plateData'
|
||||
#' to be visualized in a plate layout
|
||||
#' @param textVariable The name of the variable in 'plateData'
|
||||
#' to be shown in the wells of the plate layout. Defaults to the
|
||||
#' valueVariable and if NULL, no text will be displayed.
|
||||
#' @param breaks Numeric vector indicating breaks for plot coloring.
|
||||
#' @param colours Colours to be used for levels specified by
|
||||
#' the breaks. If NULL, a colour scheme of purples is shown.
|
||||
#' @param colourWellText Colour to display the text in the wells.
|
||||
#' @param layout Integer vector of length two with number of rows
|
||||
#' and colums in a plate, e.g. \code{c(16,24)}
|
||||
#' @param makeContourColours Logical, whether or not the plate
|
||||
#' layout will contain a contour colours for the wells based on the
|
||||
#' parameters in 'contourColours' and 'categories'
|
||||
#' @param contourVariable The variable used for the contour colouring
|
||||
#' @param contourColours Character vector specifying a colour for
|
||||
#' each range in 'categories'
|
||||
#' @param labelsCategories Character vector specifying the names
|
||||
#' (labels) for each range in 'categories'
|
||||
#' @param categories if contour Variable is not a factor, a numeric
|
||||
#' vector specifying the categories to divide the 'varOfInterest',
|
||||
#' including the lower and upper limits.
|
||||
#' @param plateName The plate name
|
||||
#' @param plot.title A title for the plot, will be contracted with
|
||||
#' the plate name
|
||||
#' @param legend.title A title for the legend
|
||||
#' @param displayHeatmap Logical, whether to display the plateLayout heatmap
|
||||
#' @param saveHeatmap Logical, whether to save the plateLayout heatmap
|
||||
#' @param outputDir The directory where the plateLayout heatmap should be saved
|
||||
#' @param prefix The prefix to the file name of the saved plateLayout heatmap
|
||||
#' @param ... additional arguments for \code{ComplexHeatmap::Heatmap} function
|
||||
#' @importFrom platetools fill_plate
|
||||
#' @importFrom RColorBrewer brewer.pal
|
||||
#' @importFrom ComplexHeatmap Heatmap
|
||||
#' @importFrom circlize colorRamp2
|
||||
#' @importFrom grid grid.text grid.rect gpar legendGrob gpar
|
||||
#' @importFrom grDevices dev.off png
|
||||
#' @importFrom graphics title
|
||||
#' @export
|
||||
plateLayout <- function(
|
||||
plateData, valueVariable, textVariable = valueVariable,
|
||||
breaks = NULL, colours = NULL, colourWellText = "white", textFontSize = 6,
|
||||
layout = c(16, 24), makeContourColours = FALSE, contourVariable = character(),
|
||||
contourColours = c("red", "orange", "seagreen3"),lwdContours = c(1, 1, 1),
|
||||
labelsCategories = c('1', '2', '3'), categories = NULL, plateName = character(),
|
||||
plot.title = character(), legend.title = NULL, legendFontSize = 15,
|
||||
row_split = rep("A", 16), col_split = rep("A", 24), legendFontSizeTitle = 15,
|
||||
displayHeatmap = TRUE, saveHeatmap = FALSE, outputDir = ".", prefix = ""
|
||||
) {
|
||||
WellID <- NULL
|
||||
if (!(all(c("WellID", "SampleName") %in% colnames(plateData)))) {
|
||||
stop(" 'WellID' and 'SampleName' column required in plateData object")
|
||||
}
|
||||
|
||||
|
||||
plateData[, WellID := paste0(
|
||||
sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
|
||||
sprintf(
|
||||
"%02d",
|
||||
as.numeric(sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID))
|
||||
)
|
||||
)]
|
||||
|
||||
plateData <- platetools::fill_plate(plateData, "WellID", plate = 384)
|
||||
|
||||
plateData$column <- factor(
|
||||
sprintf("%02d", as.numeric(
|
||||
sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID)
|
||||
)),
|
||||
levels = sprintf("%02d", seq(1, layout[2]))
|
||||
)
|
||||
plateData$row <- factor(sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
|
||||
levels = LETTERS[seq(1, layout[1])])
|
||||
|
||||
|
||||
plateValues <- plateLayoutFormat(
|
||||
plateData,
|
||||
varOfInterest = valueVariable,
|
||||
rows = layout[1],
|
||||
cols = layout[2]
|
||||
)
|
||||
if (!is.null(textVariable)) {
|
||||
plateText <- plateLayoutFormat(
|
||||
plateData, varOfInterest = textVariable,
|
||||
rows = layout[1],
|
||||
cols = layout[2]
|
||||
)
|
||||
}
|
||||
plot.title <- gsub(
|
||||
"^([a-z])", "\\U\\1",
|
||||
gsub("([A-Z])", " \\1",
|
||||
plot.title, perl = TRUE), perl = TRUE
|
||||
)
|
||||
mainTitle <- paste0(plot.title, plateName)
|
||||
plateContourColours <- matrix("", nrow = layout[1], ncol = layout[2])
|
||||
|
||||
if (makeContourColours) {
|
||||
contourData <- plateData[WellType %in% c("nonEmpty", "Treated Wells"), ]
|
||||
|
||||
if (is.numeric(contourData[, ..contourVariable][[1]])) {
|
||||
contourData$contours <- cut(
|
||||
contourData[, ..contourVariable][[1]],
|
||||
categories, left = TRUE,
|
||||
right = TRUE,
|
||||
labels = labelsCategories)
|
||||
}
|
||||
else {
|
||||
contourData$contours <- contourData[, ..contourVariable][[1]]
|
||||
}
|
||||
names(contourColours) <- labelsCategories
|
||||
names(lwdContours) <- labelsCategories
|
||||
for (i in seq_len(layout[1])) {
|
||||
for (j in seq_len(layout[2])) {
|
||||
tryCatch({
|
||||
sampleHit <- which(
|
||||
as.character(contourData$WellID) == paste0(
|
||||
LETTERS[i], sprintf("%02d", j)
|
||||
)
|
||||
)
|
||||
if (length(sampleHit) == 1) {
|
||||
plateContourColours[i, j] <- as.character(
|
||||
contourData[sampleHit,'contours'][[1]]
|
||||
)
|
||||
}
|
||||
},
|
||||
error = function(e) {
|
||||
print(paste0(LETTERS[i], sprintf("%02d", j), " is missing."))
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
plateValues$contours <- plateContourColours
|
||||
colnames(plateValues$values) <- seq_len(ncol(plateValues$values))
|
||||
|
||||
if (is.null(breaks)) {
|
||||
breakValues <- plateValues$values
|
||||
breakValues[which(is.na(breakValues))] <- 0
|
||||
if (all(breakValues >= 0)) {
|
||||
breaks <- computeBreaks(7, max(plateValues$values, na.rm = TRUE))
|
||||
} else {
|
||||
breaks <- quantile(plateValues$values, probs = seq(0, 1, 0.125))
|
||||
}
|
||||
}
|
||||
|
||||
if (is.null(colours)) {
|
||||
colours <- tryCatch({
|
||||
colorRamp2(
|
||||
breaks = breaks,
|
||||
colors = brewer.pal(length(breaks), "Purples")
|
||||
)
|
||||
},
|
||||
error = function(cond) {
|
||||
return(c("#9370DB", "white"))
|
||||
})
|
||||
}
|
||||
ht <- Heatmap(
|
||||
plateValues$values,
|
||||
column_title = mainTitle, column_title_side = "top",
|
||||
rect_gp = gpar(lwd = 0.4),
|
||||
cluster_rows = FALSE, cluster_columns = FALSE,
|
||||
col = colours, row_title = NULL,
|
||||
row_split = row_split, column_split = col_split,
|
||||
row_names_side = "left",
|
||||
cluster_row_slices = FALSE,
|
||||
cluster_column_slices = FALSE,
|
||||
show_heatmap_legend = TRUE,
|
||||
heatmap_legend_param = list(
|
||||
title = ifelse(
|
||||
is.null(legend.title),
|
||||
paste0(valueVariable, "\n"),
|
||||
paste0(legend.title, "\n")
|
||||
),
|
||||
grid_height = unit(9, "mm"), border = "black",
|
||||
labels_gp = gpar(fontsize = legendFontSize),
|
||||
title_gp = gpar(fontsize = legendFontSizeTitle)
|
||||
),
|
||||
cell_fun = function(j, i, x, y, width, height, fill) {
|
||||
if (is.na(plateValues$values[i, j])) {
|
||||
grid.rect(
|
||||
x, y, width, height,
|
||||
gp = gpar(fill = "white", alpha = 0.7, lwd = 0.7, col = "white")
|
||||
)
|
||||
}
|
||||
else if (!is.null(textVariable)) {
|
||||
grid.text(
|
||||
plateText$values[i, j], x, y,
|
||||
just = "centre",
|
||||
gp = gpar(fontsize = textFontSize, col = colourWellText)
|
||||
)
|
||||
}
|
||||
if (makeContourColours) {
|
||||
if (!is.na(plateValues$contours[i, j])) {
|
||||
grid.rect(
|
||||
x, y, width, height,
|
||||
gp = gpar(
|
||||
col = contourColours[as.character(plateValues$contours[i, j])],
|
||||
fill = NA,
|
||||
lwd = lwdContours[as.character(plateValues$contours[i, j])]
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
if (displayHeatmap) {
|
||||
print(ht)
|
||||
}
|
||||
if (saveHeatmap) {
|
||||
png(
|
||||
file.path(
|
||||
outputDir,
|
||||
paste0(prefix,gsub(" |-", "",plot.title), "_", plateName, ".png")
|
||||
),
|
||||
width = 30, height = 10, units = "cm", res = 1200
|
||||
)
|
||||
print(ht)
|
||||
dev.off()
|
||||
}
|
||||
|
||||
return(ht)
|
||||
}
|
||||
|
||||
|
||||
#' Return numerical matrix with number of reads that corresponds to the
|
||||
#' plate layout
|
||||
#' @param data A data.frame of the values to be visualized with at least
|
||||
#' the columnof interest (specified in 'varOfInterest') and a 'WellID' column
|
||||
#' indicating the wells in the plate. The WellID is a combination of a
|
||||
#' letter (row in the plate) and an integer (column in the plate).
|
||||
#' @param varOfInterest The name of the variable in 'data' to be visualized
|
||||
#' in a plate layout
|
||||
#' @param rows number of rows in a plate layout
|
||||
#' @param cols number of columns in a plate layout
|
||||
#' @param verbose if \code{TRUE}, samples missing from the plate
|
||||
#' will be reported
|
||||
#' @export
|
||||
plateLayoutFormat <- function(
|
||||
data, varOfInterest,
|
||||
rows = 16, cols = 24,
|
||||
verbose = FALSE
|
||||
) {
|
||||
plateValues <- matrix(NA, nrow = rows, ncol = cols)
|
||||
for (i in seq_len(rows)) {
|
||||
for (j in seq_len(cols)) {
|
||||
tryCatch({
|
||||
sampleHit <- which(
|
||||
as.character(data$WellID) == paste0(LETTERS[i], sprintf("%02d", j))
|
||||
)
|
||||
if(length(sampleHit) == 1){
|
||||
plateValues[i, j] <- data[sampleHit, ..varOfInterest][[1]]
|
||||
}
|
||||
},
|
||||
error = function(e) {
|
||||
if (verbose == TRUE) {
|
||||
print(paste0(LETTERS[i], sprintf("%02d", j), " is missing."))
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
row.names(plateValues) <- LETTERS[1:rows]
|
||||
return(list("values" = plateValues))
|
||||
}
|
||||
|
||||
|
||||
|
||||
#' Helper function to automate break selection for raw count data
|
||||
#'
|
||||
#' This function creates an exponentially increasing vector for given number
|
||||
#' breaks between zero and some element of choice. It is particularly useful for
|
||||
#' raw counts or raw counts per million.
|
||||
#'
|
||||
#' @param nBreaks Number of breaks to be generated
|
||||
#' @param maxElement Maximum value of data entries
|
||||
#' @export
|
||||
computeBreaks <- function(nBreaks, variable) {
|
||||
|
||||
maxElement <- max(variable, na.rm = TRUE)
|
||||
if (length(unique(variable)) == 1) {
|
||||
breaks <- c(0, 0.5, ifelse(maxElement < 1, 1, maxElement))
|
||||
} else {
|
||||
coefSystem <- solve(
|
||||
rbind(c(1, 1), c(1, (nBreaks - 1)))) %*% c(0, log(maxElement)
|
||||
)
|
||||
coefExp <- c(exp(coefSystem[1]), coefSystem[2])
|
||||
breaks <- coefExp[1] * exp((1:(nBreaks - 1)) * coefExp[2])
|
||||
}
|
||||
return(c(0, breaks))
|
||||
}
|
||||
977
target/executable/report/create_report/template.Rmd
Executable file
977
target/executable/report/create_report/template.Rmd
Executable file
@@ -0,0 +1,977 @@
|
||||
---
|
||||
title: "Exploratory Data Report"
|
||||
date: "`r format(Sys.time(), '%d %B, %Y')`"
|
||||
editor_options:
|
||||
chunk_output_type: console
|
||||
output:
|
||||
oaStyle::html_report
|
||||
# parameters which are overwritten by the script
|
||||
params:
|
||||
outputDir: 'output/'
|
||||
esets:
|
||||
- sample1.rds
|
||||
- sample2.rds
|
||||
---
|
||||
|
||||
<!---
|
||||
Copy this template in your working directory (where you want to run the report).
|
||||
This template can be used as a starting document to run a preliminary DRUGseq report
|
||||
-->
|
||||
|
||||
<!---
|
||||
Use full page width
|
||||
-->
|
||||
|
||||
<style type="text/css">
|
||||
div.main-container {
|
||||
max-width: 1600px !important;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
|
||||
```{r params, eval = TRUE, include = FALSE}
|
||||
outputDir <- params$outputDir
|
||||
esets <- params$esets
|
||||
```
|
||||
|
||||
|
||||
```{r outputDir, echo = FALSE}
|
||||
## Required: ABSOLUTE outputDir
|
||||
outputDir <- file.path(outputDir)
|
||||
|
||||
# When working on a windows computer it should be
|
||||
# "/Users/..." instead of "C:/Users/..."
|
||||
if (.Platform$OS.type == "windows") {
|
||||
outputDir <- paste0(
|
||||
"/",
|
||||
paste(
|
||||
unlist(strsplit(outputDir, split = "/"))[-1], collapse = "/"
|
||||
),
|
||||
"/"
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
```{r optionsChunkDoNotModify, echo = FALSE, message = FALSE, warning=FALSE}
|
||||
|
||||
## Chunk with options for knitr. This chunk should not be modified.
|
||||
knitr::opts_chunk$set(
|
||||
eval = TRUE,
|
||||
echo = FALSE,
|
||||
message = FALSE,
|
||||
cache = FALSE,
|
||||
warning = FALSE,
|
||||
error = FALSE,
|
||||
comment = NA, #"#",
|
||||
tidy = FALSE,
|
||||
collapse = TRUE,
|
||||
out.width = "100%",
|
||||
fig.width = 20,
|
||||
fig.height = 10,
|
||||
results = "asis")
|
||||
|
||||
knitr::opts_knit$set(root.dir = getwd())
|
||||
|
||||
options(warn = 1, width = 200)
|
||||
|
||||
```
|
||||
|
||||
```{r libraries_and_functions}
|
||||
source("plateLayouts.R")
|
||||
library(ComplexHeatmap)
|
||||
library(data.table)
|
||||
library(ggplot2)
|
||||
library(knitr)
|
||||
library(Biobase)
|
||||
library(gridExtra)
|
||||
library(RColorBrewer)
|
||||
```
|
||||
|
||||
|
||||
```{r dataImport}
|
||||
|
||||
# Create esetList
|
||||
esetList <- sapply(
|
||||
esets, simplify = FALSE,
|
||||
USE.NAMES = TRUE,
|
||||
function(eset_raw) {
|
||||
if (!file.exists(eset_raw)) {
|
||||
stop(paste0("Provided path '", eset_raw, "' is not a file."))
|
||||
}
|
||||
eset <- readRDS(eset_raw)
|
||||
}
|
||||
)
|
||||
pools <- sapply(esetList, function(eset) {
|
||||
unique(eset$PoolName)
|
||||
})
|
||||
names(esetList) <- unlist(pools)
|
||||
|
||||
# Create qcData
|
||||
pDataList <- lapply(esetList, function(eset) data.table(pData(eset)))
|
||||
qcData <- rbindlist(pDataList, fill = TRUE)
|
||||
|
||||
textVars <- "SampleName"
|
||||
annotationVar <- "PoolName"
|
||||
|
||||
if (!"SampleName" %in% names(qcData)) {
|
||||
qcData[, SampleName := paste0(PoolName, "_", WellBC)]
|
||||
}
|
||||
qcData[, log10LibSize := round(log10(NumberOfInputReads))]
|
||||
qcData[, (annotationVar) := lapply(.SD, as.factor), .SDcols = annotationVar]
|
||||
|
||||
|
||||
colourList <- list()
|
||||
Design_levels <- sort(
|
||||
as.character(unique(qcData[, ..annotationVar][[1]])),
|
||||
decreasing = TRUE
|
||||
)
|
||||
|
||||
if (length(Design_levels) == 1) {
|
||||
colours <- c("#d6e0ff", "lightgrey")
|
||||
names(colours) <- c(Design_levels, "Empty")
|
||||
colourList[[annotationVar]] <- list(
|
||||
"colours" = colours,
|
||||
"annotVar" = annotationVar,
|
||||
"text" = textVars
|
||||
)
|
||||
}else if (length(Design_levels) == 2) {
|
||||
colours <- c("#d6e0ff", "#FF9999")
|
||||
|
||||
names(colours) <- c(Design_levels)
|
||||
colourList[[annotationVar]] <- list(
|
||||
"colours" = colours,
|
||||
"annotVar" = annotationVar,
|
||||
"text" = textVars
|
||||
)
|
||||
} else if (length(Design_levels) <= 20) {
|
||||
|
||||
if (length(Design_levels) > 12) {
|
||||
colours <- c(
|
||||
brewer.pal(12, "Set3"),
|
||||
brewer.pal((length(Design_levels) - 12),
|
||||
"Pastel2")
|
||||
)
|
||||
} else {
|
||||
colours <- c(brewer.pal(length(Design_levels), "Set3"))
|
||||
}
|
||||
|
||||
names(colours) <- c(Design_levels)
|
||||
colourList[[annotationVar]] <- list(
|
||||
"colours" = colours,
|
||||
"annotVar" = annotationVar,
|
||||
"text" = textVars
|
||||
)
|
||||
} else {
|
||||
colours <- c("#d6e0ff")
|
||||
names(colours) <- c("nonEmpty")
|
||||
colourList[[annotVar]] <- list(
|
||||
"colours" = colours,
|
||||
"annotVar" = annotVar,
|
||||
"text" = annotVar
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
# Pool Description
|
||||
|
||||
Per pool within this study, there are several pool layout plots shown, based on the
|
||||
|
||||
* number of STAR input reads (= library size)
|
||||
|
||||
* log10 transformed number of STAR input reads
|
||||
|
||||
* number of detected UMIs
|
||||
|
||||
* number of detected genes
|
||||
|
||||
* number of chromosomal reads
|
||||
|
||||
* percentage of ERCC
|
||||
|
||||
* percentage of mitochondria
|
||||
|
||||
|
||||
> The values for the different samples within each pool is expected to be comparable if the content of the different pools is equally diverse.
|
||||
|
||||
```{r plateAnnotation, out.width = "100%",fig.width = 20, fig.height= 10}
|
||||
|
||||
plateVars <- c("NumberOfInputReads", "log10LibSize", "NumberOfMappedReads",
|
||||
"NumberOfChromReads", "NumberOfUMIs", "NumberOfGenes",
|
||||
"pctMT", "pctERCC")
|
||||
|
||||
breaksVars <- lapply(
|
||||
plateVars,
|
||||
function(var) {
|
||||
computeBreaks(7, qcData[, ..var])
|
||||
}
|
||||
)
|
||||
names(breaksVars) <- plateVars
|
||||
|
||||
for (pool in pools){
|
||||
cat("\n\n")
|
||||
cat(paste0("## ", pool, " {.tabset} \n\n"))
|
||||
poolData <- qcData[PoolName == pool]
|
||||
lapply(plateVars, function(plateVar) {
|
||||
cat("\n\n")
|
||||
cat(sprintf("### %s {.unnumbered}", plateVar))
|
||||
cat("\n\n")
|
||||
plateLayout(
|
||||
poolData, valueVariable = plateVar,
|
||||
textFontSize = 10, legendFontSize = 12,
|
||||
plateName = pool, plot.title = "libSize - ",
|
||||
legend.title = "libSize", breaks = breaksVars[[plateVar]]
|
||||
)
|
||||
cat("\n\n")
|
||||
})
|
||||
cat("\n\n")
|
||||
}
|
||||
```
|
||||
|
||||
<br>
|
||||
|
||||
|
||||
# Data Distributions
|
||||
|
||||
|
||||
## Reads Distributions {.tabset}
|
||||
|
||||
The 4 box plots below represent the distributions per pool of the different samples based on:
|
||||
|
||||
* the number of STAR input reads
|
||||
|
||||
* the number of STAR mapped reads
|
||||
|
||||
* the percentage of STAR mapped reads
|
||||
|
||||
* the number of detected genes
|
||||
|
||||
> The distributions contribute to the QC metrics mentioned in Par 3. The higher these values, the better.
|
||||
> The data range for the different plates is expected to be comparable if the content of the different plates is equally diverse.
|
||||
|
||||
|
||||
### Number of Input Reads {.tabset .unnumbered}
|
||||
|
||||
```{r settings_1}
|
||||
|
||||
nColPlots = 1
|
||||
figHeight = 7
|
||||
|
||||
```
|
||||
|
||||
#### Distribution {.tabset .unnumbered}
|
||||
|
||||
|
||||
```{r boxplots_input_plate, fig.height = figHeight}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(
|
||||
x = PoolName,
|
||||
y = NumberOfInputReads, colour = PoolName
|
||||
)
|
||||
) + geom_boxplot() + ylab("Number of Input Reads") +
|
||||
ggtitle("Number of Input Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
|
||||
### Number of Mapped Reads {.tabset .unnumbered}
|
||||
|
||||
#### Distribution {.unnumbered}
|
||||
|
||||
```{r boxplots_mapped_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = NumberOfMappedReads, colour = PoolName)
|
||||
) + geom_boxplot() + ylab("Number of Mapped Reads") +
|
||||
ggtitle("Number of Mapped Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
#### pct Mapped Reads {.unnumbered}
|
||||
|
||||
```{r boxplots_pctMapped_plate, fig.height = figHeight}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = PctMappedReads, colour = PoolName)
|
||||
) +
|
||||
geom_boxplot() +
|
||||
ylab("pct Mapped Reads") +
|
||||
ggtitle("pct Mapped Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
### Number of Chromosomal Reads {.tabset .unnumbered}
|
||||
|
||||
#### Distribution {.unnumbered}
|
||||
|
||||
```{r boxplots_chrom_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = NumberOfChromReads, colour = PoolName)
|
||||
) + geom_boxplot() + ylab("Number of Chromosomal Reads") +
|
||||
ggtitle("Number of Chromosomal Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
#### pct Chromosomal Reads {.unnumbered}
|
||||
|
||||
```{r boxplots_pctChrom_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = pctChrom, colour = PoolName)
|
||||
) + geom_boxplot() + ylab("pct Chromosomal Reads") +
|
||||
ggtitle("pct Chromosomal Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
### Number of UMIs {.tabset .unnumbered}
|
||||
|
||||
#### Distribution {.tabset .unnumbered}
|
||||
|
||||
|
||||
```{r boxplots_umi_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = NumberOfUMIs, colour = PoolName)
|
||||
) + geom_boxplot() + ylab("Number of UMIs") +
|
||||
ggtitle('Number of UMIs') +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
#### Density distribution {.unnumbered}
|
||||
|
||||
```{r density_numberOfUMIs}
|
||||
|
||||
## Pre-filtering data exploration
|
||||
dt_plot <- melt(
|
||||
qcData,
|
||||
id.vars = c("SampleName", "PoolName", "WellID"),
|
||||
measure.vars = c("NumberOfInputReads", "NumberOfMappedReads", "NumberOfUMIs")
|
||||
)
|
||||
|
||||
readsDensity_plot <- ggplot(dt_plot, aes(value))
|
||||
readsDensity_plot <- readsDensity_plot +
|
||||
geom_density(aes(fill = variable), alpha=0.8) +
|
||||
facet_grid(~ PoolName, scales = "free_x", space = "fixed", drop = TRUE) +
|
||||
geom_vline(
|
||||
xintercept = 5e5,
|
||||
linetype = "dashed",
|
||||
color = "steelblue3", size = 2
|
||||
) +
|
||||
annotate(
|
||||
"text",
|
||||
x = 3.5e5, y = 2e-6, label = "500k",
|
||||
angle = 90, color = "steelblue3", size = 10
|
||||
) +
|
||||
geom_vline(
|
||||
xintercept = 1.5e6, linetype = "dashed",
|
||||
color = "forestgreen", size = 2
|
||||
) +
|
||||
annotate(
|
||||
"text", x = 1.35e6, y = 2e-6, label = "1.5M",
|
||||
angle = 90, color = "forestgreen", size = 10
|
||||
) +
|
||||
labs(
|
||||
title = "Density plot",
|
||||
subtitle = paste0(
|
||||
"# Samples with NumberOfMappedReads > 1.5M: ",
|
||||
length(which(qcData$NumberOfMappedReads > 1.5e6)),
|
||||
"\n# Samples with NumberOfUMIs > 500k: ",
|
||||
length(which(qcData$NumberOfUMIs > 5e5))
|
||||
),
|
||||
caption = paste0("# Total samples (after removing empty): ", nrow(qcData)),
|
||||
x = "Count",
|
||||
fill = "Variable"
|
||||
) +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 5),
|
||||
axis.text.x = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
plot.subtitle = element_text(size = 17),
|
||||
plot.caption = element_text(size = 15),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15),
|
||||
axis.text.y = element_blank(),
|
||||
axis.ticks.y = element_blank(),
|
||||
axis.title.y = element_blank()
|
||||
)
|
||||
readsDensity_plot
|
||||
|
||||
```
|
||||
|
||||
### Number of Genes {.tabset .unnumbered}
|
||||
|
||||
#### Distribution {.unnumbered}
|
||||
|
||||
```{r boxplots_genes_plate, fig.height = figHeight}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = NumberOfGenes, colour = PoolName)
|
||||
) +
|
||||
geom_boxplot() + ylab("Number of Genes") +
|
||||
ggtitle("Number of Genes") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
## {.tabset .toc-ignore .unnumbered}
|
||||
|
||||
|
||||
In addition, several plots are shown visualizing the efficiency of the reads-to-genes translation:
|
||||
|
||||
* the number of input reads vs the number of mapped reads
|
||||
|
||||
* the number of chromosomal reads vs the number of mapped reads
|
||||
|
||||
* the number of mapped reads per UMI vs the number of mapped reads
|
||||
|
||||
* the number of UNI vs the number of mapped reads
|
||||
|
||||
* the number of mapped reads vs the number of genes
|
||||
|
||||
* the number of chromosomal reads vs the number of genes
|
||||
|
||||
* the number of mapped reads per UMI vs the number of genes
|
||||
|
||||
### Mapping Efficiency {.tabset .unnumbered}
|
||||
|
||||
#### Number of Input Reads {.unnumbered}
|
||||
|
||||
```{r mapping_efficiency_1_plate, fig.height = 7}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfInputReads, y = NumberOfMappedReads, colour = PoolName)
|
||||
) +
|
||||
geom_point() +
|
||||
xlab("Number of Input Reads") +
|
||||
ylab("Number of Mapped Reads") +
|
||||
ggtitle("Number of Mapped Reads vs Number of Input Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
|
||||
#### Number of Chromosomal Reads {.unnumbered}
|
||||
|
||||
```{r mapping_efficiency_2_plate, fig.height = 7}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfChromReads, y = NumberOfMappedReads, colour = PoolName)
|
||||
) + geom_point() +
|
||||
xlab("Number of Chromosomal Reads") + ylab("Number of Mapped Reads") +
|
||||
ggtitle("Number of Chromosomal Reads vs Number of Mapped Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
|
||||
#### Number of UMI {.unnumbered}
|
||||
|
||||
```{r mapping_efficiency_4_plate, fig.height = 7}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x =NumberOfUMIs, y = NumberOfMappedReads, colour = PoolName)
|
||||
) + geom_point() +
|
||||
ylab("Number of Mapped Reads") + xlab("Number of UMIs ") +
|
||||
ggtitle("Number of UMIs vs Number of Mapped Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
### Counting Efficiency {.tabset .unnumbered}
|
||||
|
||||
#### Number of Mapped Reads {.unnumbered}
|
||||
|
||||
```{r gene_efficiency_1_plate, fig.height = 7}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfMappedReads, y = NumberOfGenes, colour = PoolName)
|
||||
) + geom_point() +
|
||||
ylab("Number of Genes") + xlab("Number of Mapped Reads") +
|
||||
ggtitle("Number of Genes vs Number of Mapped Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
```
|
||||
|
||||
#### Number of Chromosomal Reads {.unnumbered}
|
||||
|
||||
```{r gene_efficiency_2_plate, fig.height = 7}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfChromReads, y = NumberOfGenes, colour = PoolName)
|
||||
) + geom_point() +
|
||||
ylab("Number of Genes") + xlab("Number of Chromosomal Reads") +
|
||||
ggtitle("Number of Genes vs Number of Chromosomal Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Sequencing Saturation {.tabset}
|
||||
|
||||
The barplots below represent the sequencing saturation per sample as determined by STAR, split per pool.
|
||||
The HT-RNAseq platform aims for shallow sequencing resulting in relatively low sequencing saturations of 10-20%.
|
||||
In addition, the sequencing saturation vs the number of input reads is shown.
|
||||
|
||||
### Sequencing Saturation {.unnumbered}
|
||||
|
||||
|
||||
|
||||
```{r sequencingSaturation, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = WellID, y = SequencingSaturation, fill = PoolName)
|
||||
) + geom_bar(stat = "identity", position = "dodge") +
|
||||
xlab("Samples") + ggtitle("Sequencing Saturation per Sample") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(1, "lines"),
|
||||
text = element_text(size = 10),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.text.y = element_text(size = 15),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
### Sequencing Saturation - Input Reads {.unnumbered}
|
||||
|
||||
|
||||
```{r sequencingSaturation_inputReads, fig.height = figHeight}
|
||||
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfInputReads, y = SequencingSaturation, colour = PoolName)
|
||||
) + geom_point() +
|
||||
ggtitle("Sequencing Saturation vs Number of Input Reads") +
|
||||
theme(strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
```
|
||||
|
||||
### Sequencing Saturation - Mapped Reads {.unnumbered}
|
||||
|
||||
```{r sequencingSaturation_mappedReads, fig.height = figHeight}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfChromReads, y = SequencingSaturation, colour = PoolName)
|
||||
) + geom_point() +
|
||||
ggtitle("Sequencing Saturation vs Number of Chromosomal Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size=10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size=18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
```
|
||||
|
||||
<br>
|
||||
|
||||
## Genomic Origin {.tabset}
|
||||
|
||||
The 3 boxplots below represent, per pool, the distributions of the percentage of reads mapping to:
|
||||
|
||||
* chromosomal regions
|
||||
|
||||
* mitochondrial regions
|
||||
|
||||
* ERCC spike-ins
|
||||
|
||||
The 4th plot summarises the above results across samples per pool.
|
||||
|
||||
The 5th plot shows the percentage of reads mapped to the transcriptome (as counted by STAR). This measurement serves as a proxy for the percentage of reads mapped to exons.
|
||||
|
||||
> The percentage ERCC contributes to the QC metrics mentioned in Par 3. This value is ideally as low as possible (but non-zero to ensure the they have been spiked in) and comparable for the different pools.
|
||||
|
||||
|
||||
|
||||
|
||||
### pctChrom {.tabset .unnumbered}
|
||||
|
||||
|
||||
```{r genomicOrigin_chrom_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData, aes(x = PoolName, y = pctChrom, colour = PoolName)
|
||||
) +
|
||||
geom_boxplot() +
|
||||
ggtitle("pctChrom") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
### pctMT {.tabset .unnumbered}
|
||||
|
||||
```{r genomicOrigin_mt_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = pctMT, colour = PoolName)
|
||||
) +
|
||||
geom_boxplot() + ggtitle("pctMT") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
### pctERCC {.tabset .unnumbered}
|
||||
|
||||
|
||||
```{r genomicOrigin_ercc_plate, fig.height = figHeight}
|
||||
ggplot(qcData, aes(x = PoolName, y = pctERCC, colour = PoolName)) +
|
||||
geom_boxplot() +
|
||||
ggtitle("pctERCC") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
### Genomic Summary {.tabset .unnumbered}
|
||||
|
||||
|
||||
|
||||
```{r genomicOrigin_summary_plate}
|
||||
meanPctChromMTData <- qcData[, .(
|
||||
"pctChrom" = median(pctChrom),
|
||||
"pctMT" = median(pctMT),
|
||||
"pctERCC" = median(pctERCC)
|
||||
), by = PoolName]
|
||||
meanPctChromMTDataLong <- melt(
|
||||
meanPctChromMTData,
|
||||
id.vars = "PoolName",
|
||||
measure.vars = c("pctChrom", "pctMT", "pctERCC"),
|
||||
variable.name = "Origin", value.name = "pct"
|
||||
)
|
||||
ggplot(
|
||||
meanPctChromMTDataLong,
|
||||
aes(fill = Origin, y = pct, x = PoolName)) +
|
||||
geom_bar(position = "stack", stat = "identity") +
|
||||
ggtitle("Genomic Origin") +
|
||||
theme(
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
# Depletion {.tabset}
|
||||
|
||||
<div align="center">
|
||||
```{r depletion}
|
||||
|
||||
|
||||
for (eset_name in pools) {
|
||||
cat("\n\n")
|
||||
cat(paste0("## ", eset_name, " {.unnumbered}"))
|
||||
cat("\n\n")
|
||||
|
||||
eset <- esetList[[eset_name]]
|
||||
average_reads <- sort(apply(exprs(eset), 1, mean), decreasing = TRUE)
|
||||
plotData <- data.table(
|
||||
ENSGID = names(average_reads),
|
||||
av_count = average_reads
|
||||
)
|
||||
|
||||
gen_descript <- data.table(
|
||||
ENSGID = eset@featureData@data$gene_id,
|
||||
Description = eset@featureData@data$GENENAME
|
||||
)
|
||||
order_gen_descript <- gen_descript[
|
||||
match(plotData$ENSGID, gen_descript$ENSGID),
|
||||
]
|
||||
|
||||
g <- ggplot(
|
||||
plotData[c(1:100)],
|
||||
aes(x = reorder(ENSGID, -av_count), y = av_count)
|
||||
) + geom_bar(stat = "identity") +
|
||||
theme(
|
||||
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 12),
|
||||
axis.text.y = element_text(size = 12),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 15),
|
||||
axis.title = element_text(size = 18),
|
||||
plot.title = element_text(size = 20)
|
||||
) + ylab("Average Counts") + xlab("Genes")
|
||||
|
||||
print(g)
|
||||
|
||||
cat("\n\n")
|
||||
cat("<br>")
|
||||
cat("<br>")
|
||||
|
||||
print(htmltools::tagList((DT::datatable(order_gen_descript[1:100, ]))))
|
||||
}
|
||||
```
|
||||
</div>
|
||||
|
||||
|
||||
<br>
|
||||
<br>
|
||||
<br>
|
||||
<br>
|
||||
|
||||
# Glossary {.unnumbered}
|
||||
|
||||
|
||||
## Read {.unlisted .unnumbered}
|
||||
|
||||
A read is a oligonucleotide (a short RNA fragment) that has been sequenced. It consists of a fixed number of base pairs (bp) and therefore has a specific read length.
|
||||
|
||||
|
||||
|
||||
## Input Read {.unlisted .unnumbered}
|
||||
|
||||
Each read of the fastq file used as input to the STAR aligner is considered an input read.
|
||||
|
||||
|
||||
|
||||
## Read With Valid Barcode {.unlisted .unnumbered}
|
||||
|
||||
A read with a valid barcode is a read for which the barcode matches the white list of barcodes under the given restriction of the number of allowed mismatches. The number of reads with a valid barcode is lower or equal to the number of input reads.
|
||||
|
||||
|
||||
|
||||
## Mapped Read {.unlisted .unnumbered}
|
||||
|
||||
A read that has been aligned against the reference genome and for which one or more suitable matching locations have been found is a mapped read. Depending on the number of allowed mismatches this might or might not be be an exact match. The number of mapped reads is lower or equal to the number of reads with a valid barcode.
|
||||
|
||||
|
||||
|
||||
## Uniquely Mapped Read {.unlisted .unnumbered}
|
||||
|
||||
A read for which one and only one suitable matching location in the reference genome was found is an uniquely mapped read. The number of uniquely mapped reads is lower or equal to the number of mapped reads.
|
||||
|
||||
|
||||
|
||||
## Counted Read {.unlisted .unnumbered}
|
||||
|
||||
A mapped read will only be counted if it overlaps (1 nucleotide or more) with one and only one gene. The number of counted reads is lower or equal to the number of (uniquely) mapped reads.
|
||||
|
||||
|
||||
|
||||
## UMIs {.unlisted .unnumbered}
|
||||
|
||||
Unique molecular identifiers (UMI) are short sequences in order to uniquely tag each molecule in a sample library. Sequencing with UMIs allows bioinformatics software to filter out duplicate reads and PCR errors with a high level of accuracy and report unique reads.
|
||||
|
||||
The reported UMIs is the number of UMIs among the set of reads that map to an unique gene, i.e the number of reads is deduplicated.
|
||||
|
||||
|
||||
|
||||
## pctERCC {.unlisted .unnumbered}
|
||||
|
||||
The percentage of reads mapping to the ERCC genes among the total number of **mapped** reads.
|
||||
|
||||
|
||||
|
||||
## pctMT {.unlisted .unnumbered}
|
||||
|
||||
The percentage of reads mapping to the MT genes among the total number of **mapped** reads.
|
||||
|
||||
|
||||
|
||||
## Sequencing Saturation {.unlisted .unnumbered}
|
||||
|
||||
The sequencing saturation is a measure of the fraction of library complexity. The inverse of one minus the sequencing saturation can be interpreted as the number of additional reads it would take to detect a new transcript. Consequently, a low sequencing saturation indicates a shallow sequencing in which a new transcript could be discovered with a few reads.
|
||||
|
||||
<br>
|
||||
<br>
|
||||
<br>
|
||||
<br>
|
||||
|
||||
<center>
|
||||

|
||||
</center>
|
||||
|
||||
<br>
|
||||
<br>
|
||||
Reference in New Issue
Block a user