Build pipeline: viash-hub.rnaseq.main-p26ls
Source commit: d63250ae70
Source message: add checks
802 lines
44 KiB
Plaintext
802 lines
44 KiB
Plaintext
workflow run_wf {
|
|
|
|
take:
|
|
input_ch
|
|
|
|
main:
|
|
|
|
qc_ch = input_ch
|
|
|
|
// temporary fix to force assignment when alignment in skipped
|
|
| map {it}
|
|
|
|
// Feature biotype QC using featureCounts
|
|
| map { id, state ->
|
|
def biotype_in_gtf = biotypeInGtf(state.gtf, state.biotype)
|
|
def attribute_type = state.gencode ? "gene_type" : state.featurecounts_group_type
|
|
def strand = (state.strandedness == "forward") ? 1 : ((state.strandedness == "reverse") ? 2 : 0)
|
|
[ id, state + [biotype_in_gtf: biotype_in_gtf, attribute_type: attribute_type, strand: strand] ]
|
|
}
|
|
|
|
| featurecounts.run (
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.biotype_in_gtf && !state.skip_align },
|
|
fromState: [
|
|
"paired": "paired",
|
|
"strand": "strand",
|
|
"annotation": "gtf",
|
|
"input": "genome_bam",
|
|
"attribute_type": "attribute_type",
|
|
"feature_type": "featurecounts_feature_type",
|
|
"count_read_pairs": "paired"
|
|
],
|
|
toState: [
|
|
"featurecounts": "counts",
|
|
"featurecounts_summary": "summary"
|
|
],
|
|
args: [
|
|
both_aligned: true,
|
|
same_strand: true
|
|
]
|
|
)
|
|
|
|
| multiqc_custom_biotype.run (
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align },
|
|
fromState: [
|
|
"id": "id",
|
|
"biocounts": "featurecounts",
|
|
"biotypes_header": "biotypes_header"
|
|
],
|
|
toState: [
|
|
"featurecounts_multiqc": "featurecounts_multiqc",
|
|
"featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc"
|
|
]
|
|
)
|
|
|
|
| preseq_lcextrap.run (
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align },
|
|
fromState: [
|
|
"paired": "paired",
|
|
"input": "genome_bam",
|
|
"extra_preseq_args": "extra_preseq_args"
|
|
],
|
|
toState: [ "preseq_output": "output" ]
|
|
)
|
|
|
|
| rseqc_bamstat.run (
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align },
|
|
fromState: [
|
|
"input_file": "genome_bam",
|
|
"mapq": "map_qual"
|
|
],
|
|
toState: [ "bamstat_output": "output" ]
|
|
)
|
|
| rseqc_inferexperiment.run(
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align },
|
|
fromState: [
|
|
"input_file": "genome_bam",
|
|
"refgene": "gene_bed",
|
|
"sample_size": "sample_size",
|
|
"mapq": "map_qual"
|
|
],
|
|
toState: [ "strandedness_output": "output" ]
|
|
)
|
|
// Get predicted strandedness from the RSeQC infer_experiment.py output
|
|
| map { id, state ->
|
|
def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30)
|
|
def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true
|
|
[ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ]
|
|
}
|
|
| rseqc_inner_distance.run(
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align },
|
|
key: "inner_distance",
|
|
fromState: [
|
|
"input_file": "genome_bam",
|
|
"refgene": "gene_bed",
|
|
"sample_size": "sample_size",
|
|
"mapq": "map_qual",
|
|
"lower_bound": "lower_bound_size",
|
|
"upper_bound": "upper_bound_size",
|
|
"step": "step_size"
|
|
],
|
|
toState: [
|
|
"inner_dist_output_stats": "output_stats",
|
|
"inner_dist_output_dist": "output_dist",
|
|
"inner_dist_output_freq": "output_freq",
|
|
"inner_dist_output_plot": "output_plot",
|
|
"inner_dist_output_plot_r": "output_plot_r"
|
|
]
|
|
)
|
|
| rseqc_junctionannotation.run(
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align },
|
|
fromState: [
|
|
"input": "genome_bam",
|
|
"refgene": "gene_bed",
|
|
"map_qual": "map_qual",
|
|
"min_intron": "min_intron"
|
|
],
|
|
toState: [
|
|
"junction_annotation_output_log": "output_log",
|
|
"junction_annotation_output_plot_r": "output_plot_r",
|
|
"junction_annotation_output_junction_bed": "output_junction_bed",
|
|
"junction_annotation_output_junction_interact": "output_junction_interact",
|
|
"junction_annotation_output_junction_sheet": "output_junction_sheet",
|
|
"junction_annotation_output_splice_events_plot": "output_splice_events_plot",
|
|
"junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot"
|
|
]
|
|
)
|
|
| rseqc_junctionsaturation.run(
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align },
|
|
fromState: [
|
|
"input": "genome_bam",
|
|
"refgene": "gene_bed",
|
|
"sampling_percentile_lower_bound": "sampling_percentile_lower_bound",
|
|
"sampling_percentile_upper_bound": "sampling_percentile_upper_bound",
|
|
"sampling_percentile_step": "sampling_percentile_step",
|
|
"min_intron": "min_intron",
|
|
"min_splice_read": "min_splice_read",
|
|
"map_qual": "map_qual"
|
|
],
|
|
toState: [
|
|
"junction_saturation_output_plot_r": "output_plot_r",
|
|
"junction_saturation_output_plot": "output_plot"
|
|
]
|
|
)
|
|
| rseqc_readdistribution.run(
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align },
|
|
fromState: [
|
|
"input": "genome_bam",
|
|
"refgene": "gene_bed",
|
|
],
|
|
toState: [ "read_distribution_output": "output" ]
|
|
)
|
|
| rseqc_readduplication.run(
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align },
|
|
fromState: [
|
|
"input": "genome_bam",
|
|
"read_count_upper_limit": "read_count_upper_limit",
|
|
"map_qual": "map_qual"
|
|
],
|
|
toState: [
|
|
"read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r",
|
|
"read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot",
|
|
"read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping",
|
|
"read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence"
|
|
]
|
|
)
|
|
| rseqc_tin.run(
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align },
|
|
fromState: [
|
|
"bam_input": "genome_bam",
|
|
"bai_input": "genome_bam_index",
|
|
"refgene": "gene_bed",
|
|
"minimum_coverage": "minimum_coverage",
|
|
"sample_size": "tin_sample_size",
|
|
"subtract_background": "subtract_background"
|
|
],
|
|
toState: [
|
|
"tin_output_summary": "output_tin_summary",
|
|
"tin_output_metrics": "output_tin"
|
|
]
|
|
)
|
|
|
|
| dupradar.run(
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_dupradar && !state.skip_align },
|
|
fromState: [
|
|
"id": "id",
|
|
"input": "genome_bam",
|
|
"gtf_annotation": "gtf",
|
|
"paired": "paired",
|
|
"strandedness": "strandedness"
|
|
],
|
|
toState: [
|
|
"dupradar_output_dupmatrix": "output_dupmatrix",
|
|
"dupradar_output_dup_intercept_mqc": "output_dup_intercept_mqc",
|
|
"dupradar_output_duprate_exp_boxplot": "output_duprate_exp_boxplot",
|
|
"dupradar_output_duprate_exp_densplot": "output_duprate_exp_densplot",
|
|
"dupradar_output_duprate_exp_denscurve_mqc": "output_duprate_exp_denscurve_mqc",
|
|
"dupradar_output_expression_histogram": "output_expression_histogram",
|
|
"dupradar_output_intercept_slope": "output_intercept_slope"
|
|
]
|
|
)
|
|
|
|
// TODO: Add outdir as an output argument to the qualimap module on biobox.
|
|
// Qualimap ouputs a few more raw data files to outdir but since the module is using a temporary directory as output dir these files are lost.
|
|
| qualimap_rnaseq.run(
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_qualimap && !state.skip_align },
|
|
fromState: [
|
|
"bam": "genome_bam",
|
|
"gtf": "gtf",
|
|
"num_pr_bases": "pr_bases",
|
|
"num_tr_bias": "tr_bias",
|
|
"algorithm": "algorithm",
|
|
"sequencing_protocol": "sequencing_protocol",
|
|
"sorted": "sorted",
|
|
"java_memory_size": "java_memory_size",
|
|
],
|
|
toState: [
|
|
"qualimap_report": "report",
|
|
"qualimap_qc_report": "qc_report",
|
|
"qualimap_counts": "counts"
|
|
]
|
|
)
|
|
|
|
merged_ch = qc_ch
|
|
| toSortedList
|
|
| map { list ->
|
|
def ids = list.collect { id, state -> id }
|
|
def strandedness = list.collect { id, state -> state.strandedness }
|
|
def num_trimmed_reads = list.collect { id, state -> state.num_trimmed_reads }
|
|
def passed_trimmed_reads = list.collect { id, state -> state.passed_trimmed_reads }
|
|
def passed_mapping = list.collect { id, state -> state.passed_mapping }
|
|
def percent_mapped = list.collect { id, state -> state.percent_mapped }
|
|
def inferred_strand = list.collect { id, state -> state.inferred_strand }
|
|
def passed_strand_check = list.collect { id, state -> state.passed_strand_check }
|
|
def gtf = list.collect { id, state -> state.gtf }.unique()[0]
|
|
def gtf_extra_attributes = list.collect { id, state -> state.gtf_extra_attributes }.unique()[0]
|
|
def gtf_group_features = list.collect { id, state -> state.gtf_group_features }.unique()[0]
|
|
def pca_header_multiqc = list.collect { id, state -> state.pca_header_multiqc }.unique()[0]
|
|
def clustering_header_multiqc = list.collect { id, state -> state.clustering_header_multiqc }.unique()[0]
|
|
def aligner = list.collect { id, state -> state.aligner }.unique()[0]
|
|
def pseudo_aligner = list.collect { id, state -> state.pseudo_aligner }.unique()[0]
|
|
def deseq2_vst = list.collect { id, state -> state.deseq2_vst }.unique()[0]
|
|
def extra_deseq2_args = list.collect { id, state -> state.extra_deseq2_args }.unique()[0]
|
|
def extra_deseq2_args2 = list.collect { id, state -> state.extra_deseq2_args2 }.unique()[0]
|
|
def skip_deseq2_qc = list.collect { id, state -> state.skip_deseq2_qc }.unique()[0]
|
|
def skip_qc = list.collect { id, state -> state.skip_qc }.unique()[0]
|
|
def skip_align = list.collect { id, state -> state.skip_align }.unique()[0]
|
|
def skip_pseudo_align = list.collect { id, state -> state.skip_pseudo_align }.unique()[0]
|
|
def quant_results = list.collect { id, state ->
|
|
(state.quant_results_file instanceof java.nio.file.Path && state.quant_results_file.exists()) ?
|
|
state.quant_results_file :
|
|
null }
|
|
def rsem_counts_gene = list.collect { id, state ->
|
|
(state.rsem_counts_gene instanceof java.nio.file.Path && state.rsem_counts_gene.exists()) ?
|
|
state.rsem_counts_gene :
|
|
null }
|
|
def rsem_counts_transcripts = list.collect { id, state ->
|
|
(state.rsem_counts_transcripts instanceof java.nio.file.Path && state.rsem_counts_transcripts.exists()) ?
|
|
state.rsem_counts_transcripts :
|
|
null }
|
|
def pseudo_quant_out_dir = list.collect { id, state ->
|
|
(state.pseudo_quant_out_dir instanceof java.nio.file.Path && state.pseudo_quant_out_dir.exists()) ?
|
|
state.pseudo_quant_out_dir :
|
|
null }
|
|
def pseudo_salmon_quant_results = list.collect { id, state ->
|
|
(state.pseudo_salmon_quant_results_file instanceof java.nio.file.Path && state.pseudo_salmon_quant_results_file.exists()) ?
|
|
state.pseudo_salmon_quant_results_file :
|
|
null }
|
|
def pseudo_kallisto_quant_results = list.collect { id, state ->
|
|
(state.pseudo_kallisto_quant_results_file instanceof java.nio.file.Path && state.pseudo_kallisto_quant_results_file.exists()) ?
|
|
state.pseudo_kallisto_quant_results_file :
|
|
null }
|
|
def fastqc_zip_1 = list.collect { id, state ->
|
|
(state.fastqc_zip_1 instanceof java.nio.file.Path && state.fastqc_zip_1.exists()) ?
|
|
state.fastqc_zip_1 :
|
|
null }
|
|
def fastqc_zip_2 = list.collect { id, state ->
|
|
(state.fastqc_zip_2 instanceof java.nio.file.Path && state.fastqc_zip_2.exists()) ?
|
|
state.fastqc_zip_2 :
|
|
null }
|
|
def trim_zip_1 = list.collect { id, state ->
|
|
(state.trim_zip_1 instanceof java.nio.file.Path && state.trim_zip_1.exists()) ?
|
|
state.trim_zip_1 :
|
|
null }
|
|
def trim_zip_2 = list.collect { id, state ->
|
|
(state.trim_zip_2 instanceof java.nio.file.Path && state.trim_zip_2.exists()) ?
|
|
state.trim_zip_2 :
|
|
null }
|
|
def trim_log_1 = list.collect { id, state ->
|
|
(state.trim_log_1 instanceof java.nio.file.Path && state.trim_log_1.exists()) ?
|
|
state.trim_log_1 :
|
|
null }
|
|
def trim_log_2 = list.collect { id, state ->
|
|
(state.trim_log_2 instanceof java.nio.file.Path && state.trim_log_2.exists()) ?
|
|
state.trim_log_2 :
|
|
null }
|
|
def sortmerna_multiqc = list.collect { id, state ->
|
|
(state.sortmerna_multiqc instanceof java.nio.file.Path && state.sortmerna_multiqc.exists()) ?
|
|
state.sortmerna_multiqc :
|
|
null }
|
|
def star_multiqc = list.collect { id, state ->
|
|
(state.star_multiqc instanceof java.nio.file.Path && state.star_multiqc.exists()) ?
|
|
state.star_multiqc :
|
|
null }
|
|
def genome_bam_stats = list.collect { id, state ->
|
|
(state.genome_bam_stats instanceof java.nio.file.Path && state.genome_bam_stats.exists()) ?
|
|
state.genome_bam_stats :
|
|
null }
|
|
def genome_bam_flagstat = list.collect { id, state ->
|
|
(state.genome_bam_flagstat instanceof java.nio.file.Path && state.genome_bam_flagstat.exists()) ?
|
|
state.genome_bam_flagstat :
|
|
null }
|
|
def genome_bam_idxstats = list.collect { id, state ->
|
|
(state.genome_bam_idxstats instanceof java.nio.file.Path && state.genome_bam_idxstats.exists()) ?
|
|
state.genome_bam_idxstats :
|
|
null }
|
|
def markduplicates_multiqc = list.collect { id, state ->
|
|
(state.markduplicates_multiqc instanceof java.nio.file.Path && state.markduplicates_multiqc.exists()) ?
|
|
state.markduplicates_multiqc :
|
|
null }
|
|
def salmon_multiqc = list.collect { id, state ->
|
|
(state.salmon_multiqc instanceof java.nio.file.Path && state.salmon_multiqc.exists()) ?
|
|
state.salmon_multiqc :
|
|
null }
|
|
def rsem_multiqc = list.collect { id, state ->
|
|
(state.rsem_multiqc instanceof java.nio.file.Path && state.rsem_multiqc.exists()) ?
|
|
state.rsem_multiqc :
|
|
null }
|
|
def pseudo_multiqc = list.collect { id, state ->
|
|
(state.pseudo_multiqc instanceof java.nio.file.Path && state.pseudo_multiqc.exists()) ?
|
|
state.pseudo_multiqc :
|
|
null }
|
|
def featurecounts_multiqc = list.collect { id, state ->
|
|
(state.featurecounts_multiqc instanceof java.nio.file.Path && state.featurecounts_multiqc.exists()) ?
|
|
state.featurecounts_multiqc :
|
|
null }
|
|
def featurecounts_rrna_multiqc = list.collect { id, state ->
|
|
(state.featurecounts_rrna_multiqc instanceof java.nio.file.Path && state.featurecounts_rrna_multiqc.exists()) ?
|
|
state.featurecounts_rrna_multiqc :
|
|
null }
|
|
def preseq_output = list.collect { id, state ->
|
|
(state.preseq_output instanceof java.nio.file.Path && state.preseq_output.exists()) ?
|
|
state.preseq_output :
|
|
null }
|
|
// def qualimap_output_dir = list.collect { id, state ->
|
|
// (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ?
|
|
// state.qualimap_output_dir :
|
|
// null }
|
|
def dupradar_output_dup_intercept_mqc = list.collect { id, state ->
|
|
(state.dupradar_output_dup_intercept_mqc instanceof java.nio.file.Path && state.dupradar_output_dup_intercept_mqc.exists()) ?
|
|
state.dupradar_output_dup_intercept_mqc :
|
|
null }
|
|
def dupradar_output_duprate_exp_denscurve_mqc = list.collect { id, state ->
|
|
(state.dupradar_output_duprate_exp_denscurve_mqc instanceof java.nio.file.Path && state.dupradar_output_duprate_exp_denscurve_mqc.exists()) ?
|
|
state.dupradar_output_duprate_exp_denscurve_mqc :
|
|
null }
|
|
def bamstat_output = list.collect { id, state ->
|
|
(state.bamstat_output instanceof java.nio.file.Path && state.bamstat_output.exists()) ?
|
|
state.bamstat_output :
|
|
null }
|
|
def inferexperiment_multiqc = list.collect { id, state ->
|
|
(state.strandedness_output instanceof java.nio.file.Path && state.strandedness_output.exists()) ?
|
|
state.strandedness_output :
|
|
null }
|
|
def inner_dist_output_freq = list.collect { id, state ->
|
|
(state.inner_dist_output_freq instanceof java.nio.file.Path && state.inner_dist_output_freq.exists()) ?
|
|
state.inner_dist_output_freq :
|
|
null }
|
|
def junction_annotation_output_log = list.collect { id, state ->
|
|
(state.junction_annotation_output_log instanceof java.nio.file.Path && state.junction_annotation_output_log.exists()) ?
|
|
state.junction_annotation_output_log :
|
|
null }
|
|
def junction_saturation_output_plot_r = list.collect { id, state ->
|
|
(state.junction_saturation_output_plot_r instanceof java.nio.file.Path && state.junction_saturation_output_plot_r.exists()) ?
|
|
state.junction_saturation_output_plot_r :
|
|
null }
|
|
def read_distribution_output = list.collect { id, state ->
|
|
(state.read_distribution_output instanceof java.nio.file.Path && state.read_distribution_output.exists()) ?
|
|
state.read_distribution_output :
|
|
null }
|
|
def read_duplication_output_duplication_rate_mapping = list.collect { id, state ->
|
|
(state.read_duplication_output_duplication_rate_mapping instanceof java.nio.file.Path && state.read_duplication_output_duplication_rate_mapping.exists()) ?
|
|
state.read_duplication_output_duplication_rate_mapping :
|
|
null }
|
|
def tin_output_summary = list.collect { id, state ->
|
|
(state.tin_output_summary instanceof java.nio.file.Path && state.tin_output_summary.exists()) ?
|
|
state.tin_output_summary :
|
|
null }
|
|
def multiqc_custom_config = list.collect { id, state -> state.multiqc_custom_config }.unique()[0]
|
|
["merged", [
|
|
ids: ids,
|
|
strandedness: strandedness,
|
|
num_trimmed_reads: num_trimmed_reads,
|
|
passed_trimmed_reads: passed_trimmed_reads,
|
|
passed_mapping: passed_mapping,
|
|
percent_mapped: percent_mapped,
|
|
inferred_strand: inferred_strand,
|
|
passed_strand_check: passed_strand_check,
|
|
skip_align: skip_align,
|
|
skip_pseudo_align: skip_pseudo_align,
|
|
quant_results: quant_results.findAll { it != null },
|
|
rsem_counts_gene: rsem_counts_gene.findAll { it != null },
|
|
rsem_counts_transcripts: rsem_counts_transcripts.findAll { it != null },
|
|
pseudo_quant_out_dir: pseudo_quant_out_dir.findAll { it != null },
|
|
pseudo_salmon_quant_results: pseudo_salmon_quant_results.findAll { it != null },
|
|
pseudo_kallisto_quant_results: pseudo_kallisto_quant_results.findAll { it != null },
|
|
gtf: gtf,
|
|
gtf_extra_attributes: gtf_extra_attributes,
|
|
gtf_group_features: gtf_group_features,
|
|
pca_header_multiqc: pca_header_multiqc,
|
|
clustering_header_multiqc: clustering_header_multiqc,
|
|
aligner: aligner,
|
|
pseudo_aligner: pseudo_aligner,
|
|
deseq2_vst: deseq2_vst,
|
|
extra_deseq2_args: extra_deseq2_args,
|
|
extra_deseq2_args2: extra_deseq2_args2,
|
|
skip_deseq2_qc: skip_deseq2_qc,
|
|
fastqc_zip: fastqc_zip_1 + fastqc_zip_2,
|
|
trim_zip: trim_zip_1 + trim_zip_2,
|
|
trim_log: trim_log_1 + trim_log_2,
|
|
sortmerna_multiqc: sortmerna_multiqc,
|
|
star_multiqc: star_multiqc,
|
|
genome_bam_stats: genome_bam_stats,
|
|
genome_bam_flagstat: genome_bam_flagstat,
|
|
genome_bam_idxstats: genome_bam_idxstats,
|
|
markduplicates_multiqc: markduplicates_multiqc,
|
|
salmon_multiqc: salmon_multiqc,
|
|
rsem_multiqc: rsem_multiqc,
|
|
pseudo_multiqc: pseudo_multiqc,
|
|
featurecounts_multiqc: featurecounts_multiqc,
|
|
featurecounts_rrna_multiqc: featurecounts_rrna_multiqc,
|
|
preseq_output: preseq_output,
|
|
// qualimap_output_dir: qualimap_output_dir,
|
|
dupradar_output_dup_intercept_mqc: dupradar_output_dup_intercept_mqc,
|
|
dupradar_output_duprate_exp_denscurve_mqc: dupradar_output_duprate_exp_denscurve_mqc,
|
|
bamstat_output: bamstat_output,
|
|
inner_dist_output_freq: inner_dist_output_freq,
|
|
inferexperiment_multiqc: inferexperiment_multiqc,
|
|
junction_annotation_output_log: junction_annotation_output_log,
|
|
junction_saturation_output_plot_r: junction_saturation_output_plot_r,
|
|
read_distribution_output: read_distribution_output,
|
|
read_duplication_output_duplication_rate_mapping: read_duplication_output_duplication_rate_mapping,
|
|
tin_output_summary: tin_output_summary,
|
|
multiqc_custom_config: multiqc_custom_config
|
|
] ]
|
|
}
|
|
|
|
// Merge quantification results of alignment
|
|
| merge_quant_results.run (
|
|
runIf: { id, state -> !state.skip_align && state.aligner == 'star_salmon' },
|
|
fromState: [
|
|
"salmon_quant_results": "quant_results",
|
|
"gtf": "gtf",
|
|
"gtf_extra_attributes": "gtf_extra_attributes",
|
|
"gtf_group_features": "gtf_group_features"
|
|
],
|
|
args: [ quant_type: "salmon"],
|
|
toState: [
|
|
"tpm_gene": "tpm_gene",
|
|
"counts_gene": "counts_gene",
|
|
"counts_gene_length_scaled": "counts_gene_length_scaled",
|
|
"counts_gene_scaled": "counts_gene_scaled",
|
|
"tpm_transcript": "tpm_transcript",
|
|
"counts_transcript": "counts_transcript",
|
|
"lengths_gene": "lengths_gene",
|
|
"lengths_transcript": "lengths_transcript",
|
|
"quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment"
|
|
],
|
|
key: "merge_quant_results"
|
|
)
|
|
|
|
| rsem_merge_counts.run (
|
|
runIf: { id, state -> state.aligner == 'star_rsem' },
|
|
fromState: [
|
|
"counts_gene": "rsem_counts_gene",
|
|
"counts_transcripts": "rsem_counts_transcripts"
|
|
],
|
|
toState: [
|
|
"tpm_gene": "merged_gene_tpm",
|
|
"counts_gene": "merged_gene_counts",
|
|
"tpm_transcript": "merged_transcript_tpm",
|
|
"counts_transcript": "merged_transcript_counts"
|
|
]
|
|
)
|
|
|
|
| deseq2_qc.run (
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_deseq2_qc && !state.skip_align },
|
|
fromState: { id, state ->
|
|
def counts = (state.aligner == "star_rsem") ? state.counts_gene : state.counts_gene_length_scaled
|
|
[
|
|
counts: counts,
|
|
vst: state.deseq2_vst,
|
|
label: state.aligner
|
|
]
|
|
},
|
|
args: [count_col: 3, id_col: 1, outprefix: "deseq2"],
|
|
toState: [
|
|
"deseq2_output": "outdir",
|
|
"deseq2_pca_multiqc": "pca_multiqc",
|
|
"deseq2_dists_multiqc": "dists_multiqc"
|
|
],
|
|
key: "deseq2_qc_align_quant"
|
|
)
|
|
|
|
// Merge quantification results of pseudo alignment
|
|
| merge_quant_results.run (
|
|
runIf: { id, state -> !state.skip_pseudo_align },
|
|
fromState: [
|
|
"salmon_quant_results": "pseudo_salmon_quant_results",
|
|
"kallisto_quant_results": "pseudo_kallisto_quant_results",
|
|
"gtf": "gtf",
|
|
"gtf_extra_attributes": "gtf_extra_attributes",
|
|
"gtf_group_features": "gtf_group_features",
|
|
"quant_type": "pseudo_aligner"
|
|
],
|
|
toState: [
|
|
"pseudo_tpm_gene": "tpm_gene",
|
|
"pseudo_counts_gene": "counts_gene",
|
|
"pseudo_counts_gene_length_scaled": "counts_gene_length_scaled",
|
|
"pseudo_counts_gene_scaled": "counts_gene_scaled",
|
|
"pseudo_tpm_transcript": "tpm_transcript",
|
|
"pseudo_counts_transcript": "counts_transcript",
|
|
"pseudo_lengths_gene": "lengths_gene",
|
|
"pseudo_lengths_transcript": "lengths_transcript",
|
|
"pseudo_quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment"
|
|
],
|
|
key: "merge_pseudo_quant_results"
|
|
)
|
|
|
|
| deseq2_qc.run (
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_deseq2_qc && !state.skip_pseudo_align },
|
|
fromState: [
|
|
"counts": "pseudo_counts_gene_length_scaled",
|
|
"vst": "deseq2_vst",
|
|
"label": "pseudo_aligner"
|
|
],
|
|
args: [count_col: 3, id_col: 1, outprefix: "deseq2"],
|
|
toState: [
|
|
"deseq2_output_pseudo": "outdir",
|
|
"deseq2_pca_multiqc_pseudo": "pca_multiqc",
|
|
"deseq2_dists_multiqc_pseudo": "dists_multiqc"
|
|
],
|
|
key: "deseq2_qc_pseuso_align_quant"
|
|
)
|
|
|
|
// Get list of samples that failed trimming, mapping, and strand check for MultiQC report
|
|
| map { id, state ->
|
|
def fail_trimming_header = ["Sample", "Reads after trimming"]
|
|
def fail_trimming_multiqc = ""
|
|
def star_mapping_header = ["Sample", "STAR uniquely mapped reads (%)"]
|
|
def fail_mapping_multiqc = ""
|
|
def strand_check_header = ["Sample", "Provided strandedness", "Inferred strandedness", "Sense (%)", "Antisense (%)", "Undetermined (%)"]
|
|
def fail_strand_multiqc = ""
|
|
if (state.ids.size() > 0) {
|
|
fail_trimming_multiqc += "${fail_trimming_header.join('\t')}\n"
|
|
fail_mapping_multiqc += "${star_mapping_header.join('\t')}\n"
|
|
fail_strand_multiqc += "${strand_check_header.join('\t')}\n"
|
|
for (i=0; i<state.ids.size(); i++) {
|
|
if (!state.passed_trimmed_reads[i]) {
|
|
tsv_data = [state.ids[i], state.num_trimmed_reads[i]].join('\t')
|
|
fail_trimming_multiqc += tsv_data.join('\n')
|
|
}
|
|
if (!state.passed_mapping[i]) {
|
|
tsv_data = [state.ids[i], state.percent_mapped[i]].join('\t')
|
|
fail_mapping_multiqc += tsv_data.join('\n')
|
|
}
|
|
if (!state.passed_strand_check[i]) {
|
|
tsv_data = ([state.ids[i], state.strandedness[i]] + state.inferred_strand[i]).join('\t')
|
|
fail_strand_multiqc += tsv_data.join('\n')
|
|
}
|
|
}
|
|
}
|
|
|
|
[ id, state + [fail_trimming_multiqc: fail_trimming_multiqc, fail_mapping_multiqc: fail_mapping_multiqc, fail_strand_multiqc: fail_strand_multiqc] ]
|
|
}
|
|
|
|
| map { id, state ->
|
|
state.each { key, value ->
|
|
if (value instanceof ArrayList) {
|
|
value.removeAll { it == null }
|
|
}
|
|
}
|
|
mod_state = state.findAll { key, value -> value != null }
|
|
[ id, mod_state ]
|
|
}
|
|
|
|
| prepare_multiqc_input.run(
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_multiqc },
|
|
fromState: [
|
|
"fail_trimming_multiqc": "fail_trimming_multiqc",
|
|
"fail_mapping_multiqc": "fail_mapping_multiqc",
|
|
"fail_strand_multiqc": "fail_strand_multiqc",
|
|
"fastqc_raw_multiqc": "fastqc_zip",
|
|
"fastqc_trim_multiqc": "trim_zip",
|
|
"trim_log_multiqc": "trim_log",
|
|
"sortmerna_multiqc": "sortmerna_multiqc",
|
|
"star_multiqc": "star_multiqc",
|
|
"salmon_multiqc": "salmon_multiqc",
|
|
"rsem_multiqc": "rsem_multiqc",
|
|
"pseudo_multiqc": "pseudo_multiqc",
|
|
"samtools_stats": "genome_bam_stats",
|
|
"samtools_flagstat": "genome_bam_flagstat",
|
|
"samtools_idxstats": "genome_bam_idxstats",
|
|
"markduplicates_multiqc": "markduplicates_multiqc",
|
|
"featurecounts_multiqc": "featurecounts_multiqc",
|
|
"featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc",
|
|
"aligner_pca_multiqc": "deseq2_pca_multiqc",
|
|
"aligner_clustering_multiqc": "deseq2_dists_multiqc",
|
|
"pseudo_aligner_pca_multiqc": "deseq2_pca_multiqc_pseudo",
|
|
"pseudo_aligner_clustering_multiqc": "deseq2_dists_multiqc_pseudo",
|
|
"preseq_multiqc": "preseq_output",
|
|
// "qualimap_multiqc": "qualimap_output_dir",
|
|
"dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc",
|
|
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
|
|
"bamstat_multiqc": "bamstat_output",
|
|
"inferexperiment_multiqc": "inferexperiment_multiqc",
|
|
"innerdistance_multiqc": "inner_dist_output_freq",
|
|
"junctionannotation_multiqc": "junction_annotation_output_log",
|
|
"junctionsaturation_multiqc": "junction_saturation_output_plot_r",
|
|
"readdistribution_multiqc": "read_distribution_output",
|
|
"readduplication_multiqc": "read_duplication_output_duplication_rate_mapping",
|
|
"tin_multiqc": "tin_output_summary",
|
|
"multiqc_config": "multiqc_custom_config"
|
|
],
|
|
toState: [ "multiqc_input": "output" ]
|
|
)
|
|
|
|
| multiqc.run (
|
|
runIf: { id, state -> !state.skip_qc && !state.skip_multiqc },
|
|
fromState: [
|
|
"title": "multiqc_title",
|
|
"input": "multiqc_input",
|
|
],
|
|
args: [exclude_modules: "general_stats"],
|
|
toState: [
|
|
"multiqc_report": "output_report",
|
|
"multiqc_data": "output_data",
|
|
"multiqc_plots": "output_plots"
|
|
]
|
|
)
|
|
|
|
| map { id, state ->
|
|
[ id, [
|
|
tpm_gene: state.tpm_gene,
|
|
counts_gene: state.counts_gene,
|
|
counts_gene_length_scaled: state.counts_gene_length_scaled,
|
|
counts_gene_scaled: state.counts_gene_scaled,
|
|
tpm_transcript: state.tpm_transcript,
|
|
counts_transcript: state.counts_transcript,
|
|
quant_merged_summarizedexperiment: state.quant_merged_summarizedexperiment,
|
|
deseq2_output: state.deseq2_output,
|
|
pseudo_tpm_gene: state.pseudo_tpm_gene,
|
|
pseudo_counts_gene: state.pseudo_counts_gene,
|
|
pseudo_counts_gene_length_scaled: state.pseudo_counts_gene_length_scaled,
|
|
pseudo_counts_gene_scaled: state.pseudo_counts_gene_scaled,
|
|
pseudo_tpm_transcript: state.pseudo_tpm_transcript,
|
|
pseudo_counts_transcript: state.pseudo_counts_transcript,
|
|
pseudo_quant_merged_summarizedexperiment: state.pseudo_quant_merged_summarizedexperiment,
|
|
deseq2_output_pseudo: state.deseq2_output_pseudo,
|
|
multiqc_report: state.multiqc_report,
|
|
multiqc_data: state.multiqc_data,
|
|
multiqc_plots: state.multiqc_plots
|
|
] ]
|
|
}
|
|
|
|
| map { list -> list[1]}
|
|
|
|
output_ch = qc_ch
|
|
|
|
| combine(merged_ch)
|
|
|
|
| map { list -> [list[0], list[1] + list[2]] }
|
|
|
|
| map { id, state ->
|
|
def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() }
|
|
[ id, mod_state ]
|
|
}
|
|
|
|
| setState (
|
|
[
|
|
"preseq_output": "preseq_output",
|
|
"bamstat_output": "bamstat_output",
|
|
"strandedness_output": "strandedness_output",
|
|
"inner_dist_output_stats": "inner_dist_output_stats",
|
|
"inner_dist_output_dist": "inner_dist_output_dist",
|
|
"inner_dist_output_freq": "inner_dist_output_freq",
|
|
"inner_dist_output_plot": "inner_dist_output_plot",
|
|
"inner_dist_output_plot_r": "inner_dist_output_plot_r",
|
|
"junction_annotation_output_log": "junction_annotation_output_log",
|
|
"junction_annotation_output_plot_r": "junction_annotation_output_plot_r",
|
|
"junction_annotation_output_junction_bed": "junction_annotation_output_junction_bed",
|
|
"junction_annotation_output_junction_interact": "junction_annotation_output_junction_interact",
|
|
"junction_annotation_output_junction_sheet": "junction_annotation_output_junction_sheet",
|
|
"junction_annotation_output_splice_events_plot": "junction_annotation_output_splice_events_plot",
|
|
"junction_annotation_output_splice_junctions_plot": "junction_annotation_output_splice_junctions_plot",
|
|
"junction_saturation_output_plot_r": "junction_saturation_output_plot_r",
|
|
"junction_saturation_output_plot": "junction_saturation_output_plot",
|
|
"read_distribution_output": "read_distribution_output",
|
|
"read_duplication_output_duplication_rate_plot_r": "read_duplication_output_duplication_rate_plot_r",
|
|
"read_duplication_output_duplication_rate_plot": "read_duplication_output_duplication_rate_plot",
|
|
"read_duplication_output_duplication_rate_mapping": "read_duplication_output_duplication_rate_mapping",
|
|
"read_duplication_output_duplication_rate_sequence": "read_duplication_output_duplication_rate_sequence",
|
|
"tin_output_summary": "tin_output_summary",
|
|
"tin_output_metrics": "tin_output_metrics",
|
|
"dupradar_output_dupmatrix": "dupradar_output_dupmatrix",
|
|
"dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc",
|
|
"dupradar_output_duprate_exp_boxplot": "dupradar_output_duprate_exp_boxplot",
|
|
"dupradar_output_duprate_exp_densplot": "dupradar_output_duprate_exp_densplot",
|
|
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
|
|
"dupradar_output_expression_histogram": "dupradar_output_expression_histogram",
|
|
"dupradar_output_intercept_slope": "dupradar_output_intercept_slope",
|
|
"qualimap_report": "qualimap_report",
|
|
"qualimap_qc_report": "qualimap_qc_report",
|
|
"qualimap_counts": "qualimap_counts",
|
|
"featurecounts": "featurecounts",
|
|
"featurecounts_summary": "featurecounts_summary",
|
|
"featurecounts_multiqc": "featurecounts_multiqc",
|
|
"featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc",
|
|
"tpm_gene": "tpm_gene",
|
|
"counts_gene": "counts_gene",
|
|
"counts_gene_length_scaled": "counts_gene_length_scaled",
|
|
"counts_gene_scaled": "counts_gene_scaled",
|
|
"tpm_transcript": "tpm_transcript",
|
|
"counts_transcript": "counts_transcript",
|
|
"lengths_gene": "lengths_gene",
|
|
"lengths_transcript": "lengths_transcript",
|
|
"quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment",
|
|
"deseq2_output": "deseq2_output",
|
|
"pseudo_tpm_gene": "pseudo_tpm_gene",
|
|
"pseudo_counts_gene": "pseudo_counts_gene",
|
|
"pseudo_counts_gene_length_scaled": "pseudo_counts_gene_length_scaled",
|
|
"pseudo_counts_gene_scaled": "pseudo_counts_gene_scaled",
|
|
"pseudo_tpm_transcript": "pseudo_tpm_transcript",
|
|
"pseudo_counts_transcript": "pseudo_counts_transcript",
|
|
"pseudo_lengths_gene": "pseudo_lengths_gene",
|
|
"pseudo_lengths_transcript": "pseudo_lengths_transcript",
|
|
"pseudo_quant_merged_summarizedexperiment": "pseudo_quant_merged_summarizedexperiment",
|
|
"deseq2_output_pseudo": "deseq2_output_pseudo",
|
|
"multiqc_report": "multiqc_report",
|
|
"multiqc_data": "multiqc_data",
|
|
"multiqc_plots": "multiqc_plots"
|
|
]
|
|
)
|
|
|
|
emit:
|
|
output_ch
|
|
}
|
|
|
|
//
|
|
// Function to check whether biotype field exists in GTF file
|
|
//
|
|
def biotypeInGtf(gtf_file, biotype) {
|
|
def hits = 0
|
|
gtf_file.eachLine { line ->
|
|
def attributes = line.split('\t')[-1].split()
|
|
if (attributes.contains(biotype)) {
|
|
hits += 1
|
|
}
|
|
}
|
|
if (hits) {
|
|
return true
|
|
} else {
|
|
log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
|
|
" Biotype attribute '${biotype}' not found in the last column of the GTF file!\n\n" +
|
|
" Biotype QC will be skipped to circumvent the issue below:\n" +
|
|
" https://github.com/nf-core/rnaseq/issues/460\n\n" +
|
|
" Amend '--featurecounts_group_type' to change this behaviour.\n" +
|
|
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
|
|
return false
|
|
}
|
|
}
|
|
|
|
//
|
|
// Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output
|
|
//
|
|
def getInferexperimentStrandedness(inferexperiment_file, cutoff=30) {
|
|
def sense = 0
|
|
def antisense = 0
|
|
def undetermined = 0
|
|
inferexperiment_file.eachLine { line ->
|
|
def undetermined_matcher = line =~ /Fraction of reads failed to determine:\s([\d\.]+)/
|
|
def se_sense_matcher = line =~ /Fraction of reads explained by "\++,--":\s([\d\.]+)/
|
|
def se_antisense_matcher = line =~ /Fraction of reads explained by "\+-,-\+":\s([\d\.]+)/
|
|
def pe_sense_matcher = line =~ /Fraction of reads explained by "1\++,1--,2\+-,2-\+":\s([\d\.]+)/
|
|
def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\+-,1-\+,2\+\+,2--":\s([\d\.]+)/
|
|
if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100
|
|
if (se_sense_matcher) sense = se_sense_matcher[0][1].toFloat() * 100
|
|
if (se_antisense_matcher) antisense = se_antisense_matcher[0][1].toFloat() * 100
|
|
if (pe_sense_matcher) sense = pe_sense_matcher[0][1].toFloat() * 100
|
|
if (pe_antisense_matcher) antisense = pe_antisense_matcher[0][1].toFloat() * 100
|
|
}
|
|
def strandedness = 'unstranded'
|
|
if (sense >= 100-cutoff) {
|
|
strandedness = 'forward'
|
|
} else if (antisense >= 100-cutoff) {
|
|
strandedness = 'reverse'
|
|
}
|
|
return [ strandedness, sense, antisense, undetermined ]
|
|
}
|
|
|
|
|
|
|