rnaseq/assets/multiqc_config.yml

report_comment: >
  This report has been generated by the <a href="https://github.com/data-intuitive/rnaseq.vsh" </a>
  analysis pipeline.
report_section_order:
  "rnaseq.vsh-methods-description":
    order: -1000
  software_versions:
    order: -1001
  "rnaseq.vsh-summary":
    order: -1002

export_plots: true

# Run only these modules
run_modules:
  - custom_content
  - fastqc
  - cutadapt
  - fastp
  - sortmerna
  - star
  # - hisat2
  - rsem
  - salmon
  - kallisto
  - samtools
  - picard
  - preseq
  - rseqc
  - qualimap

# Order of modules
top_modules:
  - "fail_trimming"
  - "fail_mapping"
  - "fail_strand"
  - "star_rsem_deseq2_pca"
  - "star_rsem_deseq2_clustering"
  - "star_salmon_deseq2_pca"
  - "star_salmon_deseq2_clustering"
  - "salmon_deseq2_pca"
  - "salmon_deseq2_clustering"
  - "kallisto_deseq2_pca"
  - "kallisto_deseq2_clustering"
  - "biotype_counts"
  - "dupradar"

module_order:
  - fastqc:
      name: "FastQC (raw)"
      info: "This section of the report shows FastQC results before adapter trimming."
      path_filters:
        - "*.read_*.fastqc.zip"
  - cutadapt
  - fastp
  - fastqc:
      name: "FastQC (trimmed)"
      info: "This section of the report shows FastQC results after adapter trimming."
      path_filters:
        - "*.trimgalore.read_*.fastqc.zip"

# Don't show % Dups in the General Stats table (we have this from Picard)
table_columns_visible:
  fastqc:
    percent_duplicates: False

extra_fn_clean_exts:
  - ".salmon_quant"
  - ".mapping_quality"
  - ".genome_sorted"
  - ".MarkDuplicates"
  - ".MarkDuplicates_flagstat"
  - ".MarkDuplicates_stats"
  - ".genome_sorted_MarkDuplicates"
  - ".star_aligned"
  - ".read_1"
  - ".read_2"

# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml
custom_data:
  fail_trimming:
    section_name: "WARNING: Fail Trimming Check"
    description: "List of samples that failed the minimum trimmed reads threshold specified via the '--min_trimmed_reads' parameter, and hence were ignored for the downstream processing steps."
    plot_type: "table"
    pconfig:
      id: "fail_trimmed_samples_table"
      table_title: "Samples failed trimming threshold"
      namespace: "Samples failed trimming threshold"
      format: "{:.0f}"
  fail_mapping:
    section_name: "WARNING: Fail Alignment Check"
    description: "List of samples that failed the STAR minimum mapped reads threshold specified via the '--min_mapped_reads' parameter, and hence were ignored for the downstream processing steps."
    plot_type: "table"
    pconfig:
      id: "fail_mapped_samples_table"
      table_title: "Samples failed mapping threshold"
      namespace: "Samples failed mapping threshold"
      format: "{:.2f}"
  fail_strand:
    section_name: "WARNING: Fail Strand Check"
    description: "List of samples that failed the strandedness check between that provided in the samplesheet and calculated by the <a href='http://rseqc.sourceforge.net/#infer-experiment-py'>RSeQC infer_experiment.py</a> tool."
    plot_type: "table"
    pconfig:
      id: "fail_strand_check_table"
      table_title: "Samples failed strandedness check"
      namespace: "Samples failed strandedness check"
      format: "{:.2f}"

# Customise the module search patterns to speed up execution time
#  - Skip module sub-tools that we are not interested in
#  - Replace file-content searching with filename pattern searching
#  - Don't add anything that is the same as the MultiQC default
# See https://multiqc.info/docs/#optimise-file-search-patterns for details
sp:

  fastqc/zip:
    fn: "*.fastqc.zip"

  cutadapt:
    fn: "*.trimming_report.txt"

  fastp:
    fn: "*.fastp.json"

  sortmerna:
    fn: "*sortmerna*.log"

  star:
    fn: "*.star_aligned.log.final.out"

  # hisat2:
  #   fn: "*.hisat2.summary.log"

  salmon/meta:
    fn: "*meta_info.json"

  preseq:
    fn: "*.lc_extrap.txt"

  samtools/stats:
    fn: "*.stats"
  samtools/flagstat:
    fn: "*.flagstat"
  samtools/idxstats:
    fn: "*.idxstats*"

  rseqc/bam_stat:
    fn: "*.mapping_quality.txt"
  rseqc/junction_saturation:
    fn: "*.junction_saturation_plot.r"
  rseqc/junction_annotation:
    fn: "*.junction_annotation.log"
  rseqc/read_duplication_pos:
    fn: "*.duplication_rate_mapping.xls"
  rseqc/read_distribution:
    fn: "*.read_distribution.txt"
  rseqc/infer_experiment:
    fn: "*.strandedness.txt"
  rseqc/inner_distance:
    fn: "*.inner_distance_freq.txt"
  rseqc/tin:
    fn: "*.tin_summary.txt"

  picard/markdups:
    fn: "*.MarkDuplicates.metrics.txt"

skip_versions_section: true