Build pipeline: viash-hub.rnaseq.add-labels-n4fx6
Source commit: e244fcf554
Source message: Move inline TODO items to Github issue
312 lines
10 KiB
Plaintext
312 lines
10 KiB
Plaintext
workflow run_wf {
|
|
|
|
take:
|
|
input_ch
|
|
|
|
main:
|
|
output_ch = input_ch
|
|
|
|
// Uncompress fasta
|
|
| gunzip.run (
|
|
fromState: [ "input": "fasta" ],
|
|
toState: [ "fasta": "output" ],
|
|
key: "gunzip_fasta",
|
|
args: [ output: "reference_genome.fasta" ],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// uncompress gtf
|
|
| gunzip.run (
|
|
runIf: {id, state -> state.gtf},
|
|
fromState: [ "input": "gtf" ],
|
|
toState: [ "gtf": "output" ],
|
|
key: "gunzip_gtf",
|
|
args: [output: "gene_annotation.gtf"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// uncompress gff
|
|
| gunzip.run (
|
|
runIf: {id, state -> !state.gtf && state.gff},
|
|
fromState: [ "input": "gff" ],
|
|
toState: [ "gff": "output" ],
|
|
key: "gunzip_gff",
|
|
args: [output: "gene_annotation.gff"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// gff to gtf
|
|
| gffread.run (
|
|
runIf: {id, state -> !state.gtf && state.gff},
|
|
fromState: [
|
|
"input": "gff",
|
|
"genome": "fasta"
|
|
],
|
|
toState: [ "gtf": "outfile" ],
|
|
args: [
|
|
outfile: "gene_annotation.gtf",
|
|
gtf_output: true,
|
|
keep_attrs: true,
|
|
keep_exon_attrs: true
|
|
],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
| gtf_filter.run(
|
|
runIf: {id, state -> state.gtf && state.filter_gtf},
|
|
fromState: [
|
|
"fasta": "fasta",
|
|
"gtf": "gtf"
|
|
],
|
|
toState: [ "gtf": "filtered_gtf" ],
|
|
args: [filtered_gtf: "gene_annotation.gtf"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// uncompress additional fasta
|
|
| gunzip.run (
|
|
runIf: {id, state -> state.additional_fasta},
|
|
fromState: [ "input": "additional_fasta" ],
|
|
toState: [ "additional_fasta": "output" ],
|
|
key: "gunzip_additional_fasta",
|
|
args: [output: "additional.fasta"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// concatenate additional fasta
|
|
| cat_additional_fasta.run (
|
|
runIf: {id, state -> state.additional_fasta},
|
|
fromState: [
|
|
"fasta": "fasta",
|
|
"gtf": "gtf",
|
|
"additional_fasta": "additional_fasta",
|
|
"biotype": "biotype"
|
|
],
|
|
toState: [
|
|
"fasta": "fasta_output",
|
|
"gtf": "gtf_output"
|
|
],
|
|
args: [
|
|
fasta_output: "genome_additional.fasta",
|
|
gtf_output: "genome_additional.gtf"
|
|
],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// uncompress bed file
|
|
| gunzip.run (
|
|
runIf: {id, state -> state.gene_bed},
|
|
fromState: [ "input": "gene_bed" ],
|
|
toState: [ "gene_bed": "output" ],
|
|
key: "gunzip_gene_bed",
|
|
args: [output: "genome_additional.bed"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// gtf to bed
|
|
| gtf2bed.run (
|
|
runIf: { id, state -> !state.gene_bed},
|
|
fromState: [ "gtf": "gtf" ],
|
|
toState: [ "gene_bed": "bed_output" ],
|
|
args: [bed_output: "genome_additional.bed"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// uncompress transcript fasta
|
|
| gunzip.run (
|
|
runIf: {id, state -> state.transcript_fasta},
|
|
fromState: [ "input": "transcript_fasta" ],
|
|
toState: [ "transcript_fasta": "output" ],
|
|
key: "transcript_fasta",
|
|
args: [output: "transcriptome.fasta"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// preprocess transcripts fasta if gtf is in gencode format
|
|
| preprocess_transcripts_fasta.run (
|
|
runIf: {id, state -> state.transcript_fasta && state.gencode},
|
|
fromState: [ "transcript_fasta": "transcript_fasta" ],
|
|
toState: [ "transcript_fasta": "output" ],
|
|
args: [output: "transcriptome.fasta"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// make transcript FASTA if not provided
|
|
| rsem_prepare_reference.run (
|
|
runIf: {id, state -> !state.transcript_fasta},
|
|
fromState: [
|
|
"reference_fasta_files": "fasta",
|
|
"gtf": "gtf"
|
|
],
|
|
toState: [ "make_transcript_fasta_output": "output" ],
|
|
key: "make_transcript_fasta",
|
|
args: [reference_name: "genome"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
| map { id, state ->
|
|
def transcript_fasta = (!state.transcript_fasta) ?
|
|
state.make_transcript_fasta_output.listFiles().find{it.name == "genome.transcripts.fa"} :
|
|
state.transcript_fasta
|
|
[ id, state + [transcript_fasta: transcript_fasta] ]
|
|
}
|
|
|
|
// chromosome size and fai index
|
|
| getchromsizes.run (
|
|
fromState: [ "fasta": "fasta" ],
|
|
toState: [
|
|
"fai": "fai",
|
|
"sizes": "sizes"
|
|
],
|
|
key: "chromsizes",
|
|
args: [
|
|
fai: "genome_additional.fasta.fai",
|
|
sizes: "genome_additional.fasta.sizes"
|
|
],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// untar bbsplit index, if available
|
|
| untar.run (
|
|
runIf: {id, state -> state.bbsplit_index},
|
|
fromState: [ "input": "bbsplit_index" ],
|
|
toState: [ "bbsplit_index": "output" ],
|
|
key: "untar_bbsplit_index",
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
| map { id, state ->
|
|
// Check if bbsplit_fasta_list is defined
|
|
def ref = (state.bbsplit_fasta_list) ?
|
|
[state.fasta] + state.bbsplit_fasta_list :
|
|
[state.fasta]
|
|
[id, state + [bbsplit_ref: ref] ]
|
|
}
|
|
|
|
// create bbsplit index, if not already available
|
|
| bbmap_bbsplit.run (
|
|
runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index},
|
|
fromState: ["ref": "bbsplit_ref"],
|
|
toState: [ "bbsplit_index": "index" ],
|
|
args: [
|
|
only_build_index: true,
|
|
index: "BBSplit_index"
|
|
],
|
|
key: "generate_bbsplit_index"
|
|
)
|
|
|
|
// Uncompress STAR index or generate from scratch if required
|
|
| untar.run (
|
|
runIf: {id, state -> state.star_index},
|
|
fromState: [ "input": "star_index" ],
|
|
toState: [ "star_index": "output" ],
|
|
key: "untar_star_index",
|
|
args: [output: "STAR_index"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
| star_genome_generate.run (
|
|
runIf: {id, state -> !state.star_index && !state.skip_alignment},
|
|
fromState: [
|
|
"genome_fasta_files": "fasta",
|
|
"sjdb_gtf_file": "gtf",
|
|
"sjdb_gtf_feature_exon": "star_sjdb_gtf_feature_exon"
|
|
],
|
|
toState: [ "star_index": "index" ],
|
|
key: "generate_star_index",
|
|
args: [index: "STAR_index"],
|
|
directives: [ label: [ "highmem", "highcpu" ] ]
|
|
)
|
|
|
|
// Uncompress RSEM index or generate from scratch if required
|
|
| untar.run (
|
|
runIf: {id, state -> state.rsem_index},
|
|
fromState: [ "input": "rsem_index" ],
|
|
toState: [ "rsem_index": "output" ],
|
|
key: "untar_rsem_index",
|
|
args: [output: "RSEM_index"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
| rsem_prepare_reference.run (
|
|
runIf: {id, state -> !state.rsem_index && state.aligner == 'star_rsem'},
|
|
fromState: [
|
|
"reference_fasta_files": "fasta",
|
|
"gtf": "gtf"
|
|
],
|
|
toState: [ "rsem_index": "output" ],
|
|
key: "generate_rsem_index",
|
|
args: [reference_name: "genome"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// TODO: Uncompress HISAT2 index or generate from scratch if required
|
|
|
|
// Uncompress Salmon index or generate from scratch if required
|
|
| untar.run (
|
|
runIf: {id, state -> state.salmon_index},
|
|
fromState: [ "input": "salmon_index" ],
|
|
toState: [ "salmon_index": "output" ],
|
|
key: "untar_salmon_index",
|
|
args: [output: "Salmon_index"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
| salmon_index.run (
|
|
runIf: {id, state -> (state.aligner == 'star_salmon' || state.pseudo_aligner == "salmon") && !state.salmon_index},
|
|
fromState: [
|
|
"genome": "fasta",
|
|
"transcripts": "transcript_fasta",
|
|
"kmer_len": "pseudo_aligner_kmer_size",
|
|
"gencode": "gencode"
|
|
],
|
|
toState: [ "salmon_index": "index" ],
|
|
key: "generate_salmon_index",
|
|
args: [index: "Salmon_index"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
// Uncompress Kallisto index or generate from scratch if required
|
|
| untar.run (
|
|
runIf: {id, state -> state.kallisto_index},
|
|
fromState: [ "input": "kallisto_index" ],
|
|
toState: [ "kallisto_index": "output" ],
|
|
key: "untar_kallisto_index",
|
|
args: [output: "Kallisto_index"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
| kallisto_index.run(
|
|
runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index},
|
|
fromState: [
|
|
"input": "transcript_fasta",
|
|
"kmer_size": "pseudo_aligner_kmer_size"
|
|
],
|
|
toState: [ "kallisto_index": "index" ],
|
|
key: "generate_kallisto_index",
|
|
args: [index: "Kallisto_index"],
|
|
directives: [ label: [ "lowmem", "midcpu" ] ]
|
|
)
|
|
|
|
| map { id, state ->
|
|
def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() }
|
|
[ id, mod_state ]
|
|
}
|
|
|
|
| setState (
|
|
"fasta_uncompressed": "fasta",
|
|
"gtf_uncompressed": "gtf",
|
|
"transcript_fasta_uncompressed": "transcript_fasta",
|
|
"gene_bed_uncompressed": "gene_bed",
|
|
"star_index_uncompressed": "star_index",
|
|
"salmon_index_uncompressed": "salmon_index",
|
|
"kallisto_index_uncompressed": "kallisto_index",
|
|
"bbsplit_index_uncompressed": "bbsplit_index",
|
|
"rsem_index_uncompressed": "rsem_index",
|
|
"chrom_sizes": "sizes",
|
|
"fai": "fai"
|
|
)
|
|
|
|
emit:
|
|
output_ch
|
|
}
|