Files
openpipeline/src/reference/make_reference/config.vsh.yaml

63 lines
1.8 KiB
YAML
Raw Normal View History

name: make_reference
namespace: reference
description: |
Preprocess and build a transcriptome reference.
Example input files are:
- `genome_fasta`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz
- `transcriptome_gtf`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz
- `ercc`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip
authors:
- __merge__: /src/authors/angela_pisco.yaml
roles: [ author ]
- __merge__: /src/authors/robrecht_cannoodt.yaml
roles: [ author, maintainer ]
arguments:
# inputs
- type: file
name: --genome_fasta
required: true
description: "Reference genome fasta. Example: "
example: genome_fasta.fa.gz
- type: file
name: --transcriptome_gtf
required: true
description: Reference transcriptome annotation.
example: transcriptome.gtf.gz
- type: file
name: --ercc
description: ERCC sequence and annotation file.
example: ercc.zip
- type: string
name: --subset_regex
description: Will subset the reference chromosomes using the given regex.
example: (ERCC-00002|chr1)
- type: file
name: --output_fasta
direction: output
required: true
description: Output genome sequence fasta.
example: genome_sequence.fa.gz
- type: file
name: --output_gtf
direction: output
required: true
description: Output transcriptome annotation gtf.
example: transcriptome_annotation.gtf.gz
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: apt
packages: [ pigz, seqkit, curl, wget, unzip, file]
runners:
- type: executable
- type: nextflow
directives:
label: [ highmem, highcpu ]