Build pipeline: vsh-ci-dev-jsbwk
Source commit: 1e1ffb315f
Source message: Merge pull request #17 from viash-hub/add_biobox_modules
- Migrate a number of components to biobox
- Fix tests
- Reduce size of test resources
- Prepare for Viash Hub
413 lines
16 KiB
JSON
413 lines
16 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema",
|
|
"title": "prepare_genome",
|
|
"description": "A subworkflow for preparing all the required genome references\n",
|
|
"type": "object",
|
|
"definitions": {
|
|
|
|
|
|
|
|
"input" : {
|
|
"title": "Input",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"fasta": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required. Path to FASTA genome file",
|
|
"help_text": "Type: `file`, required. Path to FASTA genome file."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"gtf": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Path to GTF annotation file",
|
|
"help_text": "Type: `file`. Path to GTF annotation file. This parameter is *mandatory* if --genome is not specified."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"gff": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Path to GFF3 annotation file",
|
|
"help_text": "Type: `file`. Path to GFF3 annotation file. Required if \"--gtf\" is not specified."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"additional_fasta": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. FASTA file to concatenate to genome FASTA file e",
|
|
"help_text": "Type: `file`. FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"transcript_fasta": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Path to FASTA transcriptome file",
|
|
"help_text": "Type: `file`. Path to FASTA transcriptome file."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"gene_bed": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Path to BED file containing gene intervals",
|
|
"help_text": "Type: `file`. Path to BED file containing gene intervals. This will be created from the GTF file if not specified."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"splicesites": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Splice sites file required for HISAT2",
|
|
"help_text": "Type: `file`. Splice sites file required for HISAT2."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"skip_bbsplit": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean`. Skip BBSplit for removal of non-reference genome reads",
|
|
"help_text": "Type: `boolean`. Skip BBSplit for removal of non-reference genome reads."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"bbsplit_fasta_list": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit",
|
|
"help_text": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must be explicitly set to \"false\". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"star_index": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Path to directory or tar",
|
|
"help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built STAR index."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"rsem_index": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Path to directory or tar",
|
|
"help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built RSEM index."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"extra_rsem_prepare_reference_args": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. Extra arguments to pass to rsem-prepare-reference command in addition to defaults defined by the pipeline",
|
|
"help_text": "Type: `string`. Extra arguments to pass to rsem-prepare-reference command in addition to defaults defined by the pipeline."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"salmon_index": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Path to directory or tar",
|
|
"help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built Salmon index."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"kallisto_index": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Path to directory or tar",
|
|
"help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built Kallisto index."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"bbsplit_index": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Path to directory or tar",
|
|
"help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built BBSplit index."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"pseudo_aligner_kmer_size": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, default: `31`. Kmer length passed to indexing step of pseudoaligners",
|
|
"help_text": "Type: `integer`, default: `31`. Kmer length passed to indexing step of pseudoaligners."
|
|
,
|
|
"default": "31"
|
|
}
|
|
|
|
|
|
,
|
|
"gencode": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean`. Specify if the GTF annotation is in GENCODE format",
|
|
"help_text": "Type: `boolean`. Specify if the GTF annotation is in GENCODE format."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"biotype": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. Biotype value to use while appending entries to GTF file when additional fasta file is provided",
|
|
"help_text": "Type: `string`. Biotype value to use while appending entries to GTF file when additional fasta file is provided."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"filter_gtf": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean`. Whether to filter the GTF or not?",
|
|
"help_text": "Type: `boolean`. Whether to filter the GTF or not?"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"aligner": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, default: `star_salmon`, choices: ``star_salmon`, `star_rsem`, `hisat2``. Specifies the alignment algorithm to use - available options are \u0027star_salmon\u0027, \u0027star_rsem\u0027 and \u0027hisat2\u0027",
|
|
"help_text": "Type: `string`, default: `star_salmon`, choices: ``star_salmon`, `star_rsem`, `hisat2``. Specifies the alignment algorithm to use - available options are \u0027star_salmon\u0027, \u0027star_rsem\u0027 and \u0027hisat2\u0027.",
|
|
"enum": ["star_salmon", "star_rsem", "hisat2"]
|
|
|
|
,
|
|
"default": "star_salmon"
|
|
}
|
|
|
|
|
|
,
|
|
"pseudo_aligner": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, default: `salmon`, choices: ``salmon`, `kallisto``. Specifies the pseudo aligner to use - available options are \u0027salmon\u0027",
|
|
"help_text": "Type: `string`, default: `salmon`, choices: ``salmon`, `kallisto``. Specifies the pseudo aligner to use - available options are \u0027salmon\u0027. Runs in addition to \u0027--aligner\u0027.",
|
|
"enum": ["salmon", "kallisto"]
|
|
|
|
,
|
|
"default": "salmon"
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"output" : {
|
|
"title": "Output",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"fasta_uncompressed": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.fasta_uncompressed.fasta`. ",
|
|
"help_text": "Type: `file`, default: `$id.$key.fasta_uncompressed.fasta`. "
|
|
,
|
|
"default": "$id.$key.fasta_uncompressed.fasta"
|
|
}
|
|
|
|
|
|
,
|
|
"gtf_uncompressed": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.gtf_uncompressed.gtf`. ",
|
|
"help_text": "Type: `file`, default: `$id.$key.gtf_uncompressed.gtf`. "
|
|
,
|
|
"default": "$id.$key.gtf_uncompressed.gtf"
|
|
}
|
|
|
|
|
|
,
|
|
"transcript_fasta_uncompressed": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.transcript_fasta_uncompressed.fasta`. ",
|
|
"help_text": "Type: `file`, default: `$id.$key.transcript_fasta_uncompressed.fasta`. "
|
|
,
|
|
"default": "$id.$key.transcript_fasta_uncompressed.fasta"
|
|
}
|
|
|
|
|
|
,
|
|
"gene_bed_uncompressed": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.gene_bed_uncompressed.bed`. ",
|
|
"help_text": "Type: `file`, default: `$id.$key.gene_bed_uncompressed.bed`. "
|
|
,
|
|
"default": "$id.$key.gene_bed_uncompressed.bed"
|
|
}
|
|
|
|
|
|
,
|
|
"star_index_uncompressed": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.star_index_uncompressed.star_index_uncompressed`. Path to STAR index",
|
|
"help_text": "Type: `file`, default: `$id.$key.star_index_uncompressed.star_index_uncompressed`. Path to STAR index."
|
|
,
|
|
"default": "$id.$key.star_index_uncompressed.star_index_uncompressed"
|
|
}
|
|
|
|
|
|
,
|
|
"rsem_index_uncompressed": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.rsem_index_uncompressed.rsem_index_uncompressed`. Path to directory or tar",
|
|
"help_text": "Type: `file`, default: `$id.$key.rsem_index_uncompressed.rsem_index_uncompressed`. Path to directory or tar.gz archive for pre-built RSEM index."
|
|
,
|
|
"default": "$id.$key.rsem_index_uncompressed.rsem_index_uncompressed"
|
|
}
|
|
|
|
|
|
,
|
|
"salmon_index_uncompressed": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.salmon_index_uncompressed.salmon_index_uncompressed`. Path to Salmon index",
|
|
"help_text": "Type: `file`, default: `$id.$key.salmon_index_uncompressed.salmon_index_uncompressed`. Path to Salmon index."
|
|
,
|
|
"default": "$id.$key.salmon_index_uncompressed.salmon_index_uncompressed"
|
|
}
|
|
|
|
|
|
,
|
|
"kallisto_index_uncompressed": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.kallisto_index_uncompressed.kallisto_index_uncompressed`. Path to Kallisto index",
|
|
"help_text": "Type: `file`, default: `$id.$key.kallisto_index_uncompressed.kallisto_index_uncompressed`. Path to Kallisto index."
|
|
,
|
|
"default": "$id.$key.kallisto_index_uncompressed.kallisto_index_uncompressed"
|
|
}
|
|
|
|
|
|
,
|
|
"bbsplit_index_uncompressed": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.bbsplit_index_uncompressed.bbsplit_index_uncompressed`. Path to BBSplit index",
|
|
"help_text": "Type: `file`, default: `$id.$key.bbsplit_index_uncompressed.bbsplit_index_uncompressed`. Path to BBSplit index."
|
|
,
|
|
"default": "$id.$key.bbsplit_index_uncompressed.bbsplit_index_uncompressed"
|
|
}
|
|
|
|
|
|
,
|
|
"chrom_sizes": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.chrom_sizes.sizes`. File containing chromosome lengths",
|
|
"help_text": "Type: `file`, default: `$id.$key.chrom_sizes.sizes`. File containing chromosome lengths"
|
|
,
|
|
"default": "$id.$key.chrom_sizes.sizes"
|
|
}
|
|
|
|
|
|
,
|
|
"fai": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.fai.fai`. FASTA index file",
|
|
"help_text": "Type: `file`, default: `$id.$key.fai.fai`. FASTA index file"
|
|
,
|
|
"default": "$id.$key.fai.fai"
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"nextflow input-output arguments" : {
|
|
"title": "Nextflow input-output arguments",
|
|
"type": "object",
|
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
|
"properties": {
|
|
|
|
|
|
"publish_dir": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
|
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"param_list": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
|
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
|
"hidden": true
|
|
|
|
}
|
|
|
|
|
|
}
|
|
}
|
|
},
|
|
"allOf": [
|
|
|
|
{
|
|
"$ref": "#/definitions/input"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/output"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/nextflow input-output arguments"
|
|
}
|
|
]
|
|
}
|