rnaseq/target/nextflow/workflows/prepare_genome/nextflow_schema.json

{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "prepare_genome",
"description": "A subworkflow for preparing all the required genome references\n",
"type": "object",
"definitions": {


    "input" : {
    "title": "Input",
    "type": "object",
    "description": "No description",
    "properties": {


                "fasta": {
                "type":
                "string",
                "description": "Type: `file`, required. Path to FASTA genome file",
                "help_text": "Type: `file`, required. Path to FASTA genome file."

            }


        ,
                "gtf": {
                "type":
                "string",
                "description": "Type: `file`. Path to GTF annotation file",
                "help_text": "Type: `file`. Path to GTF annotation file. This parameter is *mandatory* if --genome is not specified."

            }


        ,
                "gff": {
                "type":
                "string",
                "description": "Type: `file`. Path to GFF3 annotation file",
                "help_text": "Type: `file`. Path to GFF3 annotation file. Required if \"--gtf\" is not specified."

            }


        ,
                "additional_fasta": {
                "type":
                "string",
                "description": "Type: `file`. FASTA file to concatenate to genome FASTA file e",
                "help_text": "Type: `file`. FASTA file to concatenate to genome FASTA file e.g. containing spike-in sequences."

            }


        ,
                "transcript_fasta": {
                "type":
                "string",
                "description": "Type: `file`. Path to FASTA transcriptome file",
                "help_text": "Type: `file`. Path to FASTA transcriptome file."

            }


        ,
                "gene_bed": {
                "type":
                "string",
                "description": "Type: `file`. Path to BED file containing gene intervals",
                "help_text": "Type: `file`. Path to BED file containing gene intervals. This will be created from the GTF file if not specified."

            }


        ,
                "splicesites": {
                "type":
                "string",
                "description": "Type: `file`. Splice sites file required for HISAT2",
                "help_text": "Type: `file`. Splice sites file required for HISAT2."

            }


        ,
                "skip_bbsplit": {
                "type":
                "boolean",
                "description": "Type: `boolean`. Skip BBSplit for removal of non-reference genome reads",
                "help_text": "Type: `boolean`. Skip BBSplit for removal of non-reference genome reads."

            }


        ,
                "bbsplit_fasta_list": {
                "type":
                "string",
                "description": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit",
                "help_text": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must be explicitly set to \"false\". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)"

            }


        ,
                "star_index": {
                "type":
                "string",
                "description": "Type: `file`. Path to directory or tar",
                "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built STAR index."

            }


        ,
                "rsem_index": {
                "type":
                "string",
                "description": "Type: `file`. Path to directory or tar",
                "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built RSEM index."

            }


        ,
                "extra_rsem_prepare_reference_args": {
                "type":
                "string",
                "description": "Type: `string`. Extra arguments to pass to rsem-prepare-reference command in addition to defaults defined by the pipeline",
                "help_text": "Type: `string`. Extra arguments to pass to rsem-prepare-reference command in addition to defaults defined by the pipeline."

            }


        ,
                "salmon_index": {
                "type":
                "string",
                "description": "Type: `file`. Path to directory or tar",
                "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built Salmon index."

            }


        ,
                "kallisto_index": {
                "type":
                "string",
                "description": "Type: `file`. Path to directory or tar",
                "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built Kallisto index."

            }


        ,
                "bbsplit_index": {
                "type":
                "string",
                "description": "Type: `file`. Path to directory or tar",
                "help_text": "Type: `file`. Path to directory or tar.gz archive for pre-built BBSplit index."

            }


        ,
                "pseudo_aligner_kmer_size": {
                "type":
                "integer",
                "description": "Type: `integer`, default: `31`. Kmer length passed to indexing step of pseudoaligners",
                "help_text": "Type: `integer`, default: `31`. Kmer length passed to indexing step of pseudoaligners."
            ,
                "default": "31"
            }


        ,
                "gencode": {
                "type":
                "boolean",
                "description": "Type: `boolean`. Specify if the GTF annotation is in GENCODE format",
                "help_text": "Type: `boolean`. Specify if the GTF annotation is in GENCODE format."

            }


        ,
                "biotype": {
                "type":
                "string",
                "description": "Type: `string`. Biotype value to use while appending entries to GTF file when additional fasta file is provided",
                "help_text": "Type: `string`. Biotype value to use while appending entries to GTF file when additional fasta file is provided."

            }


        ,
                "filter_gtf": {
                "type":
                "boolean",
                "description": "Type: `boolean`. Whether to filter the GTF or not?",
                "help_text": "Type: `boolean`. Whether to filter the GTF or not?"

            }


        ,
                "aligner": {
                "type":
                "string",
                "description": "Type: `string`, default: `star_salmon`, choices: ``star_salmon`, `star_rsem`, `hisat2``. Specifies the alignment algorithm to use - available options are \u0027star_salmon\u0027, \u0027star_rsem\u0027 and \u0027hisat2\u0027",
                "help_text": "Type: `string`, default: `star_salmon`, choices: ``star_salmon`, `star_rsem`, `hisat2``. Specifies the alignment algorithm to use - available options are \u0027star_salmon\u0027, \u0027star_rsem\u0027 and \u0027hisat2\u0027.",
                "enum": ["star_salmon", "star_rsem", "hisat2"]

            ,
                "default": "star_salmon"
            }


        ,
                "pseudo_aligner": {
                "type":
                "string",
                "description": "Type: `string`, default: `salmon`, choices: ``salmon`, `kallisto``. Specifies the pseudo aligner to use - available options are \u0027salmon\u0027",
                "help_text": "Type: `string`, default: `salmon`, choices: ``salmon`, `kallisto``. Specifies the pseudo aligner to use - available options are \u0027salmon\u0027. Runs in addition to \u0027--aligner\u0027.",
                "enum": ["salmon", "kallisto"]

            ,
                "default": "salmon"
            }


}
},


    "output" : {
    "title": "Output",
    "type": "object",
    "description": "No description",
    "properties": {


                "fasta_uncompressed": {
                "type":
                "string",
                "description": "Type: `file`, default: `$id.$key.fasta_uncompressed.fasta`. ",
                "help_text": "Type: `file`, default: `$id.$key.fasta_uncompressed.fasta`. "
            ,
                "default": "$id.$key.fasta_uncompressed.fasta"
            }


        ,
                "gtf_uncompressed": {
                "type":
                "string",
                "description": "Type: `file`, default: `$id.$key.gtf_uncompressed.gtf`. ",
                "help_text": "Type: `file`, default: `$id.$key.gtf_uncompressed.gtf`. "
            ,
                "default": "$id.$key.gtf_uncompressed.gtf"
            }


        ,
                "transcript_fasta_uncompressed": {
                "type":
                "string",
                "description": "Type: `file`, default: `$id.$key.transcript_fasta_uncompressed.fasta`. ",
                "help_text": "Type: `file`, default: `$id.$key.transcript_fasta_uncompressed.fasta`. "
            ,
                "default": "$id.$key.transcript_fasta_uncompressed.fasta"
            }


        ,
                "gene_bed_uncompressed": {
                "type":
                "string",
                "description": "Type: `file`, default: `$id.$key.gene_bed_uncompressed.bed`. ",
                "help_text": "Type: `file`, default: `$id.$key.gene_bed_uncompressed.bed`. "
            ,
                "default": "$id.$key.gene_bed_uncompressed.bed"
            }


        ,
                "star_index_uncompressed": {
                "type":
                "string",
                "description": "Type: `file`, default: `$id.$key.star_index_uncompressed.star_index_uncompressed`. Path to STAR index",
                "help_text": "Type: `file`, default: `$id.$key.star_index_uncompressed.star_index_uncompressed`. Path to STAR index."
            ,
                "default": "$id.$key.star_index_uncompressed.star_index_uncompressed"
            }


        ,
                "rsem_index_uncompressed": {
                "type":
                "string",
                "description": "Type: `file`, default: `$id.$key.rsem_index_uncompressed.rsem_index_uncompressed`. Path to directory or tar",
                "help_text": "Type: `file`, default: `$id.$key.rsem_index_uncompressed.rsem_index_uncompressed`. Path to directory or tar.gz archive for pre-built RSEM index."
            ,
                "default": "$id.$key.rsem_index_uncompressed.rsem_index_uncompressed"
            }


        ,
                "salmon_index_uncompressed": {
                "type":
                "string",
                "description": "Type: `file`, default: `$id.$key.salmon_index_uncompressed.salmon_index_uncompressed`. Path to Salmon index",
                "help_text": "Type: `file`, default: `$id.$key.salmon_index_uncompressed.salmon_index_uncompressed`. Path to Salmon index."
            ,
                "default": "$id.$key.salmon_index_uncompressed.salmon_index_uncompressed"
            }


        ,
                "kallisto_index_uncompressed": {
                "type":
                "string",
                "description": "Type: `file`, default: `$id.$key.kallisto_index_uncompressed.kallisto_index_uncompressed`. Path to Kallisto index",
                "help_text": "Type: `file`, default: `$id.$key.kallisto_index_uncompressed.kallisto_index_uncompressed`. Path to Kallisto index."
            ,
                "default": "$id.$key.kallisto_index_uncompressed.kallisto_index_uncompressed"
            }


        ,
                "bbsplit_index_uncompressed": {
                "type":
                "string",
                "description": "Type: `file`, default: `$id.$key.bbsplit_index_uncompressed.bbsplit_index_uncompressed`. Path to BBSplit index",
                "help_text": "Type: `file`, default: `$id.$key.bbsplit_index_uncompressed.bbsplit_index_uncompressed`. Path to BBSplit index."
            ,
                "default": "$id.$key.bbsplit_index_uncompressed.bbsplit_index_uncompressed"
            }


        ,
                "chrom_sizes": {
                "type":
                "string",
                "description": "Type: `file`, default: `$id.$key.chrom_sizes.sizes`. File containing chromosome lengths",
                "help_text": "Type: `file`, default: `$id.$key.chrom_sizes.sizes`. File containing chromosome lengths"
            ,
                "default": "$id.$key.chrom_sizes.sizes"
            }


        ,
                "fai": {
                "type":
                "string",
                "description": "Type: `file`, default: `$id.$key.fai.fai`. FASTA index file",
                "help_text": "Type: `file`, default: `$id.$key.fai.fai`. FASTA index file"
            ,
                "default": "$id.$key.fai.fai"
            }


}
},


    "nextflow input-output arguments" : {
    "title": "Nextflow input-output arguments",
    "type": "object",
    "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
    "properties": {


                "publish_dir": {
                "type":
                "string",
                "description": "Type: `string`, required, example: `output/`. Path to an output directory",
                "help_text": "Type: `string`, required, example: `output/`. Path to an output directory."

            }


        ,
                "param_list": {
                "type":
                "string",
                "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
                "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
                "hidden": true

            }


}
}
},
"allOf": [

    {
    "$ref": "#/definitions/input"
    },

    {
    "$ref": "#/definitions/output"
    },

    {
    "$ref": "#/definitions/nextflow input-output arguments"
    }
]
}