Files
biobox/target/nextflow/bcftools/bcftools_norm/nextflow_schema.json
CI 2a330fe450 Build branch main with version main (dc7b33d)
Build pipeline: viash-hub.biobox.main-62lvg

Source commit: dc7b33d51f

Source message: Bcftools Norm (#144)

* Initial Commit

* config and help.txt

* script.sh

* test template

* More tests and debugging

* test 5 and 6

* test 7, 8, 9

* Update test.sh

* fixing bug on config

* Changelog

* Update config.vsh.yaml

* Requested changes

* Bug fixing

---------

Co-authored-by: Jakub Majercik <57993790+jakubmajercik@users.noreply.github.com>
2024-09-10 14:36:08 +00:00

329 lines
14 KiB
JSON

{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "bcftools_norm",
"description": "Left-align and normalize indels, check if REF alleles match the reference, split multiallelic sites into multiple rows; \nrecover multiallelics from multiple rows. \n",
"type": "object",
"definitions": {
"inputs" : {
"title": "Inputs",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: `file`, required. Input VCF/BCF file",
"help_text": "Type: `file`, required. Input VCF/BCF file."
}
}
},
"outputs" : {
"title": "Outputs",
"type": "object",
"description": "No description",
"properties": {
"output": {
"type":
"string",
"description": "Type: `file`, required, default: `$id.$key.output.output`. Output normalized VCF/BCF file",
"help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output normalized VCF/BCF file."
,
"default": "$id.$key.output.output"
}
}
},
"options" : {
"title": "Options",
"type": "object",
"description": "No description",
"properties": {
"atomize": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Decompose complex variants (e",
"help_text": "Type: `boolean_true`, default: `false`. Decompose complex variants (e.g., MNVs become consecutive SNVs).\n"
,
"default": "False"
}
,
"atom_overlaps": {
"type":
"string",
"description": "Type: `string`, choices: ``.`, `*``. Use the star allele (*) for overlapping alleles or set to missing (",
"help_text": "Type: `string`, choices: ``.`, `*``. Use the star allele (*) for overlapping alleles or set to missing (.).\n",
"enum": [".", "*"]
}
,
"check_ref": {
"type":
"string",
"description": "Type: `string`, choices: ``e`, `w`, `x`, `s``. Check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites",
"help_text": "Type: `string`, choices: ``e`, `w`, `x`, `s``. Check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites.\n",
"enum": ["e", "w", "x", "s"]
}
,
"remove_duplicates": {
"type":
"string",
"description": "Type: `string`, choices: ``snps`, `indels`, `both`, `all`, `exact`, `none``. Remove duplicate snps, indels, both, all, exact matches, or none (old -D option)",
"help_text": "Type: `string`, choices: ``snps`, `indels`, `both`, `all`, `exact`, `none``. Remove duplicate snps, indels, both, all, exact matches, or none (old -D option).",
"enum": ["snps", "indels", "both", "all", "exact", "none"]
}
,
"fasta_ref": {
"type":
"string",
"description": "Type: `file`. Reference fasta sequence file",
"help_text": "Type: `file`. Reference fasta sequence file."
}
,
"force": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Try to proceed even if malformed tags are encountered",
"help_text": "Type: `boolean_true`, default: `false`. Try to proceed even if malformed tags are encountered. \nExperimental, use at your own risk.\n"
,
"default": "False"
}
,
"keep_sum": {
"type":
"string",
"description": "Type: `string`. Keep vector sum constant when splitting multiallelics (see github issue #360)",
"help_text": "Type: `string`. Keep vector sum constant when splitting multiallelics (see github issue #360).\n"
}
,
"multiallelics": {
"type":
"string",
"description": "Type: `string`, choices: ``+snps`, `+indels`, `+both`, `+any`, `-snps`, `-indels`, `-both`, `-any``. Split multiallelics (-) or join biallelics (+), type: snps, indels, both, any [default: both]",
"help_text": "Type: `string`, choices: ``+snps`, `+indels`, `+both`, `+any`, `-snps`, `-indels`, `-both`, `-any``. Split multiallelics (-) or join biallelics (+), type: snps, indels, both, any [default: both].\n",
"enum": ["+snps", "+indels", "+both", "+any", "-snps", "-indels", "-both", "-any"]
}
,
"no_version": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Do not append version and command line information to the header",
"help_text": "Type: `boolean_true`, default: `false`. Do not append version and command line information to the header."
,
"default": "False"
}
,
"do_not_normalize": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Do not normalize indels (with -m or -c s)",
"help_text": "Type: `boolean_true`, default: `false`. Do not normalize indels (with -m or -c s)."
,
"default": "False"
}
,
"output_type": {
"type":
"string",
"description": "Type: `string`, choices: ``u`, `z`, `b`, `v``. Output type:\n u: uncompressed BCF\n z: compressed VCF\n b: compressed BCF\n v: uncompressed VCF\n",
"help_text": "Type: `string`, choices: ``u`, `z`, `b`, `v``. Output type:\n u: uncompressed BCF\n z: compressed VCF\n b: compressed BCF\n v: uncompressed VCF\n",
"enum": ["u", "z", "b", "v"]
}
,
"old_rec_tag": {
"type":
"string",
"description": "Type: `string`. Annotate modified records with INFO/STR indicating the original variant",
"help_text": "Type: `string`. Annotate modified records with INFO/STR indicating the original variant."
}
,
"regions": {
"type":
"string",
"description": "Type: `string`, example: `20:1000000-2000000`. Restrict to comma-separated list of regions",
"help_text": "Type: `string`, example: `20:1000000-2000000`. Restrict to comma-separated list of regions. \nFollowing formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,\u2026\u200b].\n"
}
,
"regions_file": {
"type":
"string",
"description": "Type: `file`. Restrict to regions listed in a file",
"help_text": "Type: `file`. Restrict to regions listed in a file. \nRegions can be specified either on a VCF, BED, or tab-delimited file (the default). \nFor more information check manual.\n"
}
,
"regions_overlap": {
"type":
"string",
"description": "Type: `string`, choices: ``pos`, `record`, `variant`, `0`, `1`, `2``. This option controls how overlapping records are determined: \nset to \u0027pos\u0027 or \u00270\u0027 if the VCF record has to have POS inside a region (this corresponds to the default behavior of -t/-T); \nset to \u0027record\u0027 or \u00271\u0027 if also overlapping records with POS outside a region should be included (this is the default behavior of -r/-R, \nand includes indels with POS at the end of a region, which are technically outside the region); \nor set to \u0027variant\u0027 or \u00272\u0027 to include only true overlapping variation (compare the full VCF representation \"TA\u003eT-\" vs the true sequence variation \"A\u003e-\")",
"help_text": "Type: `string`, choices: ``pos`, `record`, `variant`, `0`, `1`, `2``. This option controls how overlapping records are determined: \nset to \u0027pos\u0027 or \u00270\u0027 if the VCF record has to have POS inside a region (this corresponds to the default behavior of -t/-T); \nset to \u0027record\u0027 or \u00271\u0027 if also overlapping records with POS outside a region should be included (this is the default behavior of -r/-R, \nand includes indels with POS at the end of a region, which are technically outside the region); \nor set to \u0027variant\u0027 or \u00272\u0027 to include only true overlapping variation (compare the full VCF representation \"TA\u003eT-\" vs the true sequence variation \"A\u003e-\").\n",
"enum": ["pos", "record", "variant", "0", "1", "2"]
}
,
"site_win": {
"type":
"integer",
"description": "Type: `integer`. Buffer for sorting lines that changed position during realignment",
"help_text": "Type: `integer`. Buffer for sorting lines that changed position during realignment.\n"
}
,
"strict_filter": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. When merging (-m+), merged site is PASS only if all sites being merged PASS",
"help_text": "Type: `boolean_true`, default: `false`. When merging (-m+), merged site is PASS only if all sites being merged PASS."
,
"default": "False"
}
,
"targets": {
"type":
"string",
"description": "Type: `string`, example: `20:1000000-2000000`. Similar to --regions but streams rather than index-jumps",
"help_text": "Type: `string`, example: `20:1000000-2000000`. Similar to --regions but streams rather than index-jumps."
}
,
"targets_file": {
"type":
"string",
"description": "Type: `file`. Similar to --regions_file but streams rather than index-jumps",
"help_text": "Type: `file`. Similar to --regions_file but streams rather than index-jumps."
}
,
"targets_overlap": {
"type":
"string",
"description": "Type: `string`, choices: ``pos`, `record`, `variant`, `0`, `1`, `2``. Include if POS in the region (0), record overlaps (1), variant overlaps (2)",
"help_text": "Type: `string`, choices: ``pos`, `record`, `variant`, `0`, `1`, `2``. Include if POS in the region (0), record overlaps (1), variant overlaps (2).\nSimilar to --regions_overlap.\n",
"enum": ["pos", "record", "variant", "0", "1", "2"]
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/inputs"
},
{
"$ref": "#/definitions/outputs"
},
{
"$ref": "#/definitions/options"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}