Build pipeline: viash-hub.biobox.main-62lvg
Source commit: dc7b33d51f
Source message: Bcftools Norm (#144)
* Initial Commit
* config and help.txt
* script.sh
* test template
* More tests and debugging
* test 5 and 6
* test 7, 8, 9
* Update test.sh
* fixing bug on config
* Changelog
* Update config.vsh.yaml
* Requested changes
* Bug fixing
---------
Co-authored-by: Jakub Majercik <57993790+jakubmajercik@users.noreply.github.com>
329 lines
14 KiB
JSON
329 lines
14 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema",
|
|
"title": "bcftools_norm",
|
|
"description": "Left-align and normalize indels, check if REF alleles match the reference, split multiallelic sites into multiple rows; \nrecover multiallelics from multiple rows. \n",
|
|
"type": "object",
|
|
"definitions": {
|
|
|
|
|
|
|
|
"inputs" : {
|
|
"title": "Inputs",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"input": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required. Input VCF/BCF file",
|
|
"help_text": "Type: `file`, required. Input VCF/BCF file."
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"outputs" : {
|
|
"title": "Outputs",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"output": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required, default: `$id.$key.output.output`. Output normalized VCF/BCF file",
|
|
"help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output normalized VCF/BCF file."
|
|
,
|
|
"default": "$id.$key.output.output"
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"options" : {
|
|
"title": "Options",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"atomize": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Decompose complex variants (e",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Decompose complex variants (e.g., MNVs become consecutive SNVs).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"atom_overlaps": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, choices: ``.`, `*``. Use the star allele (*) for overlapping alleles or set to missing (",
|
|
"help_text": "Type: `string`, choices: ``.`, `*``. Use the star allele (*) for overlapping alleles or set to missing (.).\n",
|
|
"enum": [".", "*"]
|
|
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"check_ref": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, choices: ``e`, `w`, `x`, `s``. Check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites",
|
|
"help_text": "Type: `string`, choices: ``e`, `w`, `x`, `s``. Check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites.\n",
|
|
"enum": ["e", "w", "x", "s"]
|
|
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"remove_duplicates": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, choices: ``snps`, `indels`, `both`, `all`, `exact`, `none``. Remove duplicate snps, indels, both, all, exact matches, or none (old -D option)",
|
|
"help_text": "Type: `string`, choices: ``snps`, `indels`, `both`, `all`, `exact`, `none``. Remove duplicate snps, indels, both, all, exact matches, or none (old -D option).",
|
|
"enum": ["snps", "indels", "both", "all", "exact", "none"]
|
|
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"fasta_ref": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Reference fasta sequence file",
|
|
"help_text": "Type: `file`. Reference fasta sequence file."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"force": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Try to proceed even if malformed tags are encountered",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Try to proceed even if malformed tags are encountered. \nExperimental, use at your own risk.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"keep_sum": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. Keep vector sum constant when splitting multiallelics (see github issue #360)",
|
|
"help_text": "Type: `string`. Keep vector sum constant when splitting multiallelics (see github issue #360).\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"multiallelics": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, choices: ``+snps`, `+indels`, `+both`, `+any`, `-snps`, `-indels`, `-both`, `-any``. Split multiallelics (-) or join biallelics (+), type: snps, indels, both, any [default: both]",
|
|
"help_text": "Type: `string`, choices: ``+snps`, `+indels`, `+both`, `+any`, `-snps`, `-indels`, `-both`, `-any``. Split multiallelics (-) or join biallelics (+), type: snps, indels, both, any [default: both].\n",
|
|
"enum": ["+snps", "+indels", "+both", "+any", "-snps", "-indels", "-both", "-any"]
|
|
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"no_version": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Do not append version and command line information to the header",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Do not append version and command line information to the header."
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"do_not_normalize": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Do not normalize indels (with -m or -c s)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Do not normalize indels (with -m or -c s)."
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"output_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, choices: ``u`, `z`, `b`, `v``. Output type:\n u: uncompressed BCF\n z: compressed VCF\n b: compressed BCF\n v: uncompressed VCF\n",
|
|
"help_text": "Type: `string`, choices: ``u`, `z`, `b`, `v``. Output type:\n u: uncompressed BCF\n z: compressed VCF\n b: compressed BCF\n v: uncompressed VCF\n",
|
|
"enum": ["u", "z", "b", "v"]
|
|
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"old_rec_tag": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. Annotate modified records with INFO/STR indicating the original variant",
|
|
"help_text": "Type: `string`. Annotate modified records with INFO/STR indicating the original variant."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"regions": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `20:1000000-2000000`. Restrict to comma-separated list of regions",
|
|
"help_text": "Type: `string`, example: `20:1000000-2000000`. Restrict to comma-separated list of regions. \nFollowing formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,\u2026\u200b].\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"regions_file": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Restrict to regions listed in a file",
|
|
"help_text": "Type: `file`. Restrict to regions listed in a file. \nRegions can be specified either on a VCF, BED, or tab-delimited file (the default). \nFor more information check manual.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"regions_overlap": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, choices: ``pos`, `record`, `variant`, `0`, `1`, `2``. This option controls how overlapping records are determined: \nset to \u0027pos\u0027 or \u00270\u0027 if the VCF record has to have POS inside a region (this corresponds to the default behavior of -t/-T); \nset to \u0027record\u0027 or \u00271\u0027 if also overlapping records with POS outside a region should be included (this is the default behavior of -r/-R, \nand includes indels with POS at the end of a region, which are technically outside the region); \nor set to \u0027variant\u0027 or \u00272\u0027 to include only true overlapping variation (compare the full VCF representation \"TA\u003eT-\" vs the true sequence variation \"A\u003e-\")",
|
|
"help_text": "Type: `string`, choices: ``pos`, `record`, `variant`, `0`, `1`, `2``. This option controls how overlapping records are determined: \nset to \u0027pos\u0027 or \u00270\u0027 if the VCF record has to have POS inside a region (this corresponds to the default behavior of -t/-T); \nset to \u0027record\u0027 or \u00271\u0027 if also overlapping records with POS outside a region should be included (this is the default behavior of -r/-R, \nand includes indels with POS at the end of a region, which are technically outside the region); \nor set to \u0027variant\u0027 or \u00272\u0027 to include only true overlapping variation (compare the full VCF representation \"TA\u003eT-\" vs the true sequence variation \"A\u003e-\").\n",
|
|
"enum": ["pos", "record", "variant", "0", "1", "2"]
|
|
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"site_win": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`. Buffer for sorting lines that changed position during realignment",
|
|
"help_text": "Type: `integer`. Buffer for sorting lines that changed position during realignment.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"strict_filter": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. When merging (-m+), merged site is PASS only if all sites being merged PASS",
|
|
"help_text": "Type: `boolean_true`, default: `false`. When merging (-m+), merged site is PASS only if all sites being merged PASS."
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"targets": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `20:1000000-2000000`. Similar to --regions but streams rather than index-jumps",
|
|
"help_text": "Type: `string`, example: `20:1000000-2000000`. Similar to --regions but streams rather than index-jumps."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"targets_file": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Similar to --regions_file but streams rather than index-jumps",
|
|
"help_text": "Type: `file`. Similar to --regions_file but streams rather than index-jumps."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"targets_overlap": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, choices: ``pos`, `record`, `variant`, `0`, `1`, `2``. Include if POS in the region (0), record overlaps (1), variant overlaps (2)",
|
|
"help_text": "Type: `string`, choices: ``pos`, `record`, `variant`, `0`, `1`, `2``. Include if POS in the region (0), record overlaps (1), variant overlaps (2).\nSimilar to --regions_overlap.\n",
|
|
"enum": ["pos", "record", "variant", "0", "1", "2"]
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"nextflow input-output arguments" : {
|
|
"title": "Nextflow input-output arguments",
|
|
"type": "object",
|
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
|
"properties": {
|
|
|
|
|
|
"publish_dir": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
|
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"param_list": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
|
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
|
"hidden": true
|
|
|
|
}
|
|
|
|
|
|
}
|
|
}
|
|
},
|
|
"allOf": [
|
|
|
|
{
|
|
"$ref": "#/definitions/inputs"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/outputs"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/options"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/nextflow input-output arguments"
|
|
}
|
|
]
|
|
}
|