Build pipeline: viash-hub.biobox.main-7dwhr
Source commit: da414e72c6
Source message: Add star solo component (#62)
* add star solo component
* change arguments from camelCase to snake_case
* get rid of multiple_sep
* drop star_solo component and just add arguments to star_align_reads
* Update src/star/star_align_reads/script.py
Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>
---------
Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>
443 lines
17 KiB
JSON
443 lines
17 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema",
|
|
"title": "umi_tools_extract",
|
|
"description": "Flexible removal of UMI sequences from fastq reads.\nUMIs are removed and appended to the read name. Any other barcode, for example a library barcode,\nis left on the read. Can also filter reads by quality or against a whitelist.\n",
|
|
"type": "object",
|
|
"definitions": {
|
|
|
|
|
|
|
|
"input" : {
|
|
"title": "Input",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"input": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required, example: `sample.fastq`. File containing the input data",
|
|
"help_text": "Type: `file`, required, example: `sample.fastq`. File containing the input data."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"read2_in": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, example: `sample_R2.fastq`. File containing the input data for the R2 reads (if paired)",
|
|
"help_text": "Type: `file`, example: `sample_R2.fastq`. File containing the input data for the R2 reads (if paired). If provided, a \u003clist of other required arguments\u003e need to be provided."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"bc_pattern": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. The UMI barcode pattern to use e",
|
|
"help_text": "Type: `string`. The UMI barcode pattern to use e.g. \u0027NNNNNN\u0027 indicates that the first 6 nucleotides \nof the read are from the UMI.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"bc_pattern2": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. The UMI barcode pattern to use for read 2",
|
|
"help_text": "Type: `string`. The UMI barcode pattern to use for read 2."
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"output" : {
|
|
"title": "Output",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"output": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required, default: `$id.$key.output.output`. Output file for read 1",
|
|
"help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output file for read 1."
|
|
,
|
|
"default": "$id.$key.output.output"
|
|
}
|
|
|
|
|
|
,
|
|
"read2_out": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.read2_out.read2_out`. Output file for read 2",
|
|
"help_text": "Type: `file`, default: `$id.$key.read2_out.read2_out`. Output file for read 2."
|
|
,
|
|
"default": "$id.$key.read2_out.read2_out"
|
|
}
|
|
|
|
|
|
,
|
|
"filtered_out": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Write out reads not matching regex pattern or cell barcode whitelist to this file",
|
|
"help_text": "Type: `file`. Write out reads not matching regex pattern or cell barcode whitelist to this file.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"filtered_out2": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Write out read pairs not matching regex pattern or cell barcode whitelist to this file",
|
|
"help_text": "Type: `file`. Write out read pairs not matching regex pattern or cell barcode whitelist to this file.\n"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"extract options" : {
|
|
"title": "Extract Options",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"extract_method": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `string`, choices: ``string`, `regex``. UMI pattern to use",
|
|
"help_text": "Type: `string`, example: `string`, choices: ``string`, `regex``. UMI pattern to use. Default: `string`.\n",
|
|
"enum": ["string", "regex"]
|
|
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"error_correct_cell": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Error correct cell barcodes to the whitelist",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Error correct cell barcodes to the whitelist."
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"whitelist": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Whitelist of accepted cell barcodes tab-separated format, where column 1 is the whitelisted\ncell barcodes and column 2 is the list (comma-separated) of other cell barcodes which should \nbe corrected to the barcode in column 1",
|
|
"help_text": "Type: `file`. Whitelist of accepted cell barcodes tab-separated format, where column 1 is the whitelisted\ncell barcodes and column 2 is the list (comma-separated) of other cell barcodes which should \nbe corrected to the barcode in column 1. If the --error_correct_cell option is not used, this\ncolumn will be ignored.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"blacklist": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. BlackWhitelist of cell barcodes to discard",
|
|
"help_text": "Type: `file`. BlackWhitelist of cell barcodes to discard."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"subset_reads": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`. Only parse the first N reads",
|
|
"help_text": "Type: `integer`. Only parse the first N reads."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"quality_filter_threshold": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`. Remove reads where any UMI base quality score falls below this threshold",
|
|
"help_text": "Type: `integer`. Remove reads where any UMI base quality score falls below this threshold."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"quality_filter_mask": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. If a UMI base has a quality below this threshold, replace the base with \u0027N\u0027",
|
|
"help_text": "Type: `string`. If a UMI base has a quality below this threshold, replace the base with \u0027N\u0027.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"quality_encoding": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, choices: ``phred33`, `phred64`, `solexa``. Quality score encoding",
|
|
"help_text": "Type: `string`, choices: ``phred33`, `phred64`, `solexa``. Quality score encoding. Choose from:\n * phred33 [33-77]\n * phred64 [64-106]\n * solexa [59-106]\n",
|
|
"enum": ["phred33", "phred64", "solexa"]
|
|
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"reconcile_pairs": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Allow read 2 infile to contain reads not in read 1 infile",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Allow read 2 infile to contain reads not in read 1 infile. This enables support for upstream protocols\nwhere read one contains cell barcodes, and the read pairs have been filtered and corrected without regard\nto the read2.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"three_prime": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. By default the barcode is assumed to be on the 5\u0027 end of the read, but use this option to sepecify that it is\non the 3\u0027 end instead",
|
|
"help_text": "Type: `boolean_true`, default: `false`. By default the barcode is assumed to be on the 5\u0027 end of the read, but use this option to sepecify that it is\non the 3\u0027 end instead. This option only works with --extract_method=string since 3\u0027 encoding can be specified\nexplicitly with a regex, e.g `.*(?P\u003cumi_1\u003e.{5})$`.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"ignore_read_pair_suffixes": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Ignore \"/1\" and \"/2\" read name suffixes",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Ignore \"/1\" and \"/2\" read name suffixes. Note that this options is required if the suffixes are not whitespace\nseparated from the rest of the read name.\narguments:\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"umi_separator": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `_`. The character that separates the UMI in the read name",
|
|
"help_text": "Type: `string`, example: `_`. The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with\nUMI-tools and used other software. Default: `_`\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"grouping_method": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. Method to use to determine read groups by subsuming those with similar UMIs",
|
|
"help_text": "Type: `string`, example: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying\nthe reads with the same mapping position, but treat similar yet nonidentical UMIs differently. Default: `directional`\n",
|
|
"enum": ["unique", "percentile", "cluster", "adjacency", "directional"]
|
|
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"umi_discard_read": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`, choices: ``0`, `1`, `2``. After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively",
|
|
"help_text": "Type: `integer`, example: `0`, choices: ``0`, `1`, `2``. After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively. Default: `0`\n",
|
|
"enum": [0, 1, 2]
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"common options" : {
|
|
"title": "Common Options",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"log": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.log.log`. File with logging information",
|
|
"help_text": "Type: `file`, default: `$id.$key.log.log`. File with logging information."
|
|
,
|
|
"default": "$id.$key.log.log"
|
|
}
|
|
|
|
|
|
,
|
|
"log2stderr": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Send logging information to stderr",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Send logging information to stderr."
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"verbose": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`. Log level",
|
|
"help_text": "Type: `integer`. Log level. The higher, the more output."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"error": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.error.error`. File with error information",
|
|
"help_text": "Type: `file`, default: `$id.$key.error.error`. File with error information."
|
|
,
|
|
"default": "$id.$key.error.error"
|
|
}
|
|
|
|
|
|
,
|
|
"temp_dir": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. Directory for temporary files",
|
|
"help_text": "Type: `string`. Directory for temporary files. If not set, the bash environmental variable TMPDIR is used.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"compresslevel": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `6`. Level of Gzip compression to use",
|
|
"help_text": "Type: `integer`, example: `6`. Level of Gzip compression to use. Default=6 matches GNU gzip rather than python gzip default (which is 9).\nDefault `6`.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"timeit": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.timeit.timeit`. Store timing information in file",
|
|
"help_text": "Type: `file`, default: `$id.$key.timeit.timeit`. Store timing information in file."
|
|
,
|
|
"default": "$id.$key.timeit.timeit"
|
|
}
|
|
|
|
|
|
,
|
|
"timeit_name": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, default: `all`. Name in timing file for this class of jobs",
|
|
"help_text": "Type: `string`, default: `all`. Name in timing file for this class of jobs."
|
|
,
|
|
"default": "all"
|
|
}
|
|
|
|
|
|
,
|
|
"timeit_header": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Add header for timing information",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Add header for timing information."
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"random_seed": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`. Random seed to initialize number generator with",
|
|
"help_text": "Type: `integer`. Random seed to initialize number generator with."
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"nextflow input-output arguments" : {
|
|
"title": "Nextflow input-output arguments",
|
|
"type": "object",
|
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
|
"properties": {
|
|
|
|
|
|
"publish_dir": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
|
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"param_list": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
|
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
|
"hidden": true
|
|
|
|
}
|
|
|
|
|
|
}
|
|
}
|
|
},
|
|
"allOf": [
|
|
|
|
{
|
|
"$ref": "#/definitions/input"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/output"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/extract options"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/common options"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/nextflow input-output arguments"
|
|
}
|
|
]
|
|
}
|