Files
openpipeline/target/nextflow/annotate/svm_annotation/nextflow_schema.json
CI 4caaaf68ef Build branch openpipeline/v4.0 with version v4.0.0 to openpipeline on branch v4.0 (de02293c)
Build pipeline: openpipelines-bio.openpipeline.v4.0.0-kd9qj

Source commit: de02293c9e

Source message: Bump version to v4.0.0
2026-01-26 11:23:20 +00:00

189 lines
7.1 KiB
JSON

{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "svm_annotation",
"description": "Automated cell type annotation tool for scRNA-seq datasets on the basis of SVMs.",
"type": "object",
"$defs": {
"inputs": {
"title": "Inputs",
"type": "object",
"description": "Input dataset (query) arguments",
"properties": {
"input": {
"type": "string",
"format": "path",
"exists": true,
"description": "The input (query) data to be labeled",
"help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
},
"modality": {
"type": "string",
"description": "Which modality to process.",
"help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
"default": "rna"
},
"input_layer": {
"type": "string",
"description": "The layer in the input data to be used for cell type annotation if .X is not to be used.",
"help_text": "Type: `string`, multiple: `False`. "
},
"input_var_gene_names": {
"type": "string",
"description": "The name of the adata var column in the input data containing gene names; when no gene_name_layer is provided, the var index will be used.\n",
"help_text": "Type: `string`, multiple: `False`. "
},
"input_reference_gene_overlap": {
"type": "integer",
"description": "The minimum number of genes present in both the reference and query datasets.\n",
"help_text": "Type: `integer`, multiple: `False`, default: `100`. ",
"default": 100
},
"sanitize_ensembl_ids": {
"type": "boolean",
"description": "Whether to sanitize ensembl ids by removing version numbers.",
"help_text": "Type: `boolean`, multiple: `False`, default: `true`. ",
"default": true
}
}
},
"outputs": {
"title": "Outputs",
"type": "object",
"description": "Output arguments.",
"properties": {
"output": {
"type": "string",
"format": "path",
"description": "Output h5mu file.",
"help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output.h5mu\"`, direction: `output`, example: `\"output.h5mu\"`. ",
"default": "$id.$key.output.h5mu"
},
"output_obs_prediction": {
"type": "string",
"description": "In which `.obs` slots to store the predicted information.\n",
"help_text": "Type: `string`, multiple: `False`, default: `\"svm_pred\"`. ",
"default": "svm_pred"
},
"output_obs_probability": {
"type": "string",
"description": "In which `.obs` slots to store the probability of the predictions.\n",
"help_text": "Type: `string`, multiple: `False`, default: `\"svm_probability\"`. ",
"default": "svm_probability"
},
"output_compression": {
"type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [
"gzip",
"lzf"
]
}
}
},
"reference": {
"title": "Reference",
"type": "object",
"description": "Arguments related to the reference dataset.",
"properties": {
"reference": {
"type": "string",
"format": "path",
"description": "The reference data to train the CellTypist classifiers on",
"help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"reference.h5mu\"`. "
},
"reference_layer": {
"type": "string",
"description": "The layer in the reference data to be used for cell type annotation if .X is not to be used",
"help_text": "Type: `string`, multiple: `False`. "
},
"reference_obs_target": {
"type": "string",
"description": "Key in .obs attribute of reference modality with cell-type information.\n",
"help_text": "Type: `string`, multiple: `False`, required. "
},
"reference_var_gene_names": {
"type": "string",
"description": "The name of the adata var column in the reference data containing gene names; when no gene_name_layer is provided, the var index will be used.\n",
"help_text": "Type: `string`, multiple: `False`. "
},
"reference_var_input": {
"type": "string",
"description": ".var column containing highly variable genes",
"help_text": "Type: `string`, multiple: `False`. "
}
}
},
"model arguments": {
"title": "Model arguments",
"type": "object",
"description": "Model arguments.",
"properties": {
"model": {
"type": "string",
"format": "path",
"description": "Pretrained model in pkl format",
"help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"pretrained_model.pkl\"`. "
},
"feature_selection": {
"type": "boolean",
"description": "Whether to perform feature selection.",
"help_text": "Type: `boolean`, multiple: `False`, default: `true`. ",
"default": true
},
"max_iter": {
"type": "integer",
"description": "Maximum number of iterations for the SVM.",
"help_text": "Type: `integer`, multiple: `False`, default: `5000`. ",
"default": 5000
},
"c_reg": {
"type": "number",
"description": "Regularization parameter for the SVM.",
"help_text": "Type: `double`, multiple: `False`, default: `1.0`. ",
"default": 1.0
},
"class_weight": {
"type": "string",
"description": "\"Class weights for the SVM",
"help_text": "Type: `string`, multiple: `False`, default: `\"balanced\"`, choices: ``balanced`, `uniform``. ",
"enum": [
"balanced",
"uniform"
],
"default": "balanced"
}
}
},
"nextflow input-output arguments": {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type": "string",
"description": "Path to an output directory.",
"help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
}
}
}
},
"allOf": [
{
"$ref": "#/$defs/inputs"
},
{
"$ref": "#/$defs/outputs"
},
{
"$ref": "#/$defs/reference"
},
{
"$ref": "#/$defs/model arguments"
},
{
"$ref": "#/$defs/nextflow input-output arguments"
}
]
}