svm_annotation/nextflow_schema.json

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "svm_annotation",
  "description": "Automated cell type annotation tool for scRNA-seq datasets on the basis of SVMs.",
  "type": "object",
  "$defs": {
    "inputs": {
      "title": "Inputs",
      "type": "object",
      "description": "Input dataset (query) arguments",
      "properties": {
        "input": {
          "type": "string",
          "format": "path",
          "exists": true,
          "description": "The input (query) data to be labeled",
          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
        },
        "modality": {
          "type": "string",
          "description": "Which modality to process.",
          "help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
          "default": "rna"
        },
        "input_layer": {
          "type": "string",
          "description": "The layer in the input data to be used for cell type annotation if .X is not to be used.",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "input_var_gene_names": {
          "type": "string",
          "description": "The name of the adata var column in the input data containing gene names; when no gene_name_layer is provided, the var index will be used.\n",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "input_reference_gene_overlap": {
          "type": "integer",
          "description": "The minimum number of genes present in both the reference and query datasets.\n",
          "help_text": "Type: `integer`, multiple: `False`, default: `100`. ",
          "default": 100
        },
        "sanitize_ensembl_ids": {
          "type": "boolean",
          "description": "Whether to sanitize ensembl ids by removing version numbers.",
          "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ",
          "default": true
        }
      }
    },
    "outputs": {
      "title": "Outputs",
      "type": "object",
      "description": "Output arguments.",
      "properties": {
        "output": {
          "type": "string",
          "format": "path",
          "description": "Output h5mu file.",
          "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output.h5mu\"`, direction: `output`, example: `\"output.h5mu\"`. ",
          "default": "$id.$key.output.h5mu"
        },
        "output_obs_prediction": {
          "type": "string",
          "description": "In which `.obs` slots to store the predicted information.\n",
          "help_text": "Type: `string`, multiple: `False`, default: `\"svm_pred\"`. ",
          "default": "svm_pred"
        },
        "output_obs_probability": {
          "type": "string",
          "description": "In which `.obs` slots to store the probability of the predictions.\n",
          "help_text": "Type: `string`, multiple: `False`, default: `\"svm_probability\"`. ",
          "default": "svm_probability"
        },
        "output_compression": {
          "type": "string",
          "description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
          "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
          "enum": [
            "gzip",
            "lzf"
          ]
        }
      }
    },
    "reference": {
      "title": "Reference",
      "type": "object",
      "description": "Arguments related to the reference dataset.",
      "properties": {
        "reference": {
          "type": "string",
          "format": "path",
          "description": "The reference data to train the CellTypist classifiers on",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"reference.h5mu\"`. "
        },
        "reference_layer": {
          "type": "string",
          "description": "The layer in the reference data to be used for cell type annotation if .X is not to be used",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "reference_obs_target": {
          "type": "string",
          "description": "Key in .obs attribute of reference modality with cell-type information.\n",
          "help_text": "Type: `string`, multiple: `False`, required. "
        },
        "reference_var_gene_names": {
          "type": "string",
          "description": "The name of the adata var column in the reference data containing gene names; when no gene_name_layer is provided, the var index will be used.\n",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "reference_var_input": {
          "type": "string",
          "description": ".var column containing highly variable genes",
          "help_text": "Type: `string`, multiple: `False`. "
        }
      }
    },
    "model arguments": {
      "title": "Model arguments",
      "type": "object",
      "description": "Model arguments.",
      "properties": {
        "model": {
          "type": "string",
          "format": "path",
          "description": "Pretrained model in pkl format",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"pretrained_model.pkl\"`. "
        },
        "feature_selection": {
          "type": "boolean",
          "description": "Whether to perform feature selection.",
          "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ",
          "default": true
        },
        "max_iter": {
          "type": "integer",
          "description": "Maximum number of iterations for the SVM.",
          "help_text": "Type: `integer`, multiple: `False`, default: `5000`. ",
          "default": 5000
        },
        "c_reg": {
          "type": "number",
          "description": "Regularization parameter for the SVM.",
          "help_text": "Type: `double`, multiple: `False`, default: `1.0`. ",
          "default": 1.0
        },
        "class_weight": {
          "type": "string",
          "description": "\"Class weights for the SVM",
          "help_text": "Type: `string`, multiple: `False`, default: `\"balanced\"`, choices: ``balanced`, `uniform``. ",
          "enum": [
            "balanced",
            "uniform"
          ],
          "default": "balanced"
        }
      }
    },
    "nextflow input-output arguments": {
      "title": "Nextflow input-output arguments",
      "type": "object",
      "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
      "properties": {
        "publish_dir": {
          "type": "string",
          "description": "Path to an output directory.",
          "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
        }
      }
    }
  },
  "allOf": [
    {
      "$ref": "#/$defs/inputs"
    },
    {
      "$ref": "#/$defs/outputs"
    },
    {
      "$ref": "#/$defs/reference"
    },
    {
      "$ref": "#/$defs/model arguments"
    },
    {
      "$ref": "#/$defs/nextflow input-output arguments"
    }
  ]
}