onclass/nextflow_schema.json

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "onclass",
  "description": "OnClass is a python package for single-cell cell type annotation. It uses the Cell Ontology to capture the cell type similarity. \nThese similarities enable OnClass to annotate cell types that are never seen in the training data.\n",
  "type": "object",
  "$defs": {
    "inputs": {
      "title": "Inputs",
      "type": "object",
      "description": "Input dataset (query) arguments",
      "properties": {
        "input": {
          "type": "string",
          "format": "path",
          "exists": true,
          "description": "The input (query) data to be labeled",
          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
        },
        "modality": {
          "type": "string",
          "description": "Which modality to process.",
          "help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
          "default": "rna"
        },
        "input_layer": {
          "type": "string",
          "description": "The layer in the input data to be used for cell type annotation if .X is not to be used.",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "input_var_gene_names": {
          "type": "string",
          "description": "The name of the adata var column in the input data containing gene names; when no gene_name_layer is provided, the var index will be used.\n",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "input_reference_gene_overlap": {
          "type": "integer",
          "description": "The minimum number of genes present in both the reference and query datasets.\n",
          "help_text": "Type: `integer`, multiple: `False`, default: `100`. ",
          "default": 100
        },
        "sanitize_ensembl_ids": {
          "type": "boolean",
          "description": "Whether to sanitize ensembl ids by removing version numbers.",
          "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ",
          "default": true
        }
      }
    },
    "outputs": {
      "title": "Outputs",
      "type": "object",
      "description": "Output arguments.",
      "properties": {
        "output": {
          "type": "string",
          "format": "path",
          "description": "Output h5mu file.",
          "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output.h5mu\"`, direction: `output`, example: `\"output.h5mu\"`. ",
          "default": "$id.$key.output.h5mu"
        },
        "output_obs_predictions": {
          "type": "string",
          "description": "In which `.obs` slots to store the predicted information.\n",
          "help_text": "Type: `string`, multiple: `False`, default: `\"onclass_pred\"`. ",
          "default": "onclass_pred"
        },
        "output_obs_probability": {
          "type": "string",
          "description": "In which `.obs` slots to store the probability of the predictions.\n",
          "help_text": "Type: `string`, multiple: `False`, default: `\"onclass_prob\"`. ",
          "default": "onclass_prob"
        },
        "output_compression": {
          "type": "string",
          "description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
          "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
          "enum": [
            "gzip",
            "lzf"
          ]
        }
      }
    },
    "ontology": {
      "title": "Ontology",
      "type": "object",
      "description": "Ontology input files",
      "properties": {
        "cl_nlp_emb_file": {
          "type": "string",
          "format": "path",
          "exists": true,
          "description": "The .nlp.emb file with the cell type embeddings.",
          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. "
        },
        "cl_ontology_file": {
          "type": "string",
          "format": "path",
          "exists": true,
          "description": "The .ontology file with the cell type ontology.",
          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. "
        },
        "cl_obo_file": {
          "type": "string",
          "format": "path",
          "exists": true,
          "description": "The .obo file with the cell type ontology.",
          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. "
        }
      }
    },
    "reference": {
      "title": "Reference",
      "type": "object",
      "description": "Arguments related to the reference dataset.",
      "properties": {
        "reference": {
          "type": "string",
          "format": "path",
          "description": "The reference data to train the CellTypist classifiers on",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"reference.h5mu\"`. "
        },
        "reference_layer": {
          "type": "string",
          "description": "The layer in the reference data to be used for cell type annotation if .X is not to be used.",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "reference_obs_target": {
          "type": "string",
          "description": "The name of the adata obs column in the reference data containing cell type annotations.",
          "help_text": "Type: `string`, multiple: `False`, required, example: `\"cell_ontology_class\"`. "
        },
        "reference_var_gene_names": {
          "type": "string",
          "description": "The name of the adata var column in the reference data containing gene names; when no gene_name_layer is provided, the var index will be used.\n",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "reference_var_input": {
          "type": "string",
          "description": ".var column containing highly variable genes",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "unknown_celltype": {
          "type": "string",
          "description": "Label for unknown cell types.\n",
          "help_text": "Type: `string`, multiple: `False`, default: `\"Unknown\"`. ",
          "default": "Unknown"
        }
      }
    },
    "model arguments": {
      "title": "Model arguments",
      "type": "object",
      "description": "Model arguments",
      "properties": {
        "model": {
          "type": "string",
          "description": "\"Pretrained model path without a file extension",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "max_iter": {
          "type": "integer",
          "description": "Maximum number of iterations for training the model.",
          "help_text": "Type: `integer`, multiple: `False`, default: `30`. ",
          "default": 30
        }
      }
    },
    "nextflow input-output arguments": {
      "title": "Nextflow input-output arguments",
      "type": "object",
      "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
      "properties": {
        "publish_dir": {
          "type": "string",
          "description": "Path to an output directory.",
          "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
        }
      }
    }
  },
  "allOf": [
    {
      "$ref": "#/$defs/inputs"
    },
    {
      "$ref": "#/$defs/outputs"
    },
    {
      "$ref": "#/$defs/ontology"
    },
    {
      "$ref": "#/$defs/reference"
    },
    {
      "$ref": "#/$defs/model arguments"
    },
    {
      "$ref": "#/$defs/nextflow input-output arguments"
    }
  ]
}