YAML SchemaΒΆ

Following is the YAML schema for validating the YAML using Cerberus:

  1kind:
  2  allowed:
  3    - operator
  4  required: true
  5  type: string
  6  default: operator
  7  meta:
  8    description: "Which service are you trying to use? Common kinds: `operator`, `job`"
  9
 10version:
 11  allowed:
 12    - "v1"
 13  required: true
 14  type: string
 15  default: v1
 16  meta:
 17    description: "Operators may change yaml file schemas from version to version, as well as implementation details. Double check the version to ensure compatibility."
 18
 19type:
 20  required: true
 21  type: string
 22  default: pii
 23  meta:
 24    description: "Type should always be `pii` when using a pii operator"
 25
 26
 27spec:
 28  required: true
 29  schema:
 30    input_data:
 31      required: true
 32      type: dict
 33      meta:
 34        description: "This should be indexed by target column."
 35      schema:
 36        url:
 37          required: true
 38          type: string
 39          default: data.csv
 40          meta:
 41            description: "The url can be local, or remote. For example: `oci://<bucket>@<namespace>/data.csv`"
 42
 43    output_directory:
 44      required: true
 45      schema:
 46        url:
 47          required: true
 48          type: string
 49          default: result/
 50          meta:
 51            description: "The url can be local, or remote. For example: `oci://<bucket>@<namespace>/`"
 52        name:
 53          required: false
 54          type: string
 55          default: data-out.csv
 56      type: dict
 57
 58    report:
 59      required: false
 60      schema:
 61        report_filename:
 62          required: true
 63          type: string
 64          default: report.html
 65          meta:
 66            description: "Placed into `output_directory` location. Defaults to `report.html`"
 67        show_rows:
 68          required: false
 69          type: number
 70          meta:
 71            description: "The number of rows that shows in the report. Defaults to `10`"
 72        show_sensitive_content:
 73          required: true
 74          default: false
 75          type: boolean
 76          meta:
 77            description: "Whether to show sensitive content in the report. Defaults to `False`"
 78      type: dict
 79
 80    target_column:
 81      type: string
 82      required: true
 83      default: target
 84      meta:
 85        description: "Column with user data."
 86
 87    detectors:
 88      type: list
 89      required: true
 90      schema:
 91        type: dict
 92        schema:
 93          name:
 94            required: true
 95            type: string
 96            meta:
 97              description: "The name of the detector. THe format is `<type>.<entity>`."
 98          action:
 99            required: true
100            type: string
101            default: mask
102            allowed:
103              - anonymize
104              - mask
105              - remove
106            meta:
107              description: "The way to process the detected entity. Default to `mask`."
108  type: dict