#!/usr/bin/env python# -*- coding: utf-8; -*-# Copyright (c) 2021, 2022 Oracle and/or its affiliates.# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/fromdataclassesimportdataclass,fieldfromtypingimportList
[docs]classWrongEntityFormatLabelNotString(ValueError):def__init__(self):super().__init__("Wrong entity format. Label is not a String.")
[docs]classWrongEntityFormatOffsetNotInteger(ValueError):def__init__(self):super().__init__("Wrong entity format. Offset is not an Integer.")
[docs]classWrongEntityFormatLengthNotInteger(ValueError):def__init__(self):super().__init__("Wrong entity format. Length is not an Integer.")
[docs]classWrongEntityFormatOffsetIsNegative(ValueError):def__init__(self):super().__init__("Wrong entity format. Offset is not a Nonnegative.")
[docs]classWrongEntityFormatLengthIsNegative(ValueError):def__init__(self):super().__init__("Wrong entity format. Length is not a Nonnegative.")
[docs]classWrongEntityFormatLabelIsEmpty(ValueError):def__init__(self):super().__init__("Wrong entity format. Label is empty.")
[docs]@dataclassclassNERItem:"""NERItem class which is a representation of a token span. Attributes ---------- label: str Entity name. offset: int The token span's entity start index position in the text. length: int Length of the token span. """label:str=""offset:int=0length:int=0def_validate(self):"""Validates the instance. Raises ------ WrongEntityFormat If the entity has a wrong format. """ifnotisinstance(self.label,str):raiseWrongEntityFormatLabelNotString()ifnot(isinstance(self.offset,int)or(isinstance(self.offset,float)andself.offset.is_integer())):raiseWrongEntityFormatOffsetNotInteger()ifnot(isinstance(self.length,int)or(isinstance(self.length,float)andself.length.is_integer())):raiseWrongEntityFormatLengthNotInteger()ifself.offset<0:raiseWrongEntityFormatOffsetIsNegative()ifself.length<0:raiseWrongEntityFormatLengthIsNegative()ifself.label=="":raiseWrongEntityFormatLabelIsEmpty()def__post_init__(self):self._validate()
[docs]defto_spacy(self)->tuple:"""Converts one NERItem to the spacy format. Returns ------- Tuple NERItem in the spacy format """return(self.offset,self.offset+self.length,self.label)
[docs]@dataclassclassNERItems:"""NERItems class consists of a list of NERItem. Attributes ---------- items: List[NERItem] List of NERItem. """items:List[NERItem]=field(default_factory=list)def__getitem__(self,index:int)->NERItem:returnself.items[index]
[docs]defto_spacy(self)->List[tuple]:"""Converts NERItems to the spacy format. Returns ------- List[tuple] List of NERItems in the Spacy format. """return[item.to_spacy()foriteminself.items]