Spaces:
Build error
Build error
| import logging | |
| from typing import Optional, List, Tuple, Set | |
| from presidio_analyzer import ( | |
| RecognizerResult, | |
| LocalRecognizer, | |
| AnalysisExplanation, | |
| ) | |
| from presidio_analyzer.nlp_engine import NlpArtifacts | |
| from presidio_analyzer.predefined_recognizers.spacy_recognizer import SpacyRecognizer | |
| logger = logging.getLogger("presidio-analyzer") | |
| class CustomSpacyRecognizer(LocalRecognizer): | |
| ENTITIES = [ | |
| "LOCATION", | |
| "PERSON", | |
| "NRP", | |
| "ORGANIZATION", | |
| "DATE_TIME", | |
| ] | |
| DEFAULT_EXPLANATION = "Identified as {} by Spacy's Named Entity Recognition (Privy-trained)" | |
| CHECK_LABEL_GROUPS = [ | |
| ({"LOCATION"}, {"LOC", "LOCATION", "STREET_ADDRESS", "COORDINATE"}), | |
| ({"PERSON"}, {"PER", "PERSON"}), | |
| ({"NRP"}, {"NORP", "NRP"}), | |
| ({"ORGANIZATION"}, {"ORG"}), | |
| ({"DATE_TIME"}, {"DATE_TIME"}), | |
| ] | |
| MODEL_LANGUAGES = { | |
| "en": "beki/en_spacy_pii_distilbert", | |
| } | |
| PRESIDIO_EQUIVALENCES = { | |
| "PER": "PERSON", | |
| "LOC": "LOCATION", | |
| "ORG": "ORGANIZATION", | |
| "NROP": "NRP", | |
| "DATE_TIME": "DATE_TIME", | |
| } | |
| def __init__( | |
| self, | |
| supported_language: str = "en", | |
| supported_entities: Optional[List[str]] = None, | |
| check_label_groups: Optional[Tuple[Set, Set]] = None, | |
| context: Optional[List[str]] = None, | |
| ner_strength: float = 0.85, | |
| ): | |
| self.ner_strength = ner_strength | |
| self.check_label_groups = ( | |
| check_label_groups if check_label_groups else self.CHECK_LABEL_GROUPS | |
| ) | |
| supported_entities = supported_entities if supported_entities else self.ENTITIES | |
| super().__init__( | |
| supported_entities=supported_entities, | |
| supported_language=supported_language, | |
| ) | |
| def load(self) -> None: | |
| """Load the model, not used. Model is loaded during initialization.""" | |
| pass | |
| def get_supported_entities(self) -> List[str]: | |
| """ | |
| Return supported entities by this model. | |
| :return: List of the supported entities. | |
| """ | |
| return self.supported_entities | |
| def build_spacy_explanation( | |
| self, original_score: float, explanation: str | |
| ) -> AnalysisExplanation: | |
| """ | |
| Create explanation for why this result was detected. | |
| :param original_score: Score given by this recognizer | |
| :param explanation: Explanation string | |
| :return: | |
| """ | |
| explanation = AnalysisExplanation( | |
| recognizer=self.__class__.__name__, | |
| original_score=original_score, | |
| textual_explanation=explanation, | |
| ) | |
| return explanation | |
| def analyze(self, text, entities, nlp_artifacts=None): # noqa D102 | |
| results = [] | |
| if not nlp_artifacts: | |
| logger.warning("Skipping SpaCy, nlp artifacts not provided...") | |
| return results | |
| ner_entities = nlp_artifacts.entities | |
| for entity in entities: | |
| if entity not in self.supported_entities: | |
| continue | |
| for ent in ner_entities: | |
| if not self.__check_label(entity, ent.label_, self.check_label_groups): | |
| continue | |
| textual_explanation = self.DEFAULT_EXPLANATION.format( | |
| ent.label_) | |
| explanation = self.build_spacy_explanation( | |
| self.ner_strength, textual_explanation | |
| ) | |
| spacy_result = RecognizerResult( | |
| entity_type=entity, | |
| start=ent.start_char, | |
| end=ent.end_char, | |
| score=self.ner_strength, | |
| analysis_explanation=explanation, | |
| recognition_metadata={ | |
| RecognizerResult.RECOGNIZER_NAME_KEY: self.name | |
| }, | |
| ) | |
| results.append(spacy_result) | |
| return results | |
| def __check_label( | |
| entity: str, label: str, check_label_groups: Tuple[Set, Set] | |
| ) -> bool: | |
| return any( | |
| [entity in egrp and label in lgrp for egrp, lgrp in check_label_groups] | |
| ) | |