Spaces:
Sleeping
Sleeping
| import unicodedata | |
| from collections import OrderedDict | |
| from functools import total_ordering | |
| def get_match_ordered_dict(): | |
| slots = OrderedDict([ | |
| ('ruleId', str), | |
| ('message', str), | |
| ('replacements', list), | |
| ('offsetInContext', int), | |
| ('context', str), | |
| ('offset', int), | |
| ('errorLength', int), | |
| ('category', str), | |
| ('ruleIssueType', str), | |
| ('sentence', str), | |
| ]) | |
| return slots | |
| """ Sample match JSON: | |
| { | |
| 'message': 'Possible spelling mistake found.', | |
| 'shortMessage': 'Spelling mistake', | |
| 'replacements': [{'value': 'newt'}, {'value': 'not'}, {'value': 'new', 'shortDescription': 'having just been made'}, {'value': 'news'}, {'value': 'foot', 'shortDescription': 'singular'}, {'value': 'root', 'shortDescription': 'underground organ of a plant'}, {'value': 'boot'}, {'value': 'noon'}, {'value': 'loot', 'shortDescription': 'plunder'}, {'value': 'moot'}, {'value': 'Root'}, {'value': 'soot', 'shortDescription': 'carbon black'}, {'value': 'newts'}, {'value': 'nook'}, {'value': 'Lieut'}, {'value': 'coot'}, {'value': 'hoot'}, {'value': 'toot'}, {'value': 'snoot'}, {'value': 'neut'}, {'value': 'nowt'}, {'value': 'Noor'}, {'value': 'noob'}], | |
| 'offset': 8, | |
| 'length': 4, | |
| 'context': {'text': 'This is noot okay. ', 'offset': 8, 'length': 4}, 'sentence': 'This is noot okay.', | |
| 'type': {'typeName': 'Other'}, | |
| 'rule': {'id': 'MORFOLOGIK_RULE_EN_US', 'description': 'Possible spelling mistake', 'issueType': 'misspelling', 'category': {'id': 'TYPOS', 'name': 'Possible Typo'}}, | |
| 'ignoreForIncompleteSentence': False, | |
| 'contextForSureMatch': 0 | |
| } | |
| """ | |
| def auto_type(obj): | |
| try: | |
| return int(obj) | |
| except ValueError: | |
| try: | |
| return float(obj) | |
| except ValueError: | |
| return obj | |
| class Match: | |
| """Hold information about where a rule matches text.""" | |
| def __init__(self, attrib): | |
| # Process rule. | |
| attrib['category'] = attrib['rule']['category']['id'] | |
| attrib['ruleId'] = attrib['rule']['id'] | |
| attrib['ruleIssueType'] = attrib['rule']['issueType'] | |
| del attrib['rule'] | |
| # Process context. | |
| attrib['offsetInContext'] = attrib['context']['offset'] | |
| attrib['context'] = attrib['context']['text'] | |
| # Process replacements. | |
| attrib['replacements'] = [r['value'] for r in attrib['replacements']] | |
| # Rename error length. | |
| attrib['errorLength'] = attrib['length'] | |
| # Normalize unicode | |
| attrib['message'] = unicodedata.normalize("NFKC", attrib['message']) | |
| # Store objects on self. | |
| for k, v in attrib.items(): | |
| setattr(self, k, v) | |
| def __repr__(self): | |
| def _ordered_dict_repr(): | |
| slots = list(get_match_ordered_dict()) | |
| slots += list(set(self.__dict__).difference(slots)) | |
| attrs = [slot for slot in slots | |
| if slot in self.__dict__ and not slot.startswith('_')] | |
| return '{{{}}}'.format( | |
| ', '.join([ | |
| '{!r}: {!r}'.format(attr, getattr(self, attr)) | |
| for attr in attrs | |
| ]) | |
| ) | |
| return '{}({})'.format(self.__class__.__name__, _ordered_dict_repr()) | |
| def __str__(self): | |
| ruleId = self.ruleId | |
| s = 'Offset {}, length {}, Rule ID: {}'.format( | |
| self.offset, self.errorLength, ruleId) | |
| if self.message: | |
| s += '\nMessage: {}'.format(self.message) | |
| if self.replacements: | |
| s += '\nSuggestion: {}'.format('; '.join(self.replacements)) | |
| s += '\n{}\n{}'.format( | |
| self.context, ' ' * self.offsetInContext + '^' * self.errorLength | |
| ) | |
| return s | |
| def matchedText(self): | |
| """ Returns the text that garnered the error (without its surrounding context). | |
| """ | |
| return self.context[self.offsetInContext:self.offsetInContext+self.errorLength] | |
| def __eq__(self, other): | |
| return list(self) == list(other) | |
| def __lt__(self, other): | |
| return list(self) < list(other) | |
| def __iter__(self): | |
| return iter(getattr(self, attr) for attr in get_match_ordered_dict()) | |
| def __setattr__(self, key, value): | |
| try: | |
| value = get_match_ordered_dict()[key](value) | |
| except KeyError: | |
| return | |
| super().__setattr__(key, value) | |
| def __getattr__(self, name): | |
| if name not in get_match_ordered_dict(): | |
| raise AttributeError('{!r} object has no attribute {!r}' | |
| .format(self.__class__.__name__, name)) | |