Spaces:
Runtime error
Runtime error
| """ | |
| Pruthi2019: Combating with Robust Word Recognition | |
| ================================================================= | |
| """ | |
| from textattack import Attack | |
| from textattack.constraints.overlap import MaxWordsPerturbed | |
| from textattack.constraints.pre_transformation import ( | |
| MinWordLength, | |
| RepeatModification, | |
| StopwordModification, | |
| ) | |
| from textattack.goal_functions import UntargetedClassification | |
| from textattack.search_methods import GreedySearch | |
| from textattack.transformations import ( | |
| CompositeTransformation, | |
| WordSwapNeighboringCharacterSwap, | |
| WordSwapQWERTY, | |
| WordSwapRandomCharacterDeletion, | |
| WordSwapRandomCharacterInsertion, | |
| ) | |
| from .attack_recipe import AttackRecipe | |
| class Pruthi2019(AttackRecipe): | |
| """An implementation of the attack used in "Combating Adversarial | |
| Misspellings with Robust Word Recognition", Pruthi et al., 2019. | |
| This attack focuses on a small number of character-level changes that simulate common typos. It combines: | |
| - Swapping neighboring characters | |
| - Deleting characters | |
| - Inserting characters | |
| - Swapping characters for adjacent keys on a QWERTY keyboard. | |
| https://arxiv.org/abs/1905.11268 | |
| :param model: Model to attack. | |
| :param max_num_word_swaps: Maximum number of modifications to allow. | |
| """ | |
| def build(model_wrapper, max_num_word_swaps=1): | |
| # a combination of 4 different character-based transforms | |
| # ignore the first and last letter of each word, as in the paper | |
| transformation = CompositeTransformation( | |
| [ | |
| WordSwapNeighboringCharacterSwap( | |
| random_one=False, skip_first_char=True, skip_last_char=True | |
| ), | |
| WordSwapRandomCharacterDeletion( | |
| random_one=False, skip_first_char=True, skip_last_char=True | |
| ), | |
| WordSwapRandomCharacterInsertion( | |
| random_one=False, skip_first_char=True, skip_last_char=True | |
| ), | |
| WordSwapQWERTY( | |
| random_one=False, skip_first_char=True, skip_last_char=True | |
| ), | |
| ] | |
| ) | |
| # only edit words of length >= 4, edit max_num_word_swaps words. | |
| # note that we also are not editing the same word twice, so | |
| # max_num_word_swaps is really the max number of character | |
| # changes that can be made. The paper looks at 1 and 2 char attacks. | |
| constraints = [ | |
| MinWordLength(min_length=4), | |
| StopwordModification(), | |
| MaxWordsPerturbed(max_num_words=max_num_word_swaps), | |
| RepeatModification(), | |
| ] | |
| # untargeted attack | |
| goal_function = UntargetedClassification(model_wrapper) | |
| search_method = GreedySearch() | |
| return Attack(goal_function, constraints, transformation, search_method) | |