1
0
mirror of https://github.com/QData/TextAttack.git synced 2021-10-13 00:05:06 +03:00

Add CheckList Recipe

This commit is contained in:
Hanyu Liu
2020-09-01 10:35:25 -04:00
parent 0ece7ca4e9
commit 96c5feac64
10 changed files with 25425 additions and 0 deletions

View File

@@ -22,3 +22,5 @@ tokenizers==0.8.1-rc1
tqdm
visdom
wandb
word2number
num2words

View File

@@ -16,3 +16,4 @@ from .pwws_ren_2019 import PWWSRen2019
from .iga_wang_2019 import IGAWang2019
from .pruthi_2019 import Pruthi2019
from .pso_zang_2020 import PSOZang2020
from .checklist_ribeiro_2020 import Checklist2020

View File

@@ -0,0 +1,50 @@
from textattack.constraints.pre_transformation import RepeatModification
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GreedySearch
from textattack.shared.attack import Attack
from textattack.transformations import (
CompositeTransformation,
WordSwapContract,
WordSwapExtend,
WordSwapChangeNumber,
WordSwapChangeLocation,
WordSwapChangeName,
)
from .attack_recipe import AttackRecipe
class Checklist2020(AttackRecipe):
"""An implementation of the attack used in "Beyond Accuracy: Behavioral
Testing of NLP models with CheckList", Ribeiro et al., 2020.".
This attack focuses on a number of attacks used in the Invariance Testing
Method:
- Contraction
- Extension
- Changing Names, Number, Location
- possibly negation (not yet implemented)
The idea is to alter elements of the sentence without actually changing the semantic of the sentence
https://arxiv.org/abs/2005.04118
:param model: Model to attack.
:param max_num_word_swaps: Maximum number of modifications to allow.
"""
@staticmethod
def build(model):
transformation = CompositeTransformation(
[
WordSwapExtend(),
WordSwapContract(),
WordSwapChangeName(),
WordSwapChangeNumber(),
WordSwapChangeLocation(),
]
)
# Need this constraint to prevent extend and contract modifying each others' changes and forming infinite loop
constraints = [RepeatModification()]
# Untargeted attack & GreedySearch
goal_function = UntargetedClassification(model)
search_method = GreedySearch()
return Attack(goal_function, constraints, transformation, search_method)

View File

@@ -15,6 +15,7 @@ ATTACK_RECIPE_NAMES = {
"iga": "textattack.attack_recipes.IGAWang2019",
"pruthi": "textattack.attack_recipes.Pruthi2019",
"pso": "textattack.attack_recipes.PSOZang2020",
"checklist": "textattack.attack_recipes.Checklist2020",
}
#

View File

@@ -17,6 +17,11 @@ from .word_swap_masked_lm import WordSwapMaskedLM
from .word_swap_random_word import RandomSwap
from .random_synonym_insertion import RandomSynonymInsertion
from .word_swap_qwerty import WordSwapQWERTY
from .word_swap_contract import WordSwapContract
from .word_swap_extend import WordSwapExtend
from .word_swap_change_number import WordSwapChangeNumber
from .word_swap_change_location import WordSwapChangeLocation
from .word_swap_change_name import WordSwapChangeName
# White-box transformations
from .word_swap_gradient_based import WordSwapGradientBased

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,160 @@
import numpy as np
from word2number import w2n
from num2words import num2words
from textattack.transformations import Transformation
def idx_to_words(ls, words):
"""Given a list generated from cluster_idx, return a list that contains
sub-list (the first element being the idx, and the second element being the
words corresponding to the idx)"""
output = []
for cluster in ls:
word = words[cluster[0]]
for idx in cluster[1:]:
word = " ".join([word, words[idx]])
output.append([cluster, word])
return output
def cluster_idx(idx_ls):
"""Given a list of idx, return a list that contains sub-lists of adjacent
idx."""
if len(idx_ls) < 2:
return [[i] for i in idx_ls]
else:
output = [[idx_ls[0]]]
prev = idx_ls[0]
list_pos = 0
for idx in idx_ls[1:]:
if idx - 1 == prev:
output[list_pos].append(idx)
else:
output.append([idx])
list_pos += 1
prev = idx
return output
class WordSwapChangeNumber(Transformation):
def __init__(self, max_change=1, n=3, **kwargs):
"""A transformation that recognize numbers in sentence, and return
sentences with altered numbers.
:param max_change: Maximum percent of change (1 being 100%)
:param n: Numbers of new numbers to generate
"""
super().__init__(**kwargs)
self.max_change = max_change
self.n = n
def _get_transformations(self, current_text, indices_to_modify):
words = current_text.words
num_idx = []
num_words = []
# find indexes of alphabetical words
for idx in indices_to_modify:
word = words[idx].lower()
for number in STR_NUM:
if number in word:
if word in ["point", "and"]:
if 0 < idx and (idx - 1) in num_idx:
num_idx.append(idx)
else:
num_idx.append(idx)
break
if word.isdigit():
num_words.append([[idx], word])
# cluster adjacent indexes to get whole number
num_idx = cluster_idx(num_idx)
num_words += idx_to_words(num_idx, words)
# replace original numbers with new numbers
transformed_texts = []
for num_word in num_words:
idx = num_word[0]
word = num_word[1]
replacement_words = self._get_new_number(word)
for r in replacement_words:
if r == word:
continue
text = current_text.replace_word_at_index(idx[0], str(r))
if len(idx) > 1:
index = idx[1]
for i in idx[1:]:
text = text.delete_word_at_index(index)
transformed_texts.append(text)
return transformed_texts
def _get_new_number(self, word):
"""Given a word, try altering the value if the word is a number return
in digits if word is given in digit, return in alphabetical form if
word is given in alphabetical form."""
if word.isdigit():
num = float(word)
return self._alter_number(num)
else:
try:
num = w2n.word_to_num(word)
num_list = self._alter_number(num)
return [num2words(n) for n in num_list]
except ValueError:
return []
def _alter_number(self, num):
"""helper function of _get_new_number, change number base on
self.max_change."""
if num not in [0, 2, 4]:
change = int(num * self.max_change) + 1
if num >= 0:
num_list = np.random.randint(max(num - change, 1), num + change, self.n)
else:
num_list = np.random.randint(num - change, min(0, num + change), self.n)
return num_list
return []
STR_NUM = [
"zero",
"one",
"two",
"three",
"four",
"five",
"six",
"seven",
"eight",
"nine",
"ten",
"eleven",
"twelve",
"thirteen",
"fourteen",
"fifteen",
"sixteen",
"seventeen",
"eighteen",
"nineteen",
"twenty",
"thirty",
"forty",
"fifty",
"sixty",
"seventy",
"eighty",
"ninety",
"hundred",
"thousand",
"million",
"billion",
"point",
"and",
]

View File

@@ -0,0 +1,94 @@
from textattack.transformations import Transformation
class WordSwapContract(Transformation):
"""Transforms an input by performing contraction on recognized
combinations."""
reverse_contraction_map = {
"is not": "isn't",
"are not": "aren't",
"cannot": "can't",
"could not": "couldn't",
"did not": "didn't",
"does not": "doesn't",
"do not": "don't",
"had not": "hadn't",
"has not": "hasn't",
"have not": "haven't",
"he is": "he's",
"how did": "how'd",
"how is": "how's",
"I would": "I'd",
"I will": "I'll",
"I am": "I'm",
"i would": "i'd",
"i will": "i'll",
"i am": "i'm",
"it would": "it'd",
"it will": "it'll",
"it is": "it's",
"might not": "mightn't",
"must not": "mustn't",
"need not": "needn't",
"ought not": "oughtn't",
"shall not": "shan't",
"she would": "she'd",
"she will": "she'll",
"she is": "she's",
"should not": "shouldn't",
"that would": "that'd",
"that is": "that's",
"there would": "there'd",
"there is": "there's",
"they would": "they'd",
"they will": "they'll",
"they are": "they're",
"was not": "wasn't",
"we would": "we'd",
"we will": "we'll",
"we are": "we're",
"were not": "weren't",
"what are": "what're",
"what is": "what's",
"when is": "when's",
"where did": "where'd",
"where is": "where's",
"who will": "who'll",
"who is": "who's",
"who have": "who've",
"why is": "why's",
"will not": "won't",
"would not": "wouldn't",
"you would": "you'd",
"you will": "you'll",
"you are": "you're",
}
def _get_transformations(self, current_text, indices_to_modify):
"""Return all possible transformed sentences, each with one
contraction."""
transformed_texts = []
words = current_text.words
indices_to_modify = list(indices_to_modify)
# search for every 2-words combination in reverse_contraction_map
for idx in indices_to_modify[:-1]:
word = words[idx]
next_idx = indices_to_modify[indices_to_modify.index(idx) + 1]
next_word = words[next_idx]
# generating the words to search for
key = " ".join([word, next_word])
# when a possible contraction is found in map, contract the current text
if key in self.reverse_contraction_map:
transformed_text = current_text.replace_word_at_index(
idx, self.reverse_contraction_map[key]
)
transformed_text = transformed_text.delete_word_at_index(next_idx)
transformed_texts.append(transformed_text)
return transformed_texts

View File

@@ -0,0 +1,104 @@
from textattack.transformations import Transformation
class WordSwapExtend(Transformation):
"""Transforms an input by performing extension on recognized
combinations."""
contraction_map = {
"ain't": "isn't",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"could've": "could have",
"couldn't": "could not",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"hadn't": "had not",
"hasn't": "has not",
"haven't": "have not",
"he'd": "he would",
"he'd've": "he would have",
"he'll": "he will",
"he's": "he is",
"how'd": "how did",
"how'd'y": "how do you",
"how'll": "how will",
"how's": "how is",
"I'd": "I would",
"I'll": "I will",
"I'm": "I am",
"I've": "I have",
"i'd": "i would",
"i'll": "i will",
"i'm": "i am",
"i've": "i have",
"isn't": "is not",
"it'd": "it would",
"it'll": "it will",
"it's": "it is",
"ma'am": "madam",
"might've": "might have",
"mightn't": "might not",
"must've": "must have",
"mustn't": "must not",
"needn't": "need not",
"oughtn't": "ought not",
"shan't": "shall not",
"she'd": "she would",
"she'll": "she will",
"she's": "she is",
"should've": "should have",
"shouldn't": "should not",
"that'd": "that would",
"that's": "that is",
"there'd": "there would",
"there's": "there is",
"they'd": "they would",
"they'll": "they will",
"they're": "they are",
"they've": "they have",
"wasn't": "was not",
"we'd": "we would",
"we'll": "we will",
"we're": "we are",
"we've": "we have",
"weren't": "were not",
"what're": "what are",
"what's": "what is",
"when's": "when is",
"where'd": "where did",
"where's": "where is",
"where've": "where have",
"who'll": "who will",
"who's": "who is",
"who've": "who have",
"why's": "why is",
"won't": "will not",
"would've": "would have",
"wouldn't": "would not",
"you'd": "you would",
"you'd've": "you would have",
"you'll": "you will",
"you're": "you are",
"you've": "you have",
}
def _get_transformations(self, current_text, indices_to_modify):
"""Return all possible transformed sentences, each with one
extension."""
transformed_texts = []
words = current_text.words
for idx in indices_to_modify:
word = words[idx]
# expend when word in map
if word in self.contraction_map:
expanded = self.contraction_map[word].split()
transformed_text = current_text.replace_word_at_index(idx, expanded[0])
transformed_text = transformed_text.insert_text_after_word_index(
idx, expanded[1]
)
transformed_texts.append(transformed_text)
return transformed_texts