mirror of
https://github.com/QData/TextAttack.git
synced 2021-10-13 00:05:06 +03:00
Add CheckList Recipe
This commit is contained in:
@@ -22,3 +22,5 @@ tokenizers==0.8.1-rc1
|
||||
tqdm
|
||||
visdom
|
||||
wandb
|
||||
word2number
|
||||
num2words
|
||||
|
||||
@@ -16,3 +16,4 @@ from .pwws_ren_2019 import PWWSRen2019
|
||||
from .iga_wang_2019 import IGAWang2019
|
||||
from .pruthi_2019 import Pruthi2019
|
||||
from .pso_zang_2020 import PSOZang2020
|
||||
from .checklist_ribeiro_2020 import Checklist2020
|
||||
|
||||
50
textattack/attack_recipes/checklist_ribeiro_2020.py
Normal file
50
textattack/attack_recipes/checklist_ribeiro_2020.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from textattack.constraints.pre_transformation import RepeatModification
|
||||
from textattack.goal_functions import UntargetedClassification
|
||||
from textattack.search_methods import GreedySearch
|
||||
from textattack.shared.attack import Attack
|
||||
from textattack.transformations import (
|
||||
CompositeTransformation,
|
||||
WordSwapContract,
|
||||
WordSwapExtend,
|
||||
WordSwapChangeNumber,
|
||||
WordSwapChangeLocation,
|
||||
WordSwapChangeName,
|
||||
)
|
||||
from .attack_recipe import AttackRecipe
|
||||
|
||||
|
||||
class Checklist2020(AttackRecipe):
|
||||
"""An implementation of the attack used in "Beyond Accuracy: Behavioral
|
||||
Testing of NLP models with CheckList", Ribeiro et al., 2020.".
|
||||
This attack focuses on a number of attacks used in the Invariance Testing
|
||||
Method:
|
||||
- Contraction
|
||||
- Extension
|
||||
- Changing Names, Number, Location
|
||||
- possibly negation (not yet implemented)
|
||||
The idea is to alter elements of the sentence without actually changing the semantic of the sentence
|
||||
https://arxiv.org/abs/2005.04118
|
||||
:param model: Model to attack.
|
||||
:param max_num_word_swaps: Maximum number of modifications to allow.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def build(model):
|
||||
transformation = CompositeTransformation(
|
||||
[
|
||||
WordSwapExtend(),
|
||||
WordSwapContract(),
|
||||
WordSwapChangeName(),
|
||||
WordSwapChangeNumber(),
|
||||
WordSwapChangeLocation(),
|
||||
]
|
||||
)
|
||||
|
||||
# Need this constraint to prevent extend and contract modifying each others' changes and forming infinite loop
|
||||
constraints = [RepeatModification()]
|
||||
|
||||
# Untargeted attack & GreedySearch
|
||||
goal_function = UntargetedClassification(model)
|
||||
search_method = GreedySearch()
|
||||
|
||||
return Attack(goal_function, constraints, transformation, search_method)
|
||||
@@ -15,6 +15,7 @@ ATTACK_RECIPE_NAMES = {
|
||||
"iga": "textattack.attack_recipes.IGAWang2019",
|
||||
"pruthi": "textattack.attack_recipes.Pruthi2019",
|
||||
"pso": "textattack.attack_recipes.PSOZang2020",
|
||||
"checklist": "textattack.attack_recipes.Checklist2020",
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
@@ -17,6 +17,11 @@ from .word_swap_masked_lm import WordSwapMaskedLM
|
||||
from .word_swap_random_word import RandomSwap
|
||||
from .random_synonym_insertion import RandomSynonymInsertion
|
||||
from .word_swap_qwerty import WordSwapQWERTY
|
||||
from .word_swap_contract import WordSwapContract
|
||||
from .word_swap_extend import WordSwapExtend
|
||||
from .word_swap_change_number import WordSwapChangeNumber
|
||||
from .word_swap_change_location import WordSwapChangeLocation
|
||||
from .word_swap_change_name import WordSwapChangeName
|
||||
|
||||
# White-box transformations
|
||||
from .word_swap_gradient_based import WordSwapGradientBased
|
||||
|
||||
1550
textattack/transformations/word_swap_change_location.py
Normal file
1550
textattack/transformations/word_swap_change_location.py
Normal file
File diff suppressed because it is too large
Load Diff
23458
textattack/transformations/word_swap_change_name.py
Normal file
23458
textattack/transformations/word_swap_change_name.py
Normal file
File diff suppressed because it is too large
Load Diff
160
textattack/transformations/word_swap_change_number.py
Normal file
160
textattack/transformations/word_swap_change_number.py
Normal file
@@ -0,0 +1,160 @@
|
||||
import numpy as np
|
||||
from word2number import w2n
|
||||
from num2words import num2words
|
||||
from textattack.transformations import Transformation
|
||||
|
||||
|
||||
def idx_to_words(ls, words):
|
||||
"""Given a list generated from cluster_idx, return a list that contains
|
||||
sub-list (the first element being the idx, and the second element being the
|
||||
words corresponding to the idx)"""
|
||||
|
||||
output = []
|
||||
for cluster in ls:
|
||||
word = words[cluster[0]]
|
||||
for idx in cluster[1:]:
|
||||
word = " ".join([word, words[idx]])
|
||||
output.append([cluster, word])
|
||||
return output
|
||||
|
||||
|
||||
def cluster_idx(idx_ls):
|
||||
"""Given a list of idx, return a list that contains sub-lists of adjacent
|
||||
idx."""
|
||||
|
||||
if len(idx_ls) < 2:
|
||||
return [[i] for i in idx_ls]
|
||||
else:
|
||||
output = [[idx_ls[0]]]
|
||||
prev = idx_ls[0]
|
||||
list_pos = 0
|
||||
|
||||
for idx in idx_ls[1:]:
|
||||
if idx - 1 == prev:
|
||||
output[list_pos].append(idx)
|
||||
else:
|
||||
output.append([idx])
|
||||
list_pos += 1
|
||||
prev = idx
|
||||
return output
|
||||
|
||||
|
||||
class WordSwapChangeNumber(Transformation):
|
||||
def __init__(self, max_change=1, n=3, **kwargs):
|
||||
"""A transformation that recognize numbers in sentence, and return
|
||||
sentences with altered numbers.
|
||||
|
||||
:param max_change: Maximum percent of change (1 being 100%)
|
||||
:param n: Numbers of new numbers to generate
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.max_change = max_change
|
||||
self.n = n
|
||||
|
||||
def _get_transformations(self, current_text, indices_to_modify):
|
||||
words = current_text.words
|
||||
num_idx = []
|
||||
num_words = []
|
||||
|
||||
# find indexes of alphabetical words
|
||||
for idx in indices_to_modify:
|
||||
word = words[idx].lower()
|
||||
for number in STR_NUM:
|
||||
if number in word:
|
||||
if word in ["point", "and"]:
|
||||
if 0 < idx and (idx - 1) in num_idx:
|
||||
num_idx.append(idx)
|
||||
else:
|
||||
num_idx.append(idx)
|
||||
break
|
||||
|
||||
if word.isdigit():
|
||||
num_words.append([[idx], word])
|
||||
|
||||
# cluster adjacent indexes to get whole number
|
||||
num_idx = cluster_idx(num_idx)
|
||||
num_words += idx_to_words(num_idx, words)
|
||||
|
||||
# replace original numbers with new numbers
|
||||
transformed_texts = []
|
||||
for num_word in num_words:
|
||||
idx = num_word[0]
|
||||
word = num_word[1]
|
||||
replacement_words = self._get_new_number(word)
|
||||
for r in replacement_words:
|
||||
if r == word:
|
||||
continue
|
||||
text = current_text.replace_word_at_index(idx[0], str(r))
|
||||
if len(idx) > 1:
|
||||
index = idx[1]
|
||||
for i in idx[1:]:
|
||||
text = text.delete_word_at_index(index)
|
||||
transformed_texts.append(text)
|
||||
|
||||
return transformed_texts
|
||||
|
||||
def _get_new_number(self, word):
|
||||
"""Given a word, try altering the value if the word is a number return
|
||||
in digits if word is given in digit, return in alphabetical form if
|
||||
word is given in alphabetical form."""
|
||||
|
||||
if word.isdigit():
|
||||
num = float(word)
|
||||
return self._alter_number(num)
|
||||
else:
|
||||
try:
|
||||
num = w2n.word_to_num(word)
|
||||
num_list = self._alter_number(num)
|
||||
return [num2words(n) for n in num_list]
|
||||
except ValueError:
|
||||
return []
|
||||
|
||||
def _alter_number(self, num):
|
||||
"""helper function of _get_new_number, change number base on
|
||||
self.max_change."""
|
||||
if num not in [0, 2, 4]:
|
||||
change = int(num * self.max_change) + 1
|
||||
if num >= 0:
|
||||
num_list = np.random.randint(max(num - change, 1), num + change, self.n)
|
||||
else:
|
||||
num_list = np.random.randint(num - change, min(0, num + change), self.n)
|
||||
return num_list
|
||||
return []
|
||||
|
||||
|
||||
STR_NUM = [
|
||||
"zero",
|
||||
"one",
|
||||
"two",
|
||||
"three",
|
||||
"four",
|
||||
"five",
|
||||
"six",
|
||||
"seven",
|
||||
"eight",
|
||||
"nine",
|
||||
"ten",
|
||||
"eleven",
|
||||
"twelve",
|
||||
"thirteen",
|
||||
"fourteen",
|
||||
"fifteen",
|
||||
"sixteen",
|
||||
"seventeen",
|
||||
"eighteen",
|
||||
"nineteen",
|
||||
"twenty",
|
||||
"thirty",
|
||||
"forty",
|
||||
"fifty",
|
||||
"sixty",
|
||||
"seventy",
|
||||
"eighty",
|
||||
"ninety",
|
||||
"hundred",
|
||||
"thousand",
|
||||
"million",
|
||||
"billion",
|
||||
"point",
|
||||
"and",
|
||||
]
|
||||
94
textattack/transformations/word_swap_contract.py
Normal file
94
textattack/transformations/word_swap_contract.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from textattack.transformations import Transformation
|
||||
|
||||
|
||||
class WordSwapContract(Transformation):
|
||||
"""Transforms an input by performing contraction on recognized
|
||||
combinations."""
|
||||
|
||||
reverse_contraction_map = {
|
||||
"is not": "isn't",
|
||||
"are not": "aren't",
|
||||
"cannot": "can't",
|
||||
"could not": "couldn't",
|
||||
"did not": "didn't",
|
||||
"does not": "doesn't",
|
||||
"do not": "don't",
|
||||
"had not": "hadn't",
|
||||
"has not": "hasn't",
|
||||
"have not": "haven't",
|
||||
"he is": "he's",
|
||||
"how did": "how'd",
|
||||
"how is": "how's",
|
||||
"I would": "I'd",
|
||||
"I will": "I'll",
|
||||
"I am": "I'm",
|
||||
"i would": "i'd",
|
||||
"i will": "i'll",
|
||||
"i am": "i'm",
|
||||
"it would": "it'd",
|
||||
"it will": "it'll",
|
||||
"it is": "it's",
|
||||
"might not": "mightn't",
|
||||
"must not": "mustn't",
|
||||
"need not": "needn't",
|
||||
"ought not": "oughtn't",
|
||||
"shall not": "shan't",
|
||||
"she would": "she'd",
|
||||
"she will": "she'll",
|
||||
"she is": "she's",
|
||||
"should not": "shouldn't",
|
||||
"that would": "that'd",
|
||||
"that is": "that's",
|
||||
"there would": "there'd",
|
||||
"there is": "there's",
|
||||
"they would": "they'd",
|
||||
"they will": "they'll",
|
||||
"they are": "they're",
|
||||
"was not": "wasn't",
|
||||
"we would": "we'd",
|
||||
"we will": "we'll",
|
||||
"we are": "we're",
|
||||
"were not": "weren't",
|
||||
"what are": "what're",
|
||||
"what is": "what's",
|
||||
"when is": "when's",
|
||||
"where did": "where'd",
|
||||
"where is": "where's",
|
||||
"who will": "who'll",
|
||||
"who is": "who's",
|
||||
"who have": "who've",
|
||||
"why is": "why's",
|
||||
"will not": "won't",
|
||||
"would not": "wouldn't",
|
||||
"you would": "you'd",
|
||||
"you will": "you'll",
|
||||
"you are": "you're",
|
||||
}
|
||||
|
||||
def _get_transformations(self, current_text, indices_to_modify):
|
||||
"""Return all possible transformed sentences, each with one
|
||||
contraction."""
|
||||
transformed_texts = []
|
||||
|
||||
words = current_text.words
|
||||
indices_to_modify = list(indices_to_modify)
|
||||
|
||||
# search for every 2-words combination in reverse_contraction_map
|
||||
for idx in indices_to_modify[:-1]:
|
||||
word = words[idx]
|
||||
|
||||
next_idx = indices_to_modify[indices_to_modify.index(idx) + 1]
|
||||
next_word = words[next_idx]
|
||||
|
||||
# generating the words to search for
|
||||
key = " ".join([word, next_word])
|
||||
|
||||
# when a possible contraction is found in map, contract the current text
|
||||
if key in self.reverse_contraction_map:
|
||||
transformed_text = current_text.replace_word_at_index(
|
||||
idx, self.reverse_contraction_map[key]
|
||||
)
|
||||
transformed_text = transformed_text.delete_word_at_index(next_idx)
|
||||
transformed_texts.append(transformed_text)
|
||||
|
||||
return transformed_texts
|
||||
104
textattack/transformations/word_swap_extend.py
Normal file
104
textattack/transformations/word_swap_extend.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from textattack.transformations import Transformation
|
||||
|
||||
|
||||
class WordSwapExtend(Transformation):
|
||||
"""Transforms an input by performing extension on recognized
|
||||
combinations."""
|
||||
|
||||
contraction_map = {
|
||||
"ain't": "isn't",
|
||||
"aren't": "are not",
|
||||
"can't": "cannot",
|
||||
"can't've": "cannot have",
|
||||
"could've": "could have",
|
||||
"couldn't": "could not",
|
||||
"didn't": "did not",
|
||||
"doesn't": "does not",
|
||||
"don't": "do not",
|
||||
"hadn't": "had not",
|
||||
"hasn't": "has not",
|
||||
"haven't": "have not",
|
||||
"he'd": "he would",
|
||||
"he'd've": "he would have",
|
||||
"he'll": "he will",
|
||||
"he's": "he is",
|
||||
"how'd": "how did",
|
||||
"how'd'y": "how do you",
|
||||
"how'll": "how will",
|
||||
"how's": "how is",
|
||||
"I'd": "I would",
|
||||
"I'll": "I will",
|
||||
"I'm": "I am",
|
||||
"I've": "I have",
|
||||
"i'd": "i would",
|
||||
"i'll": "i will",
|
||||
"i'm": "i am",
|
||||
"i've": "i have",
|
||||
"isn't": "is not",
|
||||
"it'd": "it would",
|
||||
"it'll": "it will",
|
||||
"it's": "it is",
|
||||
"ma'am": "madam",
|
||||
"might've": "might have",
|
||||
"mightn't": "might not",
|
||||
"must've": "must have",
|
||||
"mustn't": "must not",
|
||||
"needn't": "need not",
|
||||
"oughtn't": "ought not",
|
||||
"shan't": "shall not",
|
||||
"she'd": "she would",
|
||||
"she'll": "she will",
|
||||
"she's": "she is",
|
||||
"should've": "should have",
|
||||
"shouldn't": "should not",
|
||||
"that'd": "that would",
|
||||
"that's": "that is",
|
||||
"there'd": "there would",
|
||||
"there's": "there is",
|
||||
"they'd": "they would",
|
||||
"they'll": "they will",
|
||||
"they're": "they are",
|
||||
"they've": "they have",
|
||||
"wasn't": "was not",
|
||||
"we'd": "we would",
|
||||
"we'll": "we will",
|
||||
"we're": "we are",
|
||||
"we've": "we have",
|
||||
"weren't": "were not",
|
||||
"what're": "what are",
|
||||
"what's": "what is",
|
||||
"when's": "when is",
|
||||
"where'd": "where did",
|
||||
"where's": "where is",
|
||||
"where've": "where have",
|
||||
"who'll": "who will",
|
||||
"who's": "who is",
|
||||
"who've": "who have",
|
||||
"why's": "why is",
|
||||
"won't": "will not",
|
||||
"would've": "would have",
|
||||
"wouldn't": "would not",
|
||||
"you'd": "you would",
|
||||
"you'd've": "you would have",
|
||||
"you'll": "you will",
|
||||
"you're": "you are",
|
||||
"you've": "you have",
|
||||
}
|
||||
|
||||
def _get_transformations(self, current_text, indices_to_modify):
|
||||
"""Return all possible transformed sentences, each with one
|
||||
extension."""
|
||||
transformed_texts = []
|
||||
words = current_text.words
|
||||
for idx in indices_to_modify:
|
||||
word = words[idx]
|
||||
# expend when word in map
|
||||
if word in self.contraction_map:
|
||||
expanded = self.contraction_map[word].split()
|
||||
transformed_text = current_text.replace_word_at_index(idx, expanded[0])
|
||||
transformed_text = transformed_text.insert_text_after_word_index(
|
||||
idx, expanded[1]
|
||||
)
|
||||
transformed_texts.append(transformed_text)
|
||||
|
||||
return transformed_texts
|
||||
Reference in New Issue
Block a user