1
0
mirror of https://github.com/QData/TextAttack.git synced 2021-10-13 00:05:06 +03:00
Files
textattack-nlp-transformer/textattack/constraints/semantics/stopword_modification.py

36 lines
1.3 KiB
Python

""" Abstract classes represent constraints on text adversarial examples.
"""
from textattack.shared.utils import default_class_repr
from textattack.constraints import PreTransformationConstraint
from textattack.shared.validators import transformation_consists_of_word_swaps
import nltk
class StopwordModification(PreTransformationConstraint):
"""
A constraint disallowing the modification of stopwords
"""
def __init__(self, stopwords=None):
if stopwords is not None:
self.stopwords = set(stopwords)
else:
self.stopwords = set(nltk.corpus.stopwords.words('english'))
def _get_modifiable_indices(self, tokenized_text):
""" Returns the word indices in x which are able to be deleted """
non_stopword_indices = set()
for i, word in enumerate(tokenized_text.words):
if word not in self.stopwords:
non_stopword_indices.add(i)
return non_stopword_indices
def check_compatibility(self, transformation):
"""
The stopword constraint only is concerned with word swaps since, paraphrasing phrases
containing stopwords is OK.
Args:
transformation: The transformation to check compatibility with.
"""
return transformation_consists_of_word_swaps(transformation)