mirror of
https://github.com/QData/TextAttack.git
synced 2021-10-13 00:05:06 +03:00
add thought vectors
This commit is contained in:
66
textattack/constraints/semantics/thought_vector.py
Normal file
66
textattack/constraints/semantics/thought_vector.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import functools
|
||||
import torch
|
||||
|
||||
from textattack.shared import utils
|
||||
from textattack.constraints import Constraint
|
||||
from textattack.shared import WordEmbedding
|
||||
from textattack.shared import TokenizedText
|
||||
|
||||
class ThoughtVector(Constraint):
|
||||
"""
|
||||
A constraint on the distance between two sentences' thought vectors.
|
||||
|
||||
Args:
|
||||
word_embedding (str): The word embedding to use
|
||||
min_cos_sim: the minimum cosine similarity between thought vectors
|
||||
max_mse_dist: the maximum euclidean distance between thought vectors
|
||||
"""
|
||||
def __init__(self, embedding_type='paragramcf', max_mse_dist, min_cos_sim):
|
||||
self.word_embedding = WordEmbedding(embedding_type)
|
||||
|
||||
if (max_mse_dist or min_cos_sim) is None:
|
||||
raise ValueError('Must set max_mse_dist or min_cos_sim')
|
||||
|
||||
self.max_mse_dist = max_mse_dist
|
||||
self.min_cos_sim = min_cos_sim
|
||||
|
||||
@functools.lru_cache(maxsize=2**10)
|
||||
def _get_thought_vector(self, tokenized_text):
|
||||
return torch.sum([self.word_embedding[word] for word in tokenized_text.words])
|
||||
|
||||
def __call__(self, x, x_adv):
|
||||
""" Returns true if (x, x_adv) are closer than `self.min_cos_sim`
|
||||
and `self.max_mse_dist`. """
|
||||
|
||||
if not isinstance(x, TokenizedText):
|
||||
raise TypeError('x must be of type TokenizedText')
|
||||
if not isinstance(x_adv, TokenizedText):
|
||||
raise TypeError('x_adv must be of type TokenizedText')
|
||||
|
||||
thought_vector_1 = self._get_thought_vector(x)
|
||||
thought_vector_2 = self._get_thought_vector(x_adv)
|
||||
|
||||
# Check cosine distance.
|
||||
if self.min_cos_sim:
|
||||
cos_sim = torch.nn.CosineSimilarity(dim=0)(thought_vector_1, thought_vector_2)
|
||||
if cos_sim < self.min_cos_sim:
|
||||
return False
|
||||
# Check MSE distance.
|
||||
if self.max_mse_dist:
|
||||
mse_dist = torch.sum((e1 - e2) ** 2)
|
||||
if mse_dist > self.max_mse_dist:
|
||||
return False
|
||||
return True
|
||||
|
||||
def extra_repr_keys(self):
|
||||
"""Set the extra representation of the constraint using these keys.
|
||||
|
||||
To print customized extra information, you should reimplement
|
||||
this method in your own constraint. Both single-line and multi-line
|
||||
strings are acceptable.
|
||||
"""
|
||||
if self.min_cos_sim is None:
|
||||
metric = 'max_mse_dist'
|
||||
else:
|
||||
metric = 'min_cos_sim'
|
||||
return ['embedding_type', metric]
|
||||
@@ -1,5 +1,5 @@
|
||||
# Helper stuff, like embeddings.
|
||||
from . import helper_utils
|
||||
from . import utils
|
||||
from .glove_embedding_layer import GloveEmbeddingLayer
|
||||
|
||||
# Helper modules.
|
||||
|
||||
@@ -5,7 +5,7 @@ import torch.nn as nn
|
||||
from textattack.shared import utils
|
||||
|
||||
from textattack.models.helpers import GloveEmbeddingLayer
|
||||
from textattack.models.helpers.helper_utils import load_cached_state_dict
|
||||
from textattack.models.helpers.utils import load_cached_state_dict
|
||||
|
||||
class LSTMForClassification(nn.Module):
|
||||
""" A long short-term memory neural network for text classification.
|
||||
|
||||
@@ -5,7 +5,7 @@ import torch.nn.functional as F
|
||||
|
||||
from textattack.shared import utils
|
||||
from textattack.models.helpers import GloveEmbeddingLayer
|
||||
from textattack.models.helpers.helper_utils import load_cached_state_dict
|
||||
from textattack.models.helpers.utils import load_cached_state_dict
|
||||
|
||||
class WordCNNForClassification(nn.Module):
|
||||
""" A convolutional neural network for text classification.
|
||||
|
||||
Reference in New Issue
Block a user