1
0
mirror of https://github.com/QData/TextAttack.git synced 2021-10-13 00:05:06 +03:00

learning to write language model + batch queries

This commit is contained in:
Jack Morris
2020-06-28 11:24:49 -04:00
parent ca311f286f
commit 5f3e2b2961
10 changed files with 188 additions and 99 deletions

View File

@@ -1,25 +1,27 @@
import math
import torch
from abc import ABC, abstractmethod
from textattack.constraints import Constraint
class LanguageModelConstraint(Constraint):
class LanguageModelConstraint(ABC, Constraint):
"""
Determines if two sentences have a swapped word that has a similar
probability according to a language model.
Args:
max_log_prob_diff (float): the maximum difference in log-probability
between x and x_adv
max_log_prob_diff (float): the maximum decrease in log-probability
in swapped words from x to x_adv
compare_against_original (bool): whether to compare against the original
text or the most recent
"""
def __init__(self, max_log_prob_diff=None):
def __init__(self, max_log_prob_diff=None, compare_against_original=False):
if max_log_prob_diff is None:
raise ValueError("Must set max_log_prob_diff")
self.max_log_prob_diff = max_log_prob_diff
self.compare_against_original = compare_against_original
@abstractmethod
def get_log_probs_at_index(self, text_list, word_index):
""" Gets the log-probability of items in `text_list` at index
`word_index` according to a language model.
@@ -27,6 +29,9 @@ class LanguageModelConstraint(Constraint):
raise NotImplementedError()
def _check_constraint(self, transformed_text, current_text, original_text=None):
if self.compare_against_original:
current_text = original_text
try:
indices = transformed_text.attack_attrs["newly_modified_indices"]
except KeyError:
@@ -41,9 +46,7 @@ class LanguageModelConstraint(Constraint):
f"Error: get_log_probs_at_index returned {len(probs)} values for 2 inputs"
)
cur_prob, transformed_prob = probs
if self.max_log_prob_diff is None:
cur_prob, transformed_prob = math.log(p1), math.log(p2)
if abs(cur_prob - transformed_prob) > self.max_log_prob_diff:
if transformed_prob <= cur_prob - self.max_log_prob_diff:
return False
return True