mirror of
https://github.com/QData/TextAttack.git
synced 2021-10-13 00:05:06 +03:00
merge
This commit is contained in:
@@ -8,7 +8,7 @@ optional but will greatly improve code speed.
|
||||
```
|
||||
conda create -n text-attack python=3.7
|
||||
conda activate text-attack
|
||||
pip install -r requirements.txt
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
We use the list of stopwords from nltk. To download them run in Python shell:
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
|
||||
BIN
docs/_build/doctrees/attacks/attack.doctree
vendored
Normal file
BIN
docs/_build/doctrees/attacks/attack.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/attacks/genetic_algorithm.doctree
vendored
Normal file
BIN
docs/_build/doctrees/attacks/genetic_algorithm.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/attacks/greedy_word_swap.doctree
vendored
Normal file
BIN
docs/_build/doctrees/attacks/greedy_word_swap.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/constraints/constraint.doctree
vendored
Normal file
BIN
docs/_build/doctrees/constraints/constraint.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/constraints/semantics/google_language_model.doctree
vendored
Normal file
BIN
docs/_build/doctrees/constraints/semantics/google_language_model.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/constraints/semantics/semantics.doctree
vendored
Normal file
BIN
docs/_build/doctrees/constraints/semantics/semantics.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/datasets/built-in_datasets.doctree
vendored
Normal file
BIN
docs/_build/doctrees/datasets/built-in_datasets.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/datasets/user_defined_datasets.doctree
vendored
Normal file
BIN
docs/_build/doctrees/datasets/user_defined_datasets.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/environment.pickle
vendored
Normal file
BIN
docs/_build/doctrees/environment.pickle
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/index.doctree
vendored
Normal file
BIN
docs/_build/doctrees/index.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/models/bert.doctree
vendored
Normal file
BIN
docs/_build/doctrees/models/bert.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/models/infer_sent.doctree
vendored
Normal file
BIN
docs/_build/doctrees/models/infer_sent.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/transformations/transformation.doctree
vendored
Normal file
BIN
docs/_build/doctrees/transformations/transformation.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/transformations/word_swap.doctree
vendored
Normal file
BIN
docs/_build/doctrees/transformations/word_swap.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/_build/doctrees/users/examples.doctree
vendored
Normal file
BIN
docs/_build/doctrees/users/examples.doctree
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
docs/_build/doctrees/users/introduction.doctree
vendored
Normal file
BIN
docs/_build/doctrees/users/introduction.doctree
vendored
Normal file
Binary file not shown.
@@ -1,4 +1,4 @@
|
||||
# Sphinx build info version 1
|
||||
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
|
||||
config: 5767419702565dfe6bf824077d7a8882
|
||||
config: 2d09ffbcef6e05bce8419451285fcd74
|
||||
tags: 645f666f9bcd5a90fca523b33c5a78b7
|
||||
479
docs/_build/html/_modules/attacks/attack.html
vendored
Normal file
479
docs/_build/html/_modules/attacks/attack.html
vendored
Normal file
@@ -0,0 +1,479 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>attacks.attack — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>attacks.attack</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for attacks.attack</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">difflib</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">import</span> <span class="nn">torch</span>
|
||||
<span class="kn">import</span> <span class="nn">random</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">textattack</span> <span class="k">import</span> <span class="n">utils</span> <span class="k">as</span> <span class="n">utils</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">textattack.constraints</span> <span class="k">import</span> <span class="n">Constraint</span>
|
||||
<span class="kn">from</span> <span class="nn">textattack.tokenized_text</span> <span class="k">import</span> <span class="n">TokenizedText</span>
|
||||
|
||||
<div class="viewcode-block" id="Attack"><a class="viewcode-back" href="../../attacks/attack.html#attacks.attack.Attack">[docs]</a><span class="k">class</span> <span class="nc">Attack</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> An attack generates adversarial examples on text. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> model: A PyTorch or TensorFlow model to attack</span>
|
||||
<span class="sd"> constraints: A list of constraints to add to the attack</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">model</span><span class="p">,</span> <span class="n">constraints</span><span class="o">=</span><span class="p">[]):</span>
|
||||
<span class="sd">""" Initialize an attack object.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> Attacks can be run multiple times</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> @TODO should `tokenizer` be an additional parameter or should</span>
|
||||
<span class="sd"> we assume every model has a .tokenizer ?</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">model</span>
|
||||
<span class="c1"># Transformation and corresponding constraints.</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">constraints</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">if</span> <span class="n">constraints</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">add_constraints</span><span class="p">(</span><span class="n">constraints</span><span class="p">)</span>
|
||||
<span class="c1"># List of files to output to.</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">output_files</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">output_to_terminal</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">output_to_visdom</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<div class="viewcode-block" id="Attack.add_output_file"><a class="viewcode-back" href="../../attacks/attack.html#attacks.attack.Attack.add_output_file">[docs]</a> <span class="k">def</span> <span class="nf">add_output_file</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file</span><span class="p">):</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> When attack runs, it will output to this file. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> file (str): The path to the output file</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||||
<span class="n">directory</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">file</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">directory</span><span class="p">):</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">directory</span><span class="p">)</span>
|
||||
<span class="n">file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="s1">'w'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">output_files</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">file</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="Attack.add_constraint"><a class="viewcode-back" href="../../attacks/attack.html#attacks.attack.Attack.add_constraint">[docs]</a> <span class="k">def</span> <span class="nf">add_constraint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">constraint</span><span class="p">):</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> Adds a constraint to the attack. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> constraint: A constraint to add, see constraints</span>
|
||||
|
||||
<span class="sd"> Raises:</span>
|
||||
<span class="sd"> ValueError: If the constraint is not of type :obj:`Constraint`</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">constraint</span><span class="p">,</span> <span class="n">Constraint</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Cannot add constraint of type'</span><span class="p">,</span> <span class="nb">type</span><span class="p">(</span><span class="n">constraint</span><span class="p">))</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">constraints</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">constraint</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="Attack.add_constraints"><a class="viewcode-back" href="../../attacks/attack.html#attacks.attack.Attack.add_constraints">[docs]</a> <span class="k">def</span> <span class="nf">add_constraints</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">constraints</span><span class="p">):</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> Adds multiple constraints to the attack. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> constraints: An iterable of constraints to add, see constraints. </span>
|
||||
|
||||
<span class="sd"> Raises:</span>
|
||||
<span class="sd"> TypeError: If the constraints are not iterable</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="c1"># Make sure constraints are iterable.</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="nb">iter</span><span class="p">(</span><span class="n">constraints</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="ne">TypeError</span> <span class="k">as</span> <span class="n">te</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="n">f</span><span class="s1">'Constraint list type {type(constraints)} is not iterable.'</span><span class="p">)</span>
|
||||
<span class="c1"># Store each constraint after validating its type.</span>
|
||||
<span class="k">for</span> <span class="n">constraint</span> <span class="ow">in</span> <span class="n">constraints</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">add_constraint</span><span class="p">(</span><span class="n">constraint</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="Attack.get_transformations"><a class="viewcode-back" href="../../attacks/attack.html#attacks.attack.Attack.get_transformations">[docs]</a> <span class="k">def</span> <span class="nf">get_transformations</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">transformation</span><span class="p">,</span> <span class="n">text</span><span class="p">,</span> <span class="n">original_text</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Filters a list of transformations by self.constraints. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> transformation:</span>
|
||||
<span class="sd"> text:</span>
|
||||
<span class="sd"> original text (:obj:`type`, optional): Defaults to None. </span>
|
||||
<span class="sd"> **kwargs:</span>
|
||||
|
||||
<span class="sd"> Returns:</span>
|
||||
<span class="sd"> A filtered list of transformations where each transformation matches the constraints</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">transformations</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">transformation</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">))</span>
|
||||
<span class="k">for</span> <span class="n">C</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">constraints</span><span class="p">:</span>
|
||||
<span class="n">transformations</span> <span class="o">=</span> <span class="n">C</span><span class="o">.</span><span class="n">call_many</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">transformations</span><span class="p">,</span> <span class="n">original_text</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">transformations</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_attack_one</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">,</span> <span class="n">tokenized_text</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Perturbs `text` to until `self.model` gives a different label</span>
|
||||
<span class="sd"> than `label`. </span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_call_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tokenized_text_list</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Returns model predictions for a list of TokenizedText objects. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">ids</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">([</span><span class="n">t</span><span class="o">.</span><span class="n">ids</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">tokenized_text_list</span><span class="p">])</span>
|
||||
<span class="n">ids</span> <span class="o">=</span> <span class="n">ids</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">utils</span><span class="o">.</span><span class="n">get_device</span><span class="p">())</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">(</span><span class="n">ids</span><span class="p">)</span><span class="o">.</span><span class="n">squeeze</span><span class="p">()</span>
|
||||
|
||||
<div class="viewcode-block" id="Attack.attack"><a class="viewcode-back" href="../../attacks/attack.html#attacks.attack.Attack.attack">[docs]</a> <span class="k">def</span> <span class="nf">attack</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> Runs an attack on the given dataset and outputs the results to the console and the output file.</span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> dataset: An iterable of (label, text) pairs</span>
|
||||
<span class="sd"> shuffle (:obj:`bool`, optional): Whether to shuffle the data. Defaults to False.</span>
|
||||
|
||||
<span class="sd"> Returns:</span>
|
||||
<span class="sd"> The results of the attack on the dataset</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">shuffle</span><span class="p">:</span>
|
||||
<span class="n">random</span><span class="o">.</span><span class="n">shuffle</span><span class="p">(</span><span class="n">dataset</span><span class="p">)</span>
|
||||
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">label</span><span class="p">,</span> <span class="n">text</span> <span class="ow">in</span> <span class="n">dataset</span><span class="p">:</span>
|
||||
<span class="n">tokenized_text</span> <span class="o">=</span> <span class="n">TokenizedText</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">,</span> <span class="n">text</span><span class="p">)</span>
|
||||
<span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_attack_one</span><span class="p">(</span><span class="n">label</span><span class="p">,</span> <span class="n">tokenized_text</span><span class="p">)</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">output_to_terminal</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">result</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">results</span><span class="p">):</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'-'</span><span class="o">*</span><span class="mi">35</span><span class="p">,</span> <span class="s1">'Result'</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">),</span> <span class="s1">'-'</span><span class="o">*</span><span class="mi">35</span><span class="p">)</span>
|
||||
<span class="n">result</span><span class="o">.</span><span class="n">print_</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">()</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">output_files</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">output_file</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">output_files</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">results</span><span class="p">:</span>
|
||||
<span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">result</span><span class="p">)</span> <span class="o">+</span> <span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">output_to_visdom</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'-'</span><span class="o">*</span><span class="mi">80</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span></div></div>
|
||||
|
||||
<div class="viewcode-block" id="AttackResult"><a class="viewcode-back" href="../../attacks/attack.html#attacks.attack.AttackResult">[docs]</a><span class="k">class</span> <span class="nc">AttackResult</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Result of an Attack run on a single (label, text_input) pair. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> original_text (str): The original text</span>
|
||||
<span class="sd"> perturbed_text (str): The perturbed text resulting from the attack</span>
|
||||
<span class="sd"> original_label (int): he classification label of the original text</span>
|
||||
<span class="sd"> perturbed_label (int): The classification label of the perturbed text</span>
|
||||
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> @TODO support attacks that fail (no perturbed label/text)</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">original_text</span><span class="p">,</span> <span class="n">perturbed_text</span><span class="p">,</span> <span class="n">original_label</span><span class="p">,</span>
|
||||
<span class="n">perturbed_label</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">original_text</span> <span class="o">=</span> <span class="n">original_text</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">perturbed_text</span> <span class="o">=</span> <span class="n">perturbed_text</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">original_label</span> <span class="o">=</span> <span class="n">original_label</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">perturbed_label</span> <span class="o">=</span> <span class="n">perturbed_label</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__data__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">original_text</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">original_label</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">perturbed_text</span><span class="p">,</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">perturbed_label</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="nb">tuple</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">data</span><span class="p">))</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__data__</span><span class="p">())</span>
|
||||
|
||||
<div class="viewcode-block" id="AttackResult.diff"><a class="viewcode-back" href="../../attacks/attack.html#attacks.attack.AttackResult.diff">[docs]</a> <span class="k">def</span> <span class="nf">diff</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> Highlights the difference between two texts using color.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">_color</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">color_text_terminal</span>
|
||||
<span class="n">t1</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">original_text</span>
|
||||
<span class="n">t2</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">perturbed_text</span>
|
||||
|
||||
<span class="n">words1</span> <span class="o">=</span> <span class="n">t1</span><span class="o">.</span><span class="n">words</span><span class="p">()</span>
|
||||
<span class="n">words2</span> <span class="o">=</span> <span class="n">t2</span><span class="o">.</span><span class="n">words</span><span class="p">()</span>
|
||||
|
||||
<span class="n">c1</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">color_from_label</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">original_label</span><span class="p">)</span>
|
||||
<span class="n">c2</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">color_from_label</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">perturbed_label</span><span class="p">)</span>
|
||||
<span class="n">new_is</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">new_w1s</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">new_w2s</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">min</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">words1</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">words2</span><span class="p">))):</span>
|
||||
<span class="n">w1</span> <span class="o">=</span> <span class="n">words1</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
||||
<span class="n">w2</span> <span class="o">=</span> <span class="n">words2</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">w1</span> <span class="o">!=</span> <span class="n">w2</span><span class="p">:</span>
|
||||
<span class="n">new_is</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>
|
||||
<span class="n">new_w1s</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">_color</span><span class="p">(</span><span class="n">w1</span><span class="p">,</span> <span class="n">c1</span><span class="p">))</span>
|
||||
<span class="n">new_w2s</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">_color</span><span class="p">(</span><span class="n">w2</span><span class="p">,</span> <span class="n">c2</span><span class="p">))</span>
|
||||
|
||||
<span class="n">t1</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">original_text</span><span class="o">.</span><span class="n">replace_words_at_indices</span><span class="p">(</span><span class="n">new_is</span><span class="p">,</span> <span class="n">new_w1s</span><span class="p">)</span>
|
||||
<span class="n">t2</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">original_text</span><span class="o">.</span><span class="n">replace_words_at_indices</span><span class="p">(</span><span class="n">new_is</span><span class="p">,</span> <span class="n">new_w2s</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">t1</span><span class="p">),</span> <span class="nb">str</span><span class="p">(</span><span class="n">t2</span><span class="p">))</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">print_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">original_label</span><span class="p">),</span> <span class="s1">'-->'</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">perturbed_label</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">diff</span><span class="p">()))</span></div>
|
||||
|
||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">textattack.attacks</span> <span class="k">as</span> <span class="nn">attacks</span>
|
||||
<span class="kn">import</span> <span class="nn">textattack.constraints</span> <span class="k">as</span> <span class="nn">constraints</span>
|
||||
<span class="kn">from</span> <span class="nn">textattack.datasets</span> <span class="k">import</span> <span class="n">YelpSentiment</span>
|
||||
<span class="kn">from</span> <span class="nn">textattack.models</span> <span class="k">import</span> <span class="n">BertForSentimentClassification</span>
|
||||
<span class="kn">from</span> <span class="nn">textattack.transformations</span> <span class="k">import</span> <span class="n">WordSwapCounterfit</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="c1"># Only use one GPU, if we have one.</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'CUDA_VISIBLE_DEVICES'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'0'</span>
|
||||
<span class="c1"># Disable tensorflow logs, except in the case of an error.</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'TF_CPP_MIN_LOG_LEVEL'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'3'</span>
|
||||
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">BertForSentimentClassification</span><span class="p">()</span>
|
||||
|
||||
<span class="n">transformation</span> <span class="o">=</span> <span class="n">WordSwapCounterfit</span><span class="p">(</span><span class="n">max_candidates</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
|
||||
|
||||
<span class="n">attack</span> <span class="o">=</span> <span class="n">attacks</span><span class="o">.</span><span class="n">GreedyWordSwapWIR</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">transformation</span><span class="p">)</span>
|
||||
|
||||
<span class="n">attack</span><span class="o">.</span><span class="n">add_constraints</span><span class="p">(</span>
|
||||
<span class="p">(</span>
|
||||
<span class="n">constraints</span><span class="o">.</span><span class="n">semantics</span><span class="o">.</span><span class="n">GoogleLanguageModel</span><span class="p">(</span><span class="n">top_n</span><span class="o">=</span><span class="mi">2</span><span class="p">),</span>
|
||||
<span class="c1"># constraints.syntax.LanguageTool(1),</span>
|
||||
<span class="c1"># constraints.semantics.UniversalSentenceEncoder(0.9, metric='cosine'),</span>
|
||||
<span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="n">yelp_data</span> <span class="o">=</span> <span class="n">YelpSentiment</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
<span class="c1"># yelp_data = [</span>
|
||||
<span class="c1"># (1, 'I hate this Restaurant!'), </span>
|
||||
<span class="c1"># (0, "Texas Jack's has amazing food.")</span>
|
||||
<span class="c1"># ]</span>
|
||||
|
||||
<span class="c1"># attack.enable_visdom()</span>
|
||||
<span class="n">attack</span><span class="o">.</span><span class="n">add_output_file</span><span class="p">(</span><span class="s1">'outputs/test.txt'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">attack</span><span class="o">.</span><span class="n">attack</span><span class="p">(</span><span class="n">yelp_data</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
456
docs/_build/html/_modules/attacks/genetic_algorithm.html
vendored
Normal file
456
docs/_build/html/_modules/attacks/genetic_algorithm.html
vendored
Normal file
@@ -0,0 +1,456 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>attacks.genetic_algorithm — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>attacks.genetic_algorithm</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for attacks.genetic_algorithm</h1><div class="highlight"><pre>
|
||||
<span></span><span class="sd">'''</span>
|
||||
<span class="sd">Algorithm from Generating Natural Language Adversarial Examples by Alzantot et. al</span>
|
||||
|
||||
<span class="sd">`<arxiv.org/abs/1804.07998>`_</span>
|
||||
|
||||
<span class="sd">`<github.com/nesl/nlp_adversarial_examples>`_</span>
|
||||
<span class="sd">'''</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">textattack.attacks</span> <span class="k">import</span> <span class="n">Attack</span><span class="p">,</span> <span class="n">AttackResult</span>
|
||||
<span class="kn">from</span> <span class="nn">textattack.transformations</span> <span class="k">import</span> <span class="n">WordSwap</span>
|
||||
|
||||
<div class="viewcode-block" id="GeneticAlgorithm"><a class="viewcode-back" href="../../attacks/genetic_algorithm.html#attacks.genetic_algorithm.GeneticAlgorithm">[docs]</a><span class="k">class</span> <span class="nc">GeneticAlgorithm</span><span class="p">(</span><span class="n">Attack</span><span class="p">):</span>
|
||||
<span class="sd">'''</span>
|
||||
<span class="sd"> Attacks a model using a genetic algorithm. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> model: A PyTorch or TensorFlow model to attack.</span>
|
||||
<span class="sd"> transformation: The type of transformation to use. Should be a subclass of WordSwap. </span>
|
||||
<span class="sd"> pop_size (:obj:`int`, optional): The population size. Defauls to 20. </span>
|
||||
<span class="sd"> max_iters (:obj:`int`, optional): The maximum number of iterations to use. Defaults to 100. </span>
|
||||
<span class="sd"> n1 (:obj:`int`, optinal): The number of similar words. Defaults to 20. </span>
|
||||
|
||||
<span class="sd"> Raises:</span>
|
||||
<span class="sd"> ValueError: If the transformation is not a subclass of WordSwap. </span>
|
||||
|
||||
<span class="sd"> '''</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">model</span><span class="p">,</span> <span class="n">transformation</span><span class="p">,</span> <span class="n">pop_size</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">max_iters</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">n1</span><span class="o">=</span><span class="mi">20</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">transformation</span><span class="p">,</span> <span class="n">WordSwap</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">f</span><span class="s1">'Transformation is of type {type(transformation)}, should be a subclass of WordSwap'</span><span class="p">)</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">model</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">transformation</span> <span class="o">=</span> <span class="n">transformation</span>
|
||||
<span class="c1"># self.batch_model = batch_model</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">max_iters</span> <span class="o">=</span> <span class="n">max_iters</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">pop_size</span> <span class="o">=</span> <span class="n">pop_size</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">top_n</span> <span class="o">=</span> <span class="n">n1</span> <span class="c1"># similar words</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">temp</span> <span class="o">=</span> <span class="mf">0.3</span>
|
||||
|
||||
<div class="viewcode-block" id="GeneticAlgorithm.select_best_replacement"><a class="viewcode-back" href="../../attacks/genetic_algorithm.html#attacks.genetic_algorithm.GeneticAlgorithm.select_best_replacement">[docs]</a> <span class="k">def</span> <span class="nf">select_best_replacement</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pos</span><span class="p">,</span> <span class="n">x_cur</span><span class="p">,</span> <span class="n">x_orig</span><span class="p">,</span> <span class="n">target</span><span class="p">,</span> <span class="n">replace_list</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Select the most effective replacement for word at position (pos)</span>
|
||||
<span class="sd"> in (x_cur) between the words in replace_list.</span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> pos:</span>
|
||||
<span class="sd"> x_cur:</span>
|
||||
<span class="sd"> x_orig:</span>
|
||||
<span class="sd"> target:</span>
|
||||
<span class="sd"> replace_list:</span>
|
||||
|
||||
<span class="sd"> Returns:</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">orig_words</span> <span class="o">=</span> <span class="n">x_orig</span><span class="o">.</span><span class="n">words</span><span class="p">()</span>
|
||||
<span class="n">new_x_list</span> <span class="o">=</span> <span class="p">[</span><span class="n">x_cur</span><span class="o">.</span><span class="n">replace_word_at_index</span><span class="p">(</span>
|
||||
<span class="n">pos</span><span class="p">,</span> <span class="n">w</span><span class="p">)</span> <span class="k">if</span> <span class="n">orig_words</span><span class="p">[</span><span class="n">pos</span><span class="p">]</span> <span class="o">!=</span> <span class="n">w</span> <span class="ow">and</span> <span class="n">w</span> <span class="o">!=</span> <span class="s1">'0.0'</span> <span class="k">else</span> <span class="n">x_cur</span> <span class="k">for</span> <span class="n">w</span> <span class="ow">in</span> <span class="n">replace_list</span><span class="p">]</span>
|
||||
<span class="n">new_x_preds</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_model</span><span class="p">(</span><span class="n">new_x_list</span><span class="p">)</span>
|
||||
|
||||
<span class="n">new_x_scores</span> <span class="o">=</span> <span class="n">new_x_preds</span><span class="p">[:,</span> <span class="n">target</span><span class="p">]</span>
|
||||
<span class="n">orig_score</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_model</span><span class="p">([</span><span class="n">x_cur</span><span class="p">])[</span><span class="n">target</span><span class="p">]</span>
|
||||
<span class="n">new_x_scores</span> <span class="o">=</span> <span class="n">new_x_scores</span> <span class="o">-</span> <span class="n">orig_score</span>
|
||||
|
||||
<span class="c1"># Eliminate words that are not that close</span>
|
||||
<span class="n">new_x_scores</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">top_n</span><span class="p">:]</span> <span class="o">=</span> <span class="o">-</span><span class="mi">10000000</span>
|
||||
|
||||
<span class="sd">'''</span>
|
||||
<span class="sd"> if self.use_lm:</span>
|
||||
<span class="sd"> prefix = ""</span>
|
||||
<span class="sd"> suffix = None</span>
|
||||
<span class="sd"> if pos > 0:</span>
|
||||
<span class="sd"> prefix = self.dataset.inv_dict[x_cur[pos-1]]</span>
|
||||
<span class="sd"> #</span>
|
||||
<span class="sd"> orig_word = self.dataset.inv_dict[x_orig[pos]]</span>
|
||||
<span class="sd"> if self.use_suffix and pos < x_cur.shape[0]-1:</span>
|
||||
<span class="sd"> if (x_cur[pos+1] != 0):</span>
|
||||
<span class="sd"> suffix = self.dataset.inv_dict[x_cur[pos+1]]</span>
|
||||
<span class="sd"> # print('** ', orig_word)</span>
|
||||
<span class="sd"> replace_words_and_orig = [</span>
|
||||
<span class="sd"> self.dataset.inv_dict[w] if w in self.dataset.inv_dict else 'UNK' for w in replace_list[:self.top_n]] + [orig_word]</span>
|
||||
<span class="sd"> # print(replace_words_and_orig)</span>
|
||||
<span class="sd"> replace_words_lm_scores = self.lm.get_words_probs(</span>
|
||||
<span class="sd"> prefix, replace_words_and_orig, suffix)</span>
|
||||
<span class="sd"> # print(replace_words_lm_scores)</span>
|
||||
<span class="sd"> # for i in range(len(replace_words_and_orig)):</span>
|
||||
<span class="sd"> # print(replace_words_and_orig[i], ' -- ', replace_words_lm_scores[i])</span>
|
||||
|
||||
<span class="sd"> # select words</span>
|
||||
<span class="sd"> new_words_lm_scores = np.array(replace_words_lm_scores[:-1])</span>
|
||||
<span class="sd"> # abs_diff_lm_scores = np.abs(new_words_lm_scores - replace_words_lm_scores[-1])</span>
|
||||
<span class="sd"> # rank_replaces_by_lm = np.argsort(abs_diff_lm_scores)</span>
|
||||
<span class="sd"> rank_replaces_by_lm = np.argsort(-new_words_lm_scores)</span>
|
||||
|
||||
<span class="sd"> filtered_words_idx = rank_replaces_by_lm[self.top_n2:]</span>
|
||||
<span class="sd"> # print(filtered_words_idx)</span>
|
||||
<span class="sd"> new_x_scores[filtered_words_idx] = -10000000</span>
|
||||
<span class="sd"> '''</span>
|
||||
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">new_x_scores</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> <span class="o">></span> <span class="mi">0</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">new_x_list</span><span class="p">[</span><span class="n">new_x_scores</span><span class="o">.</span><span class="n">argmax</span><span class="p">()]</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">x_cur</span></div>
|
||||
|
||||
<div class="viewcode-block" id="GeneticAlgorithm.perturb"><a class="viewcode-back" href="../../attacks/genetic_algorithm.html#attacks.genetic_algorithm.GeneticAlgorithm.perturb">[docs]</a> <span class="k">def</span> <span class="nf">perturb</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x_cur</span><span class="p">,</span> <span class="n">x_orig</span><span class="p">,</span> <span class="n">neighbors</span><span class="p">,</span> <span class="n">w_select_probs</span><span class="p">,</span> <span class="n">target</span><span class="p">):</span>
|
||||
<span class="c1"># Pick a word that is not modified and is not UNK</span>
|
||||
<span class="sd">'''</span>
|
||||
<span class="sd"> Replaces a word that has not been modifired. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> x_cur:</span>
|
||||
<span class="sd"> x_orig:</span>
|
||||
<span class="sd"> neighbors:</span>
|
||||
<span class="sd"> w_select_probs:</span>
|
||||
<span class="sd"> target:</span>
|
||||
|
||||
<span class="sd"> Returns:</span>
|
||||
|
||||
<span class="sd"> '''</span>
|
||||
<span class="n">x_len</span> <span class="o">=</span> <span class="n">w_select_probs</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">rand_idx</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">x_len</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">p</span><span class="o">=</span><span class="n">w_select_probs</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">diff_set</span> <span class="o">=</span> <span class="n">x_cur</span><span class="o">.</span><span class="n">all_words_diff</span><span class="p">(</span><span class="n">x_orig</span><span class="p">)</span>
|
||||
<span class="n">num_replaceable_words</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sign</span><span class="p">(</span><span class="n">w_select_probs</span><span class="p">))</span>
|
||||
<span class="k">while</span> <span class="nb">len</span><span class="p">(</span><span class="n">diff_set</span><span class="p">)</span> <span class="o"><</span> <span class="n">num_replaceable_words</span> <span class="ow">and</span> <span class="n">x_cur</span><span class="o">.</span><span class="n">ith_word_diff</span><span class="p">(</span><span class="n">x_orig</span><span class="p">,</span> <span class="n">rand_idx</span><span class="p">):</span>
|
||||
<span class="sd">''' The condition above has a quick hack to prevent getting stuck in infinite loop while processing too short examples and all words (excluding articles) have been already replaced and still no successful attack is found. A more elegant way to handle this could be implemented in Attack to abort early based on the status of all population members or to improve select_best_replacement by making it stochastic.</span>
|
||||
<span class="sd"> '''</span>
|
||||
<span class="n">rand_idx</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">x_len</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">p</span><span class="o">=</span><span class="n">w_select_probs</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
|
||||
<span class="n">replace_list</span> <span class="o">=</span> <span class="n">neighbors</span><span class="p">[</span><span class="n">rand_idx</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">replace_list</span><span class="p">)</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">top_n</span><span class="p">:</span>
|
||||
<span class="n">replace_list</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span>
|
||||
<span class="p">(</span><span class="n">replace_list</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">top_n</span> <span class="o">-</span> <span class="n">replace_list</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])))</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">select_best_replacement</span><span class="p">(</span><span class="n">rand_idx</span><span class="p">,</span> <span class="n">x_cur</span><span class="p">,</span> <span class="n">x_orig</span><span class="p">,</span> <span class="n">target</span><span class="p">,</span> <span class="n">replace_list</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="GeneticAlgorithm.generate_population"><a class="viewcode-back" href="../../attacks/genetic_algorithm.html#attacks.genetic_algorithm.GeneticAlgorithm.generate_population">[docs]</a> <span class="k">def</span> <span class="nf">generate_population</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x_orig</span><span class="p">,</span> <span class="n">neigbhors_list</span><span class="p">,</span> <span class="n">w_select_probs</span><span class="p">,</span> <span class="n">target</span><span class="p">,</span> <span class="n">pop_size</span><span class="p">):</span>
|
||||
<span class="sd">'''</span>
|
||||
<span class="sd"> Generates the population of replacement words. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> x_orig:</span>
|
||||
<span class="sd"> neigbhors_list:</span>
|
||||
<span class="sd"> w_select_probs:</span>
|
||||
<span class="sd"> target:</span>
|
||||
<span class="sd"> pop_size:</span>
|
||||
|
||||
<span class="sd"> Returns:</span>
|
||||
<span class="sd"> The population</span>
|
||||
|
||||
<span class="sd"> '''</span>
|
||||
|
||||
<span class="k">return</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">perturb</span><span class="p">(</span><span class="n">x_orig</span><span class="p">,</span> <span class="n">x_orig</span><span class="p">,</span> <span class="n">neigbhors_list</span><span class="p">,</span> <span class="n">w_select_probs</span><span class="p">,</span> <span class="n">target</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">pop_size</span><span class="p">)]</span></div>
|
||||
|
||||
<div class="viewcode-block" id="GeneticAlgorithm.crossover"><a class="viewcode-back" href="../../attacks/genetic_algorithm.html#attacks.genetic_algorithm.GeneticAlgorithm.crossover">[docs]</a> <span class="k">def</span> <span class="nf">crossover</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x1</span><span class="p">,</span> <span class="n">x2</span><span class="p">):</span>
|
||||
<span class="sd">'''</span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> x1:</span>
|
||||
<span class="sd"> x2:</span>
|
||||
|
||||
<span class="sd"> Returns:</span>
|
||||
|
||||
<span class="sd"> '''</span>
|
||||
|
||||
<span class="n">indices_to_replace</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">words_to_replace</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">x2_words</span> <span class="o">=</span> <span class="n">x2</span><span class="o">.</span><span class="n">words</span><span class="p">()</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">x1</span><span class="o">.</span><span class="n">words</span><span class="p">())):</span>
|
||||
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">()</span> <span class="o"><</span> <span class="mf">0.5</span><span class="p">:</span>
|
||||
<span class="n">indices_to_replace</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>
|
||||
<span class="n">words_to_replace</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x2_words</span><span class="p">[</span><span class="n">i</span><span class="p">])</span>
|
||||
<span class="k">return</span> <span class="n">x1</span><span class="o">.</span><span class="n">replace_words_at_indices</span><span class="p">(</span><span class="n">indices_to_replace</span><span class="p">,</span> <span class="n">words_to_replace</span><span class="p">)</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_get_neighbors</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tokenized_text</span><span class="p">,</span> <span class="n">original_tokenized_text</span><span class="p">):</span>
|
||||
<span class="n">words</span> <span class="o">=</span> <span class="n">tokenized_text</span><span class="o">.</span><span class="n">words</span><span class="p">()</span>
|
||||
<span class="n">neighbors_list</span> <span class="o">=</span> <span class="p">[[]</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">words</span><span class="p">))]</span>
|
||||
<span class="n">transformations</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_transformations</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">transformation</span><span class="p">,</span>
|
||||
<span class="n">tokenized_text</span><span class="p">,</span>
|
||||
<span class="n">original_text</span><span class="o">=</span><span class="n">original_tokenized_text</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">transformed_text</span> <span class="ow">in</span> <span class="n">transformations</span><span class="p">:</span>
|
||||
<span class="n">diff_idx</span> <span class="o">=</span> <span class="n">tokenized_text</span><span class="o">.</span><span class="n">first_word_diff_index</span><span class="p">(</span><span class="n">transformed_text</span><span class="p">)</span>
|
||||
<span class="n">neighbors_list</span><span class="p">[</span><span class="n">diff_idx</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">transformed_text</span><span class="o">.</span><span class="n">words</span><span class="p">()[</span><span class="n">diff_idx</span><span class="p">])</span>
|
||||
<span class="n">neighbors_list</span> <span class="o">=</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">neighbors_list</span><span class="p">]</span>
|
||||
<span class="n">neighbors_len</span> <span class="o">=</span> <span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">neighbors_list</span><span class="p">]</span>
|
||||
<span class="n">w_select_probs</span> <span class="o">=</span> <span class="n">neighbors_len</span> <span class="o">/</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">neighbors_len</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">neighbors_list</span><span class="p">,</span> <span class="n">w_select_probs</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_attack_one</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">original_label</span><span class="p">,</span> <span class="n">tokenized_text</span><span class="p">):</span>
|
||||
<span class="n">target</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">-</span> <span class="n">original_label</span>
|
||||
<span class="n">original_tokenized_text</span> <span class="o">=</span> <span class="n">tokenized_text</span>
|
||||
<span class="n">neighbors_list</span><span class="p">,</span> <span class="n">w_select_probs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_neighbors</span><span class="p">(</span>
|
||||
<span class="n">tokenized_text</span><span class="p">,</span> <span class="n">original_tokenized_text</span><span class="p">)</span>
|
||||
<span class="n">pop</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">generate_population</span><span class="p">(</span>
|
||||
<span class="n">original_tokenized_text</span><span class="p">,</span> <span class="n">neighbors_list</span><span class="p">,</span> <span class="n">w_select_probs</span><span class="p">,</span> <span class="n">target</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pop_size</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">max_iters</span><span class="p">):</span>
|
||||
<span class="n">pop_preds</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_model</span><span class="p">(</span><span class="n">pop</span><span class="p">)</span>
|
||||
<span class="n">pop_scores</span> <span class="o">=</span> <span class="n">pop_preds</span><span class="p">[:,</span> <span class="n">target</span><span class="p">]</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'</span><span class="se">\t\t</span><span class="s1">'</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="s1">' -- '</span><span class="p">,</span> <span class="n">pop_scores</span><span class="o">.</span><span class="n">max</span><span class="p">())</span>
|
||||
<span class="n">top_attack</span> <span class="o">=</span> <span class="n">pop_scores</span><span class="o">.</span><span class="n">argmax</span><span class="p">()</span>
|
||||
|
||||
<span class="n">logits</span> <span class="o">=</span> <span class="p">(</span><span class="n">pop_scores</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">temp</span><span class="p">)</span><span class="o">.</span><span class="n">exp</span><span class="p">()</span>
|
||||
<span class="n">select_probs</span> <span class="o">=</span> <span class="p">(</span><span class="n">logits</span> <span class="o">/</span> <span class="n">logits</span><span class="o">.</span><span class="n">sum</span><span class="p">())</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span>
|
||||
|
||||
<span class="n">top_attack_probs</span> <span class="o">=</span> <span class="n">pop_preds</span><span class="p">[</span><span class="n">top_attack</span><span class="p">,</span> <span class="p">:]</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">top_attack_probs</span><span class="p">)</span> <span class="o">==</span> <span class="n">target</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">AttackResult</span><span class="p">(</span>
|
||||
<span class="n">original_tokenized_text</span><span class="p">,</span>
|
||||
<span class="n">pop</span><span class="p">[</span><span class="n">top_attack</span><span class="p">],</span>
|
||||
<span class="n">original_label</span><span class="p">,</span>
|
||||
<span class="n">target</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="n">elite</span> <span class="o">=</span> <span class="p">[</span><span class="n">pop</span><span class="p">[</span><span class="n">top_attack</span><span class="p">]]</span> <span class="c1"># elite</span>
|
||||
<span class="c1"># print(select_probs.shape)</span>
|
||||
<span class="n">parent1_idx</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">pop_size</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">pop_size</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">p</span><span class="o">=</span><span class="n">select_probs</span><span class="p">)</span>
|
||||
<span class="n">parent2_idx</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">pop_size</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">pop_size</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">p</span><span class="o">=</span><span class="n">select_probs</span><span class="p">)</span>
|
||||
|
||||
<span class="n">initial_children</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">crossover</span><span class="p">(</span><span class="n">pop</span><span class="p">[</span><span class="n">parent1_idx</span><span class="p">[</span><span class="n">i</span><span class="p">]],</span>
|
||||
<span class="n">pop</span><span class="p">[</span><span class="n">parent2_idx</span><span class="p">[</span><span class="n">i</span><span class="p">]])</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pop_size</span><span class="o">-</span><span class="mi">1</span><span class="p">)]</span>
|
||||
<span class="n">children</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">child</span> <span class="ow">in</span> <span class="n">initial_children</span><span class="p">:</span>
|
||||
<span class="n">neighbors_list</span><span class="p">,</span> <span class="n">w_select_probs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_neighbors</span><span class="p">(</span>
|
||||
<span class="n">child</span><span class="p">,</span> <span class="n">original_tokenized_text</span><span class="p">)</span>
|
||||
<span class="n">children</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">perturb</span><span class="p">(</span>
|
||||
<span class="n">child</span><span class="p">,</span> <span class="n">original_tokenized_text</span><span class="p">,</span> <span class="n">neighbors_list</span><span class="p">,</span> <span class="n">w_select_probs</span><span class="p">,</span> <span class="n">target</span><span class="p">))</span>
|
||||
|
||||
<span class="n">pop</span> <span class="o">=</span> <span class="n">elite</span> <span class="o">+</span> <span class="n">children</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">AttackResult</span><span class="p">(</span>
|
||||
<span class="n">original_tokenized_text</span><span class="p">,</span>
|
||||
<span class="n">pop</span><span class="p">[</span><span class="n">top_attack</span><span class="p">],</span>
|
||||
<span class="n">original_label</span><span class="p">,</span>
|
||||
<span class="n">target</span>
|
||||
<span class="p">)</span></div>
|
||||
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
270
docs/_build/html/_modules/attacks/greedy_word_swap.html
vendored
Normal file
270
docs/_build/html/_modules/attacks/greedy_word_swap.html
vendored
Normal file
@@ -0,0 +1,270 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>attacks.greedy_word_swap — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>attacks.greedy_word_swap</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for attacks.greedy_word_swap</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">textattack.attacks</span> <span class="k">import</span> <span class="n">Attack</span><span class="p">,</span> <span class="n">AttackResult</span>
|
||||
|
||||
<div class="viewcode-block" id="GreedyWordSwap"><a class="viewcode-back" href="../../attacks/greedy_word_swap.html#attacks.greedy_word_swap.GreedyWordSwap">[docs]</a><span class="k">class</span> <span class="nc">GreedyWordSwap</span><span class="p">(</span><span class="n">Attack</span><span class="p">):</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> An attack that greedily chooses from a list of possible </span>
|
||||
<span class="sd"> perturbations.</span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> model: The PyTorch NLP model to attack.</span>
|
||||
<span class="sd"> transformation: The type of transformation.</span>
|
||||
<span class="sd"> max_depth (:obj:`int`, optional): The maximum number of words to change. Defaults to 32. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">model</span><span class="p">,</span> <span class="n">transformation</span><span class="p">,</span> <span class="n">max_depth</span><span class="o">=</span><span class="mi">32</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">transformation</span> <span class="o">=</span> <span class="n">transformation</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">max_depth</span> <span class="o">=</span> <span class="n">max_depth</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_attack_one</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">original_label</span><span class="p">,</span> <span class="n">tokenized_text</span><span class="p">):</span>
|
||||
<span class="n">original_tokenized_text</span> <span class="o">=</span> <span class="n">tokenized_text</span>
|
||||
<span class="n">num_words_changed</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="n">unswapped_word_indices</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">tokenized_text</span><span class="o">.</span><span class="n">words</span><span class="p">())))</span>
|
||||
<span class="n">new_tokenized_text</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">new_text_label</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">while</span> <span class="n">num_words_changed</span> <span class="o"><=</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_depth</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">unswapped_word_indices</span><span class="p">):</span>
|
||||
<span class="n">num_words_changed</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
<span class="n">transformed_text_candidates</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_transformations</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">transformation</span><span class="p">,</span>
|
||||
<span class="n">tokenized_text</span><span class="p">,</span>
|
||||
<span class="n">indices_to_replace</span><span class="o">=</span><span class="n">unswapped_word_indices</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">transformed_text_candidates</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="c1"># If we did not find any possible perturbations, give up.</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
<span class="n">scores</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_model</span><span class="p">(</span><span class="n">transformed_text_candidates</span><span class="p">)</span>
|
||||
<span class="c1"># The best choice is the one that minimizes the original class label.</span>
|
||||
<span class="n">best_index</span> <span class="o">=</span> <span class="n">scores</span><span class="p">[:,</span> <span class="n">original_label</span><span class="p">]</span><span class="o">.</span><span class="n">argmin</span><span class="p">()</span>
|
||||
<span class="n">new_tokenized_text</span> <span class="o">=</span> <span class="n">transformed_text_candidates</span><span class="p">[</span><span class="n">best_index</span><span class="p">]</span>
|
||||
<span class="c1"># If we changed the label, break.</span>
|
||||
<span class="n">new_text_label</span> <span class="o">=</span> <span class="n">scores</span><span class="p">[</span><span class="n">best_index</span><span class="p">]</span><span class="o">.</span><span class="n">argmax</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">new_text_label</span> <span class="o">!=</span> <span class="n">original_label</span><span class="p">:</span>
|
||||
<span class="k">break</span>
|
||||
<span class="c1"># Otherwise, remove this word from list of words to change and</span>
|
||||
<span class="c1"># iterate.</span>
|
||||
<span class="n">word_swap_loc</span> <span class="o">=</span> <span class="n">tokenized_text</span><span class="o">.</span><span class="n">first_word_diff_index</span><span class="p">(</span><span class="n">new_tokenized_text</span><span class="p">)</span>
|
||||
<span class="n">tokenized_text</span> <span class="o">=</span> <span class="n">new_tokenized_text</span>
|
||||
<span class="n">unswapped_word_indices</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">word_swap_loc</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">AttackResult</span><span class="p">(</span>
|
||||
<span class="n">original_tokenized_text</span><span class="p">,</span>
|
||||
<span class="n">new_tokenized_text</span><span class="p">,</span>
|
||||
<span class="n">original_label</span><span class="p">,</span>
|
||||
<span class="n">new_text_label</span>
|
||||
<span class="p">)</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
288
docs/_build/html/_modules/attacks/greedy_word_swap_wir.html
vendored
Normal file
288
docs/_build/html/_modules/attacks/greedy_word_swap_wir.html
vendored
Normal file
@@ -0,0 +1,288 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>attacks.greedy_word_swap_wir — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>attacks.greedy_word_swap_wir</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for attacks.greedy_word_swap_wir</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">textattack.attacks</span> <span class="k">import</span> <span class="n">Attack</span><span class="p">,</span> <span class="n">AttackResult</span>
|
||||
<span class="kn">import</span> <span class="nn">torch</span>
|
||||
|
||||
<div class="viewcode-block" id="GreedyWordSwapWIR"><a class="viewcode-back" href="../../attacks/greedy_word_swap.html#attacks.greedy_word_swap_wir.GreedyWordSwapWIR">[docs]</a><span class="k">class</span> <span class="nc">GreedyWordSwapWIR</span><span class="p">(</span><span class="n">Attack</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> An attack that greedily chooses from a list of possible </span>
|
||||
<span class="sd"> perturbations for each index, after ranking indices by importance.</span>
|
||||
<span class="sd"> Reimplementation of paper:</span>
|
||||
<span class="sd"> Is BERT Really Robust? A Strong Baseline for Natural Language Attack on </span>
|
||||
<span class="sd"> Text Classification and Entailment by Jin et. al, 2019</span>
|
||||
<span class="sd"> https://github.com/jind11/TextFooler </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> model: The PyTorch NLP model to attack.</span>
|
||||
<span class="sd"> transformation: The type of transformation.</span>
|
||||
<span class="sd"> max_depth (:obj:`int`, optional): The maximum number of words to change. Defaults to 32. </span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">model</span><span class="p">,</span> <span class="n">transformation</span><span class="p">,</span> <span class="n">max_depth</span><span class="o">=</span><span class="mi">32</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">transformation</span> <span class="o">=</span> <span class="n">transformation</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">max_depth</span> <span class="o">=</span> <span class="n">max_depth</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_attack_one</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">original_label</span><span class="p">,</span> <span class="n">tokenized_text</span><span class="p">):</span>
|
||||
<span class="n">original_tokenized_text</span> <span class="o">=</span> <span class="n">tokenized_text</span>
|
||||
<span class="n">num_words_changed</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
|
||||
<span class="c1"># Sort words by order of importance</span>
|
||||
<span class="n">orig_probs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_model</span><span class="p">([</span><span class="n">tokenized_text</span><span class="p">])</span>
|
||||
<span class="n">orig_prob</span> <span class="o">=</span> <span class="n">orig_probs</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
|
||||
<span class="n">len_text</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">tokenized_text</span><span class="o">.</span><span class="n">words</span><span class="p">())</span>
|
||||
<span class="n">leave_one_texts</span> <span class="o">=</span> \
|
||||
<span class="p">[</span><span class="n">tokenized_text</span><span class="o">.</span><span class="n">replace_word_at_index</span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="s1">'[UNKNOWN]'</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">len_text</span><span class="p">)]</span>
|
||||
<span class="n">leave_one_probs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_model</span><span class="p">(</span><span class="n">leave_one_texts</span><span class="p">)</span>
|
||||
<span class="n">leave_one_probs_argmax</span> <span class="o">=</span> <span class="n">leave_one_probs</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">importance_scores</span> <span class="o">=</span> <span class="p">(</span><span class="n">orig_prob</span> <span class="o">-</span> <span class="n">leave_one_probs</span><span class="p">[:,</span> <span class="n">original_label</span><span class="p">]</span>
|
||||
<span class="o">+</span> <span class="p">(</span><span class="n">leave_one_probs_argmax</span> <span class="o">!=</span> <span class="n">original_label</span><span class="p">)</span><span class="o">.</span><span class="n">float</span><span class="p">()</span> <span class="o">*</span>
|
||||
<span class="p">(</span><span class="n">leave_one_probs</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="o">-</span> <span class="n">torch</span><span class="o">.</span><span class="n">index_select</span><span class="p">(</span><span class="n">orig_probs</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">leave_one_probs_argmax</span><span class="p">)))</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span>
|
||||
<span class="n">index_order</span> <span class="o">=</span> <span class="p">(</span><span class="o">-</span><span class="n">importance_scores</span><span class="p">)</span><span class="o">.</span><span class="n">argsort</span><span class="p">()</span>
|
||||
|
||||
<span class="n">new_tokenized_text</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">new_text_label</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">i</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="k">while</span> <span class="n">num_words_changed</span> <span class="o"><=</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_depth</span> <span class="ow">and</span> <span class="n">i</span> <span class="o"><</span> <span class="nb">len</span><span class="p">(</span><span class="n">index_order</span><span class="p">):</span>
|
||||
<span class="n">transformed_text_candidates</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_transformations</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">transformation</span><span class="p">,</span>
|
||||
<span class="n">tokenized_text</span><span class="p">,</span>
|
||||
<span class="n">original_tokenized_text</span><span class="p">,</span>
|
||||
<span class="n">indices_to_replace</span><span class="o">=</span><span class="p">[</span><span class="n">index_order</span><span class="p">[</span><span class="n">i</span><span class="p">]])</span>
|
||||
<span class="n">i</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">transformed_text_candidates</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">num_words_changed</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
<span class="n">scores</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_model</span><span class="p">(</span><span class="n">transformed_text_candidates</span><span class="p">)</span>
|
||||
<span class="c1"># The best choice is the one that minimizes the original class label.</span>
|
||||
<span class="n">best_index</span> <span class="o">=</span> <span class="n">scores</span><span class="p">[:,</span> <span class="n">original_label</span><span class="p">]</span><span class="o">.</span><span class="n">argmin</span><span class="p">()</span>
|
||||
<span class="n">new_tokenized_text</span> <span class="o">=</span> <span class="n">transformed_text_candidates</span><span class="p">[</span><span class="n">best_index</span><span class="p">]</span>
|
||||
<span class="c1"># If we changed the label, break.</span>
|
||||
<span class="n">new_text_label</span> <span class="o">=</span> <span class="n">scores</span><span class="p">[</span><span class="n">best_index</span><span class="p">]</span><span class="o">.</span><span class="n">argmax</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">new_text_label</span> <span class="o">!=</span> <span class="n">original_label</span><span class="p">:</span>
|
||||
<span class="k">break</span>
|
||||
<span class="n">tokenized_text</span> <span class="o">=</span> <span class="n">new_tokenized_text</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">AttackResult</span><span class="p">(</span>
|
||||
<span class="n">original_tokenized_text</span><span class="p">,</span>
|
||||
<span class="n">new_tokenized_text</span><span class="p">,</span>
|
||||
<span class="n">original_label</span><span class="p">,</span>
|
||||
<span class="n">new_text_label</span>
|
||||
<span class="p">)</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
243
docs/_build/html/_modules/constraints/constraint.html
vendored
Normal file
243
docs/_build/html/_modules/constraints/constraint.html
vendored
Normal file
@@ -0,0 +1,243 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>constraints.constraint — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>constraints.constraint</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for constraints.constraint</h1><div class="highlight"><pre>
|
||||
<span></span><span class="sd">""" Abstract classes represent constraints on text adversarial examples. </span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Constraint"><a class="viewcode-back" href="../../constraints/constraint.html#constraints.constraint.Constraint">[docs]</a><span class="k">class</span> <span class="nc">Constraint</span><span class="p">:</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> An abstract class that represents constraints on adversial text examples. </span>
|
||||
<span class="sd"> A constraint evaluates if (x,x_adv) meets a certain constraint. </span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<div class="viewcode-block" id="Constraint.call_many"><a class="viewcode-back" href="../../constraints/constraint.html#constraints.constraint.Constraint.call_many">[docs]</a> <span class="k">def</span> <span class="nf">call_many</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">x_adv_list</span><span class="p">,</span> <span class="n">original_text</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Filters x_adv_list to x_adv where C(x,x_adv) is true.</span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> x:</span>
|
||||
<span class="sd"> x_adv_list:</span>
|
||||
<span class="sd"> original_text(:obj:`type`, optional): Defaults to None. </span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">x_adv</span><span class="p">):</span>
|
||||
<span class="sd">""" Returns True if C(x,x_adv) is true. """</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
327
docs/_build/html/_modules/constraints/semantics/google_language_model/alzantot_goog_lm.html
vendored
Normal file
327
docs/_build/html/_modules/constraints/semantics/google_language_model/alzantot_goog_lm.html
vendored
Normal file
@@ -0,0 +1,327 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>constraints.semantics.google_language_model.alzantot_goog_lm — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../../../" src="../../../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../../../index.html">Module code</a> »</li>
|
||||
|
||||
<li>constraints.semantics.google_language_model.alzantot_goog_lm</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for constraints.semantics.google_language_model.alzantot_goog_lm</h1><div class="highlight"><pre>
|
||||
<span></span><span class="sd">"""</span>
|
||||
<span class="sd"> Author: Moustafa Alzantot (malzantot@ucla.edu)</span>
|
||||
<span class="sd"> All rights reserved.</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">import</span> <span class="nn">tensorflow</span> <span class="k">as</span> <span class="nn">tf</span>
|
||||
<span class="kn">import</span> <span class="nn">sys</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">google.protobuf</span> <span class="k">import</span> <span class="n">text_format</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">textattack.constraints.semantics.google_language_model.lm_utils</span> <span class="k">as</span> <span class="nn">lm_utils</span>
|
||||
<span class="kn">import</span> <span class="nn">textattack.constraints.semantics.google_language_model.lm_data_utils</span> <span class="k">as</span> <span class="nn">lm_data_utils</span>
|
||||
|
||||
<span class="n">tf</span><span class="o">.</span><span class="n">get_logger</span><span class="p">()</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="s1">'INFO'</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># @TODO automatically choose between GPU and CPU.</span>
|
||||
|
||||
<span class="n">ROOT_FOLDER</span> <span class="o">=</span> <span class="s1">'/p/qdata/jm8wx/research_OLD/nlp_adversarial_examples_new/goog_lm'</span>
|
||||
|
||||
<div class="viewcode-block" id="GoogLMHelper"><a class="viewcode-back" href="../../../../constraints/semantics/google_language_model.html#constraints.semantics.google_language_model.alzantot_goog_lm.GoogLMHelper">[docs]</a><span class="k">class</span> <span class="nc">GoogLMHelper</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
|
||||
<span class="sd">'''</span>
|
||||
<span class="sd"> An implementation of `<https://arxiv.org/abs/1804.07998>`_</span>
|
||||
<span class="sd"> adapted from `<https://github.com/nesl/nlp_adversarial_examples>`_. </span>
|
||||
|
||||
<span class="sd"> '''</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">PBTXT_PATH</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ROOT_FOLDER</span><span class="p">,</span> <span class="s1">'graph-2016-09-10-gpu.pbtxt'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">CKPT_PATH</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ROOT_FOLDER</span><span class="p">,</span> <span class="s1">'ckpt-*'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">VOCAB_PATH</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ROOT_FOLDER</span><span class="p">,</span> <span class="s1">'vocab-2016-09-10.txt'</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">BATCH_SIZE</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">NUM_TIMESTEPS</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">MAX_WORD_LEN</span> <span class="o">=</span> <span class="mi">50</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocab</span> <span class="o">=</span> <span class="n">lm_data_utils</span><span class="o">.</span><span class="n">CharsVocabulary</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">VOCAB_PATH</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">MAX_WORD_LEN</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="n">tf</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s2">"/gpu:1"</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">graph</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">Graph</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">sess</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">compat</span><span class="o">.</span><span class="n">v1</span><span class="o">.</span><span class="n">Session</span><span class="p">(</span><span class="n">graph</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">graph</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">graph</span><span class="o">.</span><span class="n">as_default</span><span class="p">():</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">t</span> <span class="o">=</span> <span class="n">lm_utils</span><span class="o">.</span><span class="n">LoadModel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sess</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">graph</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">PBTXT_PATH</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">CKPT_PATH</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="GoogLMHelper.get_words_probs"><a class="viewcode-back" href="../../../../constraints/semantics/google_language_model.html#constraints.semantics.google_language_model.alzantot_goog_lm.GoogLMHelper.get_words_probs">[docs]</a> <span class="k">def</span> <span class="nf">get_words_probs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">prefix_words</span><span class="p">,</span> <span class="n">list_words</span><span class="p">,</span> <span class="n">suffix</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="sd">'''</span>
|
||||
<span class="sd"> Retrieves the probability of words.</span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> prefix_words:</span>
|
||||
<span class="sd"> list_words:</span>
|
||||
<span class="sd"> suffix (:obj:`type`, optional): Defaults to None. </span>
|
||||
|
||||
<span class="sd"> '''</span>
|
||||
<span class="n">targets</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">BATCH_SIZE</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">NUM_TIMESTEPS</span><span class="p">],</span> <span class="n">np</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span>
|
||||
<span class="n">weights</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">BATCH_SIZE</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">NUM_TIMESTEPS</span><span class="p">],</span> <span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">prefix_words</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'<S>'</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">prefix_words</span> <span class="o">=</span> <span class="s1">'<S> '</span> <span class="o">+</span> <span class="n">prefix_words</span>
|
||||
<span class="n">prefix</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">vocab</span><span class="o">.</span><span class="n">word_to_id</span><span class="p">(</span><span class="n">w</span><span class="p">)</span> <span class="k">for</span> <span class="n">w</span> <span class="ow">in</span> <span class="n">prefix_words</span><span class="o">.</span><span class="n">split</span><span class="p">()]</span>
|
||||
<span class="n">prefix_char_ids</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">vocab</span><span class="o">.</span><span class="n">word_to_char_ids</span><span class="p">(</span><span class="n">w</span><span class="p">)</span> <span class="k">for</span> <span class="n">w</span> <span class="ow">in</span> <span class="n">prefix_words</span><span class="o">.</span><span class="n">split</span><span class="p">()]</span>
|
||||
|
||||
<span class="n">inputs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">BATCH_SIZE</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">NUM_TIMESTEPS</span><span class="p">],</span> <span class="n">np</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span>
|
||||
<span class="n">char_ids_inputs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">BATCH_SIZE</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">NUM_TIMESTEPS</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocab</span><span class="o">.</span><span class="n">max_word_length</span><span class="p">],</span> <span class="n">np</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span>
|
||||
|
||||
<span class="n">samples</span> <span class="o">=</span> <span class="n">prefix</span><span class="p">[:]</span>
|
||||
<span class="n">char_ids_samples</span> <span class="o">=</span> <span class="n">prefix_char_ids</span><span class="p">[:]</span>
|
||||
<span class="n">inputs</span> <span class="o">=</span> <span class="p">[</span> <span class="p">[</span><span class="n">samples</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]]]</span>
|
||||
<span class="n">char_ids_inputs</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="p">:]</span> <span class="o">=</span> <span class="n">char_ids_samples</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">softmax</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">t</span><span class="p">[</span><span class="s1">'softmax_out'</span><span class="p">],</span>
|
||||
<span class="n">feed_dict</span><span class="o">=</span><span class="p">{</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">t</span><span class="p">[</span><span class="s1">'char_inputs_in'</span><span class="p">]:</span> <span class="n">char_ids_inputs</span><span class="p">,</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">t</span><span class="p">[</span><span class="s1">'inputs_in'</span><span class="p">]:</span> <span class="n">inputs</span><span class="p">,</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">t</span><span class="p">[</span><span class="s1">'targets_in'</span><span class="p">]:</span> <span class="n">targets</span><span class="p">,</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">t</span><span class="p">[</span><span class="s1">'target_weights_in'</span><span class="p">]:</span> <span class="n">weights</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">words_ids</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">vocab</span><span class="o">.</span><span class="n">word_to_id</span><span class="p">(</span><span class="n">w</span><span class="p">)</span> <span class="k">for</span> <span class="n">w</span> <span class="ow">in</span> <span class="n">list_words</span><span class="p">]</span>
|
||||
<span class="n">word_probs</span> <span class="o">=</span><span class="p">[</span><span class="n">softmax</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="n">w_id</span><span class="p">]</span> <span class="k">for</span> <span class="n">w_id</span> <span class="ow">in</span> <span class="n">words_ids</span><span class="p">]</span>
|
||||
<span class="n">word_probs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">word_probs</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">suffix</span> <span class="o">==</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">suffix_probs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="n">word_probs</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">suffix_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocab</span><span class="o">.</span><span class="n">word_to_id</span><span class="p">(</span><span class="n">suffix</span><span class="p">)</span>
|
||||
<span class="n">suffix_probs</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">idx</span><span class="p">,</span> <span class="n">w_id</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">words_ids</span><span class="p">):</span>
|
||||
<span class="n">inputs</span> <span class="o">=</span> <span class="p">[[</span><span class="n">w_id</span><span class="p">]]</span>
|
||||
<span class="n">w_char_ids</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocab</span><span class="o">.</span><span class="n">word_to_char_ids</span><span class="p">(</span><span class="n">list_words</span><span class="p">[</span><span class="n">idx</span><span class="p">])</span>
|
||||
<span class="n">char_ids_inputs</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="p">:]</span> <span class="o">=</span> <span class="n">w_char_ids</span>
|
||||
<span class="n">softmax</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">t</span><span class="p">[</span><span class="s1">'softmax_out'</span><span class="p">],</span>
|
||||
<span class="n">feed_dict</span><span class="o">=</span><span class="p">{</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">t</span><span class="p">[</span><span class="s1">'char_inputs_in'</span><span class="p">]:</span> <span class="n">char_ids_inputs</span><span class="p">,</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">t</span><span class="p">[</span><span class="s1">'inputs_in'</span><span class="p">]:</span> <span class="n">inputs</span><span class="p">,</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">t</span><span class="p">[</span><span class="s1">'targets_in'</span><span class="p">]:</span> <span class="n">targets</span><span class="p">,</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">t</span><span class="p">[</span><span class="s1">'target_weights_in'</span><span class="p">]:</span> <span class="n">weights</span>
|
||||
<span class="p">})</span>
|
||||
<span class="n">suffix_probs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">softmax</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="n">suffix_id</span><span class="p">])</span>
|
||||
<span class="n">suffix_probs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">suffix_probs</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">suffix_probs</span> <span class="o">*</span> <span class="n">word_probs</span></div></div>
|
||||
|
||||
|
||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="n">my_lm</span> <span class="o">=</span> <span class="n">LM</span><span class="p">()</span>
|
||||
<span class="n">list_words</span> <span class="o">=</span> <span class="s1">'play will playing played afternoon'</span><span class="o">.</span><span class="n">split</span><span class="p">()</span>
|
||||
<span class="n">prefix</span> <span class="o">=</span> <span class="s1">'i'</span>
|
||||
<span class="n">suffix</span> <span class="o">=</span> <span class="s1">'yesterday'</span>
|
||||
<span class="n">probs</span> <span class="o">=</span> <span class="p">(</span><span class="n">my_lm</span><span class="o">.</span><span class="n">get_words_probs</span><span class="p">(</span><span class="n">prefix</span><span class="p">,</span> <span class="n">list_words</span><span class="p">,</span> <span class="n">suffix</span><span class="p">))</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">w</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">list_words</span><span class="p">):</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">w</span><span class="p">,</span> <span class="s1">' - '</span><span class="p">,</span> <span class="n">probs</span><span class="p">[</span><span class="n">i</span><span class="p">])</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
320
docs/_build/html/_modules/constraints/semantics/google_language_model/google_language_model.html
vendored
Normal file
320
docs/_build/html/_modules/constraints/semantics/google_language_model/google_language_model.html
vendored
Normal file
@@ -0,0 +1,320 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>constraints.semantics.google_language_model.google_language_model — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../../../" src="../../../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../../../index.html">Module code</a> »</li>
|
||||
|
||||
<li>constraints.semantics.google_language_model.google_language_model</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for constraints.semantics.google_language_model.google_language_model</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">import</span> <span class="nn">time</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">collections</span> <span class="k">import</span> <span class="n">defaultdict</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">textattack.constraints</span> <span class="k">import</span> <span class="n">Constraint</span>
|
||||
<span class="kn">from</span> <span class="nn">.alzantot_goog_lm</span> <span class="k">import</span> <span class="n">GoogLMHelper</span>
|
||||
|
||||
<div class="viewcode-block" id="GoogleLanguageModel"><a class="viewcode-back" href="../../../../constraints/semantics/google_language_model.html#constraints.semantics.google_language_model.google_language_model.GoogleLanguageModel">[docs]</a><span class="k">class</span> <span class="nc">GoogleLanguageModel</span><span class="p">(</span><span class="n">Constraint</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Constraint that uses the Google 1 Billion Words Language Model to </span>
|
||||
<span class="sd"> determine the difference in perplexity between x and x_adv. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> top_n (int):</span>
|
||||
<span class="sd"> top_n_per_index (int):</span>
|
||||
<span class="sd"> print_step (:obj:`bool`, optional): Whether to print each step. Defaults to False. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> Returns:</span>
|
||||
<span class="sd"> The :obj:`top_n` sentences.</span>
|
||||
|
||||
<span class="sd"> Raises:</span>
|
||||
<span class="sd"> ValueError: If :obj:`top_n` or :obj:`top_n_per_index` are not provided. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> @TODO allow user to set perplexity threshold; implement __call__.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> @TODO this use of the language model only really makes sense for </span>
|
||||
<span class="sd"> adversarial examples based on word swaps</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">top_n</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">top_n_per_index</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">print_step</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">top_n</span> <span class="ow">or</span> <span class="n">top_n_per_index</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Cannot instantiate GoogleLanguageModel without top_n or top_n_per_index'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">lm</span> <span class="o">=</span> <span class="n">GoogLMHelper</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">top_n</span> <span class="o">=</span> <span class="n">top_n</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">top_n_per_index</span> <span class="o">=</span> <span class="n">top_n_per_index</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">print_step</span> <span class="o">=</span> <span class="n">print_step</span>
|
||||
|
||||
<div class="viewcode-block" id="GoogleLanguageModel.call_many"><a class="viewcode-back" href="../../../../constraints/semantics/google_language_model.html#constraints.semantics.google_language_model.google_language_model.GoogleLanguageModel.call_many">[docs]</a> <span class="k">def</span> <span class="nf">call_many</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">x_adv_list</span><span class="p">,</span> <span class="n">original_text</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Returns the `top_n` of x_adv_list, as evaluated by the language </span>
|
||||
<span class="sd"> model. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> x:</span>
|
||||
<span class="sd"> X_adv_list:</span>
|
||||
<span class="sd"> original_text (:obj:`type`, optional): Defaults to None. </span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">len</span><span class="p">(</span><span class="n">x_adv_list</span><span class="p">):</span> <span class="k">return</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_probs</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">x_adv_list</span><span class="p">):</span>
|
||||
<span class="n">word_swap_index</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">first_word_diff_index</span><span class="p">(</span><span class="n">x_adv_list</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||||
<span class="n">prefix</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">text_until_word_index</span><span class="p">(</span><span class="n">word_swap_index</span><span class="p">)</span>
|
||||
<span class="n">swapped_words</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">t</span><span class="o">.</span><span class="n">words</span><span class="p">()[</span><span class="n">word_swap_index</span><span class="p">]</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">x_adv_list</span><span class="p">])</span>
|
||||
<span class="n">suffix</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">text_after_word_index</span><span class="p">(</span><span class="n">word_swap_index</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">print_step</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">prefix</span><span class="p">,</span> <span class="n">swapped_words</span><span class="p">,</span> <span class="n">suffix</span><span class="p">)</span>
|
||||
<span class="n">probs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lm</span><span class="o">.</span><span class="n">get_words_probs</span><span class="p">(</span><span class="n">prefix</span><span class="p">,</span> <span class="n">swapped_words</span><span class="p">,</span> <span class="n">suffix</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">probs</span>
|
||||
|
||||
<span class="c1"># This creates a dictionary where each new key is initialized to [].</span>
|
||||
<span class="n">word_swap_index_map</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">list</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">idx</span><span class="p">,</span> <span class="n">x_adv</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">x_adv_list</span><span class="p">):</span>
|
||||
<span class="n">word_swap_index</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">first_word_diff_index</span><span class="p">(</span><span class="n">x_adv</span><span class="p">)</span>
|
||||
<span class="n">word_swap_index_map</span><span class="p">[</span><span class="n">word_swap_index</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">idx</span><span class="p">,</span> <span class="n">x_adv</span><span class="p">))</span>
|
||||
|
||||
<span class="n">probs</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">word_swap_index</span><span class="p">,</span> <span class="n">item_list</span> <span class="ow">in</span> <span class="n">word_swap_index_map</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
<span class="c1"># zip(*some_list) is the inverse operator of zip!</span>
|
||||
<span class="n">item_indices</span><span class="p">,</span> <span class="n">this_x_adv_list</span> <span class="o">=</span> <span class="nb">zip</span><span class="p">(</span><span class="o">*</span><span class="n">item_list</span><span class="p">)</span>
|
||||
<span class="n">t1</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
|
||||
<span class="n">probs_of_swaps_at_index</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">item_indices</span><span class="p">,</span> <span class="n">get_probs</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">this_x_adv_list</span><span class="p">)))</span>
|
||||
<span class="c1"># Sort by probability in descending order and take the top n for this index.</span>
|
||||
<span class="n">probs_of_swaps_at_index</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="o">-</span><span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">top_n_per_index</span><span class="p">:</span>
|
||||
<span class="n">probs_of_swaps_at_index</span> <span class="o">=</span> <span class="n">probs_of_swaps_at_index</span><span class="p">[:</span><span class="bp">self</span><span class="o">.</span><span class="n">top_n_per_index</span><span class="p">]</span>
|
||||
<span class="n">probs</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">probs_of_swaps_at_index</span><span class="p">)</span>
|
||||
<span class="n">t2</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">print_step</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="s1">'LM {len(item_list)} items in {t2-t1}s'</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Probs is a list of (index, prob) where index is the corresponding </span>
|
||||
<span class="c1"># position in x_adv_list.</span>
|
||||
<span class="n">probs</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||||
|
||||
<span class="c1"># Now that they're in order, reduce to just a list of probabilities.</span>
|
||||
<span class="n">probs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span><span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">probs</span><span class="p">)))</span>
|
||||
|
||||
<span class="c1"># Get the indices of the maximum elements.</span>
|
||||
<span class="n">max_el_indices</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="o">-</span><span class="n">probs</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">top_n</span><span class="p">:</span>
|
||||
<span class="n">max_el_indices</span> <span class="o">=</span> <span class="n">max_el_indices</span><span class="p">[:</span><span class="bp">self</span><span class="o">.</span><span class="n">top_n</span><span class="p">]</span>
|
||||
|
||||
<span class="c1"># Put indices in order, now, so that the examples are returned in the</span>
|
||||
<span class="c1"># same order they were passed in.</span>
|
||||
<span class="n">max_el_indices</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">x_adv_list</span><span class="p">)[</span><span class="n">max_el_indices</span><span class="p">]</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">x_adv</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
350
docs/_build/html/_modules/constraints/semantics/universal_sentence_encoder.html
vendored
Normal file
350
docs/_build/html/_modules/constraints/semantics/universal_sentence_encoder.html
vendored
Normal file
@@ -0,0 +1,350 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>constraints.semantics.universal_sentence_encoder — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../../index.html">Module code</a> »</li>
|
||||
|
||||
<li>constraints.semantics.universal_sentence_encoder</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for constraints.semantics.universal_sentence_encoder</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">import</span> <span class="nn">torch</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">textattack.constraints</span> <span class="k">import</span> <span class="n">Constraint</span>
|
||||
<span class="kn">from</span> <span class="nn">textattack.models</span> <span class="k">import</span> <span class="n">InferSent</span>
|
||||
<span class="kn">from</span> <span class="nn">textattack.utils</span> <span class="k">import</span> <span class="n">download_if_needed</span><span class="p">,</span> <span class="n">get_device</span>
|
||||
|
||||
<div class="viewcode-block" id="UniversalSentenceEncoder"><a class="viewcode-back" href="../../../constraints/semantics/semantics.html#constraints.semantics.universal_sentence_encoder.UniversalSentenceEncoder">[docs]</a><span class="k">class</span> <span class="nc">UniversalSentenceEncoder</span><span class="p">(</span><span class="n">Constraint</span><span class="p">):</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> Constraint using cosine similarity between Universal Sentence Encodings</span>
|
||||
<span class="sd"> of x and x_adv where the text embeddings are created using InferSent.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> threshold (:obj:`float`, optional): The threshold for the constraint to bet met.</span>
|
||||
<span class="sd"> Defaults to 0.8</span>
|
||||
<span class="sd"> metric (:obj:`str`, optional): The metric function to use. Must be one of TODO. </span>
|
||||
<span class="sd"> Defaults to cosine. </span>
|
||||
|
||||
<span class="sd"> Raises:</span>
|
||||
<span class="sd"> ValueError: If :obj:`metric` is not supported</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">MODEL_PATH</span> <span class="o">=</span> <span class="s1">'/p/qdata/jm8wx/research/text_attacks/RobustNLP/AttackGeneration/infersent-encoder'</span>
|
||||
<span class="n">WORD_EMBEDDING_PATH</span> <span class="o">=</span> <span class="s1">'/p/qdata/jm8wx/research/text_attacks/RobustNLP/AttackGeneration/word_embeddings'</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">threshold</span><span class="o">=</span><span class="mf">0.8</span><span class="p">,</span> <span class="n">metric</span><span class="o">=</span><span class="s1">'cosine'</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">threshold</span> <span class="o">=</span> <span class="n">threshold</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_infersent_model</span><span class="p">()</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">metric</span><span class="o">==</span><span class="s1">'cosine'</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">dist</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">CosineSimilarity</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">f</span><span class="s1">'Unsupported metric </span><span class="si">{metric}</span><span class="s1">.'</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="UniversalSentenceEncoder.get_infersent_model"><a class="viewcode-back" href="../../../constraints/semantics/semantics.html#constraints.semantics.universal_sentence_encoder.UniversalSentenceEncoder.get_infersent_model">[docs]</a> <span class="k">def</span> <span class="nf">get_infersent_model</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Retrieves the InferSent model. </span>
|
||||
|
||||
<span class="sd"> Returns:</span>
|
||||
<span class="sd"> The pretrained InferSent model. </span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">infersent_version</span> <span class="o">=</span> <span class="mi">2</span>
|
||||
<span class="n">model_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">UniversalSentenceEncoder</span><span class="o">.</span><span class="n">MODEL_PATH</span><span class="p">,</span> <span class="n">f</span><span class="s1">'infersent</span><span class="si">{infersent_version}</span><span class="s1">.pkl'</span><span class="p">)</span>
|
||||
<span class="n">download_if_needed</span><span class="p">(</span><span class="n">model_path</span><span class="p">)</span>
|
||||
<span class="n">params_model</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'bsize'</span><span class="p">:</span> <span class="mi">64</span><span class="p">,</span> <span class="s1">'word_emb_dim'</span><span class="p">:</span> <span class="mi">300</span><span class="p">,</span> <span class="s1">'enc_lstm_dim'</span><span class="p">:</span> <span class="mi">2048</span><span class="p">,</span>
|
||||
<span class="s1">'pool_type'</span><span class="p">:</span> <span class="s1">'max'</span><span class="p">,</span> <span class="s1">'dpout_model'</span><span class="p">:</span> <span class="mf">0.0</span><span class="p">,</span> <span class="s1">'version'</span><span class="p">:</span> <span class="n">infersent_version</span><span class="p">}</span>
|
||||
<span class="n">infersent</span> <span class="o">=</span> <span class="n">InferSent</span><span class="p">(</span><span class="n">params_model</span><span class="p">)</span>
|
||||
<span class="n">infersent</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">model_path</span><span class="p">))</span>
|
||||
<span class="n">W2V_PATH</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">UniversalSentenceEncoder</span><span class="o">.</span><span class="n">WORD_EMBEDDING_PATH</span><span class="p">,</span>
|
||||
<span class="s1">'fastText'</span><span class="p">,</span> <span class="s1">'crawl-300d-2M.vec'</span><span class="p">)</span>
|
||||
<span class="n">download_if_needed</span><span class="p">(</span><span class="n">W2V_PATH</span><span class="p">)</span>
|
||||
<span class="n">infersent</span><span class="o">.</span><span class="n">set_w2v_path</span><span class="p">(</span><span class="n">W2V_PATH</span><span class="p">)</span>
|
||||
<span class="n">infersent</span><span class="o">.</span><span class="n">build_vocab_k_words</span><span class="p">(</span><span class="n">K</span><span class="o">=</span><span class="mi">100000</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">infersent</span></div>
|
||||
|
||||
<div class="viewcode-block" id="UniversalSentenceEncoder.score"><a class="viewcode-back" href="../../../constraints/semantics/semantics.html#constraints.semantics.universal_sentence_encoder.UniversalSentenceEncoder.score">[docs]</a> <span class="k">def</span> <span class="nf">score</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">x_adv</span><span class="p">):</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> Returns the metric similarity between embeddings of the text and </span>
|
||||
<span class="sd"> the perturbed text. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> x (str): The original text</span>
|
||||
<span class="sd"> x_adv (str): The perturbed text</span>
|
||||
|
||||
<span class="sd"> Returns:</span>
|
||||
<span class="sd"> The similarity between the original and perturbed text using the metric. </span>
|
||||
|
||||
<span class="sd"> @TODO should this support multiple sentences for x_adv?</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">original_embedding</span><span class="p">,</span> <span class="n">perturbed_embedding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="o">.</span><span class="n">encode</span><span class="p">([</span><span class="n">x</span><span class="p">,</span> <span class="n">x_adv</span><span class="p">],</span> <span class="n">tokenize</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="n">original_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">original_embedding</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">get_device</span><span class="p">())</span>
|
||||
<span class="n">perturbed_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">perturbed_embedding</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">get_device</span><span class="p">())</span>
|
||||
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dist</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="mi">0</span><span class="p">)(</span><span class="n">original_embedding</span><span class="p">,</span> <span class="n">perturbed_embedding</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="UniversalSentenceEncoder.score_list"><a class="viewcode-back" href="../../../constraints/semantics/semantics.html#constraints.semantics.universal_sentence_encoder.UniversalSentenceEncoder.score_list">[docs]</a> <span class="k">def</span> <span class="nf">score_list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">x_adv_list</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Returns the metric similarity between the embedding of the text and a list</span>
|
||||
<span class="sd"> of perturbed text. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> x (str): The original text</span>
|
||||
<span class="sd"> x_adv_list (list(str)): A list of perturbed texts</span>
|
||||
|
||||
<span class="sd"> Returns:</span>
|
||||
<span class="sd"> A list with the similarity between the original text and each perturbed text in :obj:`x_adv_list`. </span>
|
||||
<span class="sd"> If x_adv_list is empty, an empty tensor is returned</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="c1"># Return an empty tensor if x_adv_list is empty.</span>
|
||||
<span class="c1"># This prevents us from calling .repeat(x, 0), which throws an</span>
|
||||
<span class="c1"># error on machines with multiple GPUs (pytorch 1.2).</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">x_adv_list</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> <span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">([])</span>
|
||||
|
||||
<span class="n">x_text</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">text</span>
|
||||
<span class="n">x_adv_list_text</span> <span class="o">=</span> <span class="p">[</span><span class="n">x_adv</span><span class="o">.</span><span class="n">text</span> <span class="k">for</span> <span class="n">x_adv</span> <span class="ow">in</span> <span class="n">x_adv_list</span><span class="p">]</span>
|
||||
<span class="n">embeddings</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="o">.</span><span class="n">encode</span><span class="p">([</span><span class="n">x_text</span><span class="p">]</span> <span class="o">+</span> <span class="n">x_adv_list_text</span><span class="p">,</span> <span class="n">tokenize</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="n">original_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">embeddings</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">get_device</span><span class="p">())</span>
|
||||
<span class="n">perturbed_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">embeddings</span><span class="p">[</span><span class="mi">1</span><span class="p">:])</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">get_device</span><span class="p">())</span>
|
||||
|
||||
<span class="c1"># Repeat original embedding to size of perturbed embedding.</span>
|
||||
<span class="n">original_embedding</span> <span class="o">=</span> <span class="n">original_embedding</span><span class="o">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">perturbed_embedding</span><span class="p">),</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dist</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="mi">1</span><span class="p">)(</span><span class="n">original_embedding</span><span class="p">,</span> <span class="n">perturbed_embedding</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="UniversalSentenceEncoder.call_many"><a class="viewcode-back" href="../../../constraints/semantics/semantics.html#constraints.semantics.universal_sentence_encoder.UniversalSentenceEncoder.call_many">[docs]</a> <span class="k">def</span> <span class="nf">call_many</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">x_adv_list</span><span class="p">,</span> <span class="n">original_text</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Filters the list of perturbed texts so that the similarity between the original text</span>
|
||||
<span class="sd"> and the perturbed text is greater than the :obj:`threshold`. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> x (str): The original text</span>
|
||||
<span class="sd"> x_adv_list (list(str)): A list of perturbed texts</span>
|
||||
<span class="sd"> original_text(:obj:`type`, optional): Defaults to None. </span>
|
||||
|
||||
<span class="sd"> Returns:</span>
|
||||
<span class="sd"> A filtered list of perturbed texts where each perturbed text meets the similarity threshold. </span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="c1"># @TODO can we rename this function `filter`? (It's a reserved keyword in python)</span>
|
||||
<span class="n">scores</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">score_list</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">x_adv_list</span><span class="p">)</span>
|
||||
<span class="n">mask</span> <span class="o">=</span> <span class="n">scores</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">threshold</span>
|
||||
<span class="n">mask</span> <span class="o">=</span> <span class="n">mask</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">x_adv_list</span><span class="p">[</span><span class="n">mask</span><span class="p">]</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">x_adv</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">text</span><span class="p">,</span> <span class="n">x_adv</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">threshold</span> </div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
256
docs/_build/html/_modules/datasets/dataset.html
vendored
Normal file
256
docs/_build/html/_modules/datasets/dataset.html
vendored
Normal file
@@ -0,0 +1,256 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>datasets.dataset — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>datasets.dataset</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for datasets.dataset</h1><div class="highlight"><pre>
|
||||
<div class="viewcode-block" id="TextAttackDataset"><a class="viewcode-back" href="../../datasets/user_defined_datasets.html#datasets.dataset.TextAttackDataset">[docs]</a><span></span><span class="k">class</span> <span class="nc">TextAttackDataset</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> A dataset for text attacks.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> Any iterable of (label, text_input) pairs qualifies as </span>
|
||||
<span class="sd"> a TextAttackDataset.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="sd">""" Loads a full dataset from disk. Typically stores tuples in</span>
|
||||
<span class="sd"> `self.examples`.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">examples</span><span class="o">.</span><span class="fm">__iter__</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__next__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">examples</span><span class="o">.</span><span class="fm">__next__</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_load_text_file</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">text_file_name</span><span class="p">,</span> <span class="n">n</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="sd">""" Loads (label, text) pairs from a text file. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> Format must look like:</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> 1 this is a great little ...</span>
|
||||
<span class="sd"> 0 "i love hot n juicy . ...</span>
|
||||
<span class="sd"> 0 "\""this world needs a ...</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">examples</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">i</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="k">for</span> <span class="n">raw_line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">text_file_name</span><span class="p">,</span> <span class="s1">'r'</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">():</span>
|
||||
<span class="n">tokens</span> <span class="o">=</span> <span class="n">raw_line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">()</span>
|
||||
<span class="n">label</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">tokens</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||||
<span class="n">text</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">tokens</span><span class="p">[</span><span class="mi">1</span><span class="p">:])</span>
|
||||
<span class="n">examples</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">label</span><span class="p">,</span> <span class="n">text</span><span class="p">))</span>
|
||||
<span class="n">i</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
<span class="k">if</span> <span class="n">n</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">>=</span> <span class="n">n</span><span class="p">:</span> <span class="k">break</span>
|
||||
<span class="k">return</span> <span class="n">examples</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
234
docs/_build/html/_modules/datasets/yelp_sentiment.html
vendored
Normal file
234
docs/_build/html/_modules/datasets/yelp_sentiment.html
vendored
Normal file
@@ -0,0 +1,234 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>datasets.yelp_sentiment — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>datasets.yelp_sentiment</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for datasets.yelp_sentiment</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">textattack</span> <span class="k">import</span> <span class="n">utils</span> <span class="k">as</span> <span class="n">utils</span>
|
||||
<span class="kn">from</span> <span class="nn">.dataset</span> <span class="k">import</span> <span class="n">TextAttackDataset</span>
|
||||
|
||||
<div class="viewcode-block" id="YelpSentiment"><a class="viewcode-back" href="../../datasets/built-in_datasets.html#datasets.yelp_sentiment.YelpSentiment">[docs]</a><span class="k">class</span> <span class="nc">YelpSentiment</span><span class="p">(</span><span class="n">TextAttackDataset</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Loads the Yelp Sentiment dataset</span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> n (int): The number of examples to load</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">DATA_PATH</span> <span class="o">=</span> <span class="s1">'/p/qdata/jm8wx/research_OLD/TextFooler/data/yelp'</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="sd">""" Loads a full dataset from disk. """</span>
|
||||
<span class="n">utils</span><span class="o">.</span><span class="n">download_if_needed</span><span class="p">(</span><span class="n">YelpSentiment</span><span class="o">.</span><span class="n">DATA_PATH</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">examples</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_load_text_file</span><span class="p">(</span><span class="n">YelpSentiment</span><span class="o">.</span><span class="n">DATA_PATH</span><span class="p">,</span> <span class="n">n</span><span class="o">=</span><span class="n">n</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'YelpSentiment loaded'</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">examples</span><span class="p">),</span> <span class="s1">'examples...'</span><span class="p">)</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
230
docs/_build/html/_modules/index.html
vendored
Normal file
230
docs/_build/html/_modules/index.html
vendored
Normal file
@@ -0,0 +1,230 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Overview: module code — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../index.html">Docs</a> »</li>
|
||||
|
||||
<li>Overview: module code</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>All modules for which code is available</h1>
|
||||
<ul><li><a href="attacks/attack.html">attacks.attack</a></li>
|
||||
<li><a href="attacks/genetic_algorithm.html">attacks.genetic_algorithm</a></li>
|
||||
<li><a href="attacks/greedy_word_swap.html">attacks.greedy_word_swap</a></li>
|
||||
<li><a href="attacks/greedy_word_swap_wir.html">attacks.greedy_word_swap_wir</a></li>
|
||||
<li><a href="constraints/constraint.html">constraints.constraint</a></li>
|
||||
<li><a href="constraints/semantics/google_language_model/alzantot_goog_lm.html">constraints.semantics.google_language_model.alzantot_goog_lm</a></li>
|
||||
<li><a href="constraints/semantics/google_language_model/google_language_model.html">constraints.semantics.google_language_model.google_language_model</a></li>
|
||||
<li><a href="constraints/semantics/universal_sentence_encoder.html">constraints.semantics.universal_sentence_encoder</a></li>
|
||||
<li><a href="datasets/dataset.html">datasets.dataset</a></li>
|
||||
<li><a href="datasets/yelp_sentiment.html">datasets.yelp_sentiment</a></li>
|
||||
<li><a href="models/bert_for_sentiment_classification.html">models.bert_for_sentiment_classification</a></li>
|
||||
<li><a href="models/infer_sent.html">models.infer_sent</a></li>
|
||||
<li><a href="transformations/transformation.html">transformations.transformation</a></li>
|
||||
<li><a href="transformations/word_swap.html">transformations.word_swap</a></li>
|
||||
<li><a href="transformations/word_swap_counterfit.html">transformations.word_swap_counterfit</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
273
docs/_build/html/_modules/models/bert_for_sentiment_classification.html
vendored
Normal file
273
docs/_build/html/_modules/models/bert_for_sentiment_classification.html
vendored
Normal file
@@ -0,0 +1,273 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>models.bert_for_sentiment_classification — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>models.bert_for_sentiment_classification</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for models.bert_for_sentiment_classification</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">transformers.modeling_bert</span> <span class="k">import</span> <span class="n">BertForSequenceClassification</span>
|
||||
<span class="kn">from</span> <span class="nn">transformers.tokenization_bert</span> <span class="k">import</span> <span class="n">BertTokenizer</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">textattack.utils</span> <span class="k">as</span> <span class="nn">utils</span>
|
||||
<span class="kn">import</span> <span class="nn">torch</span>
|
||||
|
||||
<div class="viewcode-block" id="BertForSentimentClassification"><a class="viewcode-back" href="../../models/bert.html#models.bert_for_sentiment_classification.BertForSentimentClassification">[docs]</a><span class="k">class</span> <span class="nc">BertForSentimentClassification</span><span class="p">:</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> BERT fine-tuned on the Yelp Sentiment dataset for sentiment classification. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> max_seq_length(:obj:`int`, optional): Maximum length of a sequence after tokenizing.</span>
|
||||
<span class="sd"> Defaults to 32.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">MODEL_PATH</span> <span class="o">=</span> <span class="s1">'/p/qdata/jm8wx/research/text_attacks/RobustNLP/AttackGeneration/models/bert/models/bert-vanilla'</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">max_seq_length</span><span class="o">=</span><span class="mi">32</span><span class="p">):</span>
|
||||
<span class="n">utils</span><span class="o">.</span><span class="n">download_if_needed</span><span class="p">(</span><span class="n">BertForSentimentClassification</span><span class="o">.</span><span class="n">MODEL_PATH</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">BertForSequenceClassification</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span>
|
||||
<span class="n">BertForSentimentClassification</span><span class="o">.</span><span class="n">MODEL_PATH</span><span class="p">,</span>
|
||||
<span class="n">num_labels</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">tokenizer</span> <span class="o">=</span> <span class="n">BertTokenizer</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span>
|
||||
<span class="n">BertForSentimentClassification</span><span class="o">.</span><span class="n">MODEL_PATH</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">utils</span><span class="o">.</span><span class="n">get_device</span><span class="p">())</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">max_seq_length</span> <span class="o">=</span> <span class="n">max_seq_length</span>
|
||||
|
||||
<div class="viewcode-block" id="BertForSentimentClassification.convert_text_to_ids"><a class="viewcode-back" href="../../models/bert.html#models.bert_for_sentiment_classification.BertForSentimentClassification.convert_text_to_ids">[docs]</a> <span class="k">def</span> <span class="nf">convert_text_to_ids</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_text</span><span class="p">):</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> Takes a string input, tokenizes, formats,</span>
|
||||
<span class="sd"> and returns a tensor with text IDs. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> input_text (str): The text to tokenize</span>
|
||||
|
||||
<span class="sd"> Returns:</span>
|
||||
<span class="sd"> The ID of the tokenized text</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">tokens</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tokenizer</span><span class="o">.</span><span class="n">tokenize</span><span class="p">(</span><span class="n">input_text</span><span class="p">)</span>
|
||||
<span class="k">while</span> <span class="nb">len</span><span class="p">(</span><span class="n">tokens</span><span class="p">)</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">max_seq_length</span><span class="p">:</span>
|
||||
<span class="n">tokens</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
|
||||
<span class="n">tokens</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"[CLS]"</span><span class="p">]</span> <span class="o">+</span> <span class="n">tokens</span> <span class="o">+</span> <span class="p">[</span><span class="s2">"[SEP]"</span><span class="p">]</span>
|
||||
<span class="n">ids</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tokenizer</span><span class="o">.</span><span class="n">convert_tokens_to_ids</span><span class="p">(</span><span class="n">tokens</span><span class="p">)</span>
|
||||
<span class="k">while</span> <span class="nb">len</span><span class="p">(</span><span class="n">ids</span><span class="p">)</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_seq_length</span> <span class="o">+</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="n">ids</span> <span class="o">=</span> <span class="n">ids</span> <span class="o">+</span> <span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="c1"># @TODO Is it correct to just pad with zeros?</span>
|
||||
<span class="k">return</span> <span class="n">ids</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">text_ids</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">text_ids</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">f</span><span class="s1">'Object of type {type(text_ids)} must be of type torch.tensor'</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
||||
<span class="n">pred</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">(</span><span class="n">text_ids</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">pred</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
481
docs/_build/html/_modules/models/infer_sent.html
vendored
Normal file
481
docs/_build/html/_modules/models/infer_sent.html
vendored
Normal file
@@ -0,0 +1,481 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>models.infer_sent — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>models.infer_sent</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for models.infer_sent</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># Copyright (c) 2017-present, Facebook, Inc.</span>
|
||||
<span class="c1"># All rights reserved.</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># This source code is licensed under the license found in the</span>
|
||||
<span class="c1"># LICENSE file in the root directory of this source tree.</span>
|
||||
<span class="c1">#</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd">This file contains the definition of encoders used in https://arxiv.org/pdf/1705.02364.pdf</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">import</span> <span class="nn">time</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">torch</span>
|
||||
<span class="kn">import</span> <span class="nn">torch.nn</span> <span class="k">as</span> <span class="nn">nn</span>
|
||||
|
||||
<div class="viewcode-block" id="InferSent"><a class="viewcode-back" href="../../models/infer_sent.html#models.infer_sent.InferSent">[docs]</a><span class="k">class</span> <span class="nc">InferSent</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">config</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">InferSent</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">bsize</span> <span class="o">=</span> <span class="n">config</span><span class="p">[</span><span class="s1">'bsize'</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">word_emb_dim</span> <span class="o">=</span> <span class="n">config</span><span class="p">[</span><span class="s1">'word_emb_dim'</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">enc_lstm_dim</span> <span class="o">=</span> <span class="n">config</span><span class="p">[</span><span class="s1">'enc_lstm_dim'</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">pool_type</span> <span class="o">=</span> <span class="n">config</span><span class="p">[</span><span class="s1">'pool_type'</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">dpout_model</span> <span class="o">=</span> <span class="n">config</span><span class="p">[</span><span class="s1">'dpout_model'</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">version</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="s1">'version'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">config</span> <span class="k">else</span> <span class="n">config</span><span class="p">[</span><span class="s1">'version'</span><span class="p">]</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">enc_lstm</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LSTM</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">word_emb_dim</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">enc_lstm_dim</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span>
|
||||
<span class="n">bidirectional</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">dropout</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dpout_model</span><span class="p">)</span>
|
||||
|
||||
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">version</span> <span class="ow">in</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">version</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">bos</span> <span class="o">=</span> <span class="s1">'<s>'</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">eos</span> <span class="o">=</span> <span class="s1">'</s>'</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">max_pad</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">moses_tok</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">version</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">bos</span> <span class="o">=</span> <span class="s1">'<p>'</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">eos</span> <span class="o">=</span> <span class="s1">'</p>'</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">max_pad</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">moses_tok</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">is_cuda</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="c1"># either all weights are on cpu or they are on gpu</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">enc_lstm</span><span class="o">.</span><span class="n">bias_hh_l0</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">is_cuda</span>
|
||||
|
||||
<div class="viewcode-block" id="InferSent.forward"><a class="viewcode-back" href="../../models/infer_sent.html#models.infer_sent.InferSent.forward">[docs]</a> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sent_tuple</span><span class="p">):</span>
|
||||
<span class="c1"># sent_len: [max_len, ..., min_len] (bsize)</span>
|
||||
<span class="c1"># sent: (seqlen x bsize x worddim)</span>
|
||||
<span class="n">sent</span><span class="p">,</span> <span class="n">sent_len</span> <span class="o">=</span> <span class="n">sent_tuple</span>
|
||||
|
||||
<span class="c1"># Sort by length (keep idx)</span>
|
||||
<span class="n">sent_len_sorted</span><span class="p">,</span> <span class="n">idx_sort</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">sent_len</span><span class="p">)[::</span><span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="o">-</span><span class="n">sent_len</span><span class="p">)</span>
|
||||
<span class="n">sent_len_sorted</span> <span class="o">=</span> <span class="n">sent_len_sorted</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
||||
<span class="n">idx_unsort</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">idx_sort</span><span class="p">)</span>
|
||||
|
||||
<span class="n">idx_sort</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">from_numpy</span><span class="p">(</span><span class="n">idx_sort</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_cuda</span><span class="p">()</span> \
|
||||
<span class="k">else</span> <span class="n">torch</span><span class="o">.</span><span class="n">from_numpy</span><span class="p">(</span><span class="n">idx_sort</span><span class="p">)</span>
|
||||
<span class="n">sent</span> <span class="o">=</span> <span class="n">sent</span><span class="o">.</span><span class="n">index_select</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">idx_sort</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Handling padding in Recurrent Networks</span>
|
||||
<span class="n">sent_packed</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">rnn</span><span class="o">.</span><span class="n">pack_padded_sequence</span><span class="p">(</span><span class="n">sent</span><span class="p">,</span> <span class="n">sent_len_sorted</span><span class="p">)</span>
|
||||
<span class="n">sent_output</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">enc_lstm</span><span class="p">(</span><span class="n">sent_packed</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> <span class="c1"># seqlen x batch x 2*nhid</span>
|
||||
<span class="n">sent_output</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">rnn</span><span class="o">.</span><span class="n">pad_packed_sequence</span><span class="p">(</span><span class="n">sent_output</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
|
||||
<span class="c1"># Un-sort by length</span>
|
||||
<span class="n">idx_unsort</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">from_numpy</span><span class="p">(</span><span class="n">idx_unsort</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_cuda</span><span class="p">()</span> \
|
||||
<span class="k">else</span> <span class="n">torch</span><span class="o">.</span><span class="n">from_numpy</span><span class="p">(</span><span class="n">idx_unsort</span><span class="p">)</span>
|
||||
<span class="n">sent_output</span> <span class="o">=</span> <span class="n">sent_output</span><span class="o">.</span><span class="n">index_select</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">idx_unsort</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Pooling</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">pool_type</span> <span class="o">==</span> <span class="s2">"mean"</span><span class="p">:</span>
|
||||
<span class="n">sent_len</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">FloatTensor</span><span class="p">(</span><span class="n">sent_len</span><span class="o">.</span><span class="n">copy</span><span class="p">())</span><span class="o">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
|
||||
<span class="n">emb</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">sent_output</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">squeeze</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">emb</span> <span class="o">=</span> <span class="n">emb</span> <span class="o">/</span> <span class="n">sent_len</span><span class="o">.</span><span class="n">expand_as</span><span class="p">(</span><span class="n">emb</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">pool_type</span> <span class="o">==</span> <span class="s2">"max"</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_pad</span><span class="p">:</span>
|
||||
<span class="n">sent_output</span><span class="p">[</span><span class="n">sent_output</span> <span class="o">==</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="o">-</span><span class="mf">1e9</span>
|
||||
<span class="n">emb</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">sent_output</span><span class="p">,</span> <span class="mi">0</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">emb</span><span class="o">.</span><span class="n">ndimension</span><span class="p">()</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
|
||||
<span class="n">emb</span> <span class="o">=</span> <span class="n">emb</span><span class="o">.</span><span class="n">squeeze</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="k">assert</span> <span class="n">emb</span><span class="o">.</span><span class="n">ndimension</span><span class="p">()</span> <span class="o">==</span> <span class="mi">2</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">emb</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">set_w2v_path</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">w2v_path</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">w2v_path</span> <span class="o">=</span> <span class="n">w2v_path</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_word_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sentences</span><span class="p">,</span> <span class="n">tokenize</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="c1"># create vocab of words</span>
|
||||
<span class="n">word_dict</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">sentences</span> <span class="o">=</span> <span class="p">[</span><span class="n">s</span><span class="o">.</span><span class="n">split</span><span class="p">()</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">tokenize</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">tokenize</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">sentences</span><span class="p">]</span>
|
||||
<span class="k">for</span> <span class="n">sent</span> <span class="ow">in</span> <span class="n">sentences</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">word</span> <span class="ow">in</span> <span class="n">sent</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">word</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">word_dict</span><span class="p">:</span>
|
||||
<span class="n">word_dict</span><span class="p">[</span><span class="n">word</span><span class="p">]</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="n">word_dict</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">bos</span><span class="p">]</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="n">word_dict</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">eos</span><span class="p">]</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="k">return</span> <span class="n">word_dict</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_w2v</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">word_dict</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'w2v_path'</span><span class="p">),</span> <span class="s1">'w2v path not set'</span>
|
||||
<span class="c1"># create word_vec with w2v vectors</span>
|
||||
<span class="n">word_vec</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">w2v_path</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="n">word</span><span class="p">,</span> <span class="n">vec</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">' '</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">word</span> <span class="ow">in</span> <span class="n">word_dict</span><span class="p">:</span>
|
||||
<span class="n">word_vec</span><span class="p">[</span><span class="n">word</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">vec</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">' '</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Found </span><span class="si">%s</span><span class="s1">(/</span><span class="si">%s</span><span class="s1">) words with w2v vectors'</span> <span class="o">%</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">word_vec</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">word_dict</span><span class="p">)))</span>
|
||||
<span class="k">return</span> <span class="n">word_vec</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_w2v_k</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">K</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'w2v_path'</span><span class="p">),</span> <span class="s1">'w2v path not set'</span>
|
||||
<span class="c1"># create word_vec with k first w2v vectors</span>
|
||||
<span class="n">k</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="n">word_vec</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">w2v_path</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">f</span><span class="p">:</span>
|
||||
<span class="n">word</span><span class="p">,</span> <span class="n">vec</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">' '</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">k</span> <span class="o"><=</span> <span class="n">K</span><span class="p">:</span>
|
||||
<span class="n">word_vec</span><span class="p">[</span><span class="n">word</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">vec</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">' '</span><span class="p">)</span>
|
||||
<span class="n">k</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
<span class="k">if</span> <span class="n">k</span> <span class="o">></span> <span class="n">K</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">word</span> <span class="ow">in</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">bos</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">eos</span><span class="p">]:</span>
|
||||
<span class="n">word_vec</span><span class="p">[</span><span class="n">word</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">vec</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">' '</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">k</span> <span class="o">></span> <span class="n">K</span> <span class="ow">and</span> <span class="nb">all</span><span class="p">([</span><span class="n">w</span> <span class="ow">in</span> <span class="n">word_vec</span> <span class="k">for</span> <span class="n">w</span> <span class="ow">in</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">bos</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">eos</span><span class="p">]]):</span>
|
||||
<span class="k">break</span>
|
||||
<span class="k">return</span> <span class="n">word_vec</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">build_vocab</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sentences</span><span class="p">,</span> <span class="n">tokenize</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'w2v_path'</span><span class="p">),</span> <span class="s1">'w2v path not set'</span>
|
||||
<span class="n">word_dict</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_word_dict</span><span class="p">(</span><span class="n">sentences</span><span class="p">,</span> <span class="n">tokenize</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">word_vec</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_w2v</span><span class="p">(</span><span class="n">word_dict</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Vocab size : </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">word_vec</span><span class="p">)))</span>
|
||||
|
||||
<span class="c1"># build w2v vocab with k most frequent words</span>
|
||||
<span class="k">def</span> <span class="nf">build_vocab_k_words</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">K</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'w2v_path'</span><span class="p">),</span> <span class="s1">'w2v path not set'</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">word_vec</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_w2v_k</span><span class="p">(</span><span class="n">K</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Vocab size : </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">K</span><span class="p">))</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">update_vocab</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sentences</span><span class="p">,</span> <span class="n">tokenize</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'w2v_path'</span><span class="p">),</span> <span class="s1">'warning : w2v path not set'</span>
|
||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'word_vec'</span><span class="p">),</span> <span class="s1">'build_vocab before updating it'</span>
|
||||
<span class="n">word_dict</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_word_dict</span><span class="p">(</span><span class="n">sentences</span><span class="p">,</span> <span class="n">tokenize</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># keep only new words</span>
|
||||
<span class="k">for</span> <span class="n">word</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_vec</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">word</span> <span class="ow">in</span> <span class="n">word_dict</span><span class="p">:</span>
|
||||
<span class="k">del</span> <span class="n">word_dict</span><span class="p">[</span><span class="n">word</span><span class="p">]</span>
|
||||
|
||||
<span class="c1"># udpate vocabulary</span>
|
||||
<span class="k">if</span> <span class="n">word_dict</span><span class="p">:</span>
|
||||
<span class="n">new_word_vec</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_w2v</span><span class="p">(</span><span class="n">word_dict</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">word_vec</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">new_word_vec</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">new_word_vec</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'New vocab size : </span><span class="si">%s</span><span class="s1"> (added </span><span class="si">%s</span><span class="s1"> words)'</span><span class="o">%</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">word_vec</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">new_word_vec</span><span class="p">)))</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_batch</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">batch</span><span class="p">):</span>
|
||||
<span class="c1"># sent in batch in decreasing order of lengths</span>
|
||||
<span class="c1"># batch: (bsize, max_len, word_dim)</span>
|
||||
<span class="n">embed</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">[</span><span class="mi">0</span><span class="p">]),</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_emb_dim</span><span class="p">))</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)):</span>
|
||||
<span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">[</span><span class="n">i</span><span class="p">])):</span>
|
||||
<span class="n">embed</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="p">:]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_vec</span><span class="p">[</span><span class="n">batch</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">j</span><span class="p">]]</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">FloatTensor</span><span class="p">(</span><span class="n">embed</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">tokenize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">s</span><span class="p">):</span>
|
||||
<span class="kn">from</span> <span class="nn">nltk.tokenize</span> <span class="k">import</span> <span class="n">word_tokenize</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">moses_tok</span><span class="p">:</span>
|
||||
<span class="n">s</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">word_tokenize</span><span class="p">(</span><span class="n">s</span><span class="p">))</span>
|
||||
<span class="n">s</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">" n't "</span><span class="p">,</span> <span class="s2">"n 't "</span><span class="p">)</span> <span class="c1"># HACK to get ~MOSES tokenization</span>
|
||||
<span class="k">return</span> <span class="n">s</span><span class="o">.</span><span class="n">split</span><span class="p">()</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">word_tokenize</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">prepare_samples</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sentences</span><span class="p">,</span> <span class="n">bsize</span><span class="p">,</span> <span class="n">tokenize</span><span class="p">,</span> <span class="n">verbose</span><span class="p">):</span>
|
||||
<span class="n">sentences</span> <span class="o">=</span> <span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">bos</span><span class="p">]</span> <span class="o">+</span> <span class="n">s</span><span class="o">.</span><span class="n">split</span><span class="p">()</span> <span class="o">+</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">eos</span><span class="p">]</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">tokenize</span> <span class="k">else</span>
|
||||
<span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">bos</span><span class="p">]</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">tokenize</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="o">+</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">eos</span><span class="p">]</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">sentences</span><span class="p">]</span>
|
||||
<span class="n">n_w</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">([</span><span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">sentences</span><span class="p">])</span>
|
||||
|
||||
<span class="c1"># filters words without w2v vectors</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sentences</span><span class="p">)):</span>
|
||||
<span class="n">s_f</span> <span class="o">=</span> <span class="p">[</span><span class="n">word</span> <span class="k">for</span> <span class="n">word</span> <span class="ow">in</span> <span class="n">sentences</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">if</span> <span class="n">word</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_vec</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">s_f</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">warnings</span>
|
||||
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s1">'No words in "</span><span class="si">%s</span><span class="s1">" (idx=</span><span class="si">%s</span><span class="s1">) have w2v vectors. </span><span class="se">\</span>
|
||||
<span class="s1"> Replacing by "</s>"..'</span> <span class="o">%</span> <span class="p">(</span><span class="n">sentences</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">i</span><span class="p">))</span>
|
||||
<span class="n">s_f</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">eos</span><span class="p">]</span>
|
||||
<span class="n">sentences</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">s_f</span>
|
||||
|
||||
<span class="n">lengths</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">sentences</span><span class="p">])</span>
|
||||
<span class="n">n_wk</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">lengths</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Nb words kept : </span><span class="si">%s</span><span class="s1">/</span><span class="si">%s</span><span class="s1"> (</span><span class="si">%.1f%s</span><span class="s1">)'</span> <span class="o">%</span> <span class="p">(</span>
|
||||
<span class="n">n_wk</span><span class="p">,</span> <span class="n">n_w</span><span class="p">,</span> <span class="mf">100.0</span> <span class="o">*</span> <span class="n">n_wk</span> <span class="o">/</span> <span class="n">n_w</span><span class="p">,</span> <span class="s1">'%'</span><span class="p">))</span>
|
||||
|
||||
<span class="c1"># sort by decreasing length</span>
|
||||
<span class="n">lengths</span><span class="p">,</span> <span class="n">idx_sort</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">lengths</span><span class="p">)[::</span><span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="o">-</span><span class="n">lengths</span><span class="p">)</span>
|
||||
<span class="n">sentences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">sentences</span><span class="p">)[</span><span class="n">idx_sort</span><span class="p">]</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">sentences</span><span class="p">,</span> <span class="n">lengths</span><span class="p">,</span> <span class="n">idx_sort</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">encode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sentences</span><span class="p">,</span> <span class="n">bsize</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span> <span class="n">tokenize</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="n">tic</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
|
||||
<span class="n">sentences</span><span class="p">,</span> <span class="n">lengths</span><span class="p">,</span> <span class="n">idx_sort</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">prepare_samples</span><span class="p">(</span>
|
||||
<span class="n">sentences</span><span class="p">,</span> <span class="n">bsize</span><span class="p">,</span> <span class="n">tokenize</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
|
||||
|
||||
<span class="n">embeddings</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">stidx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">sentences</span><span class="p">),</span> <span class="n">bsize</span><span class="p">):</span>
|
||||
<span class="n">batch</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_batch</span><span class="p">(</span><span class="n">sentences</span><span class="p">[</span><span class="n">stidx</span><span class="p">:</span><span class="n">stidx</span> <span class="o">+</span> <span class="n">bsize</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_cuda</span><span class="p">():</span>
|
||||
<span class="n">batch</span> <span class="o">=</span> <span class="n">batch</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
|
||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
||||
<span class="n">batch</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">forward</span><span class="p">((</span><span class="n">batch</span><span class="p">,</span> <span class="n">lengths</span><span class="p">[</span><span class="n">stidx</span><span class="p">:</span><span class="n">stidx</span> <span class="o">+</span> <span class="n">bsize</span><span class="p">]))</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span>
|
||||
<span class="n">embeddings</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span>
|
||||
<span class="n">embeddings</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">(</span><span class="n">embeddings</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># unsort</span>
|
||||
<span class="n">idx_unsort</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">idx_sort</span><span class="p">)</span>
|
||||
<span class="n">embeddings</span> <span class="o">=</span> <span class="n">embeddings</span><span class="p">[</span><span class="n">idx_unsort</span><span class="p">]</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Speed : </span><span class="si">%.1f</span><span class="s1"> sentences/s (</span><span class="si">%s</span><span class="s1"> mode, bsize=</span><span class="si">%s</span><span class="s1">)'</span> <span class="o">%</span> <span class="p">(</span>
|
||||
<span class="nb">len</span><span class="p">(</span><span class="n">embeddings</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">tic</span><span class="p">),</span>
|
||||
<span class="s1">'gpu'</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_cuda</span><span class="p">()</span> <span class="k">else</span> <span class="s1">'cpu'</span><span class="p">,</span> <span class="n">bsize</span><span class="p">))</span>
|
||||
<span class="k">return</span> <span class="n">embeddings</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">visualize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sent</span><span class="p">,</span> <span class="n">tokenize</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
|
||||
<span class="n">sent</span> <span class="o">=</span> <span class="n">sent</span><span class="o">.</span><span class="n">split</span><span class="p">()</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">tokenize</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">tokenize</span><span class="p">(</span><span class="n">sent</span><span class="p">)</span>
|
||||
<span class="n">sent</span> <span class="o">=</span> <span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">bos</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">word</span> <span class="k">for</span> <span class="n">word</span> <span class="ow">in</span> <span class="n">sent</span> <span class="k">if</span> <span class="n">word</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_vec</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">eos</span><span class="p">]]</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">sent</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">==</span> <span class="s1">'</span><span class="si">%s</span><span class="s1"> </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">bos</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">eos</span><span class="p">):</span>
|
||||
<span class="kn">import</span> <span class="nn">warnings</span>
|
||||
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s1">'No words in "</span><span class="si">%s</span><span class="s1">" have w2v vectors. Replacing </span><span class="se">\</span>
|
||||
<span class="s1"> by "</span><span class="si">%s</span><span class="s1"> </span><span class="si">%s</span><span class="s1">"..'</span> <span class="o">%</span> <span class="p">(</span><span class="n">sent</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bos</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">eos</span><span class="p">))</span>
|
||||
<span class="n">batch</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_batch</span><span class="p">(</span><span class="n">sent</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_cuda</span><span class="p">():</span>
|
||||
<span class="n">batch</span> <span class="o">=</span> <span class="n">batch</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
|
||||
<span class="n">output</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">enc_lstm</span><span class="p">(</span><span class="n">batch</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">output</span><span class="p">,</span> <span class="n">idxs</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
|
||||
<span class="c1"># output, idxs = output.squeeze(), idxs.squeeze()</span>
|
||||
<span class="n">idxs</span> <span class="o">=</span> <span class="n">idxs</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span>
|
||||
<span class="n">argmaxs</span> <span class="o">=</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">((</span><span class="n">idxs</span> <span class="o">==</span> <span class="n">k</span><span class="p">))</span> <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sent</span><span class="p">[</span><span class="mi">0</span><span class="p">]))]</span>
|
||||
|
||||
<span class="c1"># visualize model</span>
|
||||
<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sent</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="p">[</span><span class="mf">100.0</span> <span class="o">*</span> <span class="n">n</span> <span class="o">/</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">argmaxs</span><span class="p">)</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">argmaxs</span><span class="p">]</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">xticks</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">sent</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">rotation</span><span class="o">=</span><span class="mi">45</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">ylabel</span><span class="p">(</span><span class="s1">'%'</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s1">'Visualisation of words importance'</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">output</span><span class="p">,</span> <span class="n">idxs</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
227
docs/_build/html/_modules/transformations/transformation.html
vendored
Normal file
227
docs/_build/html/_modules/transformations/transformation.html
vendored
Normal file
@@ -0,0 +1,227 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>transformations.transformation — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>transformations.transformation</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for transformations.transformation</h1><div class="highlight"><pre>
|
||||
<div class="viewcode-block" id="Transformation"><a class="viewcode-back" href="../../transformations/transformation.html#transformations.transformation.Transformation">[docs]</a><span></span><span class="k">class</span> <span class="nc">Transformation</span><span class="p">:</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> An abstract class for transofrming a string of text to produce</span>
|
||||
<span class="sd"> a potential adversarial example. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tokenized_text</span><span class="p">):</span>
|
||||
<span class="sd">""" Returns a list of all possible transformations for `text`."""</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
267
docs/_build/html/_modules/transformations/word_swap.html
vendored
Normal file
267
docs/_build/html/_modules/transformations/word_swap.html
vendored
Normal file
@@ -0,0 +1,267 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>transformations.word_swap — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>transformations.word_swap</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for transformations.word_swap</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">.transformation</span> <span class="k">import</span> <span class="n">Transformation</span>
|
||||
<span class="kn">from</span> <span class="nn">nltk.corpus</span> <span class="k">import</span> <span class="n">stopwords</span>
|
||||
|
||||
<div class="viewcode-block" id="WordSwap"><a class="viewcode-back" href="../../transformations/word_swap.html#transformations.word_swap.WordSwap">[docs]</a><span class="k">class</span> <span class="nc">WordSwap</span><span class="p">(</span><span class="n">Transformation</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> An abstract class that takes a sentence and transforms it by replacing</span>
|
||||
<span class="sd"> some of its words.</span>
|
||||
|
||||
<span class="sd"> Other classes can achieve this by inheriting from WordSwap and </span>
|
||||
<span class="sd"> overriding self._get_replacement_words.</span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> replace_stopwords(:obj:`bool`, optional): Whether to replace stopwords. Defaults to False. </span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">replace_stopwords</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">replace_stopwords</span> <span class="o">=</span> <span class="n">replace_stopwords</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">stopwords</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">stopwords</span><span class="o">.</span><span class="n">words</span><span class="p">(</span><span class="s1">'english'</span><span class="p">))</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_get_replacement_words</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">word</span><span class="p">):</span>
|
||||
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tokenized_text</span><span class="p">,</span> <span class="n">indices_to_replace</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Returns a list of all possible transformations for `text`.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> If indices_to_replace is set, only replaces words at those indices.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">words</span> <span class="o">=</span> <span class="n">tokenized_text</span><span class="o">.</span><span class="n">words</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">indices_to_replace</span><span class="p">:</span>
|
||||
<span class="n">indices_to_replace</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">words</span><span class="p">)))</span>
|
||||
|
||||
<span class="n">transformations</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">word_swaps</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">indices_to_replace</span><span class="p">:</span>
|
||||
<span class="n">word_to_replace</span> <span class="o">=</span> <span class="n">words</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">replace_stopwords</span> <span class="ow">and</span> <span class="n">word_to_replace</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">stopwords</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">replacement_words</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_replacement_words</span><span class="p">(</span><span class="n">word_to_replace</span><span class="p">)</span>
|
||||
<span class="n">new_tokenized_texts</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">replacement_words</span><span class="p">:</span>
|
||||
<span class="n">new_tokenized_texts</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tokenized_text</span><span class="o">.</span><span class="n">replace_word_at_index</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">r</span><span class="p">))</span>
|
||||
<span class="n">transformations</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">new_tokenized_texts</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">transformations</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
288
docs/_build/html/_modules/transformations/word_swap_counterfit.html
vendored
Normal file
288
docs/_build/html/_modules/transformations/word_swap_counterfit.html
vendored
Normal file
@@ -0,0 +1,288 @@
|
||||
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
|
||||
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>transformations.word_swap_counterfit — TextAttack 0.0.1 documentation</title>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
|
||||
|
||||
|
||||
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
|
||||
<script type="text/javascript" src="../../_static/jquery.js"></script>
|
||||
<script type="text/javascript" src="../../_static/underscore.js"></script>
|
||||
<script type="text/javascript" src="../../_static/doctools.js"></script>
|
||||
<script type="text/javascript" src="../../_static/language_data.js"></script>
|
||||
|
||||
<script type="text/javascript" src="../../_static/js/theme.js"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
|
||||
|
||||
<div class="wy-grid-for-nav">
|
||||
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home"> TextAttack
|
||||
|
||||
|
||||
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="caption"><span class="caption-text">User Documentation</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/introduction.html">What is TextAttack?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/installation.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../users/examples.html">Examples</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Attack Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/attack.html">Attack Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/greedy_word_swap.html">Greedy Word Swap</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../attacks/genetic_algorithm.html">Genetic Algorithm</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Models Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/bert.html">BERT</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../models/infer_sent.html">InferSent</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Transformations Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/transformation.html">Transformation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../transformations/word_swap.html">Word Swap</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Constraints Documentation:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/constraint.html">Constraints</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/semantics.html">Semantics</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../constraints/semantics/google_language_model.html">Google Language Model</a></li>
|
||||
</ul>
|
||||
<p class="caption"><span class="caption-text">Datasets:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/built-in_datasets.html">Built-in Datasets</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../datasets/user_defined_datasets.html">User-Defined Datasets</a></li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
|
||||
|
||||
|
||||
<nav class="wy-nav-top" aria-label="top navigation">
|
||||
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">TextAttack</a>
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
<div class="wy-nav-content">
|
||||
|
||||
<div class="rst-content">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
||||
|
||||
<ul class="wy-breadcrumbs">
|
||||
|
||||
<li><a href="../../index.html">Docs</a> »</li>
|
||||
|
||||
<li><a href="../index.html">Module code</a> »</li>
|
||||
|
||||
<li>transformations.word_swap_counterfit</li>
|
||||
|
||||
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for transformations.word_swap_counterfit</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">textattack</span> <span class="k">import</span> <span class="n">utils</span> <span class="k">as</span> <span class="n">utils</span>
|
||||
<span class="kn">from</span> <span class="nn">.word_swap</span> <span class="k">import</span> <span class="n">WordSwap</span>
|
||||
|
||||
<div class="viewcode-block" id="WordSwapCounterfit"><a class="viewcode-back" href="../../transformations/word_swap.html#transformations.word_swap_counterfit.WordSwapCounterfit">[docs]</a><span class="k">class</span> <span class="nc">WordSwapCounterfit</span><span class="p">(</span><span class="n">WordSwap</span><span class="p">):</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd"> Transforms an input by replacing its words with synonyms in the counter-fitted</span>
|
||||
<span class="sd"> embedding space. </span>
|
||||
|
||||
<span class="sd"> Args:</span>
|
||||
<span class="sd"> replace_stopwords (:obj:`bool`, optional): Whether to replace the stopwords in the text. Defaults to False. </span>
|
||||
<span class="sd"> max_candidates (:obj:`int`, optional): The default number of words to replace. Defaults to None. </span>
|
||||
<span class="sd"> word_embedding_folder (:obj:`str`, optional): The path to the word_embedding folder. Defaults to paragram_300_sl999</span>
|
||||
|
||||
<span class="sd"> Raises:</span>
|
||||
<span class="sd"> ValueError: If the word_embedding_folder is not found. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">PATH</span> <span class="o">=</span> <span class="s1">'/p/qdata/jm8wx/research/text_attacks/RobustNLP/AttackGeneration/word_embeddings/'</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">replace_stopwords</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">max_candidates</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">word_embedding_folder</span><span class="o">=</span><span class="s1">'paragram_300_sl999'</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">replace_stopwords</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">max_candidates</span> <span class="o">=</span> <span class="n">max_candidates</span>
|
||||
<span class="k">if</span> <span class="n">word_embedding_folder</span> <span class="o">==</span> <span class="s1">'paragram_300_sl999'</span><span class="p">:</span>
|
||||
<span class="n">word_embeddings_file</span> <span class="o">=</span> <span class="s1">'paragram_300_sl999.npy'</span>
|
||||
<span class="n">word_list_file</span> <span class="o">=</span> <span class="s1">'wordlist.pickle'</span>
|
||||
<span class="n">word_embedding_matrix_file</span> <span class="o">=</span> <span class="s1">'paragramnn.py'</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">f</span><span class="s1">'Could not find word embedding </span><span class="si">{word_embedding}</span><span class="s1">'</span><span class="p">)</span><span class="c1"># Concatenate folder names to create full path.</span>
|
||||
|
||||
<span class="n">utils</span><span class="o">.</span><span class="n">download_if_needed</span><span class="p">(</span><span class="n">WordSwapCounterfit</span><span class="o">.</span><span class="n">PATH</span><span class="p">)</span>
|
||||
<span class="n">word_embeddings_file</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">WordSwapCounterfit</span><span class="o">.</span><span class="n">PATH</span><span class="p">,</span> <span class="n">word_embedding_folder</span><span class="p">,</span> <span class="n">word_embeddings_file</span><span class="p">)</span>
|
||||
<span class="n">word_list_file</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">WordSwapCounterfit</span><span class="o">.</span><span class="n">PATH</span><span class="p">,</span> <span class="n">word_embedding_folder</span><span class="p">,</span> <span class="n">word_list_file</span><span class="p">)</span>
|
||||
<span class="n">word_embedding_matrix_file</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">WordSwapCounterfit</span><span class="o">.</span><span class="n">PATH</span><span class="p">,</span> <span class="n">word_embedding_folder</span><span class="p">,</span> <span class="n">word_embedding_matrix_file</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Actually load the files from disk.</span>
|
||||
<span class="n">word_embeddings</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">word_embeddings_file</span><span class="p">)</span>
|
||||
<span class="n">word_embedding_word2index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">word_list_file</span><span class="p">,</span> <span class="n">allow_pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">word_embedding_matrix</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">word_embedding_matrix_file</span><span class="p">)</span>
|
||||
<span class="c1"># Build glove dict and index.</span>
|
||||
<span class="n">word_embedding_index2word</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">for</span> <span class="n">word</span><span class="p">,</span> <span class="n">index</span> <span class="ow">in</span> <span class="n">word_embedding_word2index</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
<span class="n">word_embedding_index2word</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> <span class="o">=</span> <span class="n">word</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">word_embeddings</span> <span class="o">=</span> <span class="n">word_embeddings</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">nn</span> <span class="o">=</span> <span class="n">word_embedding_matrix</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">word_embedding_index2word</span> <span class="o">=</span> <span class="n">word_embedding_index2word</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">word_embedding_word2index</span> <span class="o">=</span> <span class="n">word_embedding_word2index</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_get_replacement_words</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">word</span><span class="p">,</span> <span class="n">max_candidates</span><span class="o">=</span><span class="mi">10</span><span class="p">):</span>
|
||||
<span class="sd">""" </span>
|
||||
<span class="sd"> Returns a list of possible 'candidate words' to replace a word in a sentence </span>
|
||||
<span class="sd"> or phrase. </span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> Based on nearest neighbors selected word embeddings.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_candidates</span><span class="p">:</span>
|
||||
<span class="n">max_candidates</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">max_candidates</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_candidates</span><span class="p">)</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">word_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_embedding_word2index</span><span class="p">[</span><span class="n">word</span><span class="p">]</span>
|
||||
<span class="n">nnids</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nn</span><span class="p">[</span><span class="n">word_id</span><span class="p">][</span><span class="mi">1</span><span class="p">:</span><span class="n">max_candidates</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">candidate_words</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">wi</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">nnids</span><span class="p">):</span>
|
||||
<span class="n">candidate_words</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">word_embedding_index2word</span><span class="p">[</span><span class="n">wi</span><span class="p">])</span>
|
||||
<span class="k">return</span> <span class="n">candidate_words</span>
|
||||
<span class="k">except</span> <span class="ne">KeyError</span><span class="p">:</span>
|
||||
<span class="c1"># This word is not in our word embedding database, so return an empty list.</span>
|
||||
<span class="k">return</span> <span class="p">[]</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>
|
||||
© Copyright 2019, UVA QData Lab
|
||||
|
||||
</p>
|
||||
</div>
|
||||
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
6
docs/_build/html/_sources/attacks/attack.rst.txt
vendored
Normal file
6
docs/_build/html/_sources/attacks/attack.rst.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
=====================
|
||||
Attack Documentation
|
||||
=====================
|
||||
|
||||
.. automodule:: attacks.attack
|
||||
:members:
|
||||
6
docs/_build/html/_sources/attacks/genetic_algorithm.rst.txt
vendored
Normal file
6
docs/_build/html/_sources/attacks/genetic_algorithm.rst.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
===================
|
||||
Genetic Algorithm
|
||||
===================
|
||||
|
||||
.. automodule:: attacks.genetic_algorithm
|
||||
:members:
|
||||
9
docs/_build/html/_sources/attacks/greedy_word_swap.rst.txt
vendored
Normal file
9
docs/_build/html/_sources/attacks/greedy_word_swap.rst.txt
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
===================
|
||||
Greedy Word Swap
|
||||
===================
|
||||
|
||||
.. automodule:: attacks.greedy_word_swap
|
||||
:members:
|
||||
|
||||
.. automodule:: attacks.greedy_word_swap_wir
|
||||
:members:
|
||||
6
docs/_build/html/_sources/constraints/constraint.rst.txt
vendored
Normal file
6
docs/_build/html/_sources/constraints/constraint.rst.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
============
|
||||
Constraints
|
||||
============
|
||||
|
||||
.. automodule:: constraints.constraint
|
||||
:members:
|
||||
9
docs/_build/html/_sources/constraints/semantics/google_language_model.rst.txt
vendored
Normal file
9
docs/_build/html/_sources/constraints/semantics/google_language_model.rst.txt
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
=========================
|
||||
Google Language Model
|
||||
=========================
|
||||
|
||||
.. automodule:: constraints.semantics.google_language_model.google_language_model
|
||||
:members:
|
||||
|
||||
.. automodule:: constraints.semantics.google_language_model.alzantot_goog_lm
|
||||
:members:
|
||||
11
docs/_build/html/_sources/constraints/semantics/semantics.rst.txt
vendored
Normal file
11
docs/_build/html/_sources/constraints/semantics/semantics.rst.txt
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
===========
|
||||
Semantics
|
||||
===========
|
||||
|
||||
Semantic constraints.
|
||||
|
||||
Universal Sentence Encoder
|
||||
##########################
|
||||
|
||||
.. automodule:: constraints.semantics.universal_sentence_encoder
|
||||
:members:
|
||||
9
docs/_build/html/_sources/datasets/built-in_datasets.rst.txt
vendored
Normal file
9
docs/_build/html/_sources/datasets/built-in_datasets.rst.txt
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
===================
|
||||
Built-in Datasets
|
||||
===================
|
||||
|
||||
Yelp Sentiment
|
||||
###############
|
||||
.. automodule:: datasets.yelp_sentiment
|
||||
:members:
|
||||
|
||||
6
docs/_build/html/_sources/datasets/user_defined_datasets.rst.txt
vendored
Normal file
6
docs/_build/html/_sources/datasets/user_defined_datasets.rst.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
======================
|
||||
User-Defined Datasets
|
||||
======================
|
||||
|
||||
.. automodule:: datasets.dataset
|
||||
:members:
|
||||
61
docs/_build/html/_sources/index.rst.txt
vendored
Normal file
61
docs/_build/html/_sources/index.rst.txt
vendored
Normal file
@@ -0,0 +1,61 @@
|
||||
.. TextAttack documentation master file, created by
|
||||
sphinx-quickstart on Sat Oct 19 20:54:30 2019.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
Welcome to TextAttack's documentation!
|
||||
======================================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: User Documentation
|
||||
|
||||
users/introduction
|
||||
users/installation
|
||||
users/examples
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Attack Documentation:
|
||||
|
||||
attacks/attack
|
||||
attacks/greedy_word_swap
|
||||
attacks/genetic_algorithm
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Models Documentation:
|
||||
|
||||
models/bert.rst
|
||||
models/infer_sent.rst
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Transformations Documentation:
|
||||
|
||||
transformations/transformation
|
||||
transformations/word_swap
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Constraints Documentation:
|
||||
|
||||
constraints/constraint
|
||||
constraints/semantics/semantics
|
||||
constraints/semantics/google_language_model
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Datasets:
|
||||
|
||||
datasets/built-in_datasets
|
||||
datasets/user_defined_datasets
|
||||
|
||||
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
7
docs/_build/html/_sources/models/bert.rst.txt
vendored
Normal file
7
docs/_build/html/_sources/models/bert.rst.txt
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
=====
|
||||
BERT
|
||||
=====
|
||||
|
||||
.. automodule:: models.bert_for_sentiment_classification
|
||||
:members:
|
||||
|
||||
8
docs/_build/html/_sources/models/infer_sent.rst.txt
vendored
Normal file
8
docs/_build/html/_sources/models/infer_sent.rst.txt
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
=========
|
||||
InferSent
|
||||
=========
|
||||
|
||||
Adapted from Facebook Research (https://arxiv.org/abs/1705.02364.)
|
||||
|
||||
.. automodule:: models.infer_sent
|
||||
:members:
|
||||
6
docs/_build/html/_sources/transformations/transformation.rst.txt
vendored
Normal file
6
docs/_build/html/_sources/transformations/transformation.rst.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
================
|
||||
Transformation
|
||||
================
|
||||
|
||||
.. automodule:: transformations.transformation
|
||||
:members:
|
||||
10
docs/_build/html/_sources/transformations/word_swap.rst.txt
vendored
Normal file
10
docs/_build/html/_sources/transformations/word_swap.rst.txt
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
==========
|
||||
Word Swap
|
||||
==========
|
||||
|
||||
|
||||
.. automodule:: transformations.word_swap
|
||||
:members:
|
||||
|
||||
.. automodule:: transformations.word_swap_counterfit
|
||||
:members:
|
||||
26
docs/_build/html/_sources/users/examples.rst.txt
vendored
Normal file
26
docs/_build/html/_sources/users/examples.rst.txt
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
=========
|
||||
Examples
|
||||
=========
|
||||
|
||||
|
||||
BERT Example
|
||||
############
|
||||
|
||||
.. parsed-literal::
|
||||
model = BertForSentimentClassification()
|
||||
|
||||
transformation = WordSwapCounterfit()
|
||||
|
||||
attack = attacks.GreedyWordSwap(model, transformation)
|
||||
|
||||
attack.add_constraints(
|
||||
(
|
||||
constraints.semantics.UniversalSentenceEncoder(0.9, metric='cosine'),
|
||||
)
|
||||
)
|
||||
|
||||
yelp_data = YelpSentiment(n=2)
|
||||
|
||||
attack.add_output_file(open('outputs/test.txt', 'w'))
|
||||
|
||||
attack.attack(yelp_data, shuffle=False)
|
||||
5
docs/_build/html/_sources/users/installation.rst.txt
vendored
Normal file
5
docs/_build/html/_sources/users/installation.rst.txt
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
==============
|
||||
Installation
|
||||
==============
|
||||
|
||||
TextAttack must use Python >=3.6
|
||||
5
docs/_build/html/_sources/users/introduction.rst.txt
vendored
Normal file
5
docs/_build/html/_sources/users/introduction.rst.txt
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
=====================
|
||||
What is TextAttack?
|
||||
=====================
|
||||
|
||||
TextAttack is a Python package...
|
||||
@@ -1,6 +1,6 @@
|
||||
var DOCUMENTATION_OPTIONS = {
|
||||
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
|
||||
VERSION: '',
|
||||
VERSION: '0.0.1',
|
||||
LANGUAGE: 'None',
|
||||
COLLAPSE_INDEX: false,
|
||||
FILE_SUFFIX: '.html',
|
||||
|
Before Width: | Height: | Size: 286 B After Width: | Height: | Size: 286 B |
|
Before Width: | Height: | Size: 434 KiB After Width: | Height: | Size: 434 KiB |
|
Before Width: | Height: | Size: 90 B After Width: | Height: | Size: 90 B |
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user