diff --git a/.github/workflows/python-test.yml b/.github/workflows/check-formatting.yml similarity index 82% rename from .github/workflows/python-test.yml rename to .github/workflows/check-formatting.yml index 8fcdd4c0..b6e7d3f7 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/check-formatting.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Github PyTest +name: Formatting with black & isort on: push: @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: [3.8] steps: - uses: actions/checkout@v2 @@ -26,7 +26,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip setuptools wheel - pip install black flake8 isort pytest pytest-xdist + pip install black flake8 isort # Testing packages python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537 pip install -e . - name: Check code format with black and isort @@ -34,6 +34,3 @@ jobs: black . --check isort --check-only --recursive tests textattack flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - - name: Test with pytest - run: | - pytest tests -vx --dist=loadfile -n auto diff --git a/.github/workflows/make-docs.yml b/.github/workflows/make-docs.yml new file mode 100644 index 00000000..33ddf569 --- /dev/null +++ b/.github/workflows/make-docs.yml @@ -0,0 +1,35 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Build documentation with Sphinx + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.8] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + sudo apt-get install pandoc # install pandoc + python -m pip install --upgrade pip setuptools wheel # update python + pip install ipython --upgrade # needed for Github for whatever reason + python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537 + pip install -e . ".[dev]" # This should install all packages for development + - name: Build docs with Sphinx and check for errors + run: | + sphinx-build -b html docs docs/_build/html -W diff --git a/.github/workflows/python-publish.yml b/.github/workflows/publish-to-pypi.yml similarity index 95% rename from .github/workflows/python-publish.yml rename to .github/workflows/publish-to-pypi.yml index 7e958c6e..fe037a12 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -1,7 +1,7 @@ # This workflows will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries -name: Upload Python Package +name: Upload Python Package to PyPI on: release: diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml new file mode 100644 index 00000000..ee16e7c7 --- /dev/null +++ b/.github/workflows/run-pytest.yml @@ -0,0 +1,34 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Test with PyTest + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.6, 3.7, 3.8] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip setuptools wheel + pip install pytest pytest-xdist # Testing packages + python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537 + pip install -e . + - name: Test with pytest + run: | + pytest tests -vx --dist=loadfile -n auto diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 858ef797..7eed7873 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -179,11 +179,25 @@ Follow these steps to start contributing: $ git push -u origin a-descriptive-name-for-my-changes ``` -6. Once you are satisfied (**and the checklist below is happy too**), go to the +6. Add documentation. + + Our docs are in the `docs/` folder. Thanks to `sphinx-automodule`, this + should just be two lines. Our docs will automatically generate from the + comments you added to your code. If you're adding an attack recipe, add a + reference in `attack_recipes.rst`. If you're adding a transformation, add + a reference in `transformation.rst`, etc. + + You can build the docs and view the updates using `make docs`. If you're + adding a tutorial or something where you want to update the docs multiple + times, you can run `make docs-auto`. This will run a server using + `sphinx-autobuild` that should automatically reload whenever you change + a file. + +7. Once you are satisfied (**and the checklist below is happy too**), go to the webpage of your fork on GitHub. Click on 'Pull request' to send your changes to the project maintainers for review. -7. It's ok if maintainers ask you for changes. It happens to core contributors +8. It's ok if maintainers ask you for changes. It happens to core contributors too! So everyone can see the changes in the Pull request, work in your local branch and push the changes to your fork. They will automatically appear in the pull request. diff --git a/Makefile b/Makefile index f22a6848..f4bb32f0 100644 --- a/Makefile +++ b/Makefile @@ -3,9 +3,10 @@ format: FORCE ## Run black and isort (rewriting files) isort --atomic --recursive tests textattack -lint: FORCE ## Run black (in check mode) +lint: FORCE ## Run black, isort, flake8 (in check mode) black . --check isort --check-only --recursive tests textattack + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude=build,dist # catch certain syntax errors using flake8 test: FORCE ## Run tests using pytest python -m pytest --dist=loadfile -n auto @@ -13,10 +14,13 @@ test: FORCE ## Run tests using pytest docs: FORCE ## Build docs using Sphinx. sphinx-build -b html docs docs/_build/html +docs-check: FORCE ## Builds docs using Sphinx. If there is an error, exit with an error code (instead of warning & continuing). + sphinx-build -b html docs docs/_build/html -W + docs-auto: FORCE ## Build docs using Sphinx and run hotreload server using Sphinx autobuild. sphinx-autobuild docs docs/_build/html -H 0.0.0.0 -p 8765 -all: format lint test ## Format, lint, and test. +all: format lint docs-check test ## Format, lint, and test. .PHONY: help diff --git a/README.md b/README.md index b90aaad1..7995f723 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@
-
+
## About
@@ -97,16 +97,19 @@ We include attack recipes which implement attacks from the literature. You can l
To run an attack recipe: `textattack attack --recipe [recipe_name]`
-The first are for classification tasks, like sentiment classification and entailment:
+Attacks on classification tasks, like sentiment classification and entailment:
- **alzantot**: Genetic algorithm attack from (["Generating Natural Language Adversarial Examples" (Alzantot et al., 2018)](https://arxiv.org/abs/1804.07998)).
+- **bae**: BERT masked language model transformation attack from (["BAE: BERT-based Adversarial Examples for Text Classification" (Garg & Ramakrishnan, 2019)](https://arxiv.org/abs/2004.01970)).
+- **bert-attack**: BERT masked language model transformation attack with subword replacements (["BERT-ATTACK: Adversarial Attack Against BERT Using BERT" (Li et al., 2020)](https://arxiv.org/abs/2004.09984)).
- **deepwordbug**: Greedy replace-1 scoring and multi-transformation character-swap attack (["Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers" (Gao et al., 2018)](https://arxiv.org/abs/1801.04354)).
- **hotflip**: Beam search and gradient-based word swap (["HotFlip: White-Box Adversarial Examples for Text Classification" (Ebrahimi et al., 2017)](https://arxiv.org/abs/1712.06751)).
+- **input-reduction**: Reducing the input while maintaining the prediction through word importance ranking (["Pathologies of Neural Models Make Interpretation Difficult" (Feng et al., 2018)](https://arxiv.org/pdf/1804.07781.pdf)).
- **kuleshov**: Greedy search and counterfitted embedding swap (["Adversarial Examples for Natural Language Classification Problems" (Kuleshov et al., 2018)](https://openreview.net/pdf?id=r1QZ3zbAZ)).
+- **pwws**: Greedy attack with word importance ranking based on word saliency and synonym swap scores (["Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency" (Ren et al., 2019)](https://www.aclweb.org/anthology/P19-1103/)).
- **textbugger**: Greedy attack with word importance ranking and character-based swaps ([(["TextBugger: Generating Adversarial Text Against Real-world Applications" (Li et al., 2018)](https://arxiv.org/abs/1812.05271)).
- **textfooler**: Greedy attack with word importance ranking and counter-fitted embedding swap (["Is Bert Really Robust?" (Jin et al., 2019)](https://arxiv.org/abs/1907.11932)).
-- **PWWS**: Greedy attack with word importance ranking based on word saliency and synonym swap scores (["Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency" (Ren et al., 2019)](https://www.aclweb.org/anthology/P19-1103/)).
-The final is for sequence-to-sequence models:
+Attacks on sequence-to-sequence models:
- **seq2sick**: Greedy attack with goal of changing every word in the output translation. Currently implemented as black-box with plans to change to white-box as done in paper (["Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples" (Cheng et al., 2018)](https://arxiv.org/abs/1803.01128)).
#### Recipe Usage Examples
@@ -120,7 +123,7 @@ textattack attack --model bert-base-uncased-sst2 --recipe textfooler --num-examp
*seq2sick (black-box) against T5 fine-tuned for English-German translation:*
```bash
-textattack attack --recipe seq2sick --model t5-en2de --num-examples 100
+ textattack attack --model t5-en-de --recipe seq2sick --num-examples 100
```
### Augmenting Text
@@ -301,6 +304,8 @@ A `SearchMethod` takes as input an initial `GoalFunctionResult` and returns a fi
We welcome suggestions and contributions! Submit an issue or pull request and we will do our best to respond in a timely manner. TextAttack is currently in an "alpha" stage in which we are working to improve its capabilities and design.
+See [CONTRIBUTING.md](https://github.com/QData/TextAttack/blob/master/CONTRIBUTING.md) for detailed information on contributing.
+
## Citing TextAttack
If you use TextAttack for your research, please cite [TextAttack: A Framework for Adversarial Attacks in Natural Language Processing](https://arxiv.org/abs/2005.05909).
diff --git a/docs/attacks/attack_recipes.rst b/docs/attacks/attack_recipes.rst
index 9511c6b9..50c7aa24 100644
--- a/docs/attacks/attack_recipes.rst
+++ b/docs/attacks/attack_recipes.rst
@@ -5,59 +5,76 @@ We provide a number of pre-built attack recipes. To run an attack recipe, run::
textattack attack --recipe [recipe_name]
-Alzantot Genetic Algorithm
-###########
+Alzantot Genetic Algorithm (Generating Natural Language Adversarial Examples)
+###################################################################################
.. automodule:: textattack.attack_recipes.genetic_algorithm_alzantot_2018
:members:
-Faster Alzantot Genetic Algorithm
-###########
+Faster Alzantot Genetic Algorithm (Certified Robustness to Adversarial Word Substitutions)
+##############################################################################################
.. automodule:: textattack.attack_recipes.faster_genetic_algorithm_jia_2019
:members:
+
+BAE (BAE: BERT-Based Adversarial Examples)
+#############################################
-DeepWordBug
-############
+.. automodule:: textattack.attack_recipes.bae_garg_2019
+ :members:
+
+BERT-Attack: (BERT-Attack: Adversarial Attack Against BERT Using BERT)
+#########################################################################
+
+.. automodule:: textattack.attack_recipes.bert_attack_li_2020
+ :members:
+
+DeepWordBug (Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers)
+######################################################################################################
.. automodule:: textattack.attack_recipes.deepwordbug_gao_2018
:members:
-HotFlip
-###########
+HotFlip (HotFlip: White-Box Adversarial Examples for Text Classification)
+##############################################################################
+
+.. automodule:: textattack.attack_recipes.hotflip_ebrahimi_2017
+ :members:
+
+Input Reduction
+################
.. automodule:: textattack.attack_recipes.input_reduction_feng_2018
:members:
-Kuleshov
-###########
+Kuleshov (Adversarial Examples for Natural Language Classification Problems)
+##############################################################################
.. automodule:: textattack.attack_recipes.kuleshov_2017
:members:
-Seq2Sick
-###########
+PWWS (Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency)
+###################################################################################################
+
+.. automodule:: textattack.attack_recipes.pwws_ren_2019
+ :members:
+
+Seq2Sick (Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples)
+#########################################################################################################
.. automodule:: textattack.attack_recipes.seq2sick_cheng_2018_blackbox
:members:
-TextFooler
-###########
+TextFooler (Is BERT Really Robust? A Strong Baseline for Natural Language Attack on Text Classification and Entailment)
+########################################################################################################################
.. automodule:: textattack.attack_recipes.textfooler_jin_2019
:members:
-PWWS
-###########
-
-.. automodule:: textattack.attack_recipes.pwws_ren_2019
- :members:
-
-
-TextBugger
-###########
+TextBugger (TextBugger: Generating Adversarial Text Against Real-world Applications)
+########################################################################################
.. automodule:: textattack.attack_recipes.textbugger_li_2018
:members:
diff --git a/docs/attacks/constraint.rst b/docs/attacks/constraint.rst
index 9af35f0d..db11f159 100644
--- a/docs/attacks/constraint.rst
+++ b/docs/attacks/constraint.rst
@@ -85,7 +85,7 @@ GPT-2
:members:
"Learning To Write" Language Model
-*******
+************************************
.. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.learning_to_write
:members:
@@ -142,7 +142,7 @@ Maximum Words Perturbed
.. _pre_transformation:
Pre-Transformation
-----------
+-------------------------
Pre-transformation constraints determine if a transformation is valid based on
only the original input and the position of the replacement. These constraints
@@ -151,7 +151,7 @@ constraints can prevent search methods from swapping words at the same index
twice, or from replacing stopwords.
Pre-Transformation Constraint
-########################
+###############################
.. automodule:: textattack.constraints.pre_transformation.pre_transformation_constraint
:special-members: __call__
:private-members:
@@ -166,3 +166,13 @@ Repeat Modification
########################
.. automodule:: textattack.constraints.pre_transformation.repeat_modification
:members:
+
+Input Column Modification
+#############################
+.. automodule:: textattack.constraints.pre_transformation.input_column_modification
+ :members:
+
+Max Word Index Modification
+###############################
+.. automodule:: textattack.constraints.pre_transformation.max_word_index_modification
+ :members:
diff --git a/docs/attacks/transformation.rst b/docs/attacks/transformation.rst
index d37f205a..b61a8eab 100644
--- a/docs/attacks/transformation.rst
+++ b/docs/attacks/transformation.rst
@@ -69,7 +69,7 @@ Word Swap by Random Character Insertion
:members:
Word Swap by Random Character Substitution
----------------------------------------
+-------------------------------------------
.. automodule:: textattack.transformations.word_swap_random_character_substitution
:members:
diff --git a/docs/conf.py b/docs/conf.py
index feb823ca..383855af 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -22,7 +22,7 @@ copyright = "2020, UVA QData Lab"
author = "UVA QData Lab"
# The full version, including alpha/beta/rc tags
-release = "0.1.2"
+release = "0.1.5"
# Set master doc to `index.rst`.
master_doc = "index"
diff --git a/docs/datasets_models/datasets.rst b/docs/datasets_models/datasets.rst
index bcd5b407..158d87d7 100644
--- a/docs/datasets_models/datasets.rst
+++ b/docs/datasets_models/datasets.rst
@@ -6,19 +6,10 @@ Datasets
:members:
:private-members:
-Classification
-###############
-.. automodule:: textattack.datasets.classification.classification_dataset
+.. automodule:: textattack.datasets.huggingface_nlp_dataset
:members:
-Entailment
-############
-.. automodule:: textattack.datasets.entailment.entailment_dataset
+.. automodule:: textattack.datasets.translation.ted_multi
:members:
-Translation
-#############
-.. automodule:: textattack.datasets.translation.translation_datasets
- :members:
-
diff --git a/docs/datasets_models/models.rst b/docs/datasets_models/models.rst
index 15bbeb45..e3069415 100644
--- a/docs/datasets_models/models.rst
+++ b/docs/datasets_models/models.rst
@@ -11,7 +11,7 @@ We split models up into two broad categories:
**Classification models:**
- :ref:`BERT`: ``bert-base-uncased`` fine-tuned on various datasets using transformers_.
+ :ref:`BERT`: ``bert-base-uncased`` fine-tuned on various datasets using ``transformers``.
:ref:`LSTM`: a standard LSTM fine-tuned on various datasets.
@@ -20,30 +20,29 @@ We split models up into two broad categories:
**Text-to-text models:**
- :ref:`T5`: ``T5`` fine-tuned on various datasets using transformers_.
+ :ref:`T5`: ``T5`` fine-tuned on various datasets using ``transformers``.
+.. _BERT:
BERT
********
-.. _BERT:
-
.. automodule:: textattack.models.helpers.bert_for_classification
:members:
-LSTM
-*******
.. _LSTM:
+LSTM
+*******
.. automodule:: textattack.models.helpers.lstm_for_classification
:members:
-Word-CNN
-************
.. _CNN:
+Word-CNN
+************
.. automodule:: textattack.models.helpers.word_cnn_for_classification
:members:
diff --git a/docs/examples/1_Introduction_and_Transformations.ipynb b/docs/examples/1_Introduction_and_Transformations.ipynb
index f39fb9b4..fb37db23 100644
--- a/docs/examples/1_Introduction_and_Transformations.ipynb
+++ b/docs/examples/1_Introduction_and_Transformations.ipynb
@@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# The TextAttack🐙 ecosystem: search, transformations, and constraints\n",
+ "# The TextAttack ecosystem: search, transformations, and constraints\n",
"\n",
"An attack in TextAttack consists of four parts.\n",
"\n",
@@ -31,9 +31,9 @@
"This lesson explains how to create a custom transformation. In TextAttack, many transformations involve *word swaps*: they take a word and try and find suitable substitutes. Some attacks focus on replacing characters with neighboring characters to create \"typos\" (these don't intend to preserve the grammaticality of inputs). Other attacks rely on semantics: they take a word and try to replace it with semantic equivalents.\n",
"\n",
"\n",
- "### Banana word swap 🍌\n",
+ "### Banana word swap \n",
"\n",
- "As an introduction to writing transformations for TextAttack, we're going to try a very simple transformation: one that replaces any given word with the word 'banana'. In TextAttack, there's an abstract `WordSwap` class that handles the heavy lifting of breaking sentences into words and avoiding replacement of stopwords. We can extend `WordSwap` and implement a single method, `_get_replacement_words`, to indicate to replace each word with 'banana'."
+ "As an introduction to writing transformations for TextAttack, we're going to try a very simple transformation: one that replaces any given word with the word 'banana'. In TextAttack, there's an abstract `WordSwap` class that handles the heavy lifting of breaking sentences into words and avoiding replacement of stopwords. We can extend `WordSwap` and implement a single method, `_get_replacement_words`, to indicate to replace each word with 'banana'. 🍌"
]
},
{
@@ -308,9 +308,9 @@
"collapsed": true
},
"source": [
- "### Conclusion 🍌\n",
+ "### Conclusion n",
"\n",
- "We can examine these examples for a good idea of how many words had to be changed to \"banana\" to change the prediction score from the correct class to another class. The examples without perturbed words were originally misclassified, so they were skipped by the attack. Looks like some examples needed only a single \"banana\", while others needed up to 17 \"banana\" substitutions to change the class score. Wow!"
+ "We can examine these examples for a good idea of how many words had to be changed to \"banana\" to change the prediction score from the correct class to another class. The examples without perturbed words were originally misclassified, so they were skipped by the attack. Looks like some examples needed only a couple \"banana\"s, while others needed up to 17 \"banana\" substitutions to change the class score. Wow! 🍌"
]
}
],
diff --git a/docs/index.rst b/docs/index.rst
index 52f88a92..c2abc5f4 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -35,7 +35,6 @@ TextAttack has some other features that make it a pleasure to use:
Installation