1
0
mirror of https://github.com/QData/TextAttack.git synced 2021-10-13 00:05:06 +03:00

try: remove tokenizers version dependency for flair

This commit is contained in:
Jack Morris
2020-07-06 21:39:26 -04:00
parent cd40129e42
commit 11013b3a4f
7 changed files with 18 additions and 16 deletions

View File

@@ -205,12 +205,12 @@ textattack train --model lstm --dataset yelp_polarity --batch-size 64 --epochs 5
textattack train --model bert-base-uncased --dataset glue:cola --batch-size 32 --epochs 5
```
## `textattack peek-dataset`
### `textattack peek-dataset`
To take a closer look at a dataset, use `textattack peek-dataset`. TextAttack will print some cursory statistics about the inputs and outputs from the dataset. For example, `textattack peek-dataset --dataset-from-nlp snli` will show information about the SNLI dataset from the NLP package.
## `textattack list`
### `textattack list`
There are lots of pieces in TextAttack, and it can be difficult to keep track of all of them. You can use `textattack list` to list components, for example, pretrained models (`textattack list models`) or available search methods (`textattack list search-methods`).
@@ -229,8 +229,11 @@ TextAttack is model-agnostic! You can use `TextAttack` to analyze any model that
TextAttack also comes built-in with models and datasets. Our command-line interface will automatically match the correct
dataset to the correct model. We include various pre-trained models for each of the nine [GLUE](https://gluebenchmark.com/)
tasks, as well as some common datasets for classification, translation, and summarization. You can
see the full list of provided models & datasets via `textattack attack --help`.
tasks, as well as some common datasets for classification, translation, and summarization.
A list of available pretrained models and their validation accuracies is available at
[models/README.md](models/README.md). You can also view a full list of provided models
& datasets via `textattack attack --help`.
Here's an example of using one of the built-in models (the SST-2 dataset is automatically loaded):

View File

@@ -1,6 +1,6 @@
bert-score
editdistance
flair==0.5.1
flair>=0.5
filelock
language_tool_python
lru-dict
@@ -10,14 +10,14 @@ numpy
pandas>=1.0.1
scikit-learn
scipy==1.4.1
sentence_transformers==0.2.6.1
sentence_transformers>0.2.6
torch
transformers>=3
tensorflow>=2
tensorflow_hub
tensorboardX
terminaltables
tokenizers==0.8.0-rc4
tokenizers
tqdm
visdom
wandb

View File

@@ -7,6 +7,6 @@ model_path = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = textattack.models.tokenizers.AutoTokenizer(model_path)
model = transformers.AutoModelForSequenceClassification.from_pretrained(model_path)
dataset = textattack.datasets.HuggingFaceNLPDataset(
dataset = textattack.datasets.HuggingFaceNlpDataset(
"glue", subset="sst2", split="train", shuffle=False
)

View File

@@ -1,4 +1,4 @@
/.*/Attack(
Attack(
(search_method): GreedyWordSwapWIR(
(wir_method): unk
)
@@ -34,15 +34,15 @@
)
(is_black_box): True
)
/.*/
Running in interactive mode
----------------------------
Enter a sentence to attack or "q" to quit:
Attacking...
1 (59%) --> 0 (72%)
1 (96%) --> 0 (66%)
All that glitters is not gold
All that glitters is not gold
All that glisten is not gold
Any that shinning is not gold
Enter a sentence to attack or "q" to quit:

View File

@@ -125,7 +125,7 @@ attack_test_params = [
(
"run_attack_kuleshov_nn",
(
"textattack attack --recipe kuleshov --num-examples 2 --model cnn-sst --attack-n --query-budget 200"
"textattack attack --recipe kuleshov --num-examples 2 --model cnn-sst2 --attack-n --query-budget 200"
),
"tests/sample_outputs/kuleshov_cnn_sst_2.txt",
),

View File

@@ -8,7 +8,6 @@ from textattack.commands import TextAttackCommand
from textattack.commands.attack.attack_args import *
from textattack.commands.attack.attack_args_helpers import *
logger = textattack.shared.logger

View File

@@ -132,7 +132,7 @@ class HuggingFaceNlpDataset(TextAttackDataset):
return self._format_raw_example(raw_example)
def __getitem__(self, i):
if isinstance(self.examples[i], str):
if isinstance(i, int):
return self._format_raw_example(self.examples[i])
else:
# `i` could be a slice or an integer. if it's a slice,