try: remove tokenizers version dependency for flair

2021-10-13 00:05:06 +03:00 · 2020-07-06 21:39:26 -04:00
parent cd40129e42
commit 11013b3a4f
7 changed files with 18 additions and 16 deletions
--- a/README.md
+++ b/README.md
@@ -205,12 +205,12 @@ textattack train --model lstm --dataset yelp_polarity --batch-size 64 --epochs 5
 textattack train --model bert-base-uncased --dataset glue:cola --batch-size 32 --epochs 5
 ```

-## `textattack peek-dataset`
+### `textattack peek-dataset`

 To take a closer look at a dataset, use `textattack peek-dataset`. TextAttack will print some cursory statistics about the inputs and outputs from the dataset. For example, `textattack peek-dataset --dataset-from-nlp snli` will show information about the SNLI dataset from the NLP package.


-## `textattack list`
+### `textattack list`

 There are lots of pieces in TextAttack, and it can be difficult to keep track of all of them. You can use `textattack list` to list components, for example, pretrained models (`textattack list models`) or available search methods (`textattack list search-methods`).

@@ -229,8 +229,11 @@ TextAttack is model-agnostic! You can use `TextAttack` to analyze any model that

 TextAttack also comes built-in with models and datasets. Our command-line interface will automatically match the correct 
 dataset to the correct model. We include various pre-trained models for each of the nine [GLUE](https://gluebenchmark.com/) 
-tasks, as well as some common datasets for classification, translation, and summarization. You can 
-see the full list of provided models & datasets via `textattack attack --help`.
+tasks, as well as some common datasets for classification, translation, and summarization. 
+
+A list of available pretrained models and their validation accuracies is available at
+[models/README.md](models/README.md). You can also view a full list of provided models 
+& datasets via `textattack attack --help`.

 Here's an example of using one of the built-in models (the SST-2 dataset is automatically loaded):

--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 bert-score
 editdistance
-flair==0.5.1
+flair>=0.5
 filelock
 language_tool_python
 lru-dict
@@ -10,14 +10,14 @@ numpy
 pandas>=1.0.1
 scikit-learn
 scipy==1.4.1
-sentence_transformers==0.2.6.1
+sentence_transformers>0.2.6
 torch
 transformers>=3
 tensorflow>=2
 tensorflow_hub
 tensorboardX
 terminaltables
-tokenizers==0.8.0-rc4
+tokenizers
 tqdm
 visdom
 wandb
--- a/tests/sample_inputs/sst_model_and_dataset.py
+++ b/tests/sample_inputs/sst_model_and_dataset.py
@@ -7,6 +7,6 @@ model_path = "distilbert-base-uncased-finetuned-sst-2-english"
 tokenizer = textattack.models.tokenizers.AutoTokenizer(model_path)
 model = transformers.AutoModelForSequenceClassification.from_pretrained(model_path)

-dataset = textattack.datasets.HuggingFaceNLPDataset(
+dataset = textattack.datasets.HuggingFaceNlpDataset(
    "glue", subset="sst2", split="train", shuffle=False
 )
--- a/tests/sample_outputs/interactive_mode.txt
+++ b/tests/sample_outputs/interactive_mode.txt
@@ -1,4 +1,4 @@
-/.*/Attack(
+Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  unk
  )
@@ -34,15 +34,15 @@
      )
  (is_black_box):  True
 ) 
-/.*/
+
 Running in interactive mode
 ----------------------------
 Enter a sentence to attack or "q" to quit:
 Attacking...
-[92m1 (59%)[0m --> [91m0 (72%)[0m
+[92m1 (96%)[0m --> [91m0 (66%)[0m

-All that [92mglitters[0m is not gold
+[92mAll[0m that [92mglitters[0m is not gold

-All that [91mglisten[0m is not gold
+[91mAny[0m that [91mshinning[0m is not gold

 Enter a sentence to attack or "q" to quit:
--- a/tests/test_command_line/test_attack.py
+++ b/tests/test_command_line/test_attack.py
@@ -125,7 +125,7 @@ attack_test_params = [
    (
        "run_attack_kuleshov_nn",
        (
-            "textattack attack --recipe kuleshov --num-examples 2 --model cnn-sst --attack-n --query-budget 200"
+            "textattack attack --recipe kuleshov --num-examples 2 --model cnn-sst2 --attack-n --query-budget 200"
        ),
        "tests/sample_outputs/kuleshov_cnn_sst_2.txt",
    ),
--- a/textattack/commands/eval_model/eval_model_command.py
+++ b/textattack/commands/eval_model/eval_model_command.py
@@ -8,7 +8,6 @@ from textattack.commands import TextAttackCommand
 from textattack.commands.attack.attack_args import *
 from textattack.commands.attack.attack_args_helpers import *

-
 logger = textattack.shared.logger


--- a/textattack/datasets/huggingface_nlp_dataset.py
+++ b/textattack/datasets/huggingface_nlp_dataset.py
@@ -132,7 +132,7 @@ class HuggingFaceNlpDataset(TextAttackDataset):
        return self._format_raw_example(raw_example)

    def __getitem__(self, i):
-        if isinstance(self.examples[i], str):
+        if isinstance(i, int):
            return self._format_raw_example(self.examples[i])
        else:
            # `i` could be a slice or an integer. if it's a slice,