1
0
mirror of https://github.com/QData/TextAttack.git synced 2021-10-13 00:05:06 +03:00

examples for tensorflow and sklearn

This commit is contained in:
Jack Morris
2020-07-31 15:35:03 -04:00
parent d9ad73b8ff
commit b0f473685d
11 changed files with 1198 additions and 112 deletions

View File

@@ -1,7 +1,7 @@
Attack Recipes
===============
We provide a number of pre-built attack recipes. To run an attack recipe, run::
We provide a number of pre-built attack recipes, which correspond to attacks from the literature. To run an attack recipe, run::
textattack attack --recipe [recipe_name]
@@ -13,6 +13,11 @@ Attacks on classification models
Alzantot Genetic Algorithm (Generating Natural Language Adversarial Examples)
###################################################################################
.. warning::
This attack uses a very slow language model. Consider using the ``fast-alzantot``
recipe instead.
.. automodule:: textattack.attack_recipes.genetic_algorithm_alzantot_2018
:members:

View File

@@ -43,6 +43,11 @@ extensions = [
"recommonmark",
]
# Allow nbsphinx errors
# (so we don't have to import everything if something
# needed in the notebook isn't installed)
nbsphinx_allow_errors = True
# Add any paths that contain templates here, relative to this directory.
templates_path = []

View File

@@ -0,0 +1,416 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "[TextAttack] tensorflow/keras example",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"accelerator": "TPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "ItXfxkxvosLH"
},
"source": [
"# TensorFlow and TextAttack"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "WooZ9pGnNJbv",
"colab_type": "text"
},
"source": [
"## Training\n",
"\n",
"\n",
"\n",
"The following is code for training a text classification model using TensorFlow (and on top of it, the Keras API). This comes from the Tensorflow documentation ([see here](https://www.tensorflow.org/tutorials/keras/text_classification_with_hub)).\n",
"\n",
"This cell loads the IMDB dataset (using `tensorflow_datasets`, not `nlp`), initializes a simple classifier, and trains it using Keras."
]
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "2ew7HTbPpCJH",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "1c1711e1-cf82-4b09-899f-db7c9bb68513"
},
"source": [
"import numpy as np\n",
"\n",
"import tensorflow as tf\n",
"import tensorflow_hub as hub\n",
"import tensorflow_datasets as tfds\n",
"\n",
"import matplotlib.pyplot as plt\n",
"\n",
"print(\"Version: \", tf.__version__)\n",
"print(\"Eager mode: \", tf.executing_eagerly())\n",
"print(\"Hub version: \", hub.__version__)\n",
"print(\"GPU is\", \"available\" if tf.config.list_physical_devices('GPU') else \"NOT AVAILABLE\")\n",
"\n",
"train_data, test_data = tfds.load(name=\"imdb_reviews\", split=[\"train\", \"test\"], \n",
" batch_size=-1, as_supervised=True)\n",
"\n",
"train_examples, train_labels = tfds.as_numpy(train_data)\n",
"test_examples, test_labels = tfds.as_numpy(test_data)\n",
"\n",
"model = \"https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1\"\n",
"hub_layer = hub.KerasLayer(model, output_shape=[20], input_shape=[], \n",
" dtype=tf.string, trainable=True)\n",
"hub_layer(train_examples[:3])\n",
"\n",
"model = tf.keras.Sequential()\n",
"model.add(hub_layer)\n",
"model.add(tf.keras.layers.Dense(16, activation='relu'))\n",
"model.add(tf.keras.layers.Dense(1))\n",
"\n",
"model.summary()\n",
"\n",
"x_val = train_examples[:10000]\n",
"partial_x_train = train_examples[10000:]\n",
"\n",
"y_val = train_labels[:10000]\n",
"partial_y_train = train_labels[10000:]\n",
"\n",
"model.compile(optimizer='adam',\n",
" loss=tf.losses.BinaryCrossentropy(from_logits=True),\n",
" metrics=['accuracy'])\n",
"\n",
"history = model.fit(partial_x_train,\n",
" partial_y_train,\n",
" epochs=40,\n",
" batch_size=512,\n",
" validation_data=(x_val, y_val),\n",
" verbose=1)"
],
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"text": [
"INFO:absl:No config specified, defaulting to first: imdb_reviews/plain_text\n",
"INFO:absl:Overwrite dataset info from restored data version.\n",
"INFO:absl:Reusing dataset imdb_reviews (/root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0)\n",
"INFO:absl:Constructing tf.data.Dataset for split ['train', 'test'], from /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"Version: 2.2.0\n",
"Eager mode: True\n",
"Hub version: 0.8.0\n",
"GPU is NOT AVAILABLE\n",
"Model: \"sequential_1\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"keras_layer_1 (KerasLayer) (None, 20) 400020 \n",
"_________________________________________________________________\n",
"dense_2 (Dense) (None, 16) 336 \n",
"_________________________________________________________________\n",
"dense_3 (Dense) (None, 1) 17 \n",
"=================================================================\n",
"Total params: 400,373\n",
"Trainable params: 400,373\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n",
"Epoch 1/40\n",
"30/30 [==============================] - 2s 75ms/step - loss: 0.6652 - accuracy: 0.5760 - val_loss: 0.6214 - val_accuracy: 0.6253\n",
"Epoch 2/40\n",
"30/30 [==============================] - 2s 72ms/step - loss: 0.5972 - accuracy: 0.6523 - val_loss: 0.5783 - val_accuracy: 0.6646\n",
"Epoch 3/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.5533 - accuracy: 0.6951 - val_loss: 0.5424 - val_accuracy: 0.7026\n",
"Epoch 4/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.5126 - accuracy: 0.7319 - val_loss: 0.5082 - val_accuracy: 0.7335\n",
"Epoch 5/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.4739 - accuracy: 0.7641 - val_loss: 0.4763 - val_accuracy: 0.7590\n",
"Epoch 6/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.4385 - accuracy: 0.7911 - val_loss: 0.4478 - val_accuracy: 0.7828\n",
"Epoch 7/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.4038 - accuracy: 0.8133 - val_loss: 0.4227 - val_accuracy: 0.7892\n",
"Epoch 8/40\n",
"30/30 [==============================] - 2s 72ms/step - loss: 0.3712 - accuracy: 0.8327 - val_loss: 0.3987 - val_accuracy: 0.8119\n",
"Epoch 9/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.3416 - accuracy: 0.8504 - val_loss: 0.3784 - val_accuracy: 0.8234\n",
"Epoch 10/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.3162 - accuracy: 0.8623 - val_loss: 0.3619 - val_accuracy: 0.8410\n",
"Epoch 11/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.2914 - accuracy: 0.8761 - val_loss: 0.3476 - val_accuracy: 0.8471\n",
"Epoch 12/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.2705 - accuracy: 0.8869 - val_loss: 0.3367 - val_accuracy: 0.8512\n",
"Epoch 13/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.2518 - accuracy: 0.8956 - val_loss: 0.3288 - val_accuracy: 0.8495\n",
"Epoch 14/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.2351 - accuracy: 0.9043 - val_loss: 0.3208 - val_accuracy: 0.8591\n",
"Epoch 15/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.2193 - accuracy: 0.9133 - val_loss: 0.3156 - val_accuracy: 0.8590\n",
"Epoch 16/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.2050 - accuracy: 0.9202 - val_loss: 0.3112 - val_accuracy: 0.8651\n",
"Epoch 17/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.1923 - accuracy: 0.9276 - val_loss: 0.3114 - val_accuracy: 0.8580\n",
"Epoch 18/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.1814 - accuracy: 0.9303 - val_loss: 0.3069 - val_accuracy: 0.8677\n",
"Epoch 19/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.1696 - accuracy: 0.9370 - val_loss: 0.3067 - val_accuracy: 0.8663\n",
"Epoch 20/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.1594 - accuracy: 0.9419 - val_loss: 0.3091 - val_accuracy: 0.8634\n",
"Epoch 21/40\n",
"30/30 [==============================] - 2s 74ms/step - loss: 0.1495 - accuracy: 0.9439 - val_loss: 0.3066 - val_accuracy: 0.8748\n",
"Epoch 22/40\n",
"30/30 [==============================] - 2s 75ms/step - loss: 0.1403 - accuracy: 0.9502 - val_loss: 0.3075 - val_accuracy: 0.8706\n",
"Epoch 23/40\n",
"30/30 [==============================] - 2s 73ms/step - loss: 0.1323 - accuracy: 0.9539 - val_loss: 0.3114 - val_accuracy: 0.8680\n",
"Epoch 24/40\n",
"30/30 [==============================] - 2s 73ms/step - loss: 0.1232 - accuracy: 0.9578 - val_loss: 0.3126 - val_accuracy: 0.8716\n",
"Epoch 25/40\n",
"30/30 [==============================] - 2s 72ms/step - loss: 0.1157 - accuracy: 0.9604 - val_loss: 0.3158 - val_accuracy: 0.8710\n",
"Epoch 26/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.1090 - accuracy: 0.9630 - val_loss: 0.3181 - val_accuracy: 0.8725\n",
"Epoch 27/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.1017 - accuracy: 0.9665 - val_loss: 0.3234 - val_accuracy: 0.8697\n",
"Epoch 28/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.0954 - accuracy: 0.9697 - val_loss: 0.3291 - val_accuracy: 0.8686\n",
"Epoch 29/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.0894 - accuracy: 0.9720 - val_loss: 0.3305 - val_accuracy: 0.8717\n",
"Epoch 30/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.0833 - accuracy: 0.9753 - val_loss: 0.3362 - val_accuracy: 0.8723\n",
"Epoch 31/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.0776 - accuracy: 0.9771 - val_loss: 0.3422 - val_accuracy: 0.8721\n",
"Epoch 32/40\n",
"30/30 [==============================] - 2s 71ms/step - loss: 0.0726 - accuracy: 0.9798 - val_loss: 0.3484 - val_accuracy: 0.8744\n",
"Epoch 33/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.0678 - accuracy: 0.9825 - val_loss: 0.3538 - val_accuracy: 0.8722\n",
"Epoch 34/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.0631 - accuracy: 0.9837 - val_loss: 0.3616 - val_accuracy: 0.8736\n",
"Epoch 35/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.0586 - accuracy: 0.9861 - val_loss: 0.3680 - val_accuracy: 0.8724\n",
"Epoch 36/40\n",
"30/30 [==============================] - 2s 69ms/step - loss: 0.0550 - accuracy: 0.9875 - val_loss: 0.3772 - val_accuracy: 0.8742\n",
"Epoch 37/40\n",
"30/30 [==============================] - 2s 69ms/step - loss: 0.0506 - accuracy: 0.9887 - val_loss: 0.3821 - val_accuracy: 0.8709\n",
"Epoch 38/40\n",
"30/30 [==============================] - 2s 70ms/step - loss: 0.0471 - accuracy: 0.9901 - val_loss: 0.3907 - val_accuracy: 0.8692\n",
"Epoch 39/40\n",
"30/30 [==============================] - 2s 68ms/step - loss: 0.0436 - accuracy: 0.9914 - val_loss: 0.3980 - val_accuracy: 0.8703\n",
"Epoch 40/40\n",
"30/30 [==============================] - 2s 69ms/step - loss: 0.0405 - accuracy: 0.9922 - val_loss: 0.4070 - val_accuracy: 0.8699\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "3varlQvrnHqV",
"colab_type": "text"
},
"source": [
"## Attacking\n",
"\n",
"For each input, our classifier outputs a single number that indicates how positive or negative the model finds the input. For binary classification, TextAttack expects two numbers for each input (a score for each class, positive and negative). We have to post-process each output to fit this TextAttack format. To add this post-processing we need to implement a custom model wrapper class (instead of using the built-in `textattack.models.wrappers.TensorFlowModelWrapper`).\n",
"\n",
"Each `ModelWrapper` must implement a single method, `__call__`, which takes a list of strings and returns a `List`, `np.ndarray`, or `torch.Tensor` of predictions."
]
},
{
"cell_type": "code",
"metadata": {
"id": "fHX3Lo7wU2LM",
"colab_type": "code",
"colab": {}
},
"source": [
"import numpy as np\n",
"import torch\n",
"\n",
"from textattack.models.wrappers import ModelWrapper\n",
"\n",
"class CustomTensorFlowModelWrapper(ModelWrapper):\n",
" def __init__(self, model):\n",
" self.model = model\n",
"\n",
" def __call__(self, text_input_list):\n",
" text_array = np.array(text_input_list)\n",
" preds = self.model(text_array).numpy()\n",
" logits = torch.exp(-torch.tensor(preds))\n",
" logits = 1 / (1 + logits)\n",
" logits = logits.squeeze(dim=-1)\n",
" # Since this model only has a single output (between 0 or 1),\n",
" # we have to add the second dimension.\n",
" final_preds = torch.stack((1-logits, logits), dim=1)\n",
" return final_preds\n"
],
"execution_count": 13,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "Ku71HuZ4n7ih",
"colab_type": "text"
},
"source": [
"Let's test our model wrapper out to make sure it can use our model to return predictions in the correct format."
]
},
{
"cell_type": "code",
"metadata": {
"id": "9hgiLQC4ejmM",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 52
},
"outputId": "132c3be5-fe5e-4be4-ef98-5c2efedc0dfd"
},
"source": [
"CustomTensorFlowModelWrapper(model)(['I hate you so much', 'I love you'])"
],
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"tensor([[0.2745, 0.7255],\n",
" [0.0072, 0.9928]])"
]
},
"metadata": {
"tags": []
},
"execution_count": 14
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-Bs14Hr4n_Sp",
"colab_type": "text"
},
"source": [
"Looks good! Now we can initialize our model wrapper with the model we trained and pass it to an instance of `textattack.attack.Attack`. \n",
"\n",
"We'll use the `PWWSRen2019` recipe as our attack, and attack 10 samples."
]
},
{
"cell_type": "code",
"metadata": {
"id": "07mOE-wLVQDR",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 780
},
"outputId": "e47a099e-c0f6-4c21-8e52-1a437741bc16"
},
"source": [
"model_wrapper = CustomTensorFlowModelWrapper(model)\n",
"\n",
"from textattack.datasets import HuggingFaceNlpDataset\n",
"from textattack.attack_recipes import PWWSRen2019\n",
"\n",
"dataset = HuggingFaceNlpDataset(\"rotten_tomatoes\", None, \"test\", shuffle=True)\n",
"attack = PWWSRen2019(model_wrapper)\n",
"\n",
"results_iterable = attack.attack_dataset(dataset, indices=range(10))\n",
"for result in results_iterable:\n",
" print(result.__str__(color_method='ansi'))"
],
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"text": [
"WARNING:nlp.builder:Using custom data configuration default\n",
"\u001b[34;1mtextattack\u001b[0m: Loading \u001b[94mnlp\u001b[0m dataset \u001b[94mrotten_tomatoes\u001b[0m, split \u001b[94mtest\u001b[0m.\n",
"\u001b[34;1mtextattack\u001b[0m: Unknown if model of class <class '__main__.CustomTensorFlowModelWrapper'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"\u001b[92mPositive (60%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
"\n",
"kaufman's script is never especially clever and often is rather pretentious .\n",
"\u001b[91mNegative (98%)\u001b[0m --> \u001b[92mPositive (59%)\u001b[0m\n",
"\n",
"an \u001b[91munfortunate\u001b[0m title for a film that has \u001b[91mnothing\u001b[0m endearing about it .\n",
"\n",
"an \u001b[92minauspicious\u001b[0m title for a film that has \u001b[92mzip\u001b[0m endearing about it .\n",
"\u001b[91mNegative (73%)\u001b[0m --> \u001b[92mPositive (59%)\u001b[0m\n",
"\n",
"sade achieves the near-impossible : it \u001b[91mturns\u001b[0m the marquis de sade into a dullard .\n",
"\n",
"sade achieves the near-impossible : it \u001b[92mtour\u001b[0m the marquis de sade into a dullard .\n",
"\u001b[91mNegative (98%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
"\n",
". . . planos fijos , tomas largas , un ritmo pausado y una sutil observación de sus personajes , sin estridencias ni grandes revelaciones .\n",
"\u001b[91mNegative (97%)\u001b[0m --> \u001b[92mPositive (62%)\u001b[0m\n",
"\n",
"charly comes off as emotionally manipulative and \u001b[91msadly\u001b[0m imitative of innumerable past love story derisions .\n",
"\n",
"charly comes off as emotionally manipulative and \u001b[92mdeplorably\u001b[0m imitative of innumerable past love story derisions .\n",
"\u001b[91mNegative (70%)\u001b[0m --> \u001b[92mPositive (93%)\u001b[0m\n",
"\n",
"any intellectual \u001b[91marguments\u001b[0m being made about the nature of god are framed in a drama so clumsy , there is a real danger less sophisticated audiences will mistake it for an endorsement of the very things that bean abhors .\n",
"\n",
"any intellectual \u001b[92mcontention\u001b[0m being made about the nature of god are framed in a drama so clumsy , there is a real danger less sophisticated audiences will mistake it for an endorsement of the very things that bean abhors .\n",
"\u001b[92mPositive (97%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
"\n",
"a handsome but unfulfilling suspense drama more suited to a quiet evening on pbs than a night out at an amc .\n",
"\u001b[91mNegative (93%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
"\n",
"you will likely prefer to keep on watching .\n",
"\u001b[91mNegative (100%)\u001b[0m --> \u001b[92mPositive (74%)\u001b[0m\n",
"\n",
"what ensues are \u001b[91mmuch\u001b[0m blood-splattering , \u001b[91mmass\u001b[0m drug-induced \u001b[91mbowel\u001b[0m evacuations , and none-too-funny commentary on the cultural \u001b[91mdistinctions\u001b[0m between americans and \u001b[91mbrits\u001b[0m .\n",
"\n",
"what ensues are \u001b[92mlots\u001b[0m blood-splattering , \u001b[92mplenty\u001b[0m drug-induced \u001b[92mintestine\u001b[0m evacuations , and none-too-funny commentary on the cultural \u001b[92mdistinction\u001b[0m between americans and \u001b[92mBrits\u001b[0m .\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
"\n",
"a film without surprise geared toward maximum comfort and familiarity .\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "P3L9ccqGoS-J",
"colab_type": "text"
},
"source": [
"## Conclusion \n",
"\n",
"Looks good! We successfully loaded a model, adapted it for TextAttack's `ModelWrapper`, and used that object in an attack. This is basically how you would adapt any model, using TensorFlow or any other library, for use with TextAttack."
]
}
]
}

View File

@@ -0,0 +1,700 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## sklearn and TextAttack\n",
"\n",
"This following code trains two different text classification models using sklearn. Both use logistic regression models: the difference is in the features. \n",
"\n",
"We will load data using `nlp`, train the models, and subsequently attack them using TextAttack.\n",
"\n",
"### Training\n",
"\n",
"This code trains two models: one on bag-of-words statistics (`bow_unstemmed`) and one on tfidf statistics (`tfidf_unstemmed`). The dataset is the IMDB movie review dataset."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"...successfully loaded training data\n",
"Total length of training data: 25000\n",
"...augmented data with len_tokens and average_words\n",
"...successfully loaded testing data\n",
"Total length of testing data: 25000\n",
"...augmented data with len_tokens and average_words\n",
"...successfully created the unstemmed BOW data\n",
"...successfully created the unstemmed TFIDF data\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/u/jm8wx/.conda/envs/torch/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:762: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training accuracy of BOW Unstemmed: 1.0\n",
"Testing accuracy of BOW Unstemmed: 0.8368\n",
" precision recall f1-score support\n",
"\n",
" 0 0.83 0.85 0.84 12500\n",
" 1 0.84 0.83 0.83 12500\n",
"\n",
" accuracy 0.84 25000\n",
" macro avg 0.84 0.84 0.84 25000\n",
"weighted avg 0.84 0.84 0.84 25000\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/u/jm8wx/.conda/envs/torch/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:762: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training accuracy of TFIDF Unstemmed: 0.98836\n",
"Testing accuracy of TFIDF Unstemmed: 0.85656\n",
" precision recall f1-score support\n",
"\n",
" 0 0.85 0.87 0.86 12500\n",
" 1 0.86 0.85 0.85 12500\n",
"\n",
" accuracy 0.86 25000\n",
" macro avg 0.86 0.86 0.86 25000\n",
"weighted avg 0.86 0.86 0.86 25000\n",
"\n"
]
}
],
"source": [
"import nlp\n",
"import os\n",
"import pandas as pd\n",
"import re\n",
"from nltk import word_tokenize\n",
"from nltk.stem import PorterStemmer\n",
"from sklearn.feature_extraction.text import CountVectorizer, ENGLISH_STOP_WORDS\n",
"from sklearn import preprocessing\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"# Nice to see additional metrics\n",
"from sklearn.metrics import classification_report\n",
"\n",
"def load_data(dataset_split='train'):\n",
" dataset = nlp.load_dataset('imdb')[dataset_split]\n",
" # Open and import positve data\n",
" df = pd.DataFrame()\n",
" df['Review'] = [review['text'] for review in dataset]\n",
" df['Sentiment'] = [review['label'] for review in dataset]\n",
" # Remove non-alphanumeric characters\n",
" df['Review'] = df['Review'].apply(lambda x: re.sub(\"[^a-zA-Z]\", ' ', str(x)))\n",
" # Tokenize the training and testing data\n",
" df_tokenized = tokenize_review(df)\n",
" return df_tokenized\n",
"\n",
"def tokenize_review(df):\n",
" # Tokenize Reviews in training\n",
" tokened_reviews = [word_tokenize(rev) for rev in df['Review']]\n",
" # Create word stems\n",
" stemmed_tokens = []\n",
" porter = PorterStemmer()\n",
" for i in range(len(tokened_reviews)):\n",
" stems = [porter.stem(token) for token in tokened_reviews[i]]\n",
" stems = ' '.join(stems)\n",
" stemmed_tokens.append(stems)\n",
" df.insert(1, column='Stemmed', value=stemmed_tokens)\n",
" return df\n",
"\n",
"def transform_BOW(training, testing, column_name):\n",
" vect = CountVectorizer(max_features=10000, ngram_range=(1,3), stop_words=ENGLISH_STOP_WORDS)\n",
" vectFit = vect.fit(training[column_name])\n",
" BOW_training = vectFit.transform(training[column_name])\n",
" BOW_training_df = pd.DataFrame(BOW_training.toarray(), columns=vect.get_feature_names())\n",
" BOW_testing = vectFit.transform(testing[column_name])\n",
" BOW_testing_Df = pd.DataFrame(BOW_testing.toarray(), columns=vect.get_feature_names())\n",
" return vectFit, BOW_training_df, BOW_testing_Df\n",
"\n",
"def transform_tfidf(training, testing, column_name):\n",
" Tfidf = TfidfVectorizer(ngram_range=(1,3), max_features=10000, stop_words=ENGLISH_STOP_WORDS)\n",
" Tfidf_fit = Tfidf.fit(training[column_name])\n",
" Tfidf_training = Tfidf_fit.transform(training[column_name])\n",
" Tfidf_training_df = pd.DataFrame(Tfidf_training.toarray(), columns=Tfidf.get_feature_names())\n",
" Tfidf_testing = Tfidf_fit.transform(testing[column_name])\n",
" Tfidf_testing_df = pd.DataFrame(Tfidf_testing.toarray(), columns=Tfidf.get_feature_names())\n",
" return Tfidf_fit, Tfidf_training_df, Tfidf_testing_df\n",
"\n",
"def add_augmenting_features(df):\n",
" tokened_reviews = [word_tokenize(rev) for rev in df['Review']]\n",
" # Create feature that measures length of reviews\n",
" len_tokens = []\n",
" for i in range(len(tokened_reviews)):\n",
" len_tokens.append(len(tokened_reviews[i]))\n",
" len_tokens = preprocessing.scale(len_tokens)\n",
" df.insert(0, column='Lengths', value=len_tokens)\n",
"\n",
" # Create average word length (training)\n",
" Average_Words = [len(x)/(len(x.split())) for x in df['Review'].tolist()]\n",
" Average_Words = preprocessing.scale(Average_Words)\n",
" df['averageWords'] = Average_Words\n",
" return df\n",
"\n",
"def build_model(X_train, y_train, X_test, y_test, name_of_test):\n",
" log_reg = LogisticRegression(C=30, max_iter=200).fit(X_train, y_train)\n",
" y_pred = log_reg.predict(X_test)\n",
" print('Training accuracy of '+name_of_test+': ', log_reg.score(X_train, y_train))\n",
" print('Testing accuracy of '+name_of_test+': ', log_reg.score(X_test, y_test))\n",
" print(classification_report(y_test, y_pred)) # Evaluating prediction ability\n",
" return log_reg\n",
"\n",
"# Load training and test sets\n",
"# Loading reviews into DF\n",
"df_train = load_data('train')\n",
"\n",
"print('...successfully loaded training data')\n",
"print('Total length of training data: ', len(df_train))\n",
"# Add augmenting features\n",
"df_train = add_augmenting_features(df_train)\n",
"print('...augmented data with len_tokens and average_words')\n",
"\n",
"# Load test DF\n",
"df_test = load_data('test')\n",
"\n",
"print('...successfully loaded testing data')\n",
"print('Total length of testing data: ', len(df_test))\n",
"df_test = add_augmenting_features(df_test)\n",
"print('...augmented data with len_tokens and average_words')\n",
"\n",
"# Create unstemmed BOW features for training set\n",
"unstemmed_BOW_vect_fit, df_train_bow_unstem, df_test_bow_unstem = transform_BOW(df_train, df_test, 'Review')\n",
"print('...successfully created the unstemmed BOW data')\n",
"\n",
"# Create TfIdf features for training set\n",
"unstemmed_tfidf_vect_fit, df_train_tfidf_unstem, df_test_tfidf_unstem = transform_tfidf(df_train, df_test, 'Review')\n",
"print('...successfully created the unstemmed TFIDF data')\n",
"\n",
"# Running logistic regression on dataframes\n",
"bow_unstemmed = build_model(df_train_bow_unstem, df_train['Sentiment'], df_test_bow_unstem, df_test['Sentiment'], 'BOW Unstemmed')\n",
"\n",
"tfidf_unstemmed = build_model(df_train_tfidf_unstem, df_train['Sentiment'], df_test_tfidf_unstem, df_test['Sentiment'], 'TFIDF Unstemmed')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Attacking\n",
"\n",
"TextAttack includes a build-in `SklearnModelWrapper` that can run attacks on most sklearn models. (If your tokenization strategy is different than above, you may need to subclass `SklearnModelWrapper` to make sure the model inputs & outputs come in the correct format.)\n",
"\n",
"Once we initializes the model wrapper, we load a few samples from the IMDB dataset and run the `TextFoolerJin2019` attack on our model."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from textattack.models.wrappers import SklearnModelWrapper\n",
"\n",
"model_wrapper = SklearnModelWrapper(bow_unstemmed, unstemmed_BOW_vect_fit)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 0:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (94%)\u001b[0m\n",
"\n",
"Bromwell High is a cartoon comedy. It ran at the same time as some other programs about school life, such as \"Teachers\". My 35 years in the teaching profession lead me to believe that Bromwell High's \u001b[92msatire\u001b[0m is much closer to reality than is \"Teachers\". The scramble to \u001b[92msurvive\u001b[0m financially, the insightful students who can see right through their pathetic teachers' pomp, the pettiness of the whole situation, all remind me of the schools I knew and their students. When I saw the episode in which a student repeatedly tried to burn down the school, I immediately recalled ......... at .......... High. A classic line: INSPECTOR: I'm here to sack one of your teachers. STUDENT: Welcome to Bromwell High. I expect that many adults of my age think that Bromwell High is far fetched. What a \u001b[92mpity\u001b[0m that it isn't!\n",
"\n",
"Bromwell High is a cartoon comedy. It ran at the same time as some other programs about school life, such as \"Teachers\". My 35 years in the teaching profession lead me to believe that Bromwell High's \u001b[91msatirical\u001b[0m is much closer to reality than is \"Teachers\". The scramble to \u001b[91mlived\u001b[0m financially, the insightful students who can see right through their pathetic teachers' pomp, the pettiness of the whole situation, all remind me of the schools I knew and their students. When I saw the episode in which a student repeatedly tried to burn down the school, I immediately recalled ......... at .......... High. A classic line: INSPECTOR: I'm here to sack one of your teachers. STUDENT: Welcome to Bromwell High. I expect that many adults of my age think that Bromwell High is far fetched. What a \u001b[91mshame\u001b[0m that it isn't!\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 1:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (52%)\u001b[0m\n",
"\n",
"Homelessness (or Houselessness as George Carlin stated) has been an issue for years but never a plan to help those on the street that were once considered human who did everything from going to school, work, or vote for the matter. Most people think of the homeless as just a lost cause while worrying about things such as racism, the war on Iraq, pressuring kids to succeed, technology, the elections, inflation, or worrying if they'll be next to end up on the streets.<br /><br />But what if you were given a bet to live on the streets for a month without the luxuries you once had from a home, the entertainment sets, a bathroom, pictures on the wall, a computer, and everything you once treasure to see what it's like to be homeless? That is Goddard Bolt's lesson.<br /><br />Mel Brooks (who directs) who stars as Bolt plays a rich man who has everything in the world until \u001b[92mdeciding\u001b[0m to make a bet with a sissy rival (Jeffery Tambor) to see if he can live in the streets for thirty days without the luxuries; if Bolt succeeds, he can do what he wants with a future project of making more buildings. The bet's on where Bolt is thrown on the street with a bracelet on his leg to monitor his every move where he can't step off the sidewalk. He's given the nickname Pepto by a vagrant after it's written on his forehead where Bolt meets other characters including a woman by the name of Molly (Lesley Ann Warren) an ex-dancer who got divorce before losing her home, and her pals Sailor (Howard Morris) and Fumes (Teddy Wilson) who are already used to the streets. They're survivors. Bolt isn't. He's not used to reaching mutual agreements like he once did when being rich where it's fight or \u001b[92mflight\u001b[0m, kill or be killed.<br /><br />While the \u001b[92mlove\u001b[0m connection between Molly and Bolt wasn't necessary to plot, I found \"Life Stinks\" to be one of Mel Brooks' observant films where prior to being a comedy, it shows a tender side compared to his slapstick work such as Blazing Saddles, Young Frankenstein, or Spaceballs for the matter, to show what it's like having something valuable before losing it the next day or on the other hand making a stupid bet like all rich people do when they don't know what to do with their money. Maybe they should give it to the homeless instead of using it like Monopoly money.<br /><br />Or maybe this film will inspire you to help others.\n",
"\n",
"Homelessness (or Houselessness as George Carlin stated) has been an issue for years but never a plan to help those on the street that were once considered human who did everything from going to school, work, or vote for the matter. Most people think of the homeless as just a lost cause while worrying about things such as racism, the war on Iraq, pressuring kids to succeed, technology, the elections, inflation, or worrying if they'll be next to end up on the streets.<br /><br />But what if you were given a bet to live on the streets for a month without the luxuries you once had from a home, the entertainment sets, a bathroom, pictures on the wall, a computer, and everything you once treasure to see what it's like to be homeless? That is Goddard Bolt's lesson.<br /><br />Mel Brooks (who directs) who stars as Bolt plays a rich man who has everything in the world until \u001b[91mchooses\u001b[0m to make a bet with a sissy rival (Jeffery Tambor) to see if he can live in the streets for thirty days without the luxuries; if Bolt succeeds, he can do what he wants with a future project of making more buildings. The bet's on where Bolt is thrown on the street with a bracelet on his leg to monitor his every move where he can't step off the sidewalk. He's given the nickname Pepto by a vagrant after it's written on his forehead where Bolt meets other characters including a woman by the name of Molly (Lesley Ann Warren) an ex-dancer who got divorce before losing her home, and her pals Sailor (Howard Morris) and Fumes (Teddy Wilson) who are already used to the streets. They're survivors. Bolt isn't. He's not used to reaching mutual agreements like he once did when being rich where it's fight or \u001b[91mplane\u001b[0m, kill or be killed.<br /><br />While the \u001b[91madore\u001b[0m connection between Molly and Bolt wasn't necessary to plot, I found \"Life Stinks\" to be one of Mel Brooks' observant films where prior to being a comedy, it shows a tender side compared to his slapstick work such as Blazing Saddles, Young Frankenstein, or Spaceballs for the matter, to show what it's like having something valuable before losing it the next day or on the other hand making a stupid bet like all rich people do when they don't know what to do with their money. Maybe they should give it to the homeless instead of using it like Monopoly money.<br /><br />Or maybe this film will inspire you to help others.\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 2:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (52%)\u001b[0m\n",
"\n",
"Brilliant over-acting by Lesley Ann Warren. Best dramatic hobo lady I have ever seen, and love scenes in clothes warehouse are second to none. The corn on face is a classic, as good as anything in Blazing Saddles. The take on lawyers is also \u001b[92msuperb\u001b[0m. After being accused of being a turncoat, selling out his boss, and being dishonest the lawyer of Pepto Bolt shrugs indifferently \"I'm a lawyer\" he says. Three funny words. Jeffrey Tambor, a \u001b[92mfavorite\u001b[0m from the later Larry Sanders show, is \u001b[92mfantastic\u001b[0m here too as a mad millionaire who wants to crush the ghetto. His character is more malevolent than usual. The hospital scene, and the \u001b[92mscene\u001b[0m where the homeless invade a demolition site, are all-time classics. Look for the \u001b[92mlegs\u001b[0m scene and the two big diggers fighting (one bleeds). This movie \u001b[92mgets\u001b[0m \u001b[92mbetter\u001b[0m each time I see it (which is quite often).\n",
"\n",
"Brilliant over-acting by Lesley Ann Warren. Best dramatic hobo lady I have ever seen, and love scenes in clothes warehouse are second to none. The corn on face is a classic, as good as anything in Blazing Saddles. The take on lawyers is also \u001b[91mextraordinaire\u001b[0m. After being accused of being a turncoat, selling out his boss, and being dishonest the lawyer of Pepto Bolt shrugs indifferently \"I'm a lawyer\" he says. Three funny words. Jeffrey Tambor, a \u001b[91mchoose\u001b[0m from the later Larry Sanders show, is \u001b[91mnoteworthy\u001b[0m here too as a mad millionaire who wants to crush the ghetto. His character is more malevolent than usual. The hospital scene, and the \u001b[91mscenes\u001b[0m where the homeless invade a demolition site, are all-time classics. Look for the \u001b[91mpaws\u001b[0m scene and the two big diggers fighting (one bleeds). This movie \u001b[91mgain\u001b[0m \u001b[91mgood\u001b[0m each time I see it (which is quite often).\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 3:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (69%)\u001b[0m\n",
"\n",
"This is easily the most \u001b[92munderrated\u001b[0m film inn the Brooks cannon. Sure, its \u001b[92mflawed\u001b[0m. It does not give a realistic view of homelessness (\u001b[92munlike\u001b[0m, say, how Citizen Kane gave a \u001b[92mrealistic\u001b[0m view of lounge singers, or Titanic gave a \u001b[92mrealistic\u001b[0m view of Italians YOU IDIOTS). Many of the jokes fall flat. But still, this film is very lovable in a way many comedies are not, and to pull that off in a story about some of the most traditionally reviled members of society is truly impressive. Its not The Fisher King, but its not crap, either. My only \u001b[92mcomplaint\u001b[0m is that Brooks should have cast someone else in the lead (I love Mel as a Director and Writer, not so much as a lead).\n",
"\n",
"This is easily the most \u001b[91moverrated\u001b[0m film inn the Brooks cannon. Sure, its \u001b[91mrotten\u001b[0m. It does not give a realistic view of homelessness (\u001b[91malthough\u001b[0m, say, how Citizen Kane gave a \u001b[91mactual\u001b[0m view of lounge singers, or Titanic gave a \u001b[91mactual\u001b[0m view of Italians YOU IDIOTS). Many of the jokes fall flat. But still, this film is very lovable in a way many comedies are not, and to pull that off in a story about some of the most traditionally reviled members of society is truly impressive. Its not The Fisher King, but its not crap, either. My only \u001b[91margue\u001b[0m is that Brooks should have cast someone else in the lead (I love Mel as a Director and Writer, not so much as a lead).\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 4:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (50%)\u001b[0m\n",
"\n",
"This is not the typical Mel Brooks film. It was much less slapstick than most of his movies and actually had a plot that was followable. Leslie Ann Warren made the movie, she is such a \u001b[92mfantastic\u001b[0m, under-rated actress. There were some moments that could have been fleshed out a bit more, and some scenes that could probably have been cut to make the room to do so, but all in all, this is worth the price to rent and see it. The acting was good overall, Brooks himself did a \u001b[92mgood\u001b[0m \u001b[92mjob\u001b[0m without his characteristic speaking to directly to the audience. Again, Warren was the best actor in the movie, but \"Fume\" and \"Sailor\" both played their parts well.\n",
"\n",
"This is not the typical Mel Brooks film. It was much less slapstick than most of his movies and actually had a plot that was followable. Leslie Ann Warren made the movie, she is such a \u001b[91mbrilliant\u001b[0m, under-rated actress. There were some moments that could have been fleshed out a bit more, and some scenes that could probably have been cut to make the room to do so, but all in all, this is worth the price to rent and see it. The acting was good overall, Brooks himself did a \u001b[91madequate\u001b[0m \u001b[91mstaff\u001b[0m without his characteristic speaking to directly to the audience. Again, Warren was the best actor in the movie, but \"Fume\" and \"Sailor\" both played their parts well.\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 5:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (64%)\u001b[0m\n",
"\n",
"This isn't the comedic Robin Williams, nor is it the quirky/\u001b[92minsane\u001b[0m Robin Williams of recent thriller fame. This is a hybrid of the classic drama without over-dramatization, mixed with Robin's new love of the thriller. But this isn't a thriller, per se. This is more a mystery/suspense vehicle through which Williams attempts to locate a sick boy and his keeper.<br /><br />Also starring Sandra Oh and Rory Culkin, this Suspense Drama plays pretty much like a news report, until William's character gets close to achieving his goal.<br /><br />I must say that I was highly entertained, though this movie fails to teach, guide, inspect, or amuse. It felt more like I was watching a guy (Williams), as he was actually performing the actions, from a third person perspective. In other words, it felt real, and I was able to subscribe to the premise of the story.<br /><br />All in all, it's worth a watch, though it's definitely not Friday/Saturday night \u001b[92mfare\u001b[0m.<br /><br />It rates a 7.7/10 from...<br /><br />the Fiend :.\n",
"\n",
"This isn't the comedic Robin Williams, nor is it the quirky/\u001b[91mstupidity\u001b[0m Robin Williams of recent thriller fame. This is a hybrid of the classic drama without over-dramatization, mixed with Robin's new love of the thriller. But this isn't a thriller, per se. This is more a mystery/suspense vehicle through which Williams attempts to locate a sick boy and his keeper.<br /><br />Also starring Sandra Oh and Rory Culkin, this Suspense Drama plays pretty much like a news report, until William's character gets close to achieving his goal.<br /><br />I must say that I was highly entertained, though this movie fails to teach, guide, inspect, or amuse. It felt more like I was watching a guy (Williams), as he was actually performing the actions, from a third person perspective. In other words, it felt real, and I was able to subscribe to the premise of the story.<br /><br />All in all, it's worth a watch, though it's definitely not Friday/Saturday night \u001b[91mcharge\u001b[0m.<br /><br />It rates a 7.7/10 from...<br /><br />the Fiend :.\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 6:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (53%)\u001b[0m\n",
"\n",
"Yes its an art... to successfully make a slow paced thriller.<br /><br />The story unfolds in nice volumes while you don't even notice it happening.<br /><br />Fine performance by Robin Williams. The sexuality angles in the film can seem unnecessary and can probably \u001b[92maffect\u001b[0m how much you enjoy the film. However, the core plot is very engaging. The movie doesn't rush onto you and still grips you enough to keep you wondering. The direction is good. Use of lights to achieve desired affects of suspense and unexpectedness is good.<br /><br />Very nice 1 \u001b[92mtime\u001b[0m \u001b[92mwatch\u001b[0m if you are looking to lay back and hear a thrilling short story!\n",
"\n",
"Yes its an art... to successfully make a slow paced thriller.<br /><br />The story unfolds in nice volumes while you don't even notice it happening.<br /><br />Fine performance by Robin Williams. The sexuality angles in the film can seem unnecessary and can probably \u001b[91mdamage\u001b[0m how much you enjoy the film. However, the core plot is very engaging. The movie doesn't rush onto you and still grips you enough to keep you wondering. The direction is good. Use of lights to achieve desired affects of suspense and unexpectedness is good.<br /><br />Very nice 1 \u001b[91mmoment\u001b[0m \u001b[91mwatcher\u001b[0m if you are looking to lay back and hear a thrilling short story!\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 7:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (80%)\u001b[0m\n",
"\n",
"In this \"critically acclaimed psychological thriller based on \u001b[92mtrue\u001b[0m events, Gabriel (Robin Williams), a celebrated writer and late-night talk show host, becomes captivated by the harrowing story of a young listener and his adoptive mother (Toni Collette). When troubling questions arise about this boy's (story), however, Gabriel finds himself drawn into a widening mystery that hides a deadly secret…",
"\" according to film's official synopsis.<br /><br />You really should STOP reading these comments, and watch the film NOW...<br /><br />The \"How did he lose his leg?\" ending, with Ms. Collette planning her new life, should be chopped off, and \u001b[92msent\u001b[0m to \"deleted scenes\" land. It's overkill. The true nature of her physical and mental ailments should be obvious, by the time Mr. Williams returns to New York. Possibly, her blindness could be in question - but a revelation could have be made certain in either the \"highway\" or \"video tape\" scenes. The film would benefit from a re-editing - how about a \"director's cut\"? <br /><br />Williams and Bobby Cannavale (as Jess) don't seem, initially, believable as a couple. A scene or two establishing their relationship might have helped set the stage. Otherwise, the cast is exemplary. Williams offers an exceptionally strong characterization, and not a \"gay impersonation\". Sandra Oh (as Anna), Joe Morton (as Ashe), and Rory Culkin (Pete Logand) are all \u001b[92mperfect\u001b[0m.<br /><br />Best of all, Collette's \"Donna\" belongs in the creepy hall of fame. Ms. Oh is correct in saying Collette might be, \"you know, like that guy from 'Psycho'.\" There have been several years when organizations giving acting awards seemed to reach for women, due to a slighter dispersion of roles; certainly, they could have noticed Collette with some award consideration. She is that good. And, director Patrick Stettner definitely evokes Hitchcock - he even makes getting a sandwich from a vending machine suspenseful.<br /><br />Finally, writers Stettner, Armistead Maupin, and Terry Anderson deserve gratitude from \u001b[92mflight\u001b[0m attendants everywhere.<br /><br />******* The Night Listener (1/21/06) Patrick Stettner ~ Robin Williams, Toni Collette, Sandra Oh, Rory Culkin\n",
"\n",
"In this \"critically acclaimed psychological thriller based on \u001b[91mhonestly\u001b[0m events, Gabriel (Robin Williams), a celebrated writer and late-night talk show host, becomes captivated by the harrowing story of a young listener and his adoptive mother (Toni Collette). When troubling questions arise about this boy's (story), however, Gabriel finds himself drawn into a widening mystery that hides a deadly secret…",
"\" according to film's official synopsis.<br /><br />You really should STOP reading these comments, and watch the film NOW...<br /><br />The \"How did he lose his leg?\" ending, with Ms. Collette planning her new life, should be chopped off, and \u001b[91msending\u001b[0m to \"deleted scenes\" land. It's overkill. The true nature of her physical and mental ailments should be obvious, by the time Mr. Williams returns to New York. Possibly, her blindness could be in question - but a revelation could have be made certain in either the \"highway\" or \"video tape\" scenes. The film would benefit from a re-editing - how about a \"director's cut\"? <br /><br />Williams and Bobby Cannavale (as Jess) don't seem, initially, believable as a couple. A scene or two establishing their relationship might have helped set the stage. Otherwise, the cast is exemplary. Williams offers an exceptionally strong characterization, and not a \"gay impersonation\". Sandra Oh (as Anna), Joe Morton (as Ashe), and Rory Culkin (Pete Logand) are all \u001b[91mblameless\u001b[0m.<br /><br />Best of all, Collette's \"Donna\" belongs in the creepy hall of fame. Ms. Oh is correct in saying Collette might be, \"you know, like that guy from 'Psycho'.\" There have been several years when organizations giving acting awards seemed to reach for women, due to a slighter dispersion of roles; certainly, they could have noticed Collette with some award consideration. She is that good. And, director Patrick Stettner definitely evokes Hitchcock - he even makes getting a sandwich from a vending machine suspenseful.<br /><br />Finally, writers Stettner, Armistead Maupin, and Terry Anderson deserve gratitude from \u001b[91mairplane\u001b[0m attendants everywhere.<br /><br />******* The Night Listener (1/21/06) Patrick Stettner ~ Robin Williams, Toni Collette, Sandra Oh, Rory Culkin\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 8:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
"\n",
"THE NIGHT LISTENER (2006) **1/2 Robin Williams, Toni Collette, Bobby Cannavale, Rory Culkin, Joe Morton, Sandra Oh, John Cullum, Lisa Emery, Becky Ann Baker. (Dir: Patrick Stettner) <br /><br />Hitchcockian suspenser gives Williams a stand-out low-key performance.<br /><br />What is it about celebrities and fans? What is the near paranoia one associates with the other and why is it almost the norm? <br /><br />In the latest derange fan scenario, based on true events no less, Williams stars as a talk-radio personality named Gabriel No one, who reads stories he's penned over the airwaves and has accumulated an interesting fan in the form of a young boy named Pete Logand (Culkin) who has submitted a manuscript about the travails of his troubled youth to No one's editor Ashe (Morton) who gives it to No one to read for himself. <br /><br />No one is naturally disturbed but ultimately intrigued about the nightmarish existence of Pete being abducted and sexually abused for years until he was finally rescued by a nurse named Donna (Collette giving an excellent performance) who has adopted the boy but her correspondence with No one reveals that Pete is dying from AIDS. Naturally No one wants to meet the fans but is suddenly in doubt to their possibly devious ulterior motives when the seed is planted by his estranged lover Jess (Cannavale) whose sudden departure from their New York City apartment has No one in an emotional tailspin that has only now grown into a tempest in a teacup when he decides to do some investigating into Donna and Pete's backgrounds discovering some truths that he didn't anticipate.<br /><br />Written by Armistead Maupin (who co-wrote the screenplay with his former lover Terry Anderson and the film's novice director Stettner) and based on a true story about a fan's hoax found out has some Hitchcockian moments that run on full tilt like any good old fashioned pot-boiler does. It helps that Williams gives a stand-out, low-key performance as the conflicted good-hearted personality who genuinely wants to believe that his number one fan is in fact real and does love him (the one thing that has escaped his own reality) and has some unsettling dreadful moments with the creepy Collette whose one physical trait I will leave unmentioned but underlines the desperation of her character that can rattle you to the core.<br /><br />However the film runs out of gas and eventually becomes a bit repetitive and predictable despite a finely directed piece of hoodwink and mystery by Stettner, it pays to listen to your own inner voice: be careful of what you hope for.\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 9:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (86%)\u001b[0m\n",
"\n",
"You know, Robin Williams, God bless him, is constantly shooting himself in the foot lately with all these dumb comedies he has done this decade (with perhaps the exception of \"Death To Smoochy\", which bombed when it came out but is now a cult classic). The dramas he has made lately have been fantastic, especially \"Insomnia\" and \"One Hour Photo\". \"The Night Listener\", despite mediocre reviews and a quick DVD \u001b[92mrelease\u001b[0m, is among his \u001b[92mbest\u001b[0m work, period.<br /><br />This is a very \u001b[92mchilling\u001b[0m story, even though it doesn't include a serial killer or anyone that physically dangerous for that matter. The concept of the film is based on an actual case of fraud that still has yet to be officially confirmed. In high school, I read an autobiography by a child named Anthony Godby Johnson, who suffered horrific abuse and eventually contracted AIDS as a result. I was moved by the story until I read reports online that Johnson may not actually exist. When I saw this movie, the confused feelings that Robin Williams so brilliantly portrayed resurfaced in my mind.<br /><br />Toni Collette probably gives her best dramatic performance too as the ultimately sociopathic \"caretaker\". Her role was a far cry from those she had in movies like \"Little Miss Sunshine\". There were even times she looked into the camera where I thought she was staring right at me. It takes a good actress to play that sort of role, and it's this understated (yet well reviewed) role that makes Toni Collette probably one of the best actresses of this generation not to have even been nominated for an Academy Award (as of 2008). It's \u001b[92mincredible\u001b[0m that there is at least one woman in this world who is like this, and it's scary too.<br /><br />This is a good, dark film that I \u001b[92mhighly\u001b[0m recommend. Be prepared to be unsettled, though, because this movie leaves you with a strange feeling at the end.\n",
"\n",
"You know, Robin Williams, God bless him, is constantly shooting himself in the foot lately with all these dumb comedies he has done this decade (with perhaps the exception of \"Death To Smoochy\", which bombed when it came out but is now a cult classic). The dramas he has made lately have been fantastic, especially \"Insomnia\" and \"One Hour Photo\". \"The Night Listener\", despite mediocre reviews and a quick DVD \u001b[91mreleasing\u001b[0m, is among his \u001b[91malright\u001b[0m work, period.<br /><br />This is a very \u001b[91mchilled\u001b[0m story, even though it doesn't include a serial killer or anyone that physically dangerous for that matter. The concept of the film is based on an actual case of fraud that still has yet to be officially confirmed. In high school, I read an autobiography by a child named Anthony Godby Johnson, who suffered horrific abuse and eventually contracted AIDS as a result. I was moved by the story until I read reports online that Johnson may not actually exist. When I saw this movie, the confused feelings that Robin Williams so brilliantly portrayed resurfaced in my mind.<br /><br />Toni Collette probably gives her best dramatic performance too as the ultimately sociopathic \"caretaker\". Her role was a far cry from those she had in movies like \"Little Miss Sunshine\". There were even times she looked into the camera where I thought she was staring right at me. It takes a good actress to play that sort of role, and it's this understated (yet well reviewed) role that makes Toni Collette probably one of the best actresses of this generation not to have even been nominated for an Academy Award (as of 2008). It's \u001b[91mimplausible\u001b[0m that there is at least one woman in this world who is like this, and it's scary too.<br /><br />This is a good, dark film that I \u001b[91mdramatically\u001b[0m recommend. Be prepared to be unsettled, though, because this movie leaves you with a strange feeling at the end.\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 10:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (76%)\u001b[0m\n",
"\n",
"When I first read Armistead Maupins story I was taken in by the human drama displayed by Gabriel No one and those he cares about and loves. That being said, we have now been given the film version of an excellent story and are expected to see past the gloss of Hollywood...<br /><br />Writer Armistead Maupin and director Patrick Stettner have truly succeeded! <br /><br />With just the \u001b[92mright\u001b[0m amount of restraint Robin Williams \u001b[92mcaptures\u001b[0m the fragile essence of Gabriel and lets us see his struggle with issues of trust both in his personnel life(Jess) and the world around him(Donna).<br /><br />As we are introduced to the players in this drama we are reminded that nothing is ever as it seems and that the smallest event can change our lives irrevocably. The request to review a book written by a young man turns into a life changing event that helps Gabriel find the strength within himself to carry on and move forward.<br /><br />It's to bad that most people will avoid this film. I only say that because the average American will probably think \"Robin Williams in a serious role? That didn't work before!\" PLEASE GIVE THIS MOVIE A CHANCE! Robin Williams touches the darkness we all must find and go through in ourselves to be better people. Like his movie One Hour Photo he has stepped up as an actor and made another quality piece of art.<br /><br />Oh and before I forget, I believe Bobby Cannavale as Jess steals every scene he is in. He has the 1940's leading man looks and screen presence. It's this hacks opinion he could carry his own movie right now!!<br /><br />S~\n",
"\n",
"When I first read Armistead Maupins story I was taken in by the human drama displayed by Gabriel No one and those he cares about and loves. That being said, we have now been given the film version of an excellent story and are expected to see past the gloss of Hollywood...<br /><br />Writer Armistead Maupin and director Patrick Stettner have truly succeeded! <br /><br />With just the \u001b[91mcorrect\u001b[0m amount of restraint Robin Williams \u001b[91mcatches\u001b[0m the fragile essence of Gabriel and lets us see his struggle with issues of trust both in his personnel life(Jess) and the world around him(Donna).<br /><br />As we are introduced to the players in this drama we are reminded that nothing is ever as it seems and that the smallest event can change our lives irrevocably. The request to review a book written by a young man turns into a life changing event that helps Gabriel find the strength within himself to carry on and move forward.<br /><br />It's to bad that most people will avoid this film. I only say that because the average American will probably think \"Robin Williams in a serious role? That didn't work before!\" PLEASE GIVE THIS MOVIE A CHANCE! Robin Williams touches the darkness we all must find and go through in ourselves to be better people. Like his movie One Hour Photo he has stepped up as an actor and made another quality piece of art.<br /><br />Oh and before I forget, I believe Bobby Cannavale as Jess steals every scene he is in. He has the 1940's leading man looks and screen presence. It's this hacks opinion he could carry his own movie right now!!<br /><br />S~\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 11:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (66%)\u001b[0m\n",
"\n",
"I liked the film. Some of the action scenes were very interesting, tense and well done. I especially \u001b[92mliked\u001b[0m the opening scene which had a semi truck in it. A very tense action \u001b[92mscene\u001b[0m that seemed well done.<br /><br />Some of the transitional scenes were filmed in interesting ways such as time lapse photography, unusual colors, or interesting angles. Also the film is funny is several parts. I also \u001b[92mliked\u001b[0m how the evil guy was portrayed too. I'd give the film an 8 out of 10.\n",
"\n",
"I liked the film. Some of the action scenes were very interesting, tense and well done. I especially \u001b[91mprefer\u001b[0m the opening scene which had a semi truck in it. A very tense action \u001b[91mfilmmaking\u001b[0m that seemed well done.<br /><br />Some of the transitional scenes were filmed in interesting ways such as time lapse photography, unusual colors, or interesting angles. Also the film is funny is several parts. I also \u001b[91mprefer\u001b[0m how the evil guy was portrayed too. I'd give the film an 8 out of 10.\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 12:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (77%)\u001b[0m\n",
"\n",
"There are many illnesses born in the mind of man which have been given life in modern times. Constant vigilance or accrued information in the realm of Pyschosis, have kept psychologists, counselors and psychiatrists busy with enough work to last them decades. Occasionally, some of these mental phenomenon are discover by those with no knowledge of their remedy or even of their existence. That is the premise of the film entitled \" The Night Listner.\" It tells the story of a popular radio host called Gabriel Noon (Robin Williams) who spends his evenings enthralling his audiences with \u001b[92mvivid\u001b[0m stories about Gay lifestyles. Perhaps its because his show is losing it's authentic veneer which causes Noon to admit he is no longer himself. Feeling abandoned by both his lover Jess (Bobby Cannavale) and his and \u001b[92mbest\u001b[0m friend (Joe Morton), he seeks shelter in his deepening despair and isolation. It is here, a mysterious voice in the night asks him for help. Noon needs to feel useful and \u001b[92mreaches\u001b[0m out to the desperate voice which belongs to a 14 year old boy called Peter (Rory Culkin). In reading the boy's harrowing manuscript which depicts the early life and sexual abuse at the hands of his brutal parents, Noon is captivated and wants to help. However, things are not what they seem and Noon soon finds himself en-wrapped in an elusive and bizarre tale torn right out of a medical nightmare. This movie is pure Robin Williams and were it not for Toni Collette who plays \u001b[92mDonna\u001b[0m D. Logand, Sandra Oh as Anna and John Cullum as pop, this might be comical. Instead, this may prove to be one of William's more serious performances. ***\n",
"\n",
"There are many illnesses born in the mind of man which have been given life in modern times. Constant vigilance or accrued information in the realm of Pyschosis, have kept psychologists, counselors and psychiatrists busy with enough work to last them decades. Occasionally, some of these mental phenomenon are discover by those with no knowledge of their remedy or even of their existence. That is the premise of the film entitled \" The Night Listner.\" It tells the story of a popular radio host called Gabriel Noon (Robin Williams) who spends his evenings enthralling his audiences with \u001b[91mloud\u001b[0m stories about Gay lifestyles. Perhaps its because his show is losing it's authentic veneer which causes Noon to admit he is no longer himself. Feeling abandoned by both his lover Jess (Bobby Cannavale) and his and \u001b[91mbestest\u001b[0m friend (Joe Morton), he seeks shelter in his deepening despair and isolation. It is here, a mysterious voice in the night asks him for help. Noon needs to feel useful and \u001b[91mreach\u001b[0m out to the desperate voice which belongs to a 14 year old boy called Peter (Rory Culkin). In reading the boy's harrowing manuscript which depicts the early life and sexual abuse at the hands of his brutal parents, Noon is captivated and wants to help. However, things are not what they seem and Noon soon finds himself en-wrapped in an elusive and bizarre tale torn right out of a medical nightmare. This movie is pure Robin Williams and were it not for Toni Collette who plays \u001b[91mFemales\u001b[0m D. Logand, Sandra Oh as Anna and John Cullum as pop, this might be comical. Instead, this may prove to be one of William's more serious performances. ***\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 13:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (98%)\u001b[0m\n",
"\n",
"I enjoyed The Night Listener very much. It's one of the better movies of the summer.<br /><br />Robin Williams gives one of his \u001b[92mbest\u001b[0m performances. In fact, the entire cast was very good. All played just the right notes for their characters - not too much and not too little. Sandra Oh adds a wonderful comic touch. Toni Collette is great as the Mom, and never goes over the top. Everyone is very believable.<br /><br />It's a short movie, just under an hour and a half. I noticed the general release version is nine minutes shorter than the Sundance version. I wonder if some of the more disturbing images were cut from the movie.<br /><br />The director told a story and did it in straightforward fashion, which is a \u001b[92mrefreshing\u001b[0m change from many directors these days who seem to think their job is to impress the audience rather than tell a story and tell it well.<br /><br />Do not be sucker punched by the previews and ads. It is not a Hitchcockian thriller. See The Night Listener because you want to see a good story told well. If you go expecting Hitchcock you will be disappointed.<br /><br />My only complaint with the movie was the ending. The director could have left a little more to the audience's imagination, but this is a minor quibble.\n",
"\n",
"I enjoyed The Night Listener very much. It's one of the better movies of the summer.<br /><br />Robin Williams gives one of his \u001b[91malright\u001b[0m performances. In fact, the entire cast was very good. All played just the right notes for their characters - not too much and not too little. Sandra Oh adds a wonderful comic touch. Toni Collette is great as the Mom, and never goes over the top. Everyone is very believable.<br /><br />It's a short movie, just under an hour and a half. I noticed the general release version is nine minutes shorter than the Sundance version. I wonder if some of the more disturbing images were cut from the movie.<br /><br />The director told a story and did it in straightforward fashion, which is a \u001b[91mbracing\u001b[0m change from many directors these days who seem to think their job is to impress the audience rather than tell a story and tell it well.<br /><br />Do not be sucker punched by the previews and ads. It is not a Hitchcockian thriller. See The Night Listener because you want to see a good story told well. If you go expecting Hitchcock you will be disappointed.<br /><br />My only complaint with the movie was the ending. The director could have left a little more to the audience's imagination, but this is a minor quibble.\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 14:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (66%)\u001b[0m\n",
"\n",
"The Night Listener is probably not one of William's best roles, but he makes a very interesting character in a somewhat odd but very different movie. I can guarantee you that you have never seen this kind of \u001b[92mmovie\u001b[0m before. Some people maybe won't like the slow pacing of this movie, but I think it's the great plus of the movie. It is definitely one of the top movies that have come out the year 2006. It has a intriguing performance in a movie with a great content, dramatic feeling. This is no americanized movie. Neither is it a predictable movie. You just feel that it is a movie that has secrets which you have a hard time to determine what the \u001b[92moutcome\u001b[0m of it may be. This is no \u001b[92mexcellent\u001b[0m \u001b[92mmovie\u001b[0m that has everything, but hell, it's a damn good and very original movie.\n",
"\n",
"The Night Listener is probably not one of William's best roles, but he makes a very interesting character in a somewhat odd but very different movie. I can guarantee you that you have never seen this kind of \u001b[91mfilmmaking\u001b[0m before. Some people maybe won't like the slow pacing of this movie, but I think it's the great plus of the movie. It is definitely one of the top movies that have come out the year 2006. It has a intriguing performance in a movie with a great content, dramatic feeling. This is no americanized movie. Neither is it a predictable movie. You just feel that it is a movie that has secrets which you have a hard time to determine what the \u001b[91mconsequence\u001b[0m of it may be. This is no \u001b[91mnoteworthy\u001b[0m \u001b[91mfilmmaking\u001b[0m that has everything, but hell, it's a damn good and very original movie.\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 15:\n",
"\u001b[92mPositive (98%)\u001b[0m --> \u001b[91mNegative (55%)\u001b[0m\n",
"\n",
"Like one of the previous commenters said, this had the foundations of a great movie but something happened on the way to delivery. Such a waste because Collette's performance was \u001b[92meerie\u001b[0m and Williams was believable. I just kept waiting for it to get better. I don't think it was bad editing or needed another director, it could have just been the film. It came across as a Canadian movie, something like the first few seasons of X-Files. Not cheap, just hokey. Also, it needed a little more suspense. Something that makes you jump off your seat. The movie reached that moment then faded away; kind of like a false climax. I can see how being too suspenseful would have taken away from the \"reality\" of the story but I thought that part was reached when Gabriel was in the hospital looking for the boy. This movie needs to have a Director's cut that tries to fix these problems.\n",
"\n",
"Like one of the previous commenters said, this had the foundations of a great movie but something happened on the way to delivery. Such a waste because Collette's performance was \u001b[91mominous\u001b[0m and Williams was believable. I just kept waiting for it to get better. I don't think it was bad editing or needed another director, it could have just been the film. It came across as a Canadian movie, something like the first few seasons of X-Files. Not cheap, just hokey. Also, it needed a little more suspense. Something that makes you jump off your seat. The movie reached that moment then faded away; kind of like a false climax. I can see how being too suspenseful would have taken away from the \"reality\" of the story but I thought that part was reached when Gabriel was in the hospital looking for the boy. This movie needs to have a Director's cut that tries to fix these problems.\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 16:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (66%)\u001b[0m\n",
"\n",
"The Night Listener held my attention, with Robin Williams shining as a New York City radio host who becomes enamored with his \u001b[92mfriendship\u001b[0m with a 14 year old boy (Rory Culkin) who is very ill. \u001b[92mWilliams\u001b[0m has never met the boy in person, as they have only been in contact by talking on the telephone. However, Williams' ex-boyfriend (nice job from Bobby Cannavale) raises doubt about the boy, which prompts \u001b[92mWilliams\u001b[0m to arrange a meeting with him in person. What follows makes a permanent \u001b[92mimpact\u001b[0m on Williams in a way he does not expect. I will leave it at that. Toni \u001b[92mCollette\u001b[0m also stars.<br /><br />I enjoyed this film, with Toni \u001b[92mCollette\u001b[0m giving a memorable portrayal of Culkin's adoptive mother. Sandra Oh also starred as Williams' friend. The Night Listener is inspired by actual events, and it has a somber, almost creepy silence throughout. At times it is predictable, no thanks to some of the reviews I read before seeing the movie and just due to logic, but I \u001b[92mliked\u001b[0m it anyway. I enjoy \u001b[92mWilliams\u001b[0m in roles like this, more so than his \u001b[92mcomedic\u001b[0m characters so that was an added bonus for me. Recommended. 8/10\n",
"\n",
"The Night Listener held my attention, with Robin Williams shining as a New York City radio host who becomes enamored with his \u001b[91mamigo\u001b[0m with a 14 year old boy (Rory Culkin) who is very ill. \u001b[91mRoberts\u001b[0m has never met the boy in person, as they have only been in contact by talking on the telephone. However, Williams' ex-boyfriend (nice job from Bobby Cannavale) raises doubt about the boy, which prompts \u001b[91mWilliam\u001b[0m to arrange a meeting with him in person. What follows makes a permanent \u001b[91mconsequence\u001b[0m on Williams in a way he does not expect. I will leave it at that. Toni \u001b[91mColette\u001b[0m also stars.<br /><br />I enjoyed this film, with Toni \u001b[91mColette\u001b[0m giving a memorable portrayal of Culkin's adoptive mother. Sandra Oh also starred as Williams' friend. The Night Listener is inspired by actual events, and it has a somber, almost creepy silence throughout. At times it is predictable, no thanks to some of the reviews I read before seeing the movie and just due to logic, but I \u001b[91mloves\u001b[0m it anyway. I enjoy \u001b[91mWilliam\u001b[0m in roles like this, more so than his \u001b[91munfunny\u001b[0m characters so that was an added bonus for me. Recommended. 8/10\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 17:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (58%)\u001b[0m\n",
"\n",
"Popular radio storyteller Gabriel No one(Robin Williams,scraggy and speaking in hushed,hypnotic tones) becomes acquainted and friends with a fourteen-year-old boy from Wisconsin named Pete Logand(Rory Culkin),who has written a book detailing sexual abuse from his parents. To boot,Pete has AIDS and this compels Gabriel further still,since his partner Jess(Bobby Cannavale,good)happens to be a survivor of HIV himself. <br /><br />He also acquaints himself with Pete's guardian,a woman named Donna(Toni Collette,brilliant!)and when Gabriel decides he wants to meet and talk to the two of them in person and goes to Wisconsin,he discovers some secrets he was(naturally)not prepared to find.<br /><br />\u001b[92mBased\u001b[0m on \u001b[92mreal\u001b[0m events that happened to Armistead Maupin(who co-wrote the screenplay with Terry Anderson)and directed by Patrick Stetner,this film moves a lot faster(90 min.,maybe a few minutes longer)than one might think a movie of this genre would run. That's good in that it keeps the action and storyline lean and clear. It's bad in that it leaves various holes in the plot and doesn't sew-up any of the plot openings or back-story. I'd rather not go into any great detail except to say that,if you are not familiar with Mr.Maupin's works or his personal story,you feel a little bit out of the loop here. Still,the performances by Williams( I would've \u001b[92mloved\u001b[0m to heard more of his narration,personally),Collette,Cannavale,Culkin and much of the supporting cast(the Waitress at the restaurant Collete's Donna frequents \u001b[92mdoes\u001b[0m a \u001b[92mgreat\u001b[0m \u001b[92mjob\u001b[0m with what small part she has!)are top-notch and the mood established here--namely,the chilly,lonely dark exteriors of Wisconsin and New York--give a terrific framing for this story. It may have ends that don't tie together particularly well,but it's still a compelling enough story to stick with.\n",
"\n",
"Popular radio storyteller Gabriel No one(Robin Williams,scraggy and speaking in hushed,hypnotic tones) becomes acquainted and friends with a fourteen-year-old boy from Wisconsin named Pete Logand(Rory Culkin),who has written a book detailing sexual abuse from his parents. To boot,Pete has AIDS and this compels Gabriel further still,since his partner Jess(Bobby Cannavale,good)happens to be a survivor of HIV himself. <br /><br />He also acquaints himself with Pete's guardian,a woman named Donna(Toni Collette,brilliant!)and when Gabriel decides he wants to meet and talk to the two of them in person and goes to Wisconsin,he discovers some secrets he was(naturally)not prepared to find.<br /><br />\u001b[91mJustified\u001b[0m on \u001b[91mactual\u001b[0m events that happened to Armistead Maupin(who co-wrote the screenplay with Terry Anderson)and directed by Patrick Stetner,this film moves a lot faster(90 min.,maybe a few minutes longer)than one might think a movie of this genre would run. That's good in that it keeps the action and storyline lean and clear. It's bad in that it leaves various holes in the plot and doesn't sew-up any of the plot openings or back-story. I'd rather not go into any great detail except to say that,if you are not familiar with Mr.Maupin's works or his personal story,you feel a little bit out of the loop here. Still,the performances by Williams( I would've \u001b[91mbeloved\u001b[0m to heard more of his narration,personally),Collette,Cannavale,Culkin and much of the supporting cast(the Waitress at the restaurant Collete's Donna frequents \u001b[91mdoing\u001b[0m a \u001b[91mnoteworthy\u001b[0m \u001b[91mstaff\u001b[0m with what small part she has!)are top-notch and the mood established here--namely,the chilly,lonely dark exteriors of Wisconsin and New York--give a terrific framing for this story. It may have ends that don't tie together particularly well,but it's still a compelling enough story to stick with.\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 18:\n",
"\u001b[92mPositive (99%)\u001b[0m --> \u001b[91mNegative (70%)\u001b[0m\n",
"\n",
"If there is one thing to recommend about this film is that it is intriguing. The premise certainly \u001b[92mdraws\u001b[0m the audience in because it is a mystery, and throughout the film there are hints that there is something dark lurking about. However, there is not much tension, and Williams' mild mannered portrayal doesn't do much to makes us relate to his obsession with the boy.<br /><br />Collete fares much better as the woman whose true nature and intentions are not very clear. The production felt rushed and holes are apparent. It certainly feels like a preview for a much more complete and better effort. The book is probably better.<br /><br />One thing is certain: Taupin must have written something truly good to have inspired at least one commendable effort.\n",
"\n",
"If there is one thing to recommend about this film is that it is intriguing. The premise certainly \u001b[91mrelies\u001b[0m the audience in because it is a mystery, and throughout the film there are hints that there is something dark lurking about. However, there is not much tension, and Williams' mild mannered portrayal doesn't do much to makes us relate to his obsession with the boy.<br /><br />Collete fares much better as the woman whose true nature and intentions are not very clear. The production felt rushed and holes are apparent. It certainly feels like a preview for a much more complete and better effort. The book is probably better.<br /><br />One thing is certain: Taupin must have written something truly good to have inspired at least one commendable effort.\n",
"\n",
"****************************************\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/p/qdata/jm8wx/research/text_attacks/textattack/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:148: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" transformed_embeddings = torch.tensor(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result 19:\n",
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (69%)\u001b[0m\n",
"\n",
"I absolutely \u001b[92mLOVED\u001b[0m this \u001b[92mfilm\u001b[0m! I do not at all relate to all the other comments I have read about it. I was COMPLETELY enthralled through every second! <br /><br />I found the story gripping, the acting intense, and the direction spot-on. I would literally jump every time the phone would ring close to the end of the movie. Even though there was nothing \"scary\" about the story itself, I was soundly on edge through the whole movie - and for the rest of my evening. <br /><br />I found that there were so many \u001b[92mperfect\u001b[0m choices made...the casting, the script, the little bits of humor sprinkled in it. There were so many points where the film could've gone for the cheap thrill, but it never did, and that for me put this movie above so many of the mediocre \u001b[92mthrillers\u001b[0m that have come out lately...and for the last number of years.\n",
"\n",
"I absolutely \u001b[91mDEAR\u001b[0m this \u001b[91mfilmmaking\u001b[0m! I do not at all relate to all the other comments I have read about it. I was COMPLETELY enthralled through every second! <br /><br />I found the story gripping, the acting intense, and the direction spot-on. I would literally jump every time the phone would ring close to the end of the movie. Even though there was nothing \"scary\" about the story itself, I was soundly on edge through the whole movie - and for the rest of my evening. <br /><br />I found that there were so many \u001b[91mexemplary\u001b[0m choices made...the casting, the script, the little bits of humor sprinkled in it. There were so many points where the film could've gone for the cheap thrill, but it never did, and that for me put this movie above so many of the mediocre \u001b[91mthriller\u001b[0m that have come out lately...and for the last number of years.\n",
"\n",
"****************************************\n",
"\n"
]
}
],
"source": [
"from textattack.datasets import HuggingFaceNlpDataset\n",
"from textattack.attack_recipes import TextFoolerJin2019\n",
"\n",
"dataset = HuggingFaceNlpDataset(\"imdb\", None, \"train\")\n",
"attack = TextFoolerJin2019(model_wrapper)\n",
"\n",
"results = attack.attack_dataset(dataset, indices=range(20))\n",
"for idx, result in enumerate(results):\n",
" print(f'Result {idx}:')\n",
" print(result.__str__(color_method='ansi'))\n",
" print('\\n' + ('*' * 40) + '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Conclusion\n",
"\n",
"We were able to train a model on the IMDB dataset using `sklearn` and use it in TextAttack by initializing with the `SklearnModelWrapper`. It's that simple!"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "torch",
"language": "python",
"name": "build_central"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -1,15 +0,0 @@
=================
Datasets
=================
.. automodule:: textattack.datasets.dataset
:members:
:private-members:
.. automodule:: textattack.datasets.huggingface_nlp_dataset
:members:
.. automodule:: textattack.datasets.translation.ted_multi
:members:

View File

@@ -1,55 +1,39 @@
Models
===============
TextAttack provides different pre-trained models for testing NLP attacks.
User-specified models
=========================
We split models up into two broad categories:
TextAttack allows users to provide their own models for testing. Models can be loaded in three ways: 1. ``--model`` for pre-trained models and models trained with TextAttack
2. ``--model-from-huggingface`` which will attempt to load any model from the ``HuggingFace model hub <https://huggingface.co/models>``
3. ``--model-from-file`` which will dynamically load a Python file and look for the ``model`` variable
- **Classification**: models that output probability scores for some number of classes. These include models for sentiment classification, topic classification, and entailment.
- **Text-to-text**: models that output a sequence of text. These include models that do translation and summarization.
Model Wrappers
*************************
TextAttack can attack any model that takes a list of strings as input and outputs a list of predictions. To help your model conform to this API, we've provided the ``textattack.models.wrappers.ModelWrapper`` abstract class.
**Classification models:**
:ref:`BERT`: ``bert-base-uncased`` fine-tuned on various datasets using ``transformers``.
:ref:`LSTM`: a standard LSTM fine-tuned on various datasets.
:ref:`CNN`: a Word-CNN fine-tuned on various datasets.
**Text-to-text models:**
:ref:`T5`: ``T5`` fine-tuned on various datasets using ``transformers``.
.. _BERT:
BERT
********
.. automodule:: textattack.models.helpers.bert_for_classification
.. automodule:: textattack.models.wrappers.model_wrapper
:members:
.. _LSTM:
We've also provided implementations of model wrappers for common patterns in some popular machine learning frameworks:
LSTM
*******
.. automodule:: textattack.models.helpers.lstm_for_classification
.. automodule:: textattack.models.wrappers.pytorch_model_wrapper
:members:
.. automodule:: textattack.models.wrappers.tensorflow_model_wrapper
:members:
.. automodule:: textattack.models.wrappers.sklearn_model_wrapper
:members:
.. automodule:: textattack.models.wrappers.huggingface_model_wrapper
:members:
.. _CNN:
Pre-trained models
=====================
Word-CNN
************
.. automodule:: textattack.models.helpers.word_cnn_for_classification
:members:
TextAttack also provides lots of pre-trained models for common tasks. Testing different attacks on the same model ensures attack comparisons are fair.
.. _T5:
T5
*****************
.. automodule:: textattack.models.helpers.t5_for_text_to_text
:members:
Any of these models can be provided to ``textattack attack`` via ``--model``, for example, ``--model bert-base-uncased-mr``. For a full list of pre-trained models, see the `pre-trained models README <https://github.com/QData/TextAttack/tree/master/textattack/models>`_.

View File

@@ -1,15 +0,0 @@
===========
Tokenizers
===========
.. automodule:: textattack.models.tokenizers.auto_tokenizer
:members:
.. automodule:: textattack.models.tokenizers.glove_tokenizer
:members:
.. automodule:: textattack.models.tokenizers.t5_tokenizer
:members:
.. automodule:: textattack.models.tokenizers.bert_tokenizer
:members:

View File

@@ -62,13 +62,13 @@ TextAttack has some other features that make it a pleasure to use:
augmentation/augmenter
.. toctree::
:maxdepth: 3
:maxdepth: 1
:hidden:
:caption: Models, Datasets and Tokenizers
:caption: Models and Tokenizers
datasets_models/models
datasets_models/datasets
datasets_models/tokenizers
Example: Attacking TensorFlow models <datasets_models/Example_0_tensorflow>
Example: Attacking scikit-learn models <datasets_models/Example_1_sklearn.ipynb>
.. toctree::
:maxdepth: 3

View File

@@ -1,19 +1,27 @@
import pickle
import pandas as pd
import textattack
from .model_wrapper import ModelWrapper
class SklearnModelWrapper(ModelWrapper):
"""Loads an sklearn model and tokenizer."""
"""Loads a scikit-learn model and tokenizer (tokenizer implements
`transform` and model implements `predict_proba`).
May need to be extended and modified for different types of
tokenizers.
"""
def __init__(self, model, tokenizer):
raise NotImplementedError()
self.model = model.to(textattack.shared.utils.device)
self.model = model
self.tokenizer = tokenizer
def tokenize(self, text_input_list):
raise NotImplementedError()
def __call__(self, text_input_list):
raise NotImplementedError()
encoded_text_matrix = self.tokenizer.transform(text_input_list).toarray()
tokenized_text_df = pd.DataFrame(
encoded_text_matrix, columns=self.tokenizer.get_feature_names()
)
return self.model.predict_proba(tokenized_text_df)

View File

@@ -1,19 +1,23 @@
import textattack
import numpy as np
from .model_wrapper import ModelWrapper
class TensorFlowModelWrapper(ModelWrapper):
"""Loads a TensorFlow model and tokenizer."""
"""Loads a TensorFlow model and tokenizer.
def __init__(self, model, tokenizer):
raise NotImplementedError()
TensorFlow models can use many different architectures and
tokenization strategies. This assumes that the model takes an
np.array of strings as input and returns a tf.Tensor of outputs, as
is typical with Keras modules. You may need to subclass this for
models that have dedicated tokenizers or otherwise take input
differently.
"""
self.model = model.to(textattack.shared.utils.device)
self.tokenizer = tokenizer
def tokenize(self, text_input_list):
raise NotImplementedError()
def __init__(self, model):
self.model = model
def __call__(self, text_input_list):
raise NotImplementedError()
text_array = np.array(text_input_list)
preds = self.model(text_array)
return preds.numpy()

View File

@@ -165,15 +165,9 @@ class Attack:
original_text: The original ``AttackedText`` from which the attack started.
"""
# Remove any occurences of current_text in transformed_texts
original_num_texts = len(transformed_texts)
transformed_texts = [
t for t in transformed_texts if t.text != current_text.text
]
if len(transformed_texts) < original_num_texts:
# If this happened, warn the user
utils.logger.warn(
"Warning: transformation returned text with no changes. Skipping."
)
# Populate cache with transformed_texts
uncached_texts = []
for transformed_text in transformed_texts:
@@ -239,25 +233,25 @@ class Attack:
i = indices.popleft()
try:
text_input, ground_truth_output = dataset[i]
try:
# get label names from dataset, if possible
label_names = dataset.label_names
except AttributeError:
label_names = None
attacked_text = AttackedText(
text_input, attack_attrs={"label_names": label_names}
)
goal_function_result, _ = self.goal_function.init_attack_example(
attacked_text, ground_truth_output
)
yield goal_function_result
except IndexError:
utils.logger.warn(
f"Dataset has {len(dataset)} samples but tried to access index {i}. Ending attack early."
)
break
try:
# get label names from dataset, if possible
label_names = dataset.label_names
except AttributeError:
label_names = None
attacked_text = AttackedText(
text_input, attack_attrs={"label_names": label_names}
)
goal_function_result, _ = self.goal_function.init_attack_example(
attacked_text, ground_truth_output
)
yield goal_function_result
def attack_dataset(self, dataset, indices=None):
"""Runs an attack on the given dataset and outputs the results to the
console and the output file.