Created using Colaboratory

2021-10-04 01:29:04 +03:00 · 2019-03-18 16:59:19 -05:00
parent 89259e85a5
commit 802238fc26
1 changed files with 21 additions and 11 deletions
--- a/keras_tokenizer_fix.ipynb
+++ b/keras_tokenizer_fix.ipynb
@@ -6,8 +6,14 @@
      "name": "keras_tokenizer_fix.ipynb",
      "version": "0.3.2",
      "provenance": [],
-      "collapsed_sections": []
-    }
+      "collapsed_sections": [],
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
  },
  "cells": [
    {
@@ -17,7 +23,7 @@
        "colab_type": "text"
      },
      "source": [
-        "[View in Colaboratory](https://colab.research.google.com/github/gmihaila/deep_learning_toolbox/blob/master/keras_tokenizer_fix.ipynb)"
+        "<a href=\"https://colab.research.google.com/github/gmihaila/machine_learning_toolbox/blob/master/keras_tokenizer_fix.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
@@ -36,14 +42,18 @@
      "metadata": {
        "id": "GSmIeoaP7mEE",
        "colab_type": "code",
+        "outputId": "1849ed29-8a68-4166-dcd6-89bfac5ea498",
        "colab": {
          "base_uri": "https://localhost:8080/",
-          "height": 212
-        },
-        "outputId": "74d87a5e-5e93-4e34-c5d4-cf2601bf782d"
+          "height": 215
+        }
      },
      "cell_type": "code",
      "source": [
+        "from keras.preprocessing.text import Tokenizer\n",
+        "import string\n",
+        "\n",
+        "\n",
        "n_words = 3\n",
        "\n",
        "\n",
@@ -71,7 +81,7 @@
        "print(tk.word_index)\n",
        "print('%s  <-RIGHT!'%tk.texts_to_sequences(texts))\n"
      ],
-      "execution_count": 18,
+      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
@@ -81,12 +91,12 @@
            "Only use top 3 words\n",
            "\n",
            "\n",
-            "{'name': 1, 'far': 4, 'is': 2, 'asdasd': 6, 'faraway': 5, 'my': 3, 'your': 7, '<unk>': 8}\n",
-            "[[3, 1, 2], [3, 1, 2], [1, 2]]  <-WRONG!\n",
+            "{'<unk>': 1, 'name': 2, 'is': 3, 'my': 4, 'far': 5, 'faraway': 6, 'asdasd': 7, 'your': 8}\n",
+            "[[1, 2, 3, 1, 1, 1], [1, 2, 3], [1, 2, 3]]  <-WRONG!\n",
            "\n",
            "\n",
-            "{'is': 2, 'my': 3, 'name': 1, '<unk>': 4}\n",
-            "[[3, 1, 2, 4, 4, 4], [3, 1, 2], [4, 1, 2]]  <-RIGHT!\n"
+            "{'<unk>': 4, 'name': 2, 'is': 3}\n",
+            "[[4, 2, 3, 4, 4, 4], [4, 2, 3], [4, 2, 3]]  <-RIGHT!\n"
          ],
          "name": "stdout"
        }