mirror of
https://github.com/fchollet/deep-learning-with-python-notebooks.git
synced 2021-07-27 01:28:40 +03:00
736 lines
18 KiB
Plaintext
736 lines
18 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"# Deep learning for text"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"## Natural Language Processing: the bird's eye view"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"## Preparing text data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"### Text standardization"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"### Text splitting (tokenization)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"### Vocabulary indexing"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"### Using the `TextVectorization` layer"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import string\n",
|
|
"\n",
|
|
"class Vectorizer:\n",
|
|
" def standardize(self, text):\n",
|
|
" text = text.lower()\n",
|
|
" return \"\".join(char for char in text if char not in string.punctuation)\n",
|
|
"\n",
|
|
" def tokenize(self, text):\n",
|
|
" text = self.standardize(text)\n",
|
|
" return text.split()\n",
|
|
"\n",
|
|
" def make_vocabulary(self, dataset):\n",
|
|
" self.vocabulary = {\"\": 0, \"[UNK]\": 1}\n",
|
|
" for text in dataset:\n",
|
|
" text = self.standardize(text)\n",
|
|
" tokens = self.tokenize(text)\n",
|
|
" for token in tokens:\n",
|
|
" if token not in self.vocabulary:\n",
|
|
" self.vocabulary[token] = len(self.vocabulary)\n",
|
|
" self.inverse_vocabulary = dict(\n",
|
|
" (v, k) for k, v in self.vocabulary.items())\n",
|
|
"\n",
|
|
" def encode(self, text):\n",
|
|
" text = self.standardize(text)\n",
|
|
" tokens = self.tokenize(text)\n",
|
|
" return [self.vocabulary.get(token, 1) for token in tokens]\n",
|
|
"\n",
|
|
" def decode(self, int_sequence):\n",
|
|
" return \" \".join(\n",
|
|
" self.inverse_vocabulary.get(i, \"[UNK]\") for i in int_sequence)\n",
|
|
"\n",
|
|
"vectorizer = Vectorizer()\n",
|
|
"dataset = [\n",
|
|
" \"I write, erase, rewrite\",\n",
|
|
" \"Erase again, and then\",\n",
|
|
" \"A poppy blooms.\",\n",
|
|
"]\n",
|
|
"vectorizer.make_vocabulary(dataset)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"test_sentence = \"I write, rewrite, and still rewrite again\"\n",
|
|
"encoded_sentence = vectorizer.encode(test_sentence)\n",
|
|
"print(encoded_sentence)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"decoded_sentence = vectorizer.decode(encoded_sentence)\n",
|
|
"print(decoded_sentence)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from tensorflow.keras.layers.experimental.preprocessing import TextVectorization\n",
|
|
"text_vectorization = TextVectorization(\n",
|
|
" output_mode=\"int\",\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import re\n",
|
|
"import string\n",
|
|
"import tensorflow as tf\n",
|
|
"\n",
|
|
"def custom_standardization_fn(string_tensor):\n",
|
|
" lowercase_string = tf.strings.lower(string_tensor)\n",
|
|
" return tf.strings.regex_replace(\n",
|
|
" lowercase_string, f\"[{re.escape(string.punctuation)}]\", \"\")\n",
|
|
"\n",
|
|
"def custom_split_fn(string_tensor):\n",
|
|
" return tf.strings.split(string_tensor)\n",
|
|
"\n",
|
|
"text_vectorization = TextVectorization(\n",
|
|
" output_mode=\"int\",\n",
|
|
" standardize=custom_standardization_fn,\n",
|
|
" split=custom_split_fn,\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"dataset = [\n",
|
|
" \"I write, erase, rewrite\",\n",
|
|
" \"Erase again, and then\",\n",
|
|
" \"A poppy blooms.\",\n",
|
|
"]\n",
|
|
"text_vectorization.adapt(dataset)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Displaying the vocabulary**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"text_vectorization.get_vocabulary()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"vocabulary = text_vectorization.get_vocabulary()\n",
|
|
"test_sentence = \"I write, rewrite, and still rewrite again\"\n",
|
|
"encoded_sentence = text_vectorization(test_sentence)\n",
|
|
"print(encoded_sentence)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"inverse_vocab = dict(enumerate(vocabulary))\n",
|
|
"decoded_sentence = \" \".join(inverse_vocab[int(i)] for i in encoded_sentence)\n",
|
|
"print(decoded_sentence)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"## Two approaches for representing groups of words: sets and sequences"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"### Preparing the IMDB movie reviews data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!curl -O https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n",
|
|
"!tar -xf aclImdb_v1.tar.gz"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!rm -r aclImdb/train/unsup"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!cat aclImdb/train/pos/4077_10.txt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os, pathlib, shutil, random\n",
|
|
"\n",
|
|
"base_dir = pathlib.Path(\"aclImdb\")\n",
|
|
"val_dir = base_dir / \"val\"\n",
|
|
"train_dir = base_dir / \"train\"\n",
|
|
"for category in (\"neg\", \"pos\"):\n",
|
|
" os.makedirs(val_dir / category)\n",
|
|
" files = os.listdir(train_dir / category)\n",
|
|
" random.Random(1337).shuffle(files)\n",
|
|
" num_val_samples = int(0.2 * len(files))\n",
|
|
" val_files = files[-num_val_samples:]\n",
|
|
" for fname in val_files:\n",
|
|
" shutil.move(train_dir / category / fname,\n",
|
|
" val_dir / category / fname)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from tensorflow import keras\n",
|
|
"batch_size = 32\n",
|
|
"\n",
|
|
"train_ds = keras.preprocessing.text_dataset_from_directory(\n",
|
|
" \"aclImdb/train\", batch_size=batch_size\n",
|
|
")\n",
|
|
"val_ds = keras.preprocessing.text_dataset_from_directory(\n",
|
|
" \"aclImdb/val\", batch_size=batch_size\n",
|
|
")\n",
|
|
"test_ds = keras.preprocessing.text_dataset_from_directory(\n",
|
|
" \"aclImdb/test\", batch_size=batch_size\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Displaying the shapes and dtypes of the first batch**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"for inputs, targets in train_ds:\n",
|
|
" print(\"inputs.shape:\", inputs.shape)\n",
|
|
" print(\"inputs.dtype:\", inputs.dtype)\n",
|
|
" print(\"targets.shape:\", targets.shape)\n",
|
|
" print(\"targets.dtype:\", targets.dtype)\n",
|
|
" print(\"inputs[0]:\", inputs[0])\n",
|
|
" print(\"targets[0]:\", targets[0])\n",
|
|
" break"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"### Processing words as a set: the bag-of-words approach"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"#### Single words (unigrams) with binary encoding"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Preprocessing our datasets with a `TextVectorization` layer**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"text_vectorization = TextVectorization(\n",
|
|
" max_tokens=20000,\n",
|
|
" output_mode=\"binary\",\n",
|
|
")\n",
|
|
"text_only_train_ds = train_ds.map(lambda x, y: x)\n",
|
|
"text_vectorization.adapt(text_only_train_ds)\n",
|
|
"\n",
|
|
"binary_1gram_train_ds = train_ds.map(lambda x, y: (text_vectorization(x), y))\n",
|
|
"binary_1gram_val_ds = val_ds.map(lambda x, y: (text_vectorization(x), y))\n",
|
|
"binary_1gram_test_ds = test_ds.map(lambda x, y: (text_vectorization(x), y))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Inspecting the output of our binary unigram dataset**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"for inputs, targets in binary_1gram_train_ds:\n",
|
|
" print(\"inputs.shape:\", inputs.shape)\n",
|
|
" print(\"inputs.dtype:\", inputs.dtype)\n",
|
|
" print(\"targets.shape:\", targets.shape)\n",
|
|
" print(\"targets.dtype:\", targets.dtype)\n",
|
|
" print(\"inputs[0]:\", inputs[0])\n",
|
|
" print(\"targets[0]:\", targets[0])\n",
|
|
" break"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Our model-building utility**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from tensorflow import keras\n",
|
|
"from tensorflow.keras import layers\n",
|
|
"\n",
|
|
"def get_model(max_tokens=20000, hidden_dim=16):\n",
|
|
" inputs = keras.Input(shape=(max_tokens,))\n",
|
|
" x = layers.Dense(hidden_dim, activation=\"relu\")(inputs)\n",
|
|
" x = layers.Dropout(0.5)(x)\n",
|
|
" outputs = layers.Dense(1, activation=\"sigmoid\")(x)\n",
|
|
" model = keras.Model(inputs, outputs)\n",
|
|
" model.compile(optimizer=\"rmsprop\",\n",
|
|
" loss=\"binary_crossentropy\",\n",
|
|
" metrics=[\"accuracy\"])\n",
|
|
" return model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Training and testing the binary unigram model**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"model = get_model()\n",
|
|
"model.summary()\n",
|
|
"callbacks = [\n",
|
|
" keras.callbacks.ModelCheckpoint(\"binary_1gram.keras\",\n",
|
|
" save_best_only=True)\n",
|
|
"]\n",
|
|
"model.fit(binary_1gram_train_ds.cache(),\n",
|
|
" validation_data=binary_1gram_val_ds.cache(),\n",
|
|
" epochs=10,\n",
|
|
" callbacks=callbacks)\n",
|
|
"model = keras.models.load_model(\"binary_1gram.keras\")\n",
|
|
"print(f\"Test acc: {model.evaluate(binary_1gram_test_ds)[1]:.3f}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"#### Bigrams with binary encoding"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Configuring the `TextVectorization` layer to return bigrams**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"text_vectorization = TextVectorization(\n",
|
|
" ngrams=2,\n",
|
|
" max_tokens=20000,\n",
|
|
" output_mode=\"binary\",\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Training and testing the binary bigram model**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"text_vectorization.adapt(text_only_train_ds)\n",
|
|
"binary_2gram_train_ds = train_ds.map(lambda x, y: (text_vectorization(x), y))\n",
|
|
"binary_2gram_val_ds = val_ds.map(lambda x, y: (text_vectorization(x), y))\n",
|
|
"binary_2gram_test_ds = test_ds.map(lambda x, y: (text_vectorization(x), y))\n",
|
|
"\n",
|
|
"model = get_model()\n",
|
|
"model.summary()\n",
|
|
"callbacks = [\n",
|
|
" keras.callbacks.ModelCheckpoint(\"binary_2gram.keras\",\n",
|
|
" save_best_only=True)\n",
|
|
"]\n",
|
|
"model.fit(binary_2gram_train_ds.cache(),\n",
|
|
" validation_data=binary_2gram_val_ds.cache(),\n",
|
|
" epochs=10,\n",
|
|
" callbacks=callbacks)\n",
|
|
"model = keras.models.load_model(\"binary_2gram.keras\")\n",
|
|
"print(f\"Test acc: {model.evaluate(binary_2gram_test_ds)[1]:.3f}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"#### Bigrams with TF-IDF encoding"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Configuring the `TextVectorization` layer to return token counts**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"text_vectorization = TextVectorization(\n",
|
|
" ngrams=2,\n",
|
|
" max_tokens=20000,\n",
|
|
" output_mode=\"count\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Configuring the `TextVectorization` layer to return TF-IDF-weighted outputs**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"text_vectorization = TextVectorization(\n",
|
|
" ngrams=2,\n",
|
|
" max_tokens=20000,\n",
|
|
" output_mode=\"tf-idf\",\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Training and testing the TF-IDF bigram model**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"text_vectorization.adapt(text_only_train_ds)\n",
|
|
"\n",
|
|
"tfidf_2gram_train_ds = train_ds.map(lambda x, y: (text_vectorization(x), y))\n",
|
|
"tfidf_2gram_val_ds = val_ds.map(lambda x, y: (text_vectorization(x), y))\n",
|
|
"tfidf_2gram_test_ds = test_ds.map(lambda x, y: (text_vectorization(x), y))\n",
|
|
"\n",
|
|
"model = get_model()\n",
|
|
"model.summary()\n",
|
|
"callbacks = [\n",
|
|
" keras.callbacks.ModelCheckpoint(\"tfidf_2gram.keras\",\n",
|
|
" save_best_only=True)\n",
|
|
"]\n",
|
|
"model.fit(tfidf_2gram_train_ds.cache(),\n",
|
|
" validation_data=tfidf_2gram_val_ds.cache(),\n",
|
|
" epochs=10,\n",
|
|
" callbacks=callbacks)\n",
|
|
"model = keras.models.load_model(\"tfidf_2gram.keras\")\n",
|
|
"print(f\"Test acc: {model.evaluate(tfidf_2gram_test_ds)[1]:.3f}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"inputs = keras.Input(shape=(1,), dtype=\"string\")\n",
|
|
"processed_inputs = text_vectorization(inputs)\n",
|
|
"outputs = model(processed_inputs)\n",
|
|
"inference_model = keras.Model(inputs, outputs)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import tensorflow as tf\n",
|
|
"raw_text_data = tf.convert_to_tensor([\n",
|
|
" [\"That was an excellent movie, I loved it.\"],\n",
|
|
"])\n",
|
|
"predictions = inference_model(raw_text_data)\n",
|
|
"print(f\"{float(predictions[0] * 100):.2f} percent positive\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "chapter11_part01_introduction.i",
|
|
"private_outputs": false,
|
|
"provenance": [],
|
|
"toc_visible": true
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
} |