deep-learning-with-python-n…/chapter09_part03_interpreting-what-convnets-learn.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Interpreting what convnets learn"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "### Visualizing intermediate activations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# You can use this to load the file \"convnet_from_scratch_with_augmentation.keras\"\n",
    "# you obtained in the last chapter.\n",
    "from google.colab import files\n",
    "files.upload()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "from tensorflow import keras\n",
    "model = keras.models.load_model(\"convnet_from_scratch_with_augmentation.keras\")\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Preprocessing a single image**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "from tensorflow import keras\n",
    "import numpy as np\n",
    "\n",
    "img_path = keras.utils.get_file(\n",
    "    fname=\"cat.jpg\",\n",
    "    origin=\"https://img-datasets.s3.amazonaws.com/cat.jpg\")\n",
    "\n",
    "def get_img_array(img_path, target_size):\n",
    "    img = keras.preprocessing.image.load_img(\n",
    "        img_path, target_size=target_size)\n",
    "    array = keras.preprocessing.image.img_to_array(img)\n",
    "    array = np.expand_dims(array, axis=0)\n",
    "    return array\n",
    "\n",
    "img_tensor = get_img_array(img_path, target_size=(180, 180))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Displaying the test picture**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "plt.axis(\"off\")\n",
    "plt.imshow(img_tensor[0].astype(\"uint8\"))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Instantiating a model that returns layer activations**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "from tensorflow.keras import layers\n",
    "\n",
    "layer_outputs = []\n",
    "layer_names = []\n",
    "for layer in model.layers:\n",
    "    if isinstance(layer, (layers.Conv2D, layers.MaxPooling2D)):\n",
    "        layer_outputs.append(layer.output)\n",
    "        layer_names.append(layer.name)\n",
    "activation_model = keras.Model(inputs=model.input, outputs=layer_outputs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Using the model to compute layer activations**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "activations = activation_model.predict(img_tensor)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "first_layer_activation = activations[0]\n",
    "print(first_layer_activation.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Visualizing the fifth channel**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "plt.matshow(first_layer_activation[0, :, :, 5], cmap=\"viridis\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Visualizing every channel in every intermediate activation**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "images_per_row = 16\n",
    "for layer_name, layer_activation in zip(layer_names, activations):\n",
    "    n_features = layer_activation.shape[-1]\n",
    "    size = layer_activation.shape[1]\n",
    "    n_cols = n_features // images_per_row\n",
    "    display_grid = np.zeros(((size + 1) * n_cols - 1,\n",
    "                             images_per_row * (size + 1) - 1))\n",
    "    for col in range(n_cols):\n",
    "        for row in range(images_per_row):\n",
    "            channel_index = col * images_per_row + row\n",
    "            channel_image = layer_activation[0, :, :, channel_index].copy()\n",
    "            if channel_image.sum() != 0:\n",
    "                channel_image -= channel_image.mean()\n",
    "                channel_image /= channel_image.std()\n",
    "                channel_image *= 64\n",
    "                channel_image += 128\n",
    "            channel_image = np.clip(channel_image, 0, 255).astype(\"uint8\")\n",
    "            display_grid[\n",
    "                col * (size + 1): (col + 1) * size + col,\n",
    "                row * (size + 1) : (row + 1) * size + row] = channel_image\n",
    "    scale = 1. / size\n",
    "    plt.figure(figsize=(scale * display_grid.shape[1],\n",
    "                        scale * display_grid.shape[0]))\n",
    "    plt.title(layer_name)\n",
    "    plt.grid(False)\n",
    "    plt.axis(\"off\")\n",
    "    plt.imshow(display_grid, aspect=\"auto\", cmap=\"viridis\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "### Visualizing convnet filters"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Instantiating the Xception convolutional base**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "model = keras.applications.xception.Xception(\n",
    "    weights=\"imagenet\",\n",
    "    include_top=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Printing the names of all convolutional layers in Xception**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "for layer in model.layers:\n",
    "    if isinstance(layer, (keras.layers.Conv2D, keras.layers.SeparableConv2D)):\n",
    "        print(layer.name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Creating a \"feature extractor\" model that returns the output of a specific layer**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "layer_name = \"block3_sepconv1\"\n",
    "layer = model.get_layer(name=layer_name)\n",
    "feature_extractor = keras.Model(inputs=model.input, outputs=layer.output)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Using the feature extractor**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "activation = feature_extractor(\n",
    "    keras.applications.xception.preprocess_input(img_tensor)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "def compute_loss(image, filter_index):\n",
    "    activation = feature_extractor(image)\n",
    "    filter_activation = activation[:, 2:-2, 2:-2, filter_index]\n",
    "    return tf.reduce_mean(filter_activation)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Loss maximization via stochastic gradient ascent**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "@tf.function\n",
    "def gradient_ascent_step(image, filter_index, learning_rate):\n",
    "    with tf.GradientTape() as tape:\n",
    "        tape.watch(image)\n",
    "        loss = compute_loss(image, filter_index)\n",
    "    grads = tape.gradient(loss, image)\n",
    "    grads = tf.math.l2_normalize(grads)\n",
    "    image += learning_rate * grads\n",
    "    return image"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Function to generate filter visualizations**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "img_width = 200\n",
    "img_height = 200\n",
    "\n",
    "def generate_filter_pattern(filter_index):\n",
    "    iterations = 30\n",
    "    learning_rate = 10.\n",
    "    image = tf.random.uniform(\n",
    "        minval=0.4,\n",
    "        maxval=0.6,\n",
    "        shape=(1, img_width, img_height, 3))\n",
    "    for i in range(iterations):\n",
    "        image = gradient_ascent_step(image, filter_index, learning_rate)\n",
    "    return image[0].numpy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Utility function to convert a tensor into a valid image**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "def deprocess_image(image):\n",
    "    image -= image.mean()\n",
    "    image /= image.std()\n",
    "    image *= 64\n",
    "    image += 128\n",
    "    image = np.clip(image, 0, 255).astype(\"uint8\")\n",
    "    image = image[25:-25, 25:-25, :]\n",
    "    return image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "plt.axis(\"off\")\n",
    "plt.imshow(deprocess_image(generate_filter_pattern(filter_index=2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Generating a grid of all filter response patterns in a layer**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "all_images = []\n",
    "for filter_index in range(64):\n",
    "    print(f\"Processing filter {filter_index}\")\n",
    "    image = deprocess_image(\n",
    "        generate_filter_pattern(filter_index)\n",
    "    )\n",
    "    all_images.append(image)\n",
    "\n",
    "margin = 5\n",
    "n = 8\n",
    "cropped_width = img_width - 25 * 2\n",
    "cropped_height = img_height - 25 * 2\n",
    "width = n * cropped_width + (n - 1) * margin\n",
    "height = n * cropped_height + (n - 1) * margin\n",
    "stitched_filters = np.zeros((width, height, 3))\n",
    "\n",
    "for i in range(n):\n",
    "    for j in range(n):\n",
    "        image = all_images[i * n + j]\n",
    "        stitched_filters[\n",
    "            (cropped_width + margin) * i : (cropped_width + margin) * i + cropped_width,\n",
    "            (cropped_height + margin) * j : (cropped_height + margin) * j\n",
    "            + cropped_height,\n",
    "            :,\n",
    "        ] = image\n",
    "\n",
    "keras.preprocessing.image.save_img(\n",
    "    f\"filters_for_layer_{layer_name}.png\", stitched_filters)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "### Visualizing heatmaps of class activation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Loading the Xception network with pretrained weights**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "model = keras.applications.xception.Xception(weights=\"imagenet\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Preprocessing an input image for Xception**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "img_path = keras.utils.get_file(\n",
    "    fname=\"elephant.jpg\",\n",
    "    origin=\"https://img-datasets.s3.amazonaws.com/elephant.jpg\")\n",
    "\n",
    "def get_img_array(img_path, target_size):\n",
    "    img = keras.preprocessing.image.load_img(img_path, target_size=target_size)\n",
    "    array = keras.preprocessing.image.img_to_array(img)\n",
    "    array = np.expand_dims(array, axis=0)\n",
    "    array = keras.applications.xception.preprocess_input(array)\n",
    "    return array\n",
    "\n",
    "img_array = get_img_array(img_path, target_size=(299, 299))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "preds = model.predict(img_array)\n",
    "print(keras.applications.xception.decode_predictions(preds, top=3)[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "np.argmax(preds[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Setting up a model that returns the last convolutional output**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "last_conv_layer_name = \"block14_sepconv2_act\"\n",
    "classifier_layer_names = [\n",
    "    \"avg_pool\",\n",
    "    \"predictions\",\n",
    "]\n",
    "last_conv_layer = model.get_layer(last_conv_layer_name)\n",
    "last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Setting up a model that goes from the last convolutional output to the final predictions**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "classifier_input = keras.Input(shape=last_conv_layer.output.shape[1:])\n",
    "x = classifier_input\n",
    "for layer_name in classifier_layer_names:\n",
    "    x = model.get_layer(layer_name)(x)\n",
    "classifier_model = keras.Model(classifier_input, x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Retrieving the gradients of the top predicted class with regard to the last convolutional output**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "with tf.GradientTape() as tape:\n",
    "    last_conv_layer_output = last_conv_layer_model(img_array)\n",
    "    tape.watch(last_conv_layer_output)\n",
    "    preds = classifier_model(last_conv_layer_output)\n",
    "    top_pred_index = tf.argmax(preds[0])\n",
    "    top_class_channel = preds[:, top_pred_index]\n",
    "\n",
    "grads = tape.gradient(top_class_channel, last_conv_layer_output)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Gradient pooling and channel importance weighting**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2)).numpy()\n",
    "last_conv_layer_output = last_conv_layer_output.numpy()[0]\n",
    "for i in range(pooled_grads.shape[-1]):\n",
    "    last_conv_layer_output[:, :, i] *= pooled_grads[i]\n",
    "heatmap = np.mean(last_conv_layer_output, axis=-1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Heatmap post-processing**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "heatmap = np.maximum(heatmap, 0)\n",
    "heatmap /= np.max(heatmap)\n",
    "plt.matshow(heatmap)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "**Superimposing the heatmap with the original picture**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import matplotlib.cm as cm\n",
    "\n",
    "img = keras.preprocessing.image.load_img(img_path)\n",
    "img = keras.preprocessing.image.img_to_array(img)\n",
    "\n",
    "heatmap = np.uint8(255 * heatmap)\n",
    "\n",
    "jet = cm.get_cmap(\"jet\")\n",
    "jet_colors = jet(np.arange(256))[:, :3]\n",
    "jet_heatmap = jet_colors[heatmap]\n",
    "\n",
    "jet_heatmap = keras.preprocessing.image.array_to_img(jet_heatmap)\n",
    "jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))\n",
    "jet_heatmap = keras.preprocessing.image.img_to_array(jet_heatmap)\n",
    "\n",
    "superimposed_img = jet_heatmap * 0.4 + img\n",
    "superimposed_img = keras.preprocessing.image.array_to_img(superimposed_img)\n",
    "\n",
    "save_path = \"elephant_cam.jpg\"\n",
    "superimposed_img.save(save_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Chapter summary"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "chapter09_part03_interpreting-what-convnets-learn.i",
   "private_outputs": false,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}