{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6." ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Neural style transfer" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### The content loss" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### The style loss" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Neural style transfer in Keras" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Getting the style and content images**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "from tensorflow import keras\n", "\n", "base_image_path = keras.utils.get_file(\n", " \"sf.jpg\", origin=\"https://img-datasets.s3.amazonaws.com/sf.jpg\")\n", "style_reference_image_path = keras.utils.get_file(\n", " \"starry_night.jpg\", origin=\"https://img-datasets.s3.amazonaws.com/starry_night.jpg\")\n", "\n", "original_width, original_height = keras.utils.load_img(base_image_path).size\n", "img_height = 400\n", "img_width = round(original_width * img_height / original_height)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Auxiliary functions**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import numpy as np\n", "\n", "def preprocess_image(image_path):\n", " img = keras.utils.load_img(\n", " image_path, target_size=(img_height, img_width))\n", " img = keras.utils.img_to_array(img)\n", " img = np.expand_dims(img, axis=0)\n", " img = keras.applications.vgg19.preprocess_input(img)\n", " return img\n", "\n", "def deprocess_image(img):\n", " img = img.reshape((img_height, img_width, 3))\n", " img[:, :, 0] += 103.939\n", " img[:, :, 1] += 116.779\n", " img[:, :, 2] += 123.68\n", " img = img[:, :, ::-1]\n", " img = np.clip(img, 0, 255).astype(\"uint8\")\n", " return img" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Loading the pretrained VGG19 network and using it to define a feature extractor**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = keras.applications.vgg19.VGG19(weights=\"imagenet\", include_top=False)\n", "\n", "outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])\n", "feature_extractor = keras.Model(inputs=model.inputs, outputs=outputs_dict)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Content loss**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def content_loss(base_img, combination_img):\n", " return tf.reduce_sum(tf.square(combination_img - base_img))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Style loss**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def gram_matrix(x):\n", " x = tf.transpose(x, (2, 0, 1))\n", " features = tf.reshape(x, (tf.shape(x)[0], -1))\n", " gram = tf.matmul(features, tf.transpose(features))\n", " return gram\n", "\n", "def style_loss(style_img, combination_img):\n", " S = gram_matrix(style_img)\n", " C = gram_matrix(combination_img)\n", " channels = 3\n", " size = img_height * img_width\n", " return tf.reduce_sum(tf.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Total variation loss**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def total_variation_loss(x):\n", " a = tf.square(\n", " x[:, : img_height - 1, : img_width - 1, :] - x[:, 1:, : img_width - 1, :]\n", " )\n", " b = tf.square(\n", " x[:, : img_height - 1, : img_width - 1, :] - x[:, : img_height - 1, 1:, :]\n", " )\n", " return tf.reduce_sum(tf.pow(a + b, 1.25))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Defining the final loss that you'll minimize**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "style_layer_names = [\n", " \"block1_conv1\",\n", " \"block2_conv1\",\n", " \"block3_conv1\",\n", " \"block4_conv1\",\n", " \"block5_conv1\",\n", "]\n", "content_layer_name = \"block5_conv2\"\n", "total_variation_weight = 1e-6\n", "style_weight = 1e-6\n", "content_weight = 2.5e-8\n", "\n", "def compute_loss(combination_image, base_image, style_reference_image):\n", " input_tensor = tf.concat(\n", " [base_image, style_reference_image, combination_image], axis=0\n", " )\n", " features = feature_extractor(input_tensor)\n", " loss = tf.zeros(shape=())\n", " layer_features = features[content_layer_name]\n", " base_image_features = layer_features[0, :, :, :]\n", " combination_features = layer_features[2, :, :, :]\n", " loss = loss + content_weight * content_loss(\n", " base_image_features, combination_features\n", " )\n", " for layer_name in style_layer_names:\n", " layer_features = features[layer_name]\n", " style_reference_features = layer_features[1, :, :, :]\n", " combination_features = layer_features[2, :, :, :]\n", " style_loss_value = style_loss(\n", " style_reference_features, combination_features)\n", " loss += (style_weight / len(style_layer_names)) * style_loss_value\n", "\n", " loss += total_variation_weight * total_variation_loss(combination_image)\n", " return loss" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Setting up the gradient-descent process**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import tensorflow as tf\n", "\n", "@tf.function\n", "def compute_loss_and_grads(combination_image, base_image, style_reference_image):\n", " with tf.GradientTape() as tape:\n", " loss = compute_loss(combination_image, base_image, style_reference_image)\n", " grads = tape.gradient(loss, combination_image)\n", " return loss, grads\n", "\n", "optimizer = keras.optimizers.SGD(\n", " keras.optimizers.schedules.ExponentialDecay(\n", " initial_learning_rate=100.0, decay_steps=100, decay_rate=0.96\n", " )\n", ")\n", "\n", "base_image = preprocess_image(base_image_path)\n", "style_reference_image = preprocess_image(style_reference_image_path)\n", "combination_image = tf.Variable(preprocess_image(base_image_path))\n", "\n", "iterations = 4000\n", "for i in range(1, iterations + 1):\n", " loss, grads = compute_loss_and_grads(\n", " combination_image, base_image, style_reference_image\n", " )\n", " optimizer.apply_gradients([(grads, combination_image)])\n", " if i % 100 == 0:\n", " print(f\"Iteration {i}: loss={loss:.2f}\")\n", " img = deprocess_image(combination_image.numpy())\n", " fname = f\"combination_image_at_iteration_{i}.png\"\n", " keras.utils.save_img(fname, img)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Wrapping up" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "chapter12_part03_neural-style-transfer.i", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 0 }