mirror of
https://github.com/fchollet/deep-learning-with-python-notebooks.git
synced 2021-07-27 01:28:40 +03:00
356 lines
9.5 KiB
Plaintext
356 lines
9.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"## Neural style transfer"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"### The content loss"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"### The style loss"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"### Neural style transfer in Keras"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Getting the style and content images**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from tensorflow import keras\n",
|
|
"\n",
|
|
"base_image_path = keras.utils.get_file(\n",
|
|
" \"sf.jpg\", origin=\"https://img-datasets.s3.amazonaws.com/sf.jpg\")\n",
|
|
"style_reference_image_path = keras.utils.get_file(\n",
|
|
" \"starry_night.jpg\", origin=\"https://img-datasets.s3.amazonaws.com/starry_night.jpg\")\n",
|
|
"\n",
|
|
"original_width, original_height = keras.utils.load_img(base_image_path).size\n",
|
|
"img_height = 400\n",
|
|
"img_width = round(original_width * img_height / original_height)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Auxiliary functions**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"def preprocess_image(image_path):\n",
|
|
" img = keras.utils.load_img(\n",
|
|
" image_path, target_size=(img_height, img_width))\n",
|
|
" img = keras.utils.img_to_array(img)\n",
|
|
" img = np.expand_dims(img, axis=0)\n",
|
|
" img = keras.applications.vgg19.preprocess_input(img)\n",
|
|
" return img\n",
|
|
"\n",
|
|
"def deprocess_image(img):\n",
|
|
" img = img.reshape((img_height, img_width, 3))\n",
|
|
" img[:, :, 0] += 103.939\n",
|
|
" img[:, :, 1] += 116.779\n",
|
|
" img[:, :, 2] += 123.68\n",
|
|
" img = img[:, :, ::-1]\n",
|
|
" img = np.clip(img, 0, 255).astype(\"uint8\")\n",
|
|
" return img"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Loading the pretrained VGG19 network and using it to define a feature extractor**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"model = keras.applications.vgg19.VGG19(weights=\"imagenet\", include_top=False)\n",
|
|
"\n",
|
|
"outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])\n",
|
|
"feature_extractor = keras.Model(inputs=model.inputs, outputs=outputs_dict)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Content loss**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def content_loss(base_img, combination_img):\n",
|
|
" return tf.reduce_sum(tf.square(combination_img - base_img))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Style loss**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def gram_matrix(x):\n",
|
|
" x = tf.transpose(x, (2, 0, 1))\n",
|
|
" features = tf.reshape(x, (tf.shape(x)[0], -1))\n",
|
|
" gram = tf.matmul(features, tf.transpose(features))\n",
|
|
" return gram\n",
|
|
"\n",
|
|
"def style_loss(style_img, combination_img):\n",
|
|
" S = gram_matrix(style_img)\n",
|
|
" C = gram_matrix(combination_img)\n",
|
|
" channels = 3\n",
|
|
" size = img_height * img_width\n",
|
|
" return tf.reduce_sum(tf.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Total variation loss**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def total_variation_loss(x):\n",
|
|
" a = tf.square(\n",
|
|
" x[:, : img_height - 1, : img_width - 1, :] - x[:, 1:, : img_width - 1, :]\n",
|
|
" )\n",
|
|
" b = tf.square(\n",
|
|
" x[:, : img_height - 1, : img_width - 1, :] - x[:, : img_height - 1, 1:, :]\n",
|
|
" )\n",
|
|
" return tf.reduce_sum(tf.pow(a + b, 1.25))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Defining the final loss that you'll minimize**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"style_layer_names = [\n",
|
|
" \"block1_conv1\",\n",
|
|
" \"block2_conv1\",\n",
|
|
" \"block3_conv1\",\n",
|
|
" \"block4_conv1\",\n",
|
|
" \"block5_conv1\",\n",
|
|
"]\n",
|
|
"content_layer_name = \"block5_conv2\"\n",
|
|
"total_variation_weight = 1e-6\n",
|
|
"style_weight = 1e-6\n",
|
|
"content_weight = 2.5e-8\n",
|
|
"\n",
|
|
"def compute_loss(combination_image, base_image, style_reference_image):\n",
|
|
" input_tensor = tf.concat(\n",
|
|
" [base_image, style_reference_image, combination_image], axis=0\n",
|
|
" )\n",
|
|
" features = feature_extractor(input_tensor)\n",
|
|
" loss = tf.zeros(shape=())\n",
|
|
" layer_features = features[content_layer_name]\n",
|
|
" base_image_features = layer_features[0, :, :, :]\n",
|
|
" combination_features = layer_features[2, :, :, :]\n",
|
|
" loss = loss + content_weight * content_loss(\n",
|
|
" base_image_features, combination_features\n",
|
|
" )\n",
|
|
" for layer_name in style_layer_names:\n",
|
|
" layer_features = features[layer_name]\n",
|
|
" style_reference_features = layer_features[1, :, :, :]\n",
|
|
" combination_features = layer_features[2, :, :, :]\n",
|
|
" style_loss_value = style_loss(\n",
|
|
" style_reference_features, combination_features)\n",
|
|
" loss += (style_weight / len(style_layer_names)) * style_loss_value\n",
|
|
"\n",
|
|
" loss += total_variation_weight * total_variation_loss(combination_image)\n",
|
|
" return loss"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"**Setting up the gradient-descent process**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 0,
|
|
"metadata": {
|
|
"colab_type": "code"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import tensorflow as tf\n",
|
|
"\n",
|
|
"@tf.function\n",
|
|
"def compute_loss_and_grads(combination_image, base_image, style_reference_image):\n",
|
|
" with tf.GradientTape() as tape:\n",
|
|
" loss = compute_loss(combination_image, base_image, style_reference_image)\n",
|
|
" grads = tape.gradient(loss, combination_image)\n",
|
|
" return loss, grads\n",
|
|
"\n",
|
|
"optimizer = keras.optimizers.SGD(\n",
|
|
" keras.optimizers.schedules.ExponentialDecay(\n",
|
|
" initial_learning_rate=100.0, decay_steps=100, decay_rate=0.96\n",
|
|
" )\n",
|
|
")\n",
|
|
"\n",
|
|
"base_image = preprocess_image(base_image_path)\n",
|
|
"style_reference_image = preprocess_image(style_reference_image_path)\n",
|
|
"combination_image = tf.Variable(preprocess_image(base_image_path))\n",
|
|
"\n",
|
|
"iterations = 4000\n",
|
|
"for i in range(1, iterations + 1):\n",
|
|
" loss, grads = compute_loss_and_grads(\n",
|
|
" combination_image, base_image, style_reference_image\n",
|
|
" )\n",
|
|
" optimizer.apply_gradients([(grads, combination_image)])\n",
|
|
" if i % 100 == 0:\n",
|
|
" print(f\"Iteration {i}: loss={loss:.2f}\")\n",
|
|
" img = deprocess_image(combination_image.numpy())\n",
|
|
" fname = f\"combination_image_at_iteration_{i}.png\"\n",
|
|
" keras.utils.save_img(fname, img)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"### Wrapping up"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "chapter12_part03_neural-style-transfer.i",
|
|
"private_outputs": false,
|
|
"provenance": [],
|
|
"toc_visible": true
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
} |