ml_things/keras_checkpoins.ipynb

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "keras_checkpoins.ipynb",
      "version": "0.3.2",
      "provenance": [],
      "collapsed_sections": []
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "[View in Colaboratory](https://colab.research.google.com/github/gmihaila/deep_learning_toy_models/blob/master/keras_checkpoins.ipynb)"
      ]
    },
    {
      "metadata": {
        "id": "cc_f84dJE4nX",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "### Keras checkpoints\n",
        "\n",
        "by GeorgeM"
      ]
    },
    {
      "metadata": {
        "id": "-bSqvSOyFZIX",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 233
        },
        "outputId": "d873c539-a252-4549-faee-dc95c4b9512c"
      },
      "cell_type": "code",
      "source": [
        "# Downlod dataset\n",
        "\n",
        "!wget https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv\n",
        "\n",
        "!ls"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "--2018-04-23 15:02:20--  https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv\r\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 23278 (23K) [text/plain]\n",
            "Saving to: ‘pima-indians-diabetes.data.csv’\n",
            "\n",
            "pima-indians-diabet 100%[===================>]  22.73K  --.-KB/s    in 0.01s   \n",
            "\n",
            "2018-04-23 15:02:20 (1.60 MB/s) - ‘pima-indians-diabetes.data.csv’ saved [23278/23278]\n",
            "\n",
            "datalab  pima-indians-diabetes.data.csv\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "QtPonUi2Fo1W",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 51
        },
        "outputId": "b435be73-eacf-44f3-d551-763dc9e595e9"
      },
      "cell_type": "code",
      "source": [
        "ls"
      ],
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "data.h5   pima-indians-diabetes.data.csv\r\n",
            "\u001b[0m\u001b[01;34mdatalab\u001b[0m/  pima-indians-diabetes.data.csv.1\r\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "Sic9EKSME7af",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "c4c0be71-036e-424f-da43-5dea707d9607"
      },
      "cell_type": "code",
      "source": [
        "# Checkpoint the weights for best model on validation accuracy\n",
        "from keras.models import Sequential\n",
        "from keras.layers import Dense\n",
        "from keras import callbacks\n",
        "import matplotlib.pyplot as plt\n",
        "import numpy\n",
        "import time"
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Using TensorFlow backend.\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "metadata": {
        "id": "bLeDJG0hFBsv",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "# Import data\n",
        "\n",
        "# fix random seed for reproducibility\n",
        "seed = 7\n",
        "numpy.random.seed(seed)\n",
        "# load pima indians dataset\n",
        "dataset = numpy.loadtxt(\"pima-indians-diabetes.data.csv\", delimiter=\",\")\n",
        "# split into input (X) and output (Y) variables\n",
        "X = dataset[:,0:8]\n",
        "Y = dataset[:,8]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "QxwTKhnFF09z",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "# create model\n",
        "\n",
        "model = Sequential()\n",
        "model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu'))\n",
        "model.add(Dense(8, kernel_initializer='uniform', activation='relu'))\n",
        "model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))\n",
        "\n",
        "# Compile model\n",
        "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
        "\n",
        "# checkpoint\n",
        "filepath=\"epoch_{epoch:02d}_acc_{val_loss:.4f}.hdf5\"\n",
        "best_model = callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='min', period=1)\n",
        "# filepath : path and name of file to save.\n",
        "# verbose : 0 won't print 1 will print on each epoch if improved or not.\n",
        "# save_best_only : the latest best model according to the quantity monitored will not be overwritten.\n",
        "# save_weights_only : True - only model weights saved. False - whole model saved model.save(filepath).\n",
        "# mode : maximization or the minimization of the monitored quantity e.g. val_loss mode=min\n",
        "# period : how often to cheack. e.g. period=2 ever 2 epoch will check\n",
        "\n",
        "\n",
        "early_stop = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=100, verbose=1, mode='min')\n",
        "# monitor : can be val_loss or val_acc. We want to decrease val_loss and increase val_acc\n",
        "# min_delta : the minimum value that count as improvement e.g. delta = current_val - previous_val\n",
        "# patience : how many times we allow the model to NOT improve e.g. patience=2 at the second time the model did not improve will STOP\n",
        "# verbose : will print a message of the epoch where it early stopped\n",
        "# mode : which direction we want to go max, min or auto e.g. val_loss we use mode=min val_acc we use mode=max\n",
        "\n",
        "\n",
        "# When NaN loss is encounter, it will stop\n",
        "nan_terminate = callbacks.TerminateOnNaN()\n",
        "\n",
        "# Use tensor board\n",
        "tensor_board = callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)\n",
        "\n",
        "callbacks_list = [best_model, nan_terminate, early_stop, tensor_board]\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "-nCYL_nOF9ui",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 449
        },
        "outputId": "272753b8-7ca5-4c7e-af73-e3bf7716c648"
      },
      "cell_type": "code",
      "source": [
        "# Fit the model\n",
        "s = time.time()\n",
        "model.fit(X, Y, validation_split=0.33, epochs=5, batch_size=2, callbacks=callbacks_list, verbose=1)\n",
        "# model.fit(X, Y, validation_split=0.33, epochs=5, batch_size=2, verbose=0)\n",
        "\n",
        "e = time.time()\n",
        "print '\\n\\nTIME: ',(e-s)"
      ],
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Train on 514 samples, validate on 254 samples\n",
            "Epoch 1/5\n",
            "514/514 [==============================] - 1s 2ms/step - loss: 0.6810 - acc: 0.6420 - val_loss: 0.6619 - val_acc: 0.6732\n",
            "\n",
            "Epoch 00001: val_loss improved from inf to 0.66191, saving model to epoch_01_acc_0.6619.hdf5\n",
            "Epoch 2/5\n",
            "514/514 [==============================] - 1s 1ms/step - loss: 0.6597 - acc: 0.6401 - val_loss: 0.6535 - val_acc: 0.6732\n",
            "\n",
            "Epoch 00002: val_loss improved from 0.66191 to 0.65349, saving model to epoch_02_acc_0.6535.hdf5\n",
            "Epoch 3/5\n",
            "514/514 [==============================] - 1s 1ms/step - loss: 0.6510 - acc: 0.6401 - val_loss: 0.6276 - val_acc: 0.6732\n",
            "\n",
            "Epoch 00003: val_loss improved from 0.65349 to 0.62758, saving model to epoch_03_acc_0.6276.hdf5\n",
            "Epoch 4/5\n",
            "514/514 [==============================] - 1s 991us/step - loss: 0.6382 - acc: 0.6518 - val_loss: 0.6220 - val_acc: 0.7323\n",
            "\n",
            "Epoch 00004: val_loss improved from 0.62758 to 0.62203, saving model to epoch_04_acc_0.6220.hdf5\n",
            "Epoch 5/5\n",
            "514/514 [==============================] - 0s 955us/step - loss: 0.6315 - acc: 0.6537 - val_loss: 0.6229 - val_acc: 0.7244\n",
            "\n",
            "Epoch 00005: val_loss did not improve\n",
            "\n",
            "\n",
            "TIME:  3.8915579319\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "nas2x17RIBvQ",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "rm ./*.hdf5"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "WpGHviEEMFkX",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 89
        },
        "outputId": "650692fb-648c-48d9-ee09-9549074b10b5"
      },
      "cell_type": "code",
      "source": [
        "ls"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "\u001b[0m\u001b[01;34mdatalab\u001b[0m/                  epoch_05_acc_0.6182.hdf5\r\n",
            "epoch_01_acc_0.6536.hdf5  \u001b[01;34mGraph\u001b[0m/\r\n",
            "epoch_03_acc_0.6351.hdf5  pima-indians-diabetes.data.csv\r\n",
            "epoch_04_acc_0.6316.hdf5\r\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "tj3QsSJbesQp",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 109
        },
        "outputId": "0a342c24-f609-4f03-9f00-f0bf041b0399"
      },
      "cell_type": "code",
      "source": [
        "!tensorboard --logdir ./Graph "
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python2.7/dist-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\r\n",
            "  from ._conv import register_converters as _register_converters\n",
            "TensorBoard 1.7.0 at http://54bb71543568:6006 (Press CTRL+C to quit)\n",
            "^C\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}