Files
ml_things/keras_generator.ipynb
2018-05-28 16:24:37 -05:00

292 lines
9.2 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "keras_generator.ipynb",
"version": "0.3.2",
"provenance": [],
"collapsed_sections": []
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"[View in Colaboratory](https://colab.research.google.com/github/gmihaila/deep_learning_toolbox/blob/master/keras_generator.ipynb)"
]
},
{
"metadata": {
"id": "g8LvSrDRLJc9",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"### Use data Generator in Keras\n",
"\n",
"by GeorgeM."
]
},
{
"metadata": {
"id": "pEF8oMXXLIkx",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 212
},
"outputId": "3f519e62-695b-497b-8c92-3016932bd6f7"
},
"cell_type": "code",
"source": [
"!wget https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"--2018-05-28 19:23:55-- https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv\r\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\r\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 23278 (23K) [text/plain]\n",
"Saving to: pima-indians-diabetes.data.csv\n",
"\n",
"pima-indians-diabet 100%[===================>] 22.73K --.-KB/s in 0.01s \n",
"\n",
"2018-05-28 19:23:55 (1.58 MB/s) - pima-indians-diabetes.data.csv saved [23278/23278]\n",
"\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "d0iOq0HYLkPu",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "2a5df017-04dc-49a9-e4ea-d35f14b74dc9"
},
"cell_type": "code",
"source": [
"ls"
],
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"text": [
"\u001b[0m\u001b[01;34mdatalab\u001b[0m/ pima-indians-diabetes.data.csv\r\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "MfFKQB9oLa4n",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 52
},
"outputId": "d312ce22-099a-4d71-84db-fd93cdcc8079"
},
"cell_type": "code",
"source": [
"# Create your first MLP in Keras\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense\n",
"import time\n",
"import numpy as np\n",
"\n",
"# fix random seed for reproducibility\n",
"np.random.seed(7)\n",
"# load pima indians dataset\n",
"dataset = np.loadtxt('pima-indians-diabetes.data.csv', delimiter=\",\")\n",
"# split into input (X) and output (Y) variables\n",
"X = dataset[:,0:8]\n",
"Y = dataset[:,8]\n",
"data_size = len(X)\n",
"print 'Data size: ', data_size\n",
"\n",
"data = []\n",
"for x,y in zip(X,Y):\n",
" data.append([x,y])\n",
"print 'Data created'\n",
"\n",
"# create model\n",
"model = Sequential()\n",
"model.add(Dense(12, input_dim=8, activation='relu'))\n",
"model.add(Dense(8, activation='relu'))\n",
"model.add(Dense(1, activation='sigmoid'))\n",
"# Compile model\n",
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
"\n",
"\n",
"def gen(data, mini_batch, n_epochs):\n",
" print('Generator initiated!\\n')\n",
" # read or split data\n",
" data_size = len(data)\n",
" x = [] # input batch examples\n",
" y = [] # output batch examples\n",
" epoch = 0\n",
" \n",
" while epoch < n_epochs: # <- VERY IMPORTANT\n",
" for index, pair in enumerate(data):\n",
" # append data \n",
" x.append(pair[0])\n",
" y.append(pair[1])\n",
"\n",
" if (index+1)%mini_batch == 0:\n",
" x = np.array(x)\n",
" y = np.array(y)\n",
" # output data\n",
" yield x, y\n",
" x = []; y = []\n",
" \n",
" epoch += 1\n",
" return"
],
"execution_count": 22,
"outputs": [
{
"output_type": "stream",
"text": [
"Data size: 768\n",
"Data created\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "XMaurYBBZFw8",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 514
},
"outputId": "f9bc62b7-ac12-4299-de0f-343e7fd7139d"
},
"cell_type": "code",
"source": [
"tr_gen = gen(data, 1, n_epochs=10)\n",
"s = time.time()\n",
"model.fit_generator(generator=tr_gen, steps_per_epoch=768, epochs=10, max_queue_size=0, workers=0, verbose=2)\n",
"e = time.time()\n",
"print '\\n\\n Generator: ',(e-s)\n",
"\n",
"\n",
"# steps_per_epoch: batches of examples/examples form Generator to finish 1 epoch\n",
"# max_queue_size: Integer. Maximum size for the generator queue. Default 10.\n",
"# workers: Integer. Maximum number of processes to spin up when using process-based threading. Default to 1. \n",
" # 0, will execute the generator on the main thread.\n",
"# epochs: number of times model sees data\n",
"\n",
"\n",
"\n",
"# Fit the model\n",
"# s = time.time()\n",
"# model.fit(X, Y, epochs=1, batch_size=1)\n",
"# e = time.time()\n",
"# print '\\n\\n Generator: ',(e-s)\n",
"\n",
"# evaluate the model\n",
"\n",
"scores = model.evaluate(X, Y)\n",
"print(\"\\n%s: %.2f%%\" % (model.metrics_names[1], scores[1]*100))\n",
"\n",
"del model"
],
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"text": [
"Epoch 1/10\n",
"Generator initiated!\n",
"\n",
" - 2s - loss: 1.7948 - acc: 0.6068\n",
"Epoch 2/10\n",
" - 1s - loss: 0.7456 - acc: 0.6302\n",
"Epoch 3/10\n",
" - 1s - loss: 0.7028 - acc: 0.6576\n",
"Epoch 4/10\n",
" - 1s - loss: 0.6732 - acc: 0.6576\n",
"Epoch 5/10\n",
" - 1s - loss: 0.6522 - acc: 0.6836\n",
"Epoch 6/10\n",
" - 1s - loss: 0.6369 - acc: 0.6732\n",
"Epoch 7/10\n",
" - 1s - loss: 0.6223 - acc: 0.6901\n",
"Epoch 8/10\n",
" - 1s - loss: 0.6152 - acc: 0.6823\n",
"Epoch 9/10\n",
" - 1s - loss: 0.6047 - acc: 0.7031\n",
"Epoch 10/10\n",
" - 1s - loss: 0.5941 - acc: 0.7044\n",
"\n",
"\n",
" Generator: 11.5527789593\n",
"768/768 [==============================] - 0s 176us/step\n",
"\n",
"acc: 68.10%\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "A-KDhE9Forgg",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"# prediciton generator some code\n",
"\n",
"def TestGenerator(path_data, max_queue_size, n_articles):\n",
" x = [] # input batch examples\n",
" \n",
" index = 1\n",
" epoch = 0\n",
"\n",
" with (open('%s.pickle'%path_data, \"rb\")) as openfile:\n",
" while True:\n",
" try:\n",
" ID, content, title, media, source, published = pickle.load(openfile)\n",
" x.append(content)\n",
"\n",
" if ((index)%max_queue_size == 0) or ((n_articles - index) < max_queue_size):\n",
" x = np.array(x)\n",
" # output data\n",
" yield x\n",
" x = []\n",
"\n",
" index += 1\n",
" except EOFError:\n",
" break\n",
" \n",
"test_gen = TestGenerator(path_data='test', max_queue_size=2, n_articles=n_test) \n",
"\n",
"predict = model.predict_generator(generator=test_gen, steps=5, max_queue_size=10, workers=1)\n",
"print predict[0]"
],
"execution_count": 0,
"outputs": []
}
]
}