mirror of
https://github.com/gmihaila/ml_things.git
synced 2021-10-04 01:29:04 +03:00
292 lines
9.2 KiB
Plaintext
292 lines
9.2 KiB
Plaintext
{
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0,
|
||
"metadata": {
|
||
"colab": {
|
||
"name": "keras_generator.ipynb",
|
||
"version": "0.3.2",
|
||
"provenance": [],
|
||
"collapsed_sections": []
|
||
}
|
||
},
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "view-in-github",
|
||
"colab_type": "text"
|
||
},
|
||
"source": [
|
||
"[View in Colaboratory](https://colab.research.google.com/github/gmihaila/deep_learning_toolbox/blob/master/keras_generator.ipynb)"
|
||
]
|
||
},
|
||
{
|
||
"metadata": {
|
||
"id": "g8LvSrDRLJc9",
|
||
"colab_type": "text"
|
||
},
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"### Use data Generator in Keras\n",
|
||
"\n",
|
||
"by GeorgeM."
|
||
]
|
||
},
|
||
{
|
||
"metadata": {
|
||
"id": "pEF8oMXXLIkx",
|
||
"colab_type": "code",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 212
|
||
},
|
||
"outputId": "3f519e62-695b-497b-8c92-3016932bd6f7"
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"!wget https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
|
||
],
|
||
"execution_count": 1,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"text": [
|
||
"--2018-05-28 19:23:55-- https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv\r\n",
|
||
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\r\n",
|
||
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
|
||
"HTTP request sent, awaiting response... 200 OK\n",
|
||
"Length: 23278 (23K) [text/plain]\n",
|
||
"Saving to: ‘pima-indians-diabetes.data.csv’\n",
|
||
"\n",
|
||
"pima-indians-diabet 100%[===================>] 22.73K --.-KB/s in 0.01s \n",
|
||
"\n",
|
||
"2018-05-28 19:23:55 (1.58 MB/s) - ‘pima-indians-diabetes.data.csv’ saved [23278/23278]\n",
|
||
"\n"
|
||
],
|
||
"name": "stdout"
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"metadata": {
|
||
"id": "d0iOq0HYLkPu",
|
||
"colab_type": "code",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 34
|
||
},
|
||
"outputId": "2a5df017-04dc-49a9-e4ea-d35f14b74dc9"
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"ls"
|
||
],
|
||
"execution_count": 14,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\u001b[0m\u001b[01;34mdatalab\u001b[0m/ pima-indians-diabetes.data.csv\r\n"
|
||
],
|
||
"name": "stdout"
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"metadata": {
|
||
"id": "MfFKQB9oLa4n",
|
||
"colab_type": "code",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 52
|
||
},
|
||
"outputId": "d312ce22-099a-4d71-84db-fd93cdcc8079"
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# Create your first MLP in Keras\n",
|
||
"from keras.models import Sequential\n",
|
||
"from keras.layers import Dense\n",
|
||
"import time\n",
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"# fix random seed for reproducibility\n",
|
||
"np.random.seed(7)\n",
|
||
"# load pima indians dataset\n",
|
||
"dataset = np.loadtxt('pima-indians-diabetes.data.csv', delimiter=\",\")\n",
|
||
"# split into input (X) and output (Y) variables\n",
|
||
"X = dataset[:,0:8]\n",
|
||
"Y = dataset[:,8]\n",
|
||
"data_size = len(X)\n",
|
||
"print 'Data size: ', data_size\n",
|
||
"\n",
|
||
"data = []\n",
|
||
"for x,y in zip(X,Y):\n",
|
||
" data.append([x,y])\n",
|
||
"print 'Data created'\n",
|
||
"\n",
|
||
"# create model\n",
|
||
"model = Sequential()\n",
|
||
"model.add(Dense(12, input_dim=8, activation='relu'))\n",
|
||
"model.add(Dense(8, activation='relu'))\n",
|
||
"model.add(Dense(1, activation='sigmoid'))\n",
|
||
"# Compile model\n",
|
||
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
|
||
"\n",
|
||
"\n",
|
||
"def gen(data, mini_batch, n_epochs):\n",
|
||
" print('Generator initiated!\\n')\n",
|
||
" # read or split data\n",
|
||
" data_size = len(data)\n",
|
||
" x = [] # input batch examples\n",
|
||
" y = [] # output batch examples\n",
|
||
" epoch = 0\n",
|
||
" \n",
|
||
" while epoch < n_epochs: # <- VERY IMPORTANT\n",
|
||
" for index, pair in enumerate(data):\n",
|
||
" # append data \n",
|
||
" x.append(pair[0])\n",
|
||
" y.append(pair[1])\n",
|
||
"\n",
|
||
" if (index+1)%mini_batch == 0:\n",
|
||
" x = np.array(x)\n",
|
||
" y = np.array(y)\n",
|
||
" # output data\n",
|
||
" yield x, y\n",
|
||
" x = []; y = []\n",
|
||
" \n",
|
||
" epoch += 1\n",
|
||
" return"
|
||
],
|
||
"execution_count": 22,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Data size: 768\n",
|
||
"Data created\n"
|
||
],
|
||
"name": "stdout"
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"metadata": {
|
||
"id": "XMaurYBBZFw8",
|
||
"colab_type": "code",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 514
|
||
},
|
||
"outputId": "f9bc62b7-ac12-4299-de0f-343e7fd7139d"
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"tr_gen = gen(data, 1, n_epochs=10)\n",
|
||
"s = time.time()\n",
|
||
"model.fit_generator(generator=tr_gen, steps_per_epoch=768, epochs=10, max_queue_size=0, workers=0, verbose=2)\n",
|
||
"e = time.time()\n",
|
||
"print '\\n\\n Generator: ',(e-s)\n",
|
||
"\n",
|
||
"\n",
|
||
"# steps_per_epoch: batches of examples/examples form Generator to finish 1 epoch\n",
|
||
"# max_queue_size: Integer. Maximum size for the generator queue. Default 10.\n",
|
||
"# workers: Integer. Maximum number of processes to spin up when using process-based threading. Default to 1. \n",
|
||
" # 0, will execute the generator on the main thread.\n",
|
||
"# epochs: number of times model sees data\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"# Fit the model\n",
|
||
"# s = time.time()\n",
|
||
"# model.fit(X, Y, epochs=1, batch_size=1)\n",
|
||
"# e = time.time()\n",
|
||
"# print '\\n\\n Generator: ',(e-s)\n",
|
||
"\n",
|
||
"# evaluate the model\n",
|
||
"\n",
|
||
"scores = model.evaluate(X, Y)\n",
|
||
"print(\"\\n%s: %.2f%%\" % (model.metrics_names[1], scores[1]*100))\n",
|
||
"\n",
|
||
"del model"
|
||
],
|
||
"execution_count": 23,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Epoch 1/10\n",
|
||
"Generator initiated!\n",
|
||
"\n",
|
||
" - 2s - loss: 1.7948 - acc: 0.6068\n",
|
||
"Epoch 2/10\n",
|
||
" - 1s - loss: 0.7456 - acc: 0.6302\n",
|
||
"Epoch 3/10\n",
|
||
" - 1s - loss: 0.7028 - acc: 0.6576\n",
|
||
"Epoch 4/10\n",
|
||
" - 1s - loss: 0.6732 - acc: 0.6576\n",
|
||
"Epoch 5/10\n",
|
||
" - 1s - loss: 0.6522 - acc: 0.6836\n",
|
||
"Epoch 6/10\n",
|
||
" - 1s - loss: 0.6369 - acc: 0.6732\n",
|
||
"Epoch 7/10\n",
|
||
" - 1s - loss: 0.6223 - acc: 0.6901\n",
|
||
"Epoch 8/10\n",
|
||
" - 1s - loss: 0.6152 - acc: 0.6823\n",
|
||
"Epoch 9/10\n",
|
||
" - 1s - loss: 0.6047 - acc: 0.7031\n",
|
||
"Epoch 10/10\n",
|
||
" - 1s - loss: 0.5941 - acc: 0.7044\n",
|
||
"\n",
|
||
"\n",
|
||
" Generator: 11.5527789593\n",
|
||
"768/768 [==============================] - 0s 176us/step\n",
|
||
"\n",
|
||
"acc: 68.10%\n"
|
||
],
|
||
"name": "stdout"
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"metadata": {
|
||
"id": "A-KDhE9Forgg",
|
||
"colab_type": "code",
|
||
"colab": {}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# prediciton generator some code\n",
|
||
"\n",
|
||
"def TestGenerator(path_data, max_queue_size, n_articles):\n",
|
||
" x = [] # input batch examples\n",
|
||
" \n",
|
||
" index = 1\n",
|
||
" epoch = 0\n",
|
||
"\n",
|
||
" with (open('%s.pickle'%path_data, \"rb\")) as openfile:\n",
|
||
" while True:\n",
|
||
" try:\n",
|
||
" ID, content, title, media, source, published = pickle.load(openfile)\n",
|
||
" x.append(content)\n",
|
||
"\n",
|
||
" if ((index)%max_queue_size == 0) or ((n_articles - index) < max_queue_size):\n",
|
||
" x = np.array(x)\n",
|
||
" # output data\n",
|
||
" yield x\n",
|
||
" x = []\n",
|
||
"\n",
|
||
" index += 1\n",
|
||
" except EOFError:\n",
|
||
" break\n",
|
||
" \n",
|
||
"test_gen = TestGenerator(path_data='test', max_queue_size=2, n_articles=n_test) \n",
|
||
"\n",
|
||
"predict = model.predict_generator(generator=test_gen, steps=5, max_queue_size=10, workers=1)\n",
|
||
"print predict[0]"
|
||
],
|
||
"execution_count": 0,
|
||
"outputs": []
|
||
}
|
||
]
|
||
} |