mirror of
https://github.com/gmihaila/ml_things.git
synced 2021-10-04 01:29:04 +03:00
525 lines
22 KiB
Plaintext
525 lines
22 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"name": "test_mxnet.ipynb",
|
|
"version": "0.3.2",
|
|
"provenance": [],
|
|
"collapsed_sections": [],
|
|
"include_colab_link": true
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"accelerator": "GPU"
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "view-in-github",
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"<a href=\"https://colab.research.google.com/github/gmihaila/machine_learning_toolbox/blob/master/test_mxnet.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
|
]
|
|
},
|
|
{
|
|
"metadata": {
|
|
"id": "woJ4rgpXEEoh",
|
|
"colab_type": "code",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 89
|
|
},
|
|
"outputId": "822dc50b-d4c3-4823-ecd9-7f06b503c370"
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"!nvcc --version\n"
|
|
],
|
|
"execution_count": 1,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"text": [
|
|
"nvcc: NVIDIA (R) Cuda compiler driver\n",
|
|
"Copyright (c) 2005-2018 NVIDIA Corporation\n",
|
|
"Built on Tue_Jun_12_23:07:04_CDT_2018\n",
|
|
"Cuda compilation tools, release 9.2, V9.2.148\n"
|
|
],
|
|
"name": "stdout"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"metadata": {
|
|
"id": "bScEvYRoHJjv",
|
|
"colab_type": "code",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 53
|
|
},
|
|
"outputId": "1ef4d6d1-e2e2-4d20-9e5f-1474ca826de5"
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"!nvidia-smi"
|
|
],
|
|
"execution_count": 2,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"text": [
|
|
"NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.\n",
|
|
"\n"
|
|
],
|
|
"name": "stdout"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"metadata": {
|
|
"id": "N9J1AaYtfHKq",
|
|
"colab_type": "text"
|
|
},
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"# Handwritten Digit Recognition [GPU use]\n",
|
|
"\n",
|
|
"Inspiration: \n",
|
|
"\n",
|
|
"* https://gluon.mxnet.io/chapter07_distributed-learning/multiple-gpus-scratch.html\n",
|
|
"\n",
|
|
"\n",
|
|
"In this tutorial, we'll give you a step by step walk-through of how to build a hand-written digit classifier using the [MNIST](https://en.wikipedia.org/wiki/MNIST_database) dataset. For someone new to deep learning, this exercise is arguably the \"Hello World\" equivalent.]"
|
|
]
|
|
},
|
|
{
|
|
"metadata": {
|
|
"id": "V3EeB8AWi-HA",
|
|
"colab_type": "code",
|
|
"colab": {}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"!pip install mxnet-cu92\n",
|
|
"!pip install mxnet-cu92mkl"
|
|
],
|
|
"execution_count": 0,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"metadata": {
|
|
"colab_type": "code",
|
|
"outputId": "70b56d50-b9a0-4a2d-9d21-e700ab1fe7dd",
|
|
"id": "DY4p-tb3iZ58",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 233
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"from mxnet.test_utils import get_mnist\n",
|
|
"from mxnet.io import NDArrayIter\n",
|
|
"from mxnet import gpu\n",
|
|
"from mxnet import gluon\n",
|
|
"from mxnet import nd\n",
|
|
"from mxnet import autograd\n",
|
|
"from time import time\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# Given a list of data that spans multiple GPUs, we then define a function to sum the data and broadcast the results to each GPU.\n",
|
|
"def allreduce(data):\n",
|
|
" # sum on data[0].context, and then broadcast\n",
|
|
" for i in range(1, len(data)):\n",
|
|
" data[0][:] += data[i].copyto(data[0].context)\n",
|
|
" for i in range(1, len(data)):\n",
|
|
" data[0].copyto(data[i])\n",
|
|
" \n",
|
|
" \n",
|
|
"\n",
|
|
"# Given a data batch, we define a function that splits this batch and copies each part into the corresponding GPU.\n",
|
|
"def split_and_load(data, ctx):\n",
|
|
" n, k = data.shape[0], len(ctx)\n",
|
|
" assert (n//k)*k == n, '# examples is not divided by # devices'\n",
|
|
" idx = list(range(0, n+1, n//k))\n",
|
|
" return [data[idx[i]:idx[i+1]].as_in_context(ctx[i]) for i in range(k)]\n",
|
|
" \n",
|
|
" \n",
|
|
"def train_batch(batch, params, ctx, lr):\n",
|
|
" # split the data batch and load them on GPUs\n",
|
|
" data = split_and_load(batch.data[0], ctx)\n",
|
|
" label = split_and_load(batch.label[0], ctx)\n",
|
|
" # run forward on each GPU\n",
|
|
" with autograd.record():\n",
|
|
" losses = [loss(lenet(X, W), Y)\n",
|
|
" for X, Y, W in zip(data, label, params)]\n",
|
|
" # run backward on each gpu\n",
|
|
" for l in losses:\n",
|
|
" l.backward()\n",
|
|
" # aggregate gradient over GPUs\n",
|
|
" for i in range(len(params[0])):\n",
|
|
" allreduce([params[c][i].grad for c in range(len(ctx))])\n",
|
|
" # update parameters with SGD on each GPU\n",
|
|
" for p in params:\n",
|
|
" SGD(p, lr/batch.data[0].shape[0])\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# initialize parameters\n",
|
|
"scale = .01\n",
|
|
"W1 = nd.random_normal(shape=(20,1,3,3))*scale\n",
|
|
"b1 = nd.zeros(shape=20)\n",
|
|
"W2 = nd.random_normal(shape=(50,20,5,5))*scale\n",
|
|
"b2 = nd.zeros(shape=50)\n",
|
|
"W3 = nd.random_normal(shape=(800,128))*scale\n",
|
|
"b3 = nd.zeros(shape=128)\n",
|
|
"W4 = nd.random_normal(shape=(128,10))*scale\n",
|
|
"b4 = nd.zeros(shape=10)\n",
|
|
"\n",
|
|
"params = [W1, b1, W2, b2, W3, b3, W4, b4]\n",
|
|
"\n",
|
|
"\n",
|
|
"# network and loss\n",
|
|
"def lenet(X, params):\n",
|
|
" # first conv\n",
|
|
" h1_conv = nd.Convolution(data=X, weight=params[0], bias=params[1], kernel=(3,3), num_filter=20)\n",
|
|
" h1_activation = nd.relu(h1_conv)\n",
|
|
" h1 = nd.Pooling(data=h1_activation, pool_type=\"max\", kernel=(2,2), stride=(2,2))\n",
|
|
" # second conv\n",
|
|
" h2_conv = nd.Convolution(data=h1, weight=params[2], bias=params[3], kernel=(5,5), num_filter=50)\n",
|
|
" h2_activation = nd.relu(h2_conv)\n",
|
|
" h2 = nd.Pooling(data=h2_activation, pool_type=\"max\", kernel=(2,2), stride=(2,2))\n",
|
|
" h2 = nd.flatten(h2)\n",
|
|
" # first fullc\n",
|
|
" h3_linear = nd.dot(h2, params[4]) + params[5]\n",
|
|
" h3 = nd.relu(h3_linear)\n",
|
|
" # second fullc\n",
|
|
" yhat = nd.dot(h3, params[6]) + params[7]\n",
|
|
" return yhat\n",
|
|
"\n",
|
|
"loss = gluon.loss.SoftmaxCrossEntropyLoss()\n",
|
|
"\n",
|
|
"# plain SGD\n",
|
|
"def SGD(params, lr):\n",
|
|
" for p in params:\n",
|
|
" p[:] = p - lr * p.grad\n",
|
|
" \n",
|
|
" \n",
|
|
"\n",
|
|
"# The following function copies the parameters into a particular GPU and initializes the gradients.\n",
|
|
"def get_params(params, ctx):\n",
|
|
" new_params = [p.copyto(ctx) for p in params]\n",
|
|
" for p in new_params:\n",
|
|
" p.attach_grad()\n",
|
|
" return new_params\n",
|
|
"\n",
|
|
" \n",
|
|
" \n",
|
|
"# For inference, we simply let it run on the first GPU. We leave a data parallelism implementation as an exercise. \n",
|
|
"def valid_batch(batch, params, ctx):\n",
|
|
" data = batch.data[0].as_in_context(ctx[0])\n",
|
|
" pred = nd.argmax(lenet(data, params[0]), axis=1)\n",
|
|
" return nd.sum(pred == batch.label[0].as_in_context(ctx[0])).asscalar()\n",
|
|
" \n",
|
|
" \n",
|
|
" \n",
|
|
"\n",
|
|
"def run(num_gpus, batch_size, lr):\n",
|
|
" # the list of GPUs will be used\n",
|
|
" ctx = [gpu(i) for i in range(num_gpus)]\n",
|
|
" print('Running on {}'.format(ctx))\n",
|
|
"\n",
|
|
" # data iterator\n",
|
|
" mnist = get_mnist()\n",
|
|
" train_data = NDArrayIter(mnist[\"train_data\"], mnist[\"train_label\"], batch_size)\n",
|
|
" valid_data = NDArrayIter(mnist[\"test_data\"], mnist[\"test_label\"], batch_size)\n",
|
|
" print('Batch size is {}'.format(batch_size))\n",
|
|
"\n",
|
|
" # copy parameters to all GPUs\n",
|
|
" dev_params = [get_params(params, c) for c in ctx]\n",
|
|
" for epoch in range(5):\n",
|
|
" # train\n",
|
|
" start = time()\n",
|
|
" train_data.reset()\n",
|
|
" for batch in train_data:\n",
|
|
" train_batch(batch, dev_params, ctx, lr)\n",
|
|
" nd.waitall() # wait all computations are finished to benchmark the time\n",
|
|
" print('Epoch %d, training time = %.1f sec'%(epoch, time()-start))\n",
|
|
"\n",
|
|
" # validating\n",
|
|
" valid_data.reset()\n",
|
|
" correct, num = 0.0, 0.0\n",
|
|
" for batch in valid_data:\n",
|
|
" correct += valid_batch(batch, dev_params, ctx)\n",
|
|
" num += batch.data[0].shape[0]\n",
|
|
" print(' validation accuracy = %.4f'%(correct/num))\n",
|
|
" \n",
|
|
"\n",
|
|
" \n",
|
|
"GPU_COUNT = 1 # increase if you have more\n",
|
|
"\n",
|
|
"# RUN\n",
|
|
"run(GPU_COUNT, 64*GPU_COUNT, .3)\n"
|
|
],
|
|
"execution_count": 5,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Running on [gpu(0)]\n",
|
|
"Batch size is 64\n",
|
|
"Epoch 0, training time = 5.1 sec\n",
|
|
" validation accuracy = 0.9633\n",
|
|
"Epoch 1, training time = 5.0 sec\n",
|
|
" validation accuracy = 0.9807\n",
|
|
"Epoch 2, training time = 4.9 sec\n",
|
|
" validation accuracy = 0.9828\n",
|
|
"Epoch 3, training time = 4.9 sec\n",
|
|
" validation accuracy = 0.9856\n",
|
|
"Epoch 4, training time = 4.9 sec\n",
|
|
" validation accuracy = 0.9839\n"
|
|
],
|
|
"name": "stdout"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"metadata": {
|
|
"id": "WKYIZbnJU4eN",
|
|
"colab_type": "text"
|
|
},
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"# Handwritten Digit Recognition [CPU use]\n",
|
|
"\n",
|
|
"Inspiration: \n",
|
|
"* https://mxnet.incubator.apache.org/versions/master/tutorials/python/mnist.html\n",
|
|
"* https://mxnet.incubator.apache.org/versions/master/tutorials/python/mnist.ipynb\n",
|
|
"\n",
|
|
"In this tutorial, we'll give you a step by step walk-through of how to build a hand-written digit classifier using the [MNIST](https://en.wikipedia.org/wiki/MNIST_database) dataset. For someone new to deep learning, this exercise is arguably the \"Hello World\" equivalent."
|
|
]
|
|
},
|
|
{
|
|
"metadata": {
|
|
"id": "MiUlshQffDHz",
|
|
"colab_type": "code",
|
|
"colab": {}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"!pip install mxnet"
|
|
],
|
|
"execution_count": 0,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"metadata": {
|
|
"id": "Gg4R9wtDTzVh",
|
|
"colab_type": "code",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 1475
|
|
},
|
|
"outputId": "170deff3-c039-415a-bbd9-a9b9d19f87ce"
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"import mxnet as mx\n",
|
|
"\n",
|
|
"\n",
|
|
"mnist = mx.test_utils.get_mnist()\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# Fix the seed\n",
|
|
"mx.random.seed(42)\n",
|
|
"\n",
|
|
"# Set the compute context, GPU is available otherwise CPU\n",
|
|
"# ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()\n",
|
|
"ctx = mx.cpu()\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"batch_size = 100\n",
|
|
"train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True)\n",
|
|
"val_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"data = mx.sym.var('data')\n",
|
|
"# Flatten the data from 4-D shape into 2-D (batch_size, num_channel*width*height)\n",
|
|
"data = mx.sym.flatten(data=data)\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# The first fully-connected layer and the corresponding activation function\n",
|
|
"fc1 = mx.sym.FullyConnected(data=data, num_hidden=128)\n",
|
|
"act1 = mx.sym.Activation(data=fc1, act_type=\"relu\")\n",
|
|
"\n",
|
|
"# The second fully-connected layer and the corresponding activation function\n",
|
|
"fc2 = mx.sym.FullyConnected(data=act1, num_hidden = 64)\n",
|
|
"act2 = mx.sym.Activation(data=fc2, act_type=\"relu\")\n",
|
|
"\n",
|
|
"\n",
|
|
"# MNIST has 10 classes\n",
|
|
"fc3 = mx.sym.FullyConnected(data=act2, num_hidden=10)\n",
|
|
"# Softmax with cross entropy loss\n",
|
|
"mlp = mx.sym.SoftmaxOutput(data=fc3, name='softmax')\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"import logging\n",
|
|
"logging.getLogger().setLevel(logging.DEBUG) # logging to stdout\n",
|
|
"# create a trainable module on compute context\n",
|
|
"mlp_model = mx.mod.Module(symbol=mlp, context=ctx)\n",
|
|
"mlp_model.fit(train_iter, # train data\n",
|
|
" eval_data=val_iter, # validation data\n",
|
|
" optimizer='sgd', # use SGD to train\n",
|
|
" optimizer_params={'learning_rate':0.1}, # use fixed learning rate\n",
|
|
" eval_metric='acc', # report accuracy during training\n",
|
|
" batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches\n",
|
|
" num_epoch=10) # train for at most 10 dataset passes\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"test_iter = mx.io.NDArrayIter(mnist['test_data'], None, batch_size)\n",
|
|
"prob = mlp_model.predict(test_iter)\n",
|
|
"assert prob.shape == (10000, 10)\n",
|
|
"\n",
|
|
"\n",
|
|
"test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)\n",
|
|
"# predict accuracy of mlp\n",
|
|
"acc = mx.metric.Accuracy()\n",
|
|
"mlp_model.score(test_iter, acc)\n",
|
|
"print(acc)"
|
|
],
|
|
"execution_count": 2,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"text": [
|
|
"INFO:root:Epoch[0] Batch [100]\tSpeed: 44518.15 samples/sec\taccuracy=0.114059\n",
|
|
"INFO:root:Epoch[0] Batch [200]\tSpeed: 42243.82 samples/sec\taccuracy=0.112900\n",
|
|
"INFO:root:Epoch[0] Batch [300]\tSpeed: 44004.01 samples/sec\taccuracy=0.108800\n",
|
|
"INFO:root:Epoch[0] Batch [400]\tSpeed: 42883.56 samples/sec\taccuracy=0.112100\n",
|
|
"INFO:root:Epoch[0] Batch [500]\tSpeed: 43843.43 samples/sec\taccuracy=0.137200\n",
|
|
"INFO:root:Epoch[0] Train-accuracy=0.221111\n",
|
|
"INFO:root:Epoch[0] Time cost=1.396\n",
|
|
"INFO:root:Epoch[0] Validation-accuracy=0.297000\n",
|
|
"INFO:root:Epoch[1] Batch [100]\tSpeed: 44087.78 samples/sec\taccuracy=0.452970\n",
|
|
"INFO:root:Epoch[1] Batch [200]\tSpeed: 43527.35 samples/sec\taccuracy=0.699900\n",
|
|
"INFO:root:Epoch[1] Batch [300]\tSpeed: 44139.42 samples/sec\taccuracy=0.772100\n",
|
|
"INFO:root:Epoch[1] Batch [400]\tSpeed: 43953.38 samples/sec\taccuracy=0.807200\n",
|
|
"INFO:root:Epoch[1] Batch [500]\tSpeed: 43395.98 samples/sec\taccuracy=0.821900\n",
|
|
"INFO:root:Epoch[1] Train-accuracy=0.840000\n",
|
|
"INFO:root:Epoch[1] Time cost=1.387\n",
|
|
"INFO:root:Epoch[1] Validation-accuracy=0.854100\n",
|
|
"INFO:root:Epoch[2] Batch [100]\tSpeed: 44921.13 samples/sec\taccuracy=0.857327\n",
|
|
"INFO:root:Epoch[2] Batch [200]\tSpeed: 42768.30 samples/sec\taccuracy=0.870000\n",
|
|
"INFO:root:Epoch[2] Batch [300]\tSpeed: 43300.78 samples/sec\taccuracy=0.885600\n",
|
|
"INFO:root:Epoch[2] Batch [400]\tSpeed: 42015.98 samples/sec\taccuracy=0.895400\n",
|
|
"INFO:root:Epoch[2] Batch [500]\tSpeed: 40254.64 samples/sec\taccuracy=0.900800\n",
|
|
"INFO:root:Epoch[2] Train-accuracy=0.909091\n",
|
|
"INFO:root:Epoch[2] Time cost=1.468\n",
|
|
"INFO:root:Epoch[2] Validation-accuracy=0.918300\n",
|
|
"INFO:root:Epoch[3] Batch [100]\tSpeed: 35172.51 samples/sec\taccuracy=0.917822\n",
|
|
"INFO:root:Epoch[3] Batch [200]\tSpeed: 33777.88 samples/sec\taccuracy=0.925900\n",
|
|
"INFO:root:Epoch[3] Batch [300]\tSpeed: 40715.47 samples/sec\taccuracy=0.923800\n",
|
|
"INFO:root:Epoch[3] Batch [400]\tSpeed: 42939.10 samples/sec\taccuracy=0.931400\n",
|
|
"INFO:root:Epoch[3] Batch [500]\tSpeed: 42444.22 samples/sec\taccuracy=0.931500\n",
|
|
"INFO:root:Epoch[3] Train-accuracy=0.934242\n",
|
|
"INFO:root:Epoch[3] Time cost=1.541\n",
|
|
"INFO:root:Epoch[3] Validation-accuracy=0.941400\n",
|
|
"INFO:root:Epoch[4] Batch [100]\tSpeed: 44073.19 samples/sec\taccuracy=0.941485\n",
|
|
"INFO:root:Epoch[4] Batch [200]\tSpeed: 43231.87 samples/sec\taccuracy=0.944200\n",
|
|
"INFO:root:Epoch[4] Batch [300]\tSpeed: 42820.52 samples/sec\taccuracy=0.939900\n",
|
|
"INFO:root:Epoch[4] Batch [400]\tSpeed: 43408.24 samples/sec\taccuracy=0.948900\n",
|
|
"INFO:root:Epoch[4] Batch [500]\tSpeed: 42745.98 samples/sec\taccuracy=0.947900\n",
|
|
"INFO:root:Epoch[4] Train-accuracy=0.948788\n",
|
|
"INFO:root:Epoch[4] Time cost=1.401\n",
|
|
"INFO:root:Epoch[4] Validation-accuracy=0.950600\n",
|
|
"INFO:root:Epoch[5] Batch [100]\tSpeed: 43826.90 samples/sec\taccuracy=0.952574\n",
|
|
"INFO:root:Epoch[5] Batch [200]\tSpeed: 42745.85 samples/sec\taccuracy=0.956700\n",
|
|
"INFO:root:Epoch[5] Batch [300]\tSpeed: 43263.62 samples/sec\taccuracy=0.952600\n",
|
|
"INFO:root:Epoch[5] Batch [400]\tSpeed: 43433.46 samples/sec\taccuracy=0.959300\n",
|
|
"INFO:root:Epoch[5] Batch [500]\tSpeed: 43419.97 samples/sec\taccuracy=0.957500\n",
|
|
"INFO:root:Epoch[5] Train-accuracy=0.957071\n",
|
|
"INFO:root:Epoch[5] Time cost=1.399\n",
|
|
"INFO:root:Epoch[5] Validation-accuracy=0.957900\n",
|
|
"INFO:root:Epoch[6] Batch [100]\tSpeed: 43489.12 samples/sec\taccuracy=0.961782\n",
|
|
"INFO:root:Epoch[6] Batch [200]\tSpeed: 43002.23 samples/sec\taccuracy=0.963800\n",
|
|
"INFO:root:Epoch[6] Batch [300]\tSpeed: 43164.15 samples/sec\taccuracy=0.959900\n",
|
|
"INFO:root:Epoch[6] Batch [400]\tSpeed: 43548.63 samples/sec\taccuracy=0.966800\n",
|
|
"INFO:root:Epoch[6] Batch [500]\tSpeed: 42582.33 samples/sec\taccuracy=0.964600\n",
|
|
"INFO:root:Epoch[6] Train-accuracy=0.964141\n",
|
|
"INFO:root:Epoch[6] Time cost=1.407\n",
|
|
"INFO:root:Epoch[6] Validation-accuracy=0.963600\n",
|
|
"INFO:root:Epoch[7] Batch [100]\tSpeed: 44449.03 samples/sec\taccuracy=0.969010\n",
|
|
"INFO:root:Epoch[7] Batch [200]\tSpeed: 42306.92 samples/sec\taccuracy=0.967500\n",
|
|
"INFO:root:Epoch[7] Batch [300]\tSpeed: 43638.66 samples/sec\taccuracy=0.964300\n",
|
|
"INFO:root:Epoch[7] Batch [400]\tSpeed: 43452.94 samples/sec\taccuracy=0.971500\n",
|
|
"INFO:root:Epoch[7] Batch [500]\tSpeed: 43321.71 samples/sec\taccuracy=0.971400\n",
|
|
"INFO:root:Epoch[7] Train-accuracy=0.970505\n",
|
|
"INFO:root:Epoch[7] Time cost=1.394\n",
|
|
"INFO:root:Epoch[7] Validation-accuracy=0.965000\n",
|
|
"INFO:root:Epoch[8] Batch [100]\tSpeed: 43498.46 samples/sec\taccuracy=0.973465\n",
|
|
"INFO:root:Epoch[8] Batch [200]\tSpeed: 42327.50 samples/sec\taccuracy=0.972800\n",
|
|
"INFO:root:Epoch[8] Batch [300]\tSpeed: 43347.19 samples/sec\taccuracy=0.970400\n",
|
|
"INFO:root:Epoch[8] Batch [400]\tSpeed: 41508.28 samples/sec\taccuracy=0.976000\n",
|
|
"INFO:root:Epoch[8] Batch [500]\tSpeed: 42635.14 samples/sec\taccuracy=0.975600\n",
|
|
"INFO:root:Epoch[8] Train-accuracy=0.974242\n",
|
|
"INFO:root:Epoch[8] Time cost=1.426\n",
|
|
"INFO:root:Epoch[8] Validation-accuracy=0.966000\n",
|
|
"INFO:root:Epoch[9] Batch [100]\tSpeed: 44216.75 samples/sec\taccuracy=0.976931\n",
|
|
"INFO:root:Epoch[9] Batch [200]\tSpeed: 43095.63 samples/sec\taccuracy=0.976500\n",
|
|
"INFO:root:Epoch[9] Batch [300]\tSpeed: 43557.18 samples/sec\taccuracy=0.975600\n",
|
|
"INFO:root:Epoch[9] Batch [400]\tSpeed: 46465.68 samples/sec\taccuracy=0.979700\n",
|
|
"INFO:root:Epoch[9] Batch [500]\tSpeed: 46187.23 samples/sec\taccuracy=0.980000\n",
|
|
"INFO:root:Epoch[9] Train-accuracy=0.976970\n",
|
|
"INFO:root:Epoch[9] Time cost=1.354\n",
|
|
"INFO:root:Epoch[9] Validation-accuracy=0.966600\n"
|
|
],
|
|
"name": "stderr"
|
|
},
|
|
{
|
|
"output_type": "stream",
|
|
"text": [
|
|
"EvalMetric: {'accuracy': 0.9666}\n"
|
|
],
|
|
"name": "stdout"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"metadata": {
|
|
"id": "l59kJSiIUjcK",
|
|
"colab_type": "code",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"outputId": "b8f27c16-92ea-406f-e120-e45472e89e89"
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
""
|
|
],
|
|
"execution_count": 14,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"text": [
|
|
"EvalMetric: {'accuracy': 0.9674}\n"
|
|
],
|
|
"name": "stdout"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
} |