1
0
mirror of https://github.com/mmaithani/data-science.git synced 2022-04-24 02:56:41 +03:00
Files
gluonts-forecasting-noteboo…/Gluonts_twitter_volume_forecasting.ipynb
2020-12-29 06:53:30 +05:30

369 lines
30 KiB
Plaintext

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Gluonts twitter volume forecasting.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyPnECKy9/9x3uizOlZK+rEl",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/mmaithani/data-science/blob/main/Gluonts_twitter_volume_forecasting.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "E3_PpfKsEYMS",
"outputId": "7c7222bc-100e-44bf-fc6a-774afa77d2e6"
},
"source": [
"pip install gluonts"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Collecting gluonts\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/60/b4/4893bddfebbab963ab6406e2c33a241359463b25549a8a9937f87aa7aba9/gluonts-0.6.4-py3-none-any.whl (569kB)\n",
"\u001b[K |████████████████████████████████| 573kB 5.7MB/s \n",
"\u001b[?25hRequirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.6/dist-packages (from gluonts) (3.2.2)\n",
"Requirement already satisfied: numpy~=1.16 in /usr/local/lib/python3.6/dist-packages (from gluonts) (1.19.4)\n",
"Collecting ujson~=1.35\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/16/c4/79f3409bc710559015464e5f49b9879430d8f87498ecdc335899732e5377/ujson-1.35.tar.gz (192kB)\n",
"\u001b[K |████████████████████████████████| 194kB 19.0MB/s \n",
"\u001b[?25hCollecting pydantic<1.7,~=1.1\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/aa/5f/855412ad12817ae87f1c77d3af2fc384eaed3adfb8f3994816d75483fa20/pydantic-1.6.1-cp36-cp36m-manylinux2014_x86_64.whl (8.7MB)\n",
"\u001b[K |████████████████████████████████| 8.7MB 16.2MB/s \n",
"\u001b[?25hRequirement already satisfied: toolz~=0.10 in /usr/local/lib/python3.6/dist-packages (from gluonts) (0.11.1)\n",
"Requirement already satisfied: tqdm~=4.23 in /usr/local/lib/python3.6/dist-packages (from gluonts) (4.41.1)\n",
"Requirement already satisfied: holidays>=0.9 in /usr/local/lib/python3.6/dist-packages (from gluonts) (0.10.4)\n",
"Requirement already satisfied: pandas>=1.0 in /usr/local/lib/python3.6/dist-packages (from gluonts) (1.1.5)\n",
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib~=3.0->gluonts) (0.10.0)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib~=3.0->gluonts) (2.4.7)\n",
"Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib~=3.0->gluonts) (2.8.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib~=3.0->gluonts) (1.3.1)\n",
"Requirement already satisfied: dataclasses>=0.6; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from pydantic<1.7,~=1.1->gluonts) (0.8)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from holidays>=0.9->gluonts) (1.15.0)\n",
"Requirement already satisfied: convertdate<=2.2.0 in /usr/local/lib/python3.6/dist-packages (from holidays>=0.9->gluonts) (2.2.0)\n",
"Requirement already satisfied: korean-lunar-calendar in /usr/local/lib/python3.6/dist-packages (from holidays>=0.9->gluonts) (0.2.1)\n",
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=1.0->gluonts) (2018.9)\n",
"Requirement already satisfied: pymeeus<=1,>=0.3.6 in /usr/local/lib/python3.6/dist-packages (from convertdate<=2.2.0->holidays>=0.9->gluonts) (0.3.7)\n",
"Building wheels for collected packages: ujson\n",
" Building wheel for ujson (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for ujson: filename=ujson-1.35-cp36-cp36m-linux_x86_64.whl size=68034 sha256=170e662944b3fefb82cb255a0c01d6cf53ce10bcceece9f3319f2213e06fe7e9\n",
" Stored in directory: /root/.cache/pip/wheels/28/77/e4/0311145b9c2e2f01470e744855131f9e34d6919687550f87d1\n",
"Successfully built ujson\n",
"Installing collected packages: ujson, pydantic, gluonts\n",
"Successfully installed gluonts-0.6.4 pydantic-1.6.1 ujson-1.35\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9IZeKRpXEndu",
"outputId": "f0912c8e-bd4a-4d25-c620-4e4890c6c870"
},
"source": [
"pip install mxnet"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"Collecting mxnet\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/29/bb/54cbabe428351c06d10903c658878d29ee7026efbe45133fd133598d6eb6/mxnet-1.7.0.post1-py2.py3-none-manylinux2014_x86_64.whl (55.0MB)\n",
"\u001b[K |████████████████████████████████| 55.0MB 76kB/s \n",
"\u001b[?25hCollecting graphviz<0.9.0,>=0.8.1\n",
" Downloading https://files.pythonhosted.org/packages/53/39/4ab213673844e0c004bed8a0781a0721a3f6bb23eb8854ee75c236428892/graphviz-0.8.4-py2.py3-none-any.whl\n",
"Requirement already satisfied: requests<3,>=2.20.0 in /usr/local/lib/python3.6/dist-packages (from mxnet) (2.23.0)\n",
"Requirement already satisfied: numpy<2.0.0,>1.16.0 in /usr/local/lib/python3.6/dist-packages (from mxnet) (1.19.4)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.20.0->mxnet) (1.24.3)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.20.0->mxnet) (2020.12.5)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.20.0->mxnet) (3.0.4)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.20.0->mxnet) (2.10)\n",
"Installing collected packages: graphviz, mxnet\n",
" Found existing installation: graphviz 0.10.1\n",
" Uninstalling graphviz-0.10.1:\n",
" Successfully uninstalled graphviz-0.10.1\n",
"Successfully installed graphviz-0.8.4 mxnet-1.7.0.post1\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "vVNMTd-BEWHG"
},
"source": [
"from gluonts.dataset import common\r\n",
"from gluonts.model import deepar\r\n",
"from gluonts.trainer import Trainer\r\n",
"\r\n",
"import pandas as pd"
],
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "zp2hdhzlFhlg",
"outputId": "49cc7e56-9c58-4798-9e5a-d71c1c56b05a"
},
"source": [
"url = \"https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv\"\r\n",
"df = pd.read_csv(url, header=0, index_col=0)\r\n",
"df.head()"
],
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>value</th>\n",
" </tr>\n",
" <tr>\n",
" <th>timestamp</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2015-02-26 21:42:53</th>\n",
" <td>57</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-26 21:47:53</th>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-26 21:52:53</th>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-26 21:57:53</th>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-26 22:02:53</th>\n",
" <td>93</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" value\n",
"timestamp \n",
"2015-02-26 21:42:53 57\n",
"2015-02-26 21:47:53 43\n",
"2015-02-26 21:52:53 55\n",
"2015-02-26 21:57:53 64\n",
"2015-02-26 22:02:53 93"
]
},
"metadata": {
"tags": []
},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "AAMItQe-FmmX"
},
"source": [
"data = common.ListDataset([{\r\n",
" \"start\": df.index[0],\r\n",
" \"target\": df.value[:\"2015-04-05 00:00:00\"]\r\n",
"}],\r\n",
" freq=\"5min\")\r\n"
],
"execution_count": 8,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Gqxl1PoDFpjh",
"outputId": "fddc6dcf-d859-48a2-a561-cc3f546a6685"
},
"source": [
"trainer = Trainer(epochs=10)\r\n",
"estimator = deepar.DeepAREstimator(\r\n",
" freq=\"5min\", prediction_length=12, trainer=trainer)\r\n",
"predictor = estimator.train(training_data=data)\r\n",
"\r\n",
"prediction = next(predictor.predict(data))"
],
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"text": [
"\r 0%| | 0/50 [00:00<?, ?it/s]"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"learning rate from ``lr_scheduler`` has been overwritten by ``learning_rate`` in optimizer.\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:03<00:00, 15.25it/s, epoch=1/10, avg_epoch_loss=4.49]\n",
"100%|██████████| 50/50 [00:03<00:00, 15.75it/s, epoch=2/10, avg_epoch_loss=4.09]\n",
"100%|██████████| 50/50 [00:03<00:00, 15.64it/s, epoch=3/10, avg_epoch_loss=4.04]\n",
"100%|██████████| 50/50 [00:03<00:00, 15.88it/s, epoch=4/10, avg_epoch_loss=4.02]\n",
"100%|██████████| 50/50 [00:03<00:00, 15.62it/s, epoch=5/10, avg_epoch_loss=3.98]\n",
"100%|██████████| 50/50 [00:03<00:00, 15.42it/s, epoch=6/10, avg_epoch_loss=3.97]\n",
"100%|██████████| 50/50 [00:03<00:00, 15.48it/s, epoch=7/10, avg_epoch_loss=3.95]\n",
"100%|██████████| 50/50 [00:03<00:00, 15.72it/s, epoch=8/10, avg_epoch_loss=3.96]\n",
"100%|██████████| 50/50 [00:03<00:00, 15.62it/s, epoch=9/10, avg_epoch_loss=3.97]\n",
"100%|██████████| 50/50 [00:03<00:00, 15.68it/s, epoch=10/10, avg_epoch_loss=3.93]"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"[48.119385 45.482513 43.079456 40.907524 41.094902 38.321095 38.837597\n",
" 38.26018 39.68032 40.427383 42.762894 41.39221 ]\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "0ig3yLnhF1l3",
"outputId": "b593b65c-c469-4f61-e684-b31ee5a4d660"
},
"source": [
"print(prediction.mean)"
],
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"text": [
"[48.119385 45.482513 43.079456 40.907524 41.094902 38.321095 38.837597\n",
" 38.26018 39.68032 40.427383 42.762894 41.39221 ]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 277
},
"id": "E27gbMFrEh0E",
"outputId": "4bdbba3c-31e8-4d35-c0a6-4ff0a17bd7c4"
},
"source": [
"prediction.plot(output_file='graph.png')"
],
"execution_count": 10,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "OBEvWiyjFRgK"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}