Last active
February 19, 2019 18:20
-
-
Save krishnachouhan/ff820f5b5631d8b1a764de6783bf6536 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"##" | |
] | |
}, | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"toc": true | |
}, | |
"source": [ | |
"<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n", | |
"<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Snippets\" data-toc-modified-id=\"Snippets-1\"><span class=\"toc-item-num\">1 </span>Snippets</a></span><ul class=\"toc-item\"><li><span><a href=\"#Imports\" data-toc-modified-id=\"Imports-1.1\"><span class=\"toc-item-num\">1.1 </span>Imports</a></span></li><li><span><a href=\"#Code\" data-toc-modified-id=\"Code-1.2\"><span class=\"toc-item-num\">1.2 </span>Code</a></span><ul class=\"toc-item\"><li><span><a href=\"#GroupBy\" data-toc-modified-id=\"GroupBy-1.2.1\"><span class=\"toc-item-num\">1.2.1 </span>GroupBy</a></span></li><li><span><a href=\"#To-DateTime\" data-toc-modified-id=\"To-DateTime-1.2.2\"><span class=\"toc-item-num\">1.2.2 </span>To DateTime</a></span></li><li><span><a href=\"#List-Containing-Dates\" data-toc-modified-id=\"List-Containing-Dates-1.2.3\"><span class=\"toc-item-num\">1.2.3 </span>List Containing Dates</a></span></li><li><span><a href=\"#To-Csv\" data-toc-modified-id=\"To-Csv-1.2.4\"><span class=\"toc-item-num\">1.2.4 </span>To Csv</a></span></li></ul></li><li><span><a href=\"#MAPS\" data-toc-modified-id=\"MAPS-1.3\"><span class=\"toc-item-num\">1.3 </span>MAPS</a></span><ul class=\"toc-item\"><li><span><a href=\"#Plot-Size\" data-toc-modified-id=\"Plot-Size-1.3.1\"><span class=\"toc-item-num\">1.3.1 </span>Plot Size</a></span></li><li><span><a href=\"#Bar-Graph-1\" data-toc-modified-id=\"Bar-Graph-1-1.3.2\"><span class=\"toc-item-num\">1.3.2 </span>Bar Graph-1</a></span></li><li><span><a href=\"#Bar-Graph--2\" data-toc-modified-id=\"Bar-Graph--2-1.3.3\"><span class=\"toc-item-num\">1.3.3 </span>Bar Graph -2</a></span></li></ul></li><li><span><a href=\"#ANALYTICS\" data-toc-modified-id=\"ANALYTICS-1.4\"><span class=\"toc-item-num\">1.4 </span>ANALYTICS</a></span><ul class=\"toc-item\"><li><span><a href=\"#Mape\" data-toc-modified-id=\"Mape-1.4.1\"><span class=\"toc-item-num\">1.4.1 </span>Mape</a></span></li><li><span><a href=\"#ACF-PACF-ADF\" data-toc-modified-id=\"ACF-PACF-ADF-1.4.2\"><span class=\"toc-item-num\">1.4.2 </span>ACF PACF ADF</a></span></li><li><span><a href=\"#Mean-and-Windowed-Mean\" data-toc-modified-id=\"Mean-and-Windowed-Mean-1.4.3\"><span class=\"toc-item-num\">1.4.3 </span>Mean and Windowed Mean</a></span></li><li><span><a href=\"#Ensemble-Function\" data-toc-modified-id=\"Ensemble-Function-1.4.4\"><span class=\"toc-item-num\">1.4.4 </span>Ensemble Function</a></span></li></ul></li></ul></li></ul></div>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Snippets" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Imports" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler\n", | |
"from sklearn.linear_model import LogisticRegression\n", | |
"from sklearn import metrics\n", | |
"from sklearn.metrics import accuracy_score\n", | |
"\n", | |
"# from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor\n", | |
"\n", | |
"# Graphs and Viz\n", | |
"from matplotlib import pyplot\n", | |
"import matplotlib.pyplot as plt\n", | |
"from matplotlib import pylab\n", | |
"%matplotlib inline\n", | |
"import seaborn as sns\n", | |
"sns.set_style('whitegrid')\n", | |
"sns.set(color_codes=True)\n", | |
"\n", | |
"# Analytics\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"\n", | |
"# TIME-SERIES\n", | |
"from statsmodels.tsa.arima_model import ARIMA\n", | |
"from statsmodels.tsa.stattools import adfuller\n", | |
"from statsmodels.graphics.tsaplots import plot_acf\n", | |
"from statsmodels.graphics.tsaplots import plot_pacf\n", | |
"\n", | |
"# import xgboost as xgb\n", | |
"from sklearn import grid_search\n", | |
"\n", | |
"# DataBase\n", | |
"import psycopg2\n", | |
"\n", | |
"# Utility\n", | |
"import os\n", | |
"import re\n", | |
"import itertools\n", | |
"from math import sqrt\n", | |
"from json import dump\n", | |
"from os import linesep\n", | |
"import time\n", | |
"import datetime\n", | |
"import tqdm\n", | |
"from tqdm import tnrange, tqdm_notebook\n", | |
"\n", | |
"# Documentation and Presentation\n", | |
"# import doc\n", | |
"\n", | |
"\n", | |
"# def docs(a):\n", | |
"# print(a.__doc__)\n", | |
"\n", | |
"from IPython.core.display import display, HTML\n", | |
"display(HTML('<style>.container { width:90% !important; }</style>'))\n" ] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Code" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### GroupBy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"###INPUT:\n", | |
"# Date\tQty\n", | |
"# 0\t2014-09-01\t5\n", | |
"# 1\t2014-10-01\t5\n", | |
"# 2\t2014-10-01\t10\n", | |
"# 3\t2014-10-01\t19\n", | |
"# 4\t2014-10-01\t19\n", | |
"# 5\t2014-10-01\t8\n", | |
"# 6\t2014-11-01\t9\n", | |
"# 7\t2014-11-01\t10\n", | |
"# 8\t2014-11-01\t4\n", | |
"# 9\t2014-11-01\t3\n", | |
"# 10\t2014-12-01\t19\n", | |
"# 11\t2014-12-01\t13\n", | |
"# 12\t2014-12-01\t3\n", | |
"# 13\t2014-12-01\t19\n", | |
"# 14\t2014-12-01\t10\n", | |
"# 15\t2014-12-01\t13\n", | |
"# 16\t2015-01-01\t14\n", | |
"# 17\t2015-01-01\t11\n", | |
"# 18\t2015-01-01\t13\n", | |
"\n", | |
"###OUTPUT:\n", | |
"# Date\tCount\tQty\tEffective Qty\n", | |
"# 0\t2014-09-01\t1\t5\t5.000000\n", | |
"# 1\t2014-10-01\t5\t61\t12.200000\n", | |
"# 2\t2014-11-01\t4\t26\t6.500000\n", | |
"# 3\t2014-12-01\t6\t77\t12.833333\n", | |
"# 4\t2015-01-01\t3\t38\t12.666667\n", | |
"# 5\t2015-02-01\t3\t57\t19.000000\n", | |
"\n", | |
"data_splitted = data.groupby('Date').agg({\n", | |
"'Date': 'count',\n", | |
"'Qty': 'sum'\n", | |
"}).rename(columns={\n", | |
"'Date': 'Count'\n", | |
"}).reset_index()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### To DateTime" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data[\"Date\"] = pd.to_datetime(data[\"Date\"])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### List Containing Dates" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"time_list = np.arange(\"2005-01-01\", \"2014-01-01\", np.timedelta64(1, 'Y'), dtype=\"datetime64[Y]\" )\n", | |
"time_list = list( map(pd.to_datetime, time_list) )" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### To Csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"Required_Col_data.to_csv(\"./arima/Required_Col_data.csv\", index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## MAPS" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Plot Size" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#Setting Plot size\n", | |
"plt.rcParams[\"figure.figsize\"] = [16,9]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Bar Graph-1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"\n", | |
"all_accuracy = 1 - 0.70\n", | |
"G17_accuracy = 1 - 0.60\n", | |
"G18_accuracy = 1 - 0.10\n", | |
"G17_G18_accuracy = 1 - ((0.6 + 0.10) / 2)\n", | |
"# data to plot\n", | |
"n_groups = 4\n", | |
"mape_full = (1, 1, 1, 1)\n", | |
"mape_calc = (all_accuracy, G17_accuracy, G18_accuracy, G17_G18_accuracy )\n", | |
"\n", | |
"# create plot\n", | |
"fig, ax = plt.subplots()\n", | |
"index = np.arange(n_groups)\n", | |
"bar_width = 0.35\n", | |
"opacity = 0.8\n", | |
"\n", | |
"rects1 = plt.bar( index, mape_full, bar_width, alpha=opacity, color='b', label='Acutal')\n", | |
"rects2 = plt.bar( index + bar_width, mape_calc, bar_width, alpha=opacity, color='g', label='Mean')\n", | |
"\n", | |
"for v, i in enumerate(mape_calc):\n", | |
" ax.text(v + 0.25, i , str(int(mape_calc[v]*100+1)), color='blue', fontweight='bold', fontsize=24)\n", | |
"\n", | |
"plt.xlabel('Part-LEad')\n", | |
"plt.ylabel('Mean Predicted')\n", | |
"plt.title('Accuracy by simple mean calculation')\n", | |
"plt.xticks(index + bar_width, ('All'+str(), 'G17'+str(), 'G18'+str(), 'G17&G18'+str()) )\n", | |
"\n", | |
"plt.legend()\n", | |
"\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Bar Graph -2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"\n", | |
"all_accuracy = 1 - 0.70\n", | |
"G17_accuracy = 1 - 0.60\n", | |
"G18_accuracy = 1 - 0.10\n", | |
"G17_G18_accuracy = 1 - ((0.6 + 0.10) / 2)\n", | |
"# data to plot\n", | |
"n_groups = 4\n", | |
"mape_full = (1, 1, 1, 1)\n", | |
"mape_calc = (all_accuracy, G17_accuracy, G18_accuracy, G17_G18_accuracy )\n", | |
"\n", | |
"# create plot\n", | |
"fig, ax = plt.subplots()\n", | |
"index = np.arange(n_groups)\n", | |
"bar_width = 0.35\n", | |
"opacity = 0.8\n", | |
"\n", | |
"rects1 = plt.bar( index, mape_full, bar_width, alpha=opacity, color='salmon', label='Acutal')\n", | |
"rects2 = plt.bar( index , mape_calc, bar_width, alpha=opacity, color='g', label='Mean')\n", | |
"\n", | |
"for v, i in enumerate(mape_calc):\n", | |
" ax.text(v + 0.25, i , str(int(mape_calc[v]*100+1)), color='blue', fontweight='bold', fontsize=24)\n", | |
"\n", | |
"plt.xlabel('Part-LEad')\n", | |
"plt.ylabel('Mean Predicted')\n", | |
"plt.title('Accuracy by simple mean calculation')\n", | |
"plt.xticks(index , ('All'+str(), 'G17'+str(), 'G18'+str(), 'G17&G18'+str()) )\n", | |
"\n", | |
"plt.legend()\n", | |
"\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## ANALYTICS" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Mape" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### ACF PACF ADF" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# AutoCorellation Function Partial \n", | |
"# AutoCorellation Function\n", | |
"# Augmented Dickey Fuller Function\n", | |
"\n", | |
"#ACF\n", | |
"from statsmodels.graphics.tsaplots import plot_acf\n", | |
"series = pd.Series.from_csv(\"./arima/Required_Col_data.csv\", header=0)\n", | |
"temp = plot_acf(series)\n", | |
"# pyplot.show()\n", | |
"\n", | |
"#PACF\n", | |
"from statsmodels.graphics.tsaplots import plot_pacf\n", | |
"series = pd.Series.from_csv(\"./arima/Required_Col_data.csv\", header=0)\n", | |
"temp = plot_pacf(series)\n", | |
"# pyplot.show()\n", | |
"\n", | |
"#ADF\n", | |
"from statsmodels.tsa.stattools import adfuller\n", | |
"series = pd.Series.from_csv(\"./arima/Required_Col_data.csv\", header=None)\n", | |
"X = series.values\n", | |
"result = adfuller(X)\n", | |
"print('ADF Statistic: %f' % result[0])\n", | |
"print('p-value: %f' % result[1])\n", | |
"print('Critical Values:')\n", | |
"for key, value in result[4].items():\n", | |
"print('\\t%s: %.3f' % (key, value))\n", | |
"\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Mean and Windowed Mean" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Ensemble Function" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def ensemble(estimates=[], actuals=[], errorMethod=\"RMSE\", MapeCutOff=1, loops = 0, importance_leveled=False):\n", | |
" \"\"\"\n", | |
" Ensembles: Estimated the coefficients to combine multiple models.\n", | |
" Inputs:\n", | |
" estimates: List of estimated result. [List of List of values]\n", | |
" actuals: List of actual result. [List of values]\n", | |
" errorMethod: RMSE, MAPE, Difference. [Default: \"RMSE\"]\n", | |
" MapeCutOff: Value to divide while calulating mape with zero or certain threshold. (abs(act-pred))/MapeCutOff.\n", | |
" loops: Number of loops you want to run to mature the coefficient. [Default 0: continues till saturation]\n", | |
" importance _leveled: BETA functionality. Importance of models is in order of estimates list.\n", | |
" Output:\n", | |
" List of Coefficients.\n", | |
" \"\"\"\n", | |
"\n", | |
" if estimates != actual:\n", | |
" print(\"Error: Estimated and Actual value have different dimensions\")\n", | |
"\n", | |
" coeff = [1]*len(estimates)\n", | |
" delta = [0]*len(estimates)\n", | |
"\n", | |
" loop_count = 0\n", | |
" while saturation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with tqdm_notebook(total=total_count ) as pbar:\n", | |
" for product in new_df_grouped.keys():\n", | |
" part = product \n", | |
" parts_df = new_df_grouped[product]\n", | |
"\n", | |
" pbar_count += 1\n", | |
" pbar.update(pbar_count)\n" | |
] | |
} | |
], | |
"metadata": { | |
"hide_input": false, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
}, | |
"toc": { | |
"base_numbering": 1, | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": true, | |
"toc_position": { | |
"height": "calc(100% - 180px)", | |
"left": "10px", | |
"top": "150px", | |
"width": "326px" | |
}, | |
"toc_section_display": true, | |
"toc_window_display": true | |
}, | |
"varInspector": { | |
"cols": { | |
"lenName": 16, | |
"lenType": 16, | |
"lenVar": 40 | |
}, | |
"kernels_config": { | |
"python": { | |
"delete_cmd_postfix": "", | |
"delete_cmd_prefix": "del ", | |
"library": "var_list.py", | |
"varRefreshCmd": "print(var_dic_list())" | |
}, | |
"r": { | |
"delete_cmd_postfix": ") ", | |
"delete_cmd_prefix": "rm(", | |
"library": "var_list.r", | |
"varRefreshCmd": "cat(var_dic_list()) " | |
} | |
}, | |
"types_to_exclude": [ | |
"module", | |
"function", | |
"builtin_function_or_method", | |
"instance", | |
"_Feature" | |
], | |
"window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
}, | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment