Skip to content

Instantly share code, notes, and snippets.

@krishnachouhan
Last active February 19, 2019 18:20
Show Gist options
  • Save krishnachouhan/ff820f5b5631d8b1a764de6783bf6536 to your computer and use it in GitHub Desktop.
Save krishnachouhan/ff820f5b5631d8b1a764de6783bf6536 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##"
]
},
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"toc": true
},
"source": [
"<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
"<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Snippets\" data-toc-modified-id=\"Snippets-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Snippets</a></span><ul class=\"toc-item\"><li><span><a href=\"#Imports\" data-toc-modified-id=\"Imports-1.1\"><span class=\"toc-item-num\">1.1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href=\"#Code\" data-toc-modified-id=\"Code-1.2\"><span class=\"toc-item-num\">1.2&nbsp;&nbsp;</span>Code</a></span><ul class=\"toc-item\"><li><span><a href=\"#GroupBy\" data-toc-modified-id=\"GroupBy-1.2.1\"><span class=\"toc-item-num\">1.2.1&nbsp;&nbsp;</span>GroupBy</a></span></li><li><span><a href=\"#To-DateTime\" data-toc-modified-id=\"To-DateTime-1.2.2\"><span class=\"toc-item-num\">1.2.2&nbsp;&nbsp;</span>To DateTime</a></span></li><li><span><a href=\"#List-Containing-Dates\" data-toc-modified-id=\"List-Containing-Dates-1.2.3\"><span class=\"toc-item-num\">1.2.3&nbsp;&nbsp;</span>List Containing Dates</a></span></li><li><span><a href=\"#To-Csv\" data-toc-modified-id=\"To-Csv-1.2.4\"><span class=\"toc-item-num\">1.2.4&nbsp;&nbsp;</span>To Csv</a></span></li></ul></li><li><span><a href=\"#MAPS\" data-toc-modified-id=\"MAPS-1.3\"><span class=\"toc-item-num\">1.3&nbsp;&nbsp;</span>MAPS</a></span><ul class=\"toc-item\"><li><span><a href=\"#Plot-Size\" data-toc-modified-id=\"Plot-Size-1.3.1\"><span class=\"toc-item-num\">1.3.1&nbsp;&nbsp;</span>Plot Size</a></span></li><li><span><a href=\"#Bar-Graph-1\" data-toc-modified-id=\"Bar-Graph-1-1.3.2\"><span class=\"toc-item-num\">1.3.2&nbsp;&nbsp;</span>Bar Graph-1</a></span></li><li><span><a href=\"#Bar-Graph--2\" data-toc-modified-id=\"Bar-Graph--2-1.3.3\"><span class=\"toc-item-num\">1.3.3&nbsp;&nbsp;</span>Bar Graph -2</a></span></li></ul></li><li><span><a href=\"#ANALYTICS\" data-toc-modified-id=\"ANALYTICS-1.4\"><span class=\"toc-item-num\">1.4&nbsp;&nbsp;</span>ANALYTICS</a></span><ul class=\"toc-item\"><li><span><a href=\"#Mape\" data-toc-modified-id=\"Mape-1.4.1\"><span class=\"toc-item-num\">1.4.1&nbsp;&nbsp;</span>Mape</a></span></li><li><span><a href=\"#ACF-PACF-ADF\" data-toc-modified-id=\"ACF-PACF-ADF-1.4.2\"><span class=\"toc-item-num\">1.4.2&nbsp;&nbsp;</span>ACF PACF ADF</a></span></li><li><span><a href=\"#Mean-and-Windowed-Mean\" data-toc-modified-id=\"Mean-and-Windowed-Mean-1.4.3\"><span class=\"toc-item-num\">1.4.3&nbsp;&nbsp;</span>Mean and Windowed Mean</a></span></li><li><span><a href=\"#Ensemble-Function\" data-toc-modified-id=\"Ensemble-Function-1.4.4\"><span class=\"toc-item-num\">1.4.4&nbsp;&nbsp;</span>Ensemble Function</a></span></li></ul></li></ul></li></ul></div>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Snippets"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Imports"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn import metrics\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"# from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor\n",
"\n",
"# Graphs and Viz\n",
"from matplotlib import pyplot\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib import pylab\n",
"%matplotlib inline\n",
"import seaborn as sns\n",
"sns.set_style('whitegrid')\n",
"sns.set(color_codes=True)\n",
"\n",
"# Analytics\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"# TIME-SERIES\n",
"from statsmodels.tsa.arima_model import ARIMA\n",
"from statsmodels.tsa.stattools import adfuller\n",
"from statsmodels.graphics.tsaplots import plot_acf\n",
"from statsmodels.graphics.tsaplots import plot_pacf\n",
"\n",
"# import xgboost as xgb\n",
"from sklearn import grid_search\n",
"\n",
"# DataBase\n",
"import psycopg2\n",
"\n",
"# Utility\n",
"import os\n",
"import re\n",
"import itertools\n",
"from math import sqrt\n",
"from json import dump\n",
"from os import linesep\n",
"import time\n",
"import datetime\n",
"import tqdm\n",
"from tqdm import tnrange, tqdm_notebook\n",
"\n",
"# Documentation and Presentation\n",
"# import doc\n",
"\n",
"\n",
"# def docs(a):\n",
"# print(a.__doc__)\n",
"\n",
"from IPython.core.display import display, HTML\n",
"display(HTML('<style>.container { width:90% !important; }</style>'))\n" ]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Code"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GroupBy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"###INPUT:\n",
"# Date\tQty\n",
"# 0\t2014-09-01\t5\n",
"# 1\t2014-10-01\t5\n",
"# 2\t2014-10-01\t10\n",
"# 3\t2014-10-01\t19\n",
"# 4\t2014-10-01\t19\n",
"# 5\t2014-10-01\t8\n",
"# 6\t2014-11-01\t9\n",
"# 7\t2014-11-01\t10\n",
"# 8\t2014-11-01\t4\n",
"# 9\t2014-11-01\t3\n",
"# 10\t2014-12-01\t19\n",
"# 11\t2014-12-01\t13\n",
"# 12\t2014-12-01\t3\n",
"# 13\t2014-12-01\t19\n",
"# 14\t2014-12-01\t10\n",
"# 15\t2014-12-01\t13\n",
"# 16\t2015-01-01\t14\n",
"# 17\t2015-01-01\t11\n",
"# 18\t2015-01-01\t13\n",
"\n",
"###OUTPUT:\n",
"# Date\tCount\tQty\tEffective Qty\n",
"# 0\t2014-09-01\t1\t5\t5.000000\n",
"# 1\t2014-10-01\t5\t61\t12.200000\n",
"# 2\t2014-11-01\t4\t26\t6.500000\n",
"# 3\t2014-12-01\t6\t77\t12.833333\n",
"# 4\t2015-01-01\t3\t38\t12.666667\n",
"# 5\t2015-02-01\t3\t57\t19.000000\n",
"\n",
"data_splitted = data.groupby('Date').agg({\n",
"'Date': 'count',\n",
"'Qty': 'sum'\n",
"}).rename(columns={\n",
"'Date': 'Count'\n",
"}).reset_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### To DateTime"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data[\"Date\"] = pd.to_datetime(data[\"Date\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### List Containing Dates"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"time_list = np.arange(\"2005-01-01\", \"2014-01-01\", np.timedelta64(1, 'Y'), dtype=\"datetime64[Y]\" )\n",
"time_list = list( map(pd.to_datetime, time_list) )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### To Csv"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Required_Col_data.to_csv(\"./arima/Required_Col_data.csv\", index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## MAPS"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plot Size"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Setting Plot size\n",
"plt.rcParams[\"figure.figsize\"] = [16,9]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Bar Graph-1"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"all_accuracy = 1 - 0.70\n",
"G17_accuracy = 1 - 0.60\n",
"G18_accuracy = 1 - 0.10\n",
"G17_G18_accuracy = 1 - ((0.6 + 0.10) / 2)\n",
"# data to plot\n",
"n_groups = 4\n",
"mape_full = (1, 1, 1, 1)\n",
"mape_calc = (all_accuracy, G17_accuracy, G18_accuracy, G17_G18_accuracy )\n",
"\n",
"# create plot\n",
"fig, ax = plt.subplots()\n",
"index = np.arange(n_groups)\n",
"bar_width = 0.35\n",
"opacity = 0.8\n",
"\n",
"rects1 = plt.bar( index, mape_full, bar_width, alpha=opacity, color='b', label='Acutal')\n",
"rects2 = plt.bar( index + bar_width, mape_calc, bar_width, alpha=opacity, color='g', label='Mean')\n",
"\n",
"for v, i in enumerate(mape_calc):\n",
" ax.text(v + 0.25, i , str(int(mape_calc[v]*100+1)), color='blue', fontweight='bold', fontsize=24)\n",
"\n",
"plt.xlabel('Part-LEad')\n",
"plt.ylabel('Mean Predicted')\n",
"plt.title('Accuracy by simple mean calculation')\n",
"plt.xticks(index + bar_width, ('All'+str(), 'G17'+str(), 'G18'+str(), 'G17&G18'+str()) )\n",
"\n",
"plt.legend()\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Bar Graph -2"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"all_accuracy = 1 - 0.70\n",
"G17_accuracy = 1 - 0.60\n",
"G18_accuracy = 1 - 0.10\n",
"G17_G18_accuracy = 1 - ((0.6 + 0.10) / 2)\n",
"# data to plot\n",
"n_groups = 4\n",
"mape_full = (1, 1, 1, 1)\n",
"mape_calc = (all_accuracy, G17_accuracy, G18_accuracy, G17_G18_accuracy )\n",
"\n",
"# create plot\n",
"fig, ax = plt.subplots()\n",
"index = np.arange(n_groups)\n",
"bar_width = 0.35\n",
"opacity = 0.8\n",
"\n",
"rects1 = plt.bar( index, mape_full, bar_width, alpha=opacity, color='salmon', label='Acutal')\n",
"rects2 = plt.bar( index , mape_calc, bar_width, alpha=opacity, color='g', label='Mean')\n",
"\n",
"for v, i in enumerate(mape_calc):\n",
" ax.text(v + 0.25, i , str(int(mape_calc[v]*100+1)), color='blue', fontweight='bold', fontsize=24)\n",
"\n",
"plt.xlabel('Part-LEad')\n",
"plt.ylabel('Mean Predicted')\n",
"plt.title('Accuracy by simple mean calculation')\n",
"plt.xticks(index , ('All'+str(), 'G17'+str(), 'G18'+str(), 'G17&G18'+str()) )\n",
"\n",
"plt.legend()\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## ANALYTICS"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Mape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### ACF PACF ADF"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# AutoCorellation Function Partial \n",
"# AutoCorellation Function\n",
"# Augmented Dickey Fuller Function\n",
"\n",
"#ACF\n",
"from statsmodels.graphics.tsaplots import plot_acf\n",
"series = pd.Series.from_csv(\"./arima/Required_Col_data.csv\", header=0)\n",
"temp = plot_acf(series)\n",
"# pyplot.show()\n",
"\n",
"#PACF\n",
"from statsmodels.graphics.tsaplots import plot_pacf\n",
"series = pd.Series.from_csv(\"./arima/Required_Col_data.csv\", header=0)\n",
"temp = plot_pacf(series)\n",
"# pyplot.show()\n",
"\n",
"#ADF\n",
"from statsmodels.tsa.stattools import adfuller\n",
"series = pd.Series.from_csv(\"./arima/Required_Col_data.csv\", header=None)\n",
"X = series.values\n",
"result = adfuller(X)\n",
"print('ADF Statistic: %f' % result[0])\n",
"print('p-value: %f' % result[1])\n",
"print('Critical Values:')\n",
"for key, value in result[4].items():\n",
"print('\\t%s: %.3f' % (key, value))\n",
"\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Mean and Windowed Mean"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Ensemble Function"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def ensemble(estimates=[], actuals=[], errorMethod=\"RMSE\", MapeCutOff=1, loops = 0, importance_leveled=False):\n",
" \"\"\"\n",
" Ensembles: Estimated the coefficients to combine multiple models.\n",
" Inputs:\n",
" estimates: List of estimated result. [List of List of values]\n",
" actuals: List of actual result. [List of values]\n",
" errorMethod: RMSE, MAPE, Difference. [Default: \"RMSE\"]\n",
" MapeCutOff: Value to divide while calulating mape with zero or certain threshold. (abs(act-pred))/MapeCutOff.\n",
" loops: Number of loops you want to run to mature the coefficient. [Default 0: continues till saturation]\n",
" importance _leveled: BETA functionality. Importance of models is in order of estimates list.\n",
" Output:\n",
" List of Coefficients.\n",
" \"\"\"\n",
"\n",
" if estimates != actual:\n",
" print(\"Error: Estimated and Actual value have different dimensions\")\n",
"\n",
" coeff = [1]*len(estimates)\n",
" delta = [0]*len(estimates)\n",
"\n",
" loop_count = 0\n",
" while saturation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with tqdm_notebook(total=total_count ) as pbar:\n",
" for product in new_df_grouped.keys():\n",
" part = product \n",
" parts_df = new_df_grouped[product]\n",
"\n",
" pbar_count += 1\n",
" pbar.update(pbar_count)\n"
]
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": true,
"toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "326px"
},
"toc_section_display": true,
"toc_window_display": true
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
},
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment