Skip to content

Instantly share code, notes, and snippets.

@hadpro24
Created October 5, 2020 16:30
Show Gist options
  • Save hadpro24/637824f1111cb7be302e0e00840cbf60 to your computer and use it in GitHub Desktop.
Save hadpro24/637824f1111cb7be302e0e00840cbf60 to your computer and use it in GitHub Desktop.
import os
import os.path as path
import argparse
import smtplib, ssl
import mysql.connector
import pandas as pd
import numpy as np
def main(args):
#verify date
if args.get('date', False) and len(args['date'].split('-')) != 3:
print("[Error] Date format invalide, please use YYYY-MM-DD")
return None
print("[INFO] Processing started...")
#extract data to mysql db
print("[INFO] connecting database...")
try:
db = mysql.connector.connect(
host="172.20.8.249", user="root", passwd="passer",
database='way2see'
)
cursor = db.cursor()
except Exception as e:
print("[Error connection database] ", e)
return None
print("[INFO] successfy connecting !")
sql = f"SELECT api_emotion.label, api_conselier.matricule FROM api_emotion \
INNER JOIN api_conselier \
ON api_emotion.conselier_id = api_conselier.id \
WHERE api_emotion.create_at = '{args.date}' \
INTO OUTFILE '/var/lib/mysql-files/data_conselier_daily.csv' \
FIELDS ENCLOSED BY '"' TERMINATED BY ';' ESCAPED BY '"' LINES TERMINATED BY '\r\n';"
cursor.execute(sql)
print("[INFO] extracting data to database...")
cursor.fetchall()
print("[INFO] successfuly extracting !")
#cleaning data
print("[INFO] cleaning data....")
df_group = pd.read_csv('/var/lib/msql-files/data_conselier_daily.csv')
data_grouped = df_group.groupby(by=['label', 'name']).agg({'name': 'count'})
data_grouped.to_csv('intermediate_data.csv', header=None)
df = pd.read_csv('intermediate_data.csv', header=None)
df.columns = ['label', 'label_count', 'matricule']
#pivot dataframe
print("[INFO] pivot dataframe for new table...")
list_matricule_emotion = []
for matricule, emotions in df.groupby(by=['matricule']):
helper_dict = {}
for sub in emotions.values: help_dict[sub[0]] = sub[1]
list_matricule_emotions.append([matricule,
helper_dict.get("Angry", 0),
helper_dict.get("Happy", 0),
helper_dict.get("Mask", 0),
helper_dict.get("Neutral", 0),
helper_dict.get("Sad", 0),
helper_dict.get("Fear", 0),
helper_dict.get("Surprise", 0)
])
#create finished data cleaning
info = pd.DataFrame(list_matricule_emotion, columns=[
'Matricule', 'Angry', 'Happy', 'Mask', 'Neutral', 'Sad',
'Fear', 'Surprise']
)
info.to_csv('way2see_data_daily.csv', index=False)
print("Processing finished !!!")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Extract data way2see daily to csv")
parser.add_argument('--date', help="date to extract format: YYYY-MM-DD", required=True)
args = vars(parser.parse_args())
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment