Created
October 5, 2020 16:30
-
-
Save hadpro24/637824f1111cb7be302e0e00840cbf60 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import os.path as path | |
import argparse | |
import smtplib, ssl | |
import mysql.connector | |
import pandas as pd | |
import numpy as np | |
def main(args): | |
#verify date | |
if args.get('date', False) and len(args['date'].split('-')) != 3: | |
print("[Error] Date format invalide, please use YYYY-MM-DD") | |
return None | |
print("[INFO] Processing started...") | |
#extract data to mysql db | |
print("[INFO] connecting database...") | |
try: | |
db = mysql.connector.connect( | |
host="172.20.8.249", user="root", passwd="passer", | |
database='way2see' | |
) | |
cursor = db.cursor() | |
except Exception as e: | |
print("[Error connection database] ", e) | |
return None | |
print("[INFO] successfy connecting !") | |
sql = f"SELECT api_emotion.label, api_conselier.matricule FROM api_emotion \ | |
INNER JOIN api_conselier \ | |
ON api_emotion.conselier_id = api_conselier.id \ | |
WHERE api_emotion.create_at = '{args.date}' \ | |
INTO OUTFILE '/var/lib/mysql-files/data_conselier_daily.csv' \ | |
FIELDS ENCLOSED BY '"' TERMINATED BY ';' ESCAPED BY '"' LINES TERMINATED BY '\r\n';" | |
cursor.execute(sql) | |
print("[INFO] extracting data to database...") | |
cursor.fetchall() | |
print("[INFO] successfuly extracting !") | |
#cleaning data | |
print("[INFO] cleaning data....") | |
df_group = pd.read_csv('/var/lib/msql-files/data_conselier_daily.csv') | |
data_grouped = df_group.groupby(by=['label', 'name']).agg({'name': 'count'}) | |
data_grouped.to_csv('intermediate_data.csv', header=None) | |
df = pd.read_csv('intermediate_data.csv', header=None) | |
df.columns = ['label', 'label_count', 'matricule'] | |
#pivot dataframe | |
print("[INFO] pivot dataframe for new table...") | |
list_matricule_emotion = [] | |
for matricule, emotions in df.groupby(by=['matricule']): | |
helper_dict = {} | |
for sub in emotions.values: help_dict[sub[0]] = sub[1] | |
list_matricule_emotions.append([matricule, | |
helper_dict.get("Angry", 0), | |
helper_dict.get("Happy", 0), | |
helper_dict.get("Mask", 0), | |
helper_dict.get("Neutral", 0), | |
helper_dict.get("Sad", 0), | |
helper_dict.get("Fear", 0), | |
helper_dict.get("Surprise", 0) | |
]) | |
#create finished data cleaning | |
info = pd.DataFrame(list_matricule_emotion, columns=[ | |
'Matricule', 'Angry', 'Happy', 'Mask', 'Neutral', 'Sad', | |
'Fear', 'Surprise'] | |
) | |
info.to_csv('way2see_data_daily.csv', index=False) | |
print("Processing finished !!!") | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description="Extract data way2see daily to csv") | |
parser.add_argument('--date', help="date to extract format: YYYY-MM-DD", required=True) | |
args = vars(parser.parse_args()) | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment