Last active
June 24, 2024 06:59
-
-
Save chandra-prakash-meghwal/fe08c298009a8712962a93a22fce6cf0 to your computer and use it in GitHub Desktop.
python script to split csv file month wise in separate folders year wise with selected columns in the output files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
from pytz import timezone | |
# Read the original CSV file | |
csv_file_path = input('enter csv path ') | |
df = pd.read_csv(csv_file_path) | |
# Extract month and year from 'created_at' | |
df['created_at'] = pd.to_datetime(df['created_at']) | |
df['month'] = df['created_at'].dt.month | |
df['year'] = df['created_at'].dt.year | |
# Convert UTC to IST | |
utc_timezone = timezone('UTC') | |
ist_timezone = timezone('Asia/Kolkata') | |
df['created_at'] = df['created_at'].dt.tz_localize(utc_timezone).dt.tz_convert(ist_timezone) | |
# Ensure data types for customer_id and product_id | |
df['customer_id'] = pd.to_numeric(df['customer_id'], errors='coerce').astype(pd.Int64Dtype()) | |
df['product_id'] = pd.to_numeric(df['product_id'], errors='coerce').astype(pd.Int64Dtype()) | |
# Create folders for each year | |
output_dir = 'output_month_wise_data' | |
os.makedirs(output_dir, exist_ok=True) | |
# Write data to separate CSV files | |
for year, group in df.groupby('year'): | |
year_folder = os.path.join(output_dir, str(year)) | |
os.makedirs(year_folder, exist_ok=True) | |
for month, month_data in group.groupby('month'): | |
month_name = pd.to_datetime(f'2022-{month}-01').strftime('%B').lower() | |
month_csv_path = os.path.join(year_folder, f'{month_name}.csv') | |
selected_columns = ['customer_id', 'product_id', 'created_at'] | |
month_data[selected_columns].to_csv(month_csv_path, index=False) | |
print("CSV files created successfully!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment