Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save chandra-prakash-meghwal/fe08c298009a8712962a93a22fce6cf0 to your computer and use it in GitHub Desktop.
Save chandra-prakash-meghwal/fe08c298009a8712962a93a22fce6cf0 to your computer and use it in GitHub Desktop.
python script to split csv file month wise in separate folders year wise with selected columns in the output files
import os
import pandas as pd
from pytz import timezone
# Read the original CSV file
csv_file_path = input('enter csv path ')
df = pd.read_csv(csv_file_path)
# Extract month and year from 'created_at'
df['created_at'] = pd.to_datetime(df['created_at'])
df['month'] = df['created_at'].dt.month
df['year'] = df['created_at'].dt.year
# Convert UTC to IST
utc_timezone = timezone('UTC')
ist_timezone = timezone('Asia/Kolkata')
df['created_at'] = df['created_at'].dt.tz_localize(utc_timezone).dt.tz_convert(ist_timezone)
# Ensure data types for customer_id and product_id
df['customer_id'] = pd.to_numeric(df['customer_id'], errors='coerce').astype(pd.Int64Dtype())
df['product_id'] = pd.to_numeric(df['product_id'], errors='coerce').astype(pd.Int64Dtype())
# Create folders for each year
output_dir = 'output_month_wise_data'
os.makedirs(output_dir, exist_ok=True)
# Write data to separate CSV files
for year, group in df.groupby('year'):
year_folder = os.path.join(output_dir, str(year))
os.makedirs(year_folder, exist_ok=True)
for month, month_data in group.groupby('month'):
month_name = pd.to_datetime(f'2022-{month}-01').strftime('%B').lower()
month_csv_path = os.path.join(year_folder, f'{month_name}.csv')
selected_columns = ['customer_id', 'product_id', 'created_at']
month_data[selected_columns].to_csv(month_csv_path, index=False)
print("CSV files created successfully!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment