Last active
September 25, 2024 01:35
-
-
Save itrobotics/0d153425db99220ad3bdbde3c2dd2f40 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import statsmodels.api as sm | |
def process_stock_data(df): | |
# Copy the DataFrame to avoid modifying the original | |
processed_df = df.copy() | |
# Define columns to drop | |
cols_to_drop = ['Open', 'High', 'Low', 'Volume', 'Dividends', 'Stock Splits'] | |
# Drop specified columns | |
processed_df.drop(cols_to_drop, axis=1, inplace=True) | |
# Convert 'Close' column to numeric | |
processed_df['Close'] = pd.to_numeric(processed_df['Close'], errors='coerce') | |
# Drop rows with missing 'Close' values, if any | |
processed_df = processed_df.dropna(subset=['Close']) | |
return processed_df | |
filtered_dataframe = process_stock_data(df) | |
filtered_dataframe = filtered_dataframe.groupby('Date')['Close'].sum().reset_index() | |
filtered_dataframe.head() | |
# Data decomposition | |
from pylab import rcParams | |
rcParams['figure.figsize'] = 18, 8 | |
# Convert 'Date' column to datetime if not already in datetime format | |
filtered_dataframe['Date'] = pd.to_datetime(filtered_dataframe['Date']) | |
# Set 'Date' column as the index | |
filtered_dataframe.set_index('Date', inplace=True) | |
# Now perform seasonal decomposition | |
# Now perform seasonal decomposition | |
decomposition = sm.tsa.seasonal_decompose(filtered_dataframe['Close'], model='additive', period=30) # Assuming monthly data | |
trend = decomposition.trend | |
seasonal = decomposition.seasonal | |
residual = decomposition.resid | |
# Plot the decomposed series | |
plt.figure(figsize=(18, 8)) | |
plt.subplot(411) | |
plt.plot(filtered_dataframe.index, filtered_dataframe['Close'], label='Original') | |
plt.legend(loc='best') | |
plt.subplot(412) | |
plt.plot(filtered_dataframe.index, trend, label='Trend') | |
plt.legend(loc='best') | |
plt.subplot(413) | |
plt.plot(filtered_dataframe.index, seasonal, label='Seasonal') | |
plt.legend(loc='best') | |
plt.subplot(414) | |
plt.plot(filtered_dataframe.index, residual, label='Residual') | |
plt.legend(loc='best') | |
plt.tight_layout() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment