I am working on an ML model that should take input of and forecast the following stock data:
'Open Price', 'High Price', 'Low Price', 'Total Traded Quantity'
I think my code has got problems. How can I properly make forecasts for the next x days in the future?
Prerequisites for running the code:
!git clone https://github.com/NSEDownload/NSEDownload
# installation of NSEDownload library
!pip3 install NSEDownload/dist/*
The following is my code:
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from NSEDownload import stocks
stock_name = 'TCS'
# Get stock data
df = stocks.get_data(stock_symbol=stock_name, full_data=True)
df.index = pd.to_datetime(df.index)
df.to_csv(f'{stock_name}.csv')
# Number of past days to use
n_days = 10
print("Dataframe shape (rows, columns): ", df.shape)
print("Are any null values present? ", df.isnull().values.any())
# set the target variable
training_df = pd.DataFrame(df['Last Price'])
#Selecting the Features
features = ['Open Price', 'High Price', 'Low Price', 'Total Traded Quantity']
# scaling (normalization)
scaler = MinMaxScaler()
training_df_transform = scaler.fit_transform(df[features])
training_df_transform= pd.DataFrame(columns=features, data=training_df_transform, index=df.index)
timesplit= TimeSeriesSplit(n_splits=10)
for train_index, test_index in timesplit.split(df.index):
X_train, X_test = training_df_transform.iloc[:len(train_index)], training_df_transform.iloc[len(train_index): (len(train_index) + len(test_index))]
y_train, y_test = training_df.iloc[:len(train_index)].values.ravel(), training_df.iloc[len(train_index): (len(train_index) + len(test_index))].values.ravel()
# Create sequences of length n_days
X_train = np.array([X_train[i : i + n_days] for i in range(len(X_train) - n_days)])
X_test = np.array([X_test[i : i + n_days] for i in range(len(X_test) - n_days)])
# Adjust y_train and y_test to match the new X_train and X_test
y_train = y_train[n_days:]
y_test = y_test[n_days:]
# Reshape X_train and X_test to match the shape the model expects
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2]))
# Define and train the model
model = Sequential()
model.add(LSTM(32, input_shape=(n_days, X_train.shape[2]), activation='relu', return_sequences=False))
model.add(Dense(4)) # Change this to match the number of output features
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, epochs=50, batch_size=8, verbose=1, shuffle=False)
# Predict
y_pred = model.predict(X_test)
# Plotting code
plt.plot(df.index[-len(y_test):], y_test, label='Actual Close Price')
plt.plot(df.index[-len(y_test):], y_pred[:, 0], label='Predicted Close Price')
# Set major locator and formatter for x-axis
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=2))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.title('Testing the Model Prediction for ' + stock_name)
plt.xlabel('Month')
plt.ylabel('Stock Price (Rs.)')
plt.legend()
# Rotate x-axis labels for better readability
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()