Multistep LSTM: Why am i getting the same curve/pattern for each prediction?

62 Views Asked by At

I'm new to tensorflow and LSTM models (and coding in general) and would really appreciate some help. I'm getting somewhere, but no matter what i try i seem to always get the same pattern in each prediction i make. Image of part of my predictions (not aligned on x axis currently)

You can see in the image the model is always making the same guess (up, slight down, down, then linking with the start of the next prediction.)

Ill attach the code below.

first time posting here so thanks so much in advance. apologies if I've made any errors in terms of norms on the site.

import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Bidirectional, Dropout, Activation, Dense, LSTM
from tensorflow.python.keras.layers import CuDNNLSTM
from tensorflow.keras.models import Sequential
#new below
from tensorflow.keras.callbacks import EarlyStopping

%matplotlib inline

sns.set(style='whitegrid', palette='muted', font_scale=1.5)

#rcParams['figure.figsize'] = 14, 8

#RANDOM_SEED = 42

#np.random.seed(RANDOM_SEED)

# Data comes from:
# https://finance.yahoo.com/quote/BTC-USD/history?period1=1279314000&period2=1556053200&interval=1d&filter=history&frequency=1d

csv_path = r"/home/paddycuinne/files/BTC-2021min.csv"

# csv_path = "https://raw.githubusercontent.com/curiousily/Deep-Learning-For-Hackers/master/data/3.stock-prediction/AAPL.csv"

df = pd.read_csv(csv_path, parse_dates=['date'])

df = df.sort_values('date')

df.head()

df.tail()

df.shape

ax = df.plot(x='date', y='close');
ax.set_xlabel("Month")
ax.set_ylabel("Close Price (USD)")

# Normalization

df['date'] = pd.to_datetime(df['date'])

df.set_index('date', inplace=True)

df



timestamp = '2021-12-31 00:01:00'

# Ensure the timestamp is present in the DataFrame's index
date_index = df.index.get_loc(timestamp)

data_train = df.loc[:'2021-12-31 00:01:00', :]['close']


#we are splitting the data weekly wise(7days)
SEQ_LEN = 30
steps_ahead = 3


X_train, y_train = [], []

#    start_index = SEQ_LEN
#    end_index = len(data_train) - steps_ahead

for i in range(SEQ_LEN, len(data_train) - steps_ahead):
    X_train.append(data_train[i - SEQ_LEN:i])        # Input sequence of length SEQ_LEN
    y_train.append(data_train[i:i + steps_ahead]) 
    
    
X_train, y_train = np.array(X_train), np.array(y_train)    

X_train.shape, y_train.shape

pd.DataFrame(X_train).head()


pd.DataFrame(y_train).head()

x_scaler = MinMaxScaler()
X_train = x_scaler.fit_transform(X_train)
#Normalising the dataset

y_scaler = MinMaxScaler()
y_train = y_scaler.fit_transform(y_train)

pd.DataFrame(X_train).head()



X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_train.shape, y_train.shape

pd.DataFrame(y_train).head()



data_test = df.loc['2021-12-31 00:01:01':, 'close']
data_test.head()
data_test.shape



data_test = np.array(data_test)

X_test, y_test = [], []

for i in range(SEQ_LEN, len(data_test) - steps_ahead):
    X_test.append(data_test[i - SEQ_LEN:i])        # Input sequence of length SEQ_LEN
    y_test.append(data_test[i:i + steps_ahead]) 
    

X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape

X_test = x_scaler.transform(X_test)
y_test = y_scaler.transform(y_test)
pd.DataFrame(X_test).head(), pd.DataFrame(y_test)

X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
X_test.shape, y_test.shape

y_test


early_stopping = EarlyStopping(monitor='val_loss', mode='auto', verbose=1, patience=20, restore_best_weights=True)

DROPOUT = 0.1
WINDOW_SIZE = SEQ_LEN - 1
#2048
#32
model = Sequential()
model.add(Bidirectional(LSTM(100, return_sequences=True, input_shape=(X_train.shape[1],X_train.shape[2]))))
model.add(Dropout(rate=DROPOUT))
model.add(Bidirectional(LSTM(100)))
model.add(Dropout(rate=DROPOUT))
model.add(Dense(units=steps_ahead))

model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=1,validation_data=(X_test, y_test), callbacks=[early_stopping])

print("Shape of y_pred:", y_pred.shape)
print("Shape of y_pred_2d:", y_pred_2d.shape)
print("Shape of scaler min_:", y_scaler.min_.shape)
print("Shape of scaler scale_:", y_scaler.scale_.shape)
print(df.head())


import plotly.graph_objects as go

fig = go.Figure()

#transform y test

y_pred = model.predict(X_test)
y_pred_2d = y_pred
y_hat_inverse = y_scaler.inverse_transform(y_pred_2d)

#realign for lookback and for forecast

#plot

fig.add_trace(go.Scatter(x=df.index, y=df['close'],
                         mode='lines',
                         name='Original Data'))


#for y_test_inverse[:5]
   # xaxis = xtest[:5]

#X_test_inverse[:5]
# Calculate the indices for the test and predicted values
#start_idx_y_test = int(0.7 * len(df)) + SEQ_LEN - 1
#start_idx_predicted = start_idx_y_test + steps_ahead - 1

# Get the dates for the test and predicted values
#test_dates_y_test = df['Month'].iloc[start_idx_y_test : start_idx_y_test + len(y_test_inverse)]
#test_dates = df['Month'].iloc[start_idx_predicted : start_idx_predicted + len(y_hat_inverse)]
#new

test_dates = df.loc['2021-12-31 00:01:01':].index



y_hat_inverse_flat = y_hat_inverse.flatten()
#y_test_selected = y_test_inverse_flat[::5]
#y_hat_selected = y_hat_inverse_flat[::5]
def extract_blocks(arr):
    blocks = []
    length = len(arr)
    for i in range(0, length, 9):
        blocks.extend(arr[i:i+3])
    return blocks
#was 25, 5

y_hat_selected = extract_blocks(y_hat_inverse_flat)



#fig.add_trace(go.Scatter(x=test_dates, y=y_hat_selected2,
#                         mode='lines+markers',
#                         name='Predicted Values 2'))

#from dateutil.relativedelta import relativedelta

#shifted_dates = [date - relativedelta(months=SEQ_LEN) for date in test_dates]


fig.add_trace(go.Scatter(x=test_dates, y=y_hat_selected,
                         mode='lines+markers',
                         name='Predicted Values real'))


fig.update_layout(title='Passenger Count Over Time with Test Data',
                  xaxis_title='Month',
                  yaxis_title='Passenger Count')

#########################




#fig.add_trace(go.Scatter(x=test_dates, y=y_hat_inverse.flatten(),
#                         mode='lines+markers',
#                         name='Predicted Values'))




fig.show()

I'm aware bitcoin prices are very hard to predict, and I understand i'll need to add numerous other inputs (volume etc) before i get anything with any serious predictive ability. I was just hoping to deal with this issue before I move on. My understanding is that at the very least, on a trend, it should be able to make a vague guess at a continuing downward or upward trajectory.

It almost feels like the model is making one set of 5 predictions (in terms of pattern) and trying to fit that to the data.

I'm running in jupyter notebook, in anaconda, in wsl.

I've googled, looked all over stack overflow, and tried chatgpt and the like. can't find much on this particular issue.

I feel like I may be missing something obvious. :/

I'm hoping to get a prediction for each 3 future steps which is unique, and derived from the previous 30, for each graphed set of predictions. For context, I have tried rewriting the test/train split in multiple ways, I have tried changing the model in the following ways.

-neuron numbers to various levels between 1 and 1024 -I've tried adding multiple layers. -Different batch sizes -various numbers of epochs up to 1000s -bidirectional and normal -various other small changes

1

There are 1 best solutions below

3
Golden Lion On
1. your second lstm layer should be return_sequences=False 
2. the y_test was not scaled.  I normalized the y_test.
3. run my code and the loss moves close to zero
4. print the prediction and the y_test
5. used y_test data vs y_prediction data in the plotting



SEQ_LEN = 30
steps_ahead = 3

X_train, y_train = [], []


data_train = df['Close']

for i in range(SEQ_LEN, len(data_train) - steps_ahead):
    X_train.append(data_train[i - SEQ_LEN:i])        # Input sequence of length SEQ_LEN
    y_train.append(data_train[i:i + steps_ahead]) 
    
    
X_train, y_train = np.array(X_train), np.array(y_train)    

X_train.shape, y_train.shape

pd.DataFrame(X_train).head()

x_scaler = MinMaxScaler()
X_train = x_scaler.fit_transform(X_train)
#Normalising the dataset

y_scaler = MinMaxScaler()
y_train = y_scaler.fit_transform(y_train)

pd.DataFrame(X_train).head()

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_train.shape, y_train.shape

data_test = df['Close']
data_test.head()
data_test.shape


data_test = np.array(data_test)


X_test, y_test = [], []

for i in range(SEQ_LEN, len(data_test) - steps_ahead):
    X_test.append(data_test[i - SEQ_LEN:i])        # Input sequence of length SEQ_LEN
    y_test.append(data_test[i:i + steps_ahead]) 
    
    
X_test, y_test = np.array(X_test), np.array(y_test)
X_test.shape, y_test.shape



x_scaler = MinMaxScaler()
X_test = x_scaler.fit_transform(X_test)
y_scaler = MinMaxScaler()
y_test = y_scaler.fit_transform(y_test)

pd.DataFrame(X_test).head(), pd.DataFrame(y_test)

X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
X_test.shape, y_test.shape

avg_numbers=[]
for index, data in enumerate(y_test):
    avg_numbers.append(np.mean(data))

plt.plot(avg_numbers)    
plt.show()
model = Sequential()
model.add(Bidirectional(LSTM(100, return_sequences=True, input_shape=(X_train.shape[1],X_train.shape[2]))))
model.add(Dropout(rate=DROPOUT))
model.add(Bidirectional(LSTM(100,return_sequences=False)))
model.add(Dropout(rate=DROPOUT))
model.add(Dense(units=steps_ahead))

model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=1,validation_data=(X_test, y_test), callbacks=[early_stopping])


model.evaluate(X_test, y_test)

plt.plot(history.history['loss'])
plt.title('loss accuracy')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

prediction = model.predict(X_test)

avg_numbers1=[]
for index, data in enumerate(prediction):
    avg_numbers1.append(np.mean(data))

plt.plot(avg_numbers1,alpha=.6) 
    
avg_numbers2=[]
for index, data in enumerate(y_test):
    avg_numbers2.append(np.mean(data))

plt.plot(avg_numbers2) 
plt.show()

def extract_blocks(arr):
    blocks = []
    length = len(arr)
    for i in range(0, length, 9):
        blocks.extend(arr[i:i+3])
    return blocks

def inverse_flatten(y_scaler,y_array):
    y_pred_2d = y_array
    y_hat_inverse = y_scaler.inverse_transform(y_pred_2d)
    y_hat_inverse_flat = y_hat_inverse.flatten()
    y_hat_selected = extract_blocks(y_hat_inverse_flat)
    return y_hat_selected


fig = go.Figure()

y_hat_selected=inverse_flatten(y_scaler,y_prediction)
x_points=np.linspace(0,len(y_hat_selected),100)
fig.add_trace(go.Scatter(x=x_points, y=y_hat_selected,
                         mode='lines+markers',
                         name='Predicted Values real'))

y_hat_selected=inverse_flatten(y_scaler,y_test)
x_points=np.linspace(0,len(y_hat_selected),100)

fig.add_trace(go.Scatter(x=x_points, y=y_hat_selected,
                         mode='lines',
                         name='Test Data'))