Running Kalman filter on multiple variables in Python

373 Views Asked by At

I would like to predict closing price ('Close') of the stock using multiple variables (Open, Low, High, Volume, Close) by plugging into the Kalman filter. However, I get error that 'The shape of all parameters is not consistent. Please re-check their values.' I am not sure what I am doing wrong.

To be clear: I don't want five different stock predictions based on each variable but rather one price based on all 5 variables.

Here is the code:


import numpy as np
import yfinance as yf
from pykalman import KalmanFilter

spy_data = yf.download('SPY', start='2010-01-01', end='2023-03-17')

price = spy_data['Close'].values.reshape(-1, 1)
open_price = spy_data['Open'].values.reshape(-1, 1)
low_price = spy_data['Low'].values.reshape(-1, 1)
high_price = spy_data['High'].values.reshape(-1, 1)
volume = spy_data['Volume'].values.reshape(-1, 1)

initial_state = np.zeros(5)
initial_covariance = np.diag([100, 100, 100, 100, 100])
transition_matrix = np.array([[1, 0, 0, 0, 0], 
                              [0, 1, 0, 0, 0],
                              [0, 0, 1, 0, 0],
                              [0, 0, 0, 1, 0],
                              [0, 0, 0, 0, 1]])
observation_matrix = np.array([[1, 0, 0, 0, 0], 
                               [0, 0, 0, 0, 0],
                               [0, 0, 0, 0, 0],
                               [0, 0, 0, 0, 0],
                               [0, 0, 0, 0, 0]])

process_noise = np.diag([0.001, 0.001, 0.001, 0.001, 0.001])
observation_noise = np.diag([0.1])

kf = KalmanFilter(
    initial_state_mean=initial_state,
    initial_state_covariance=initial_covariance,
    transition_matrices=transition_matrix,
    observation_matrices=observation_matrix,
    observation_covariance=observation_noise,
    transition_covariance=process_noise)

state_means, state_covariances = kf.filter(np.hstack([price, open_price, low_price, high_price, volume]))

# Predict next day's closing price
last_state_mean = state_means[-1]
last_state_covariance = state_covariances[-1]

next_state_mean, next_state_covariance = kf.filter_update(
last_state_mean, last_state_covariance, observation= np.array([spy_data['Adj Close'][-1], spy_data['Open'][-1], spy_data['Low'][-1], spy_data['High'][-1], spy_data['Volume'][-1]])
)
predicted_price = next_state_mean[0]


print(f"Today's SPY closing price: {price[-1][0]}")
print(f"Predicted SPY closing price for tomorrow: {predicted_price}")

1

There are 1 best solutions below

0
Ori Yarden PhD On

It's the shapes of your inputs, and unfortunately pykalman's KalmanFilter does not specify details like which shapes are raising the error, what the shapes are, etc. The issue is observation_noise's shape, which should be np.diag([0.1, 0.1, 0.1, 0.1, 0.1]):

spy_data = yf.download('SPY', start='2010-01-01', end='2023-03-17')

price = spy_data['Close'].values.reshape(-1, 1)
open_price = spy_data['Open'].values.reshape(-1, 1)
low_price = spy_data['Low'].values.reshape(-1, 1)
high_price = spy_data['High'].values.reshape(-1, 1)
volume = spy_data['Volume'].values.reshape(-1, 1)

initial_state = np.zeros(5)
initial_covariance = np.diag([100, 100, 100, 100, 100])
transition_matrix = np.array([[1, 0, 0, 0, 0], 
                              [0, 1, 0, 0, 0],
                              [0, 0, 1, 0, 0],
                              [0, 0, 0, 1, 0],
                              [0, 0, 0, 0, 1]])
observation_matrix = np.array([[1, 0, 0, 0, 0], 
                               [0, 0, 0, 0, 0],
                               [0, 0, 0, 0, 0],
                               [0, 0, 0, 0, 0],
                               [0, 0, 0, 0, 0]])

process_noise = np.diag([0.001, 0.001, 0.001, 0.001, 0.001])
observation_noise = np.diag([0.1, 0.1, 0.1, 0.1, 0.1])

kf = KalmanFilter(
    initial_state_mean=initial_state,
    initial_state_covariance=initial_covariance,
    transition_matrices=transition_matrix,
    observation_matrices=observation_matrix,
    observation_covariance=observation_noise,
    transition_covariance=process_noise)

state_means, state_covariances = kf.filter(np.hstack([price, open_price, low_price, high_price, volume]))

# Predict next day's closing price
last_state_mean = state_means[-1]
last_state_covariance = state_covariances[-1]

next_state_mean, next_state_covariance = kf.filter_update(
last_state_mean, last_state_covariance, observation= np.array([spy_data['Adj Close'][-1], spy_data['Open'][-1], spy_data['Low'][-1], spy_data['High'][-1], spy_data['Volume'][-1]])
)
predicted_price = next_state_mean[0]


print(f"Today's SPY closing price: {price[-1][0]}")
print(f"Predicted SPY closing price for tomorrow: {predicted_price}")

Outputs:

[*********************100%***********************]  1 of 1 completed
Today's SPY closing price: 396.1099853515625
Predicted SPY closing price for tomorrow: 396.3105625117946