How can I use features in statsforecast

141 Views Asked by At

How can I use features in statsforecast (e.g. moving average, lags, user defined function)?

fcst = StatsForecast(
    m4_daily_train,
    models = [(auto_arima,7)],
    freq = 'D',
    n_jobs = min(len(m4_daily_train.index.unique()),cpu_count())
)

Or is it possible to create the features on my own in a previous step in pandas and use then the total feature table in the fitting like...

df['lag1'] = df['y'].shift(1)
df['day'] = df['timestamp'].dt.day

fcst = StatsForecast(
    df,
    models = [(auto_arima,7)],
    freq = 'D',
    n_jobs = min(len(m4_daily_train.index.unique()),cpu_count())
)
1

There are 1 best solutions below

0
On BEST ANSWER

You can use exogenous variables in the statsforecast by passing the training dataset which includes unique_id, ds, y, and exogenous variables, and the testing dataset which includes unique_id, ds, and future exogenous variable in the forecast step. For more information about Exogenous Regressors, you can find from official documentation

from datasetsforecast.m5 import M5
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA

Y_df, X_df, *_ = M5.load('./data')

# Filter data 
Y_ts = Y_df[Y_df['unique_id'] == 'FOODS_3_586_CA_3'].reset_index(drop = True)
X_ts = X_df[X_df['unique_id'] == 'FOODS_3_586_CA_3'].reset_index(drop = True)

X_ts = X_ts[['unique_id', 'ds', 'sell_price', 'snap_CA']]
X_ts['unique_id'] = X_ts.unique_id.astype(str)

# Extract dates for train and test set 
dates = Y_df['ds'].unique()
dtrain = dates[:-28]
dtest = dates[-28:]

Y_train = Y_ts.query('ds in @dtrain')
Y_test = Y_ts.query('ds in @dtest') 

X_train = X_ts.query('ds in @dtrain') 
X_test = X_ts.query('ds in @dtest')

# Add exogenous regressors
train = Y_train.merge(X_ts, how = 'left', on = ['unique_id', 'ds']) 

models = [AutoARIMA(season_length = 7)]
sf = StatsForecast(
    models=models, 
    freq='D', 
    n_jobs=-1
)

fcst = sf.forecast(df=train, h=28, X_df=X_test, level=[95])
fcst = fcst.reset_index()
fcst.head()

dataset