Using pmarima in python

27 Views Asked by At

There is a problem while training with the dataset. The first year is 2017 but when trying to train the model, it kind of moves the points one year. The result should be a forecast of at least 3 years (2024, 2025, 2026).

The dataset is:

Year Turnover
2017 8297000
2018 8482000
2019 8496000
2020 4751000
2021 5642000
2022 5359900
2023 5091905
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Importing auto_arima
import pmdarima as pm
from pmdarima.arima import auto_arima

# Read the file
sales_data = pd.read_excel('data.xlsx')

sales_data.head()

#Make sure there are no null values at the end of the dataset
sales_data.tail()

#Check the datatypes
sales_data.dtypes

#Convert the month column to datetime
sales_data['Year']=pd.to_datetime(sales_data['Year'])

#Recheck the datatypes
sales_data.dtypes

#Set the index of the Month
sales_data.set_index('Year',inplace=True)

sales_data.head()

# To understand the pattern
sales_data.plot()

#Testing for stationarity
from pmdarima.arima import ADFTest
adf_test = ADFTest(alpha = 0.05)
adf_test.should_diff(sales_data)

#Spliting the dataset into train and test
train = sales_data[:5]
test = sales_data[-3:]

train.tail()
test.head()

plt.plot(train)
plt.plot(test)


arima_model =  auto_arima(train,start_p=0, d=1, start_q=0,
                          max_p=5, max_d=5, max_q=5, start_P=0,
                          D=0, start_Q=0, max_P=5, max_D=5,
                          max_Q=5, m=5, seasonal=True,
                          error_action='warn',trace = True,
                          supress_warnings=True,stepwise = True,
                          random_state=20,n_fits = 50 )

prediction = pd.DataFrame(arima_model.predict(n_periods = 7),index=test.index)
prediction.columns = ['predicted_sales']
prediction

plt.figure(figsize=(8,5))
plt.plot(train,label="Training")
plt.plot(test,label="Test")
plt.plot(prediction,label="Predicted")
plt.legend(loc = 'best')
plt.show()

print(arima_model.summary())

# Check model parameters
print(arima_model.get_params())

# Print training and test data to verify indices
print(train.head())
print(test.head())
0

There are 0 best solutions below