Add regressor for neuralprophet

64 Views Asked by At

I have a file excel with 2 worksheets.I'm using neuralprophet and I want to use a column of the second worksheet as my regressor to do crossvalidation.How can I do it? This is my code as for now:

from neuralprophet import NeuralProphet
import pandas as pd
import openpyxl
import matplotlib.pyplot as plt

# Read Excel file using openpyxl
dataframe = openpyxl.load_workbook("Dati inclinometrici e pluviometrici_ReCity.xlsx")
dataframe1 = dataframe.active

# Create a list to store rows
rows_list = []

# Iterate through rows and columns to extract data
for row in dataframe1.iter_rows(min_row=2, values_only=True):  
    row_dict = {'ds': row[0], 'y': row[1]}  
    rows_list.append(row_dict)

# Create a Pandas DataFrame
df = pd.DataFrame(rows_list)

# Convert 'ds' to datetime format
df['ds'] = pd.to_datetime(df['ds'], errors='coerce')

# Convert 'y' to numeric format
df['y'] = pd.to_numeric(df['y'], errors='coerce')

# Fill NaN values in the 'y' column with 0
df['y'] = df['y'].fillna(method='ffill')

# Fill NaN values in the 'ds' column with a specific timestamp
df['ds'] = df['ds'].fillna(value=df['ds'].min() - pd.Timedelta(days=1))
print(df.columns)

# Filter data between January 2 2016, and September 16 2017
start_date = '2016-01-02'
end_date = '2017-09-16'
df_filtered = df[(df['ds'] >= start_date) & (df['ds'] <= end_date)]

# Print the filtered DataFrame
print(df_filtered)

# Train NeuralProphet model with filtered data and yearly seasonality
model = NeuralProphet(yearly_seasonality=True)
model.fit(df_filtered, freq="D")  

# Make forecasts for the future period
future = model.make_future_dataframe(df_filtered, periods=365)  
forecast = model.predict(future)

# Print the available columns in the forecast DataFrame
print(forecast.columns)

# Print the forecasted values
print(forecast[['ds', 'yhat1', 'trend', 'season_yearly', 'season_weekly']])

# Plot the actual values
plt.plot(df_filtered['ds'], df_filtered['y'], label='Actual', color='blue')

# Plot the forecasted values
plt.plot(forecast['ds'], forecast['yhat1'], label='Forecast', color='red')

# Plot the trend, yearly seasonality, and weekly seasonality
plt.plot(forecast['ds'], forecast['trend'], label='Trend', linestyle='--', color='green')

# Check if 'season_yearly' is present in the DataFrame
if 'season_yearly' in forecast.columns:
    plt.plot(forecast['ds'], forecast['season_yearly'], label='Yearly Seasonality', linestyle='--', color='orange')

# Plot the weekly seasonality if present
if 'season_weekly' in forecast.columns:
    plt.plot(forecast['ds'], forecast['season_weekly'], label='Weekly Seasonality', linestyle='--', color='purple')

# Add labels and title
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Actual vs Forecasted Values with Trend and Seasonality')

# Show the legend
plt.legend()

# Display the plot
plt.show()

I tried using add_lagged_regressor and add_future_regressor but even though I checked my dataframe and I see that it has the correct dataset and columns it always give me the error that the column I want to use as regressor is not found

0

There are 0 best solutions below