I have a file excel with 2 worksheets.I'm using neuralprophet and I want to use a column of the second worksheet as my regressor to do crossvalidation.How can I do it? This is my code as for now:
from neuralprophet import NeuralProphet
import pandas as pd
import openpyxl
import matplotlib.pyplot as plt
# Read Excel file using openpyxl
dataframe = openpyxl.load_workbook("Dati inclinometrici e pluviometrici_ReCity.xlsx")
dataframe1 = dataframe.active
# Create a list to store rows
rows_list = []
# Iterate through rows and columns to extract data
for row in dataframe1.iter_rows(min_row=2, values_only=True):
row_dict = {'ds': row[0], 'y': row[1]}
rows_list.append(row_dict)
# Create a Pandas DataFrame
df = pd.DataFrame(rows_list)
# Convert 'ds' to datetime format
df['ds'] = pd.to_datetime(df['ds'], errors='coerce')
# Convert 'y' to numeric format
df['y'] = pd.to_numeric(df['y'], errors='coerce')
# Fill NaN values in the 'y' column with 0
df['y'] = df['y'].fillna(method='ffill')
# Fill NaN values in the 'ds' column with a specific timestamp
df['ds'] = df['ds'].fillna(value=df['ds'].min() - pd.Timedelta(days=1))
print(df.columns)
# Filter data between January 2 2016, and September 16 2017
start_date = '2016-01-02'
end_date = '2017-09-16'
df_filtered = df[(df['ds'] >= start_date) & (df['ds'] <= end_date)]
# Print the filtered DataFrame
print(df_filtered)
# Train NeuralProphet model with filtered data and yearly seasonality
model = NeuralProphet(yearly_seasonality=True)
model.fit(df_filtered, freq="D")
# Make forecasts for the future period
future = model.make_future_dataframe(df_filtered, periods=365)
forecast = model.predict(future)
# Print the available columns in the forecast DataFrame
print(forecast.columns)
# Print the forecasted values
print(forecast[['ds', 'yhat1', 'trend', 'season_yearly', 'season_weekly']])
# Plot the actual values
plt.plot(df_filtered['ds'], df_filtered['y'], label='Actual', color='blue')
# Plot the forecasted values
plt.plot(forecast['ds'], forecast['yhat1'], label='Forecast', color='red')
# Plot the trend, yearly seasonality, and weekly seasonality
plt.plot(forecast['ds'], forecast['trend'], label='Trend', linestyle='--', color='green')
# Check if 'season_yearly' is present in the DataFrame
if 'season_yearly' in forecast.columns:
plt.plot(forecast['ds'], forecast['season_yearly'], label='Yearly Seasonality', linestyle='--', color='orange')
# Plot the weekly seasonality if present
if 'season_weekly' in forecast.columns:
plt.plot(forecast['ds'], forecast['season_weekly'], label='Weekly Seasonality', linestyle='--', color='purple')
# Add labels and title
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Actual vs Forecasted Values with Trend and Seasonality')
# Show the legend
plt.legend()
# Display the plot
plt.show()
I tried using add_lagged_regressor and add_future_regressor but even though I checked my dataframe and I see that it has the correct dataset and columns it always give me the error that the column I want to use as regressor is not found