I have the following df:
head(forecastid)
ID 2015 2016 2017 2018 2019 2020 2021 2022
1 C1 420 423 481 421 393 419 415 440
2 C2 1325 1262 1376 1370 1484 1421 1287 1400
3 C3 547 541 547 550 570 576 556 587
4 C4 349 349 375 346 374 379 433 376
5 C5 721 714 758 716 833 735 711 731
6 C6 420 423 481 421 393 419 415 440
im using the following code which works with pandas even number of rows is small
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
import warnings
# Suppress warnings
warnings.filterwarnings("ignore")
# Read the CSV file
testingR = pd.read_csv("final_dataset.csv")
# Check the structure of the dataset
print(testingR.head())
# Function to fit ARIMA models and generate forecasts
def fit_arima(series):
# Adjust the ARIMA order based on your data
order = (1, 0, 1)
model = ARIMA(series, order=order)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
fit_model = model.fit()
# Generate a one-step-ahead forecast
forecast_value = fit_model.forecast(steps=1).iloc[0]
# Set negative forecast values to 0
forecast_value = max(forecast_value, 0)
return forecast_value
# Apply the function to each row of the dataset (excluding the first column)
testingR['Forecast_2023'] = testingR.iloc[:, 1:].apply(fit_arima, axis=1)
testingR['Forecast_2023'] = testingR['Forecast_2023'].astype(int)
# Print or use the modified dataset as needed
testingR.to_csv('2023_predict.csv', index=False, encoding='utf-8-sig')
it works like a charm, but my original dataset is [1393096 rows x 9 columns]> so it never finishes task. I tried to use Cuml and cudf, but there's always error of user define function compilation or cudf doesn't support iterrrows() any help please?
this is code i'm trying
import numpy as np
import cudf
from cuml.tsa import ARIMA as cumlARIMA
import cupy as cp
# Read the CSV file using cudf
testingR = cudf.read_csv("final_dataset.csv")
# Check the structure of the dataset
print(testingR.head())
# Function to fit ARIMA models and generate forecasts for each row
def fit_arima(row):
# Adjust the ARIMA order based on your data
order = (1, 0, 1)
# Convert the row to a CuPy array
row_cupy = cp.array(row.to_array())
# Initialize cuml ARIMA model
model = cumlARIMA(order=order)
# Fit the cuml ARIMA model
model.fit(row_cupy)
# Generate a one-step-ahead forecast
forecast_value = model.forecast(steps=1).to_array()[0]
# Set negative forecast values to 0
forecast_value = max(forecast_value, 0)
return forecast_value
# Apply the function to each row of the dataset (excluding the first column)
forecast_values = testingR.apply(fit_arima, axis=1)
# Add the forecast values to the DataFrame
testingR['Forecast_2023'] = forecast_values.astype(int)
# Print or use the modified dataset as needed
testingR.to_csv('2023_predict.csv', index=False, encoding='utf-8-sig')