I have the following df:

head(forecastid)

           ID 2015 2016 2017 2018 2019 2020 2021 2022
1          C1   420   423   481   421   393   419   415   440
2          C2  1325  1262  1376  1370  1484  1421  1287  1400
3          C3   547   541   547   550   570   576   556   587
4          C4   349   349   375   346   374   379   433   376
5          C5   721   714   758   716   833   735   711   731
6          C6   420   423   481   421   393   419   415   440

im using the following code which works with pandas even number of rows is small

import numpy as np
from statsmodels.tsa.arima.model import ARIMA
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Read the CSV file
testingR = pd.read_csv("final_dataset.csv")

# Check the structure of the dataset
print(testingR.head())

# Function to fit ARIMA models and generate forecasts
def fit_arima(series):
    # Adjust the ARIMA order based on your data
    order = (1, 0, 1)
    
    model = ARIMA(series, order=order)
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        fit_model = model.fit()
    
    # Generate a one-step-ahead forecast
    forecast_value = fit_model.forecast(steps=1).iloc[0]
    
    # Set negative forecast values to 0
    forecast_value = max(forecast_value, 0)
    
    return forecast_value

# Apply the function to each row of the dataset (excluding the first column)
testingR['Forecast_2023'] = testingR.iloc[:, 1:].apply(fit_arima, axis=1)
testingR['Forecast_2023'] = testingR['Forecast_2023'].astype(int)
# Print or use the modified dataset as needed
testingR.to_csv('2023_predict.csv', index=False, encoding='utf-8-sig')

it works like a charm, but my original dataset is [1393096 rows x 9 columns]> so it never finishes task. I tried to use Cuml and cudf, but there's always error of user define function compilation or cudf doesn't support iterrrows() any help please?

this is code i'm trying

import numpy as np
import cudf
from cuml.tsa import ARIMA as cumlARIMA
import cupy as cp

# Read the CSV file using cudf
testingR = cudf.read_csv("final_dataset.csv")

# Check the structure of the dataset
print(testingR.head())

# Function to fit ARIMA models and generate forecasts for each row
def fit_arima(row):
    # Adjust the ARIMA order based on your data
    order = (1, 0, 1)

    # Convert the row to a CuPy array
    row_cupy = cp.array(row.to_array())

    # Initialize cuml ARIMA model
    model = cumlARIMA(order=order)

    # Fit the cuml ARIMA model
    model.fit(row_cupy)

    # Generate a one-step-ahead forecast
    forecast_value = model.forecast(steps=1).to_array()[0]

    # Set negative forecast values to 0
    forecast_value = max(forecast_value, 0)

    return forecast_value

# Apply the function to each row of the dataset (excluding the first column)
forecast_values = testingR.apply(fit_arima, axis=1)

# Add the forecast values to the DataFrame
testingR['Forecast_2023'] = forecast_values.astype(int)

# Print or use the modified dataset as needed
testingR.to_csv('2023_predict.csv', index=False, encoding='utf-8-sig')
0

There are 0 best solutions below