I run the code below with python3.10 -m cudf.pandas my_py.py. I realized that cudf use numba for cudf.apply on UDF w/o @numba.autojit(nopython=True) decorator, and hence the code below with object datetime cannot be processed by cudf.
Is it possible that I can disable cudf in this part of the code? I tried df_tmp = my_df.to_pandas().apply(g_a,ax=1), but it said my_df is a pd.DF and had no to_pandas() function. I further printed the type of object my_df, and it returned <class 'pandas.core.frame.DataFrame'>
def gzpd_apply(my_df00):
# some code
t_prelude = '08:30:00'
t_prev = datetime.strptime(t_prelude, '%H:%M:%S')
# some code
# some code
print(type(my_df))
df_tmp = my_df.apply(gzpd_apply,axis=1)
# some code
Here are the package versions I'm using numpy==1.24.3 pandas==1.5.3 Cython==3.0.6 swifter==1.3.4 numba==0.57.1
The dataframe my_df looks like:
index | YMH | name | time | <other fields>
000 | '2020-01-01' | 'Tom' | '09:05:00' | ...
001 | '2022-01-01' | 'John'| '09:05:00' | ...
002 | '2023-05-01' | 'Bob' | '08:05:00' | ...
Currently the code has this error:
numba.core.errors.TypingError: Failed in cuda mode pipeline (step: nopython frontend)
Untyped global name 'datetime': Cannot determine Numba type of <class 'type'>
I know why there is such an error. I wonder how I can bypass it.
To add an example (2024.1.5)
I was able to create a toy example to reproduce the error.
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import swifter
import sys, os
#import cudf
def gzpd_apply(my_df00):
col_df_1 = [x for x in my_df00.index if '_1' in x]
if my_df00[col_df_1].isnull().sum()>0:
return np.nan
if my_df00['time']==my_df00['time_1']:
my_df00['time_1'] = (datetime.strptime(my_df00['time_1'], '%H:%M:%S')+timedelta(minutes=5)).strftime('%H:%M:%S')
return my_df00.rename(index=dict({x:x for x in my_df00.index})).transpose() if (my_df00 is not None) else np.nan
def main():
my_df = pd.DataFrame(data={'YMH': ['2019-01-02', '2018-01-02', '2019-05-02', '2020-01-02', '2021-01-02'], 'name': ['albert', 'alex', 'alice', 'ben', 'bob'], 'time': ['09:00:00', '09:20:00', '08:00:00', '07:00:00', '09:30:00']})
my_df = pd.concat([my_df for i in range(5)], axis=0)
my_df['YMH'] = np.random.permutation(my_df['YMH'].values)
my_df['time'] = np.random.permutation(my_df['time'].values)
my_df = my_df.sort_values(by=['YMH', 'time', 'name'], inplace=False).reset_index(drop=True,inplace=False)
my_df = pd.concat([my_df, my_df.shift(1).rename(columns=dict({x:x+'_1' for x in my_df.columns}))],axis=1)
# data generated ------------------------------------------------------
#df_tmp = my_df.apply(gzpd_apply,axis=1) # !! w/o swifter, the code works fine
df_tmp = my_df.swifter.apply(gzpd_apply,axis=1) # !! w/ swifter, the code returns error.
# no error below -------------------------------------------------------
df_tmp = df_tmp.T.dropna().reset_index(drop=True)
df_tmp = pd.concat([r for r in df_tmp],axis=1).reset_index(drop=True)
df_tmp = df_tmp.transpose()
df_tmp.rename(columns=dict({i: my_df.columns[i] for i in range(len(my_df.columns))}), inplace=True)
pd.set_option('display.max_rows', 500)
print(df_tmp)
if __name__ == '__main__':
main()
In the above toy example, if I don't use swifter, the code works fine. When I use swifter, the code returns the aformentioned numba.core.errors. Note that in my original project, the numba error comes regardless of swifter.
The above toy example has the following error (the numba error comes in the middle):
Traceback (most recent call last): File "/usr/local/lib/python3.10/dist-packages/swifter/swifter.py", line 427, in apply
tmp_df = func(sample, *args, **kwds)
File "my_cudf.py", line 10, in gzpd_apply
col_df_1 = [x for x in my_df00.index if '_1' in x]
File "/usr/local/lib/python3.10/dist-packages/cudf/utils/utils.py", line 242, in __iter__
raise TypeError(
TypeError: Int64Index object is not iterable. Consider using `.to_arrow()`, `.to_pandas()` or `.values_host` if you wish to iterate over the values.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/cudf/core/indexed_frame.py", line 2360, in _apply
kernel, retty = _compile_or_get(
File "/usr/local/lib/python3.10/dist-packages/nvtx/nvtx.py", line 115, in inner
result = func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/cudf/core/udf/utils.py", line 269, in _compile_or_get
kernel, scalar_return_type = kernel_getter(frame, func, args)
File "/usr/local/lib/python3.10/dist-packages/cudf/core/udf/row_function.py", line 143, in _get_row_kernel
scalar_return_type = _get_udf_return_type(row_type, func, args)
File "/usr/local/lib/python3.10/dist-packages/nvtx/nvtx.py", line 115, in inner
result = func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/cudf/core/udf/utils.py", line 89, in _get_udf_return_type
ptx, output_type = cudautils.compile_udf(func, compile_sig)
File "/usr/local/lib/python3.10/dist-packages/cudf/utils/cudautils.py", line 126, in compile_udf
ptx_code, return_type = cuda.compile_ptx_for_current_device(
File "/usr/local/lib/python3.10/dist-packages/numba/cuda/compiler.py", line 319, in compile_ptx_for_current_device
return compile_ptx(pyfunc, sig, debug=debug, lineinfo=lineinfo,
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/numba/cuda/compiler.py", line 289, in compile_ptx
cres = compile_cuda(pyfunc, return_type, args, debug=debug,
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/numba/cuda/compiler.py", line 230, in compile_cuda
cres = compiler.compile_extra(typingctx=typingctx,
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler.py", line 762, in compile_extra
return pipeline.compile_extra(func)
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler.py", line 460, in compile_extra
return self._compile_bytecode()
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler.py", line 528, in _compile_bytecode
return self._compile_core()
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler.py", line 507, in _compile_core
raise e
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler.py", line 494, in _compile_core
pm.run(self.state)
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler_machinery.py", line 368, in run
raise patched_exception
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler_machinery.py", line 356, in run
self._runPass(idx, pass_inst, state)
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler_machinery.py", line 311, in _runPass
mutated |= check(pss.run_pass, internal_state)
File "/usr/local/lib/python3.10/dist-packages/numba/core/compiler_machinery.py", line 273, in check
mangled = func(compiler_state)
File "/usr/local/lib/python3.10/dist-packages/numba/core/typed_passes.py", line 110, in run_pass
typemap, return_type, calltypes, errs = type_inference_stage(
File "/usr/local/lib/python3.10/dist-packages/numba/core/typed_passes.py", line 86, in type_inference_stage
infer.build_constraint()
File "/usr/local/lib/python3.10/dist-packages/numba/core/typeinfer.py", line 1039, in build_constraint
self.constrain_statement(inst)
File "/usr/local/lib/python3.10/dist-packages/numba/core/typeinfer.py", line 1386, in constrain_statement
self.typeof_assign(inst)
File "/usr/local/lib/python3.10/dist-packages/numba/core/typeinfer.py", line 1461, in typeof_assign
self.typeof_global(inst, inst.target, value)
File "/usr/local/lib/python3.10/dist-packages/numba/core/typeinfer.py", line 1561, in typeof_global
typ = self.resolve_value_type(inst, gvar.value)
File "/usr/local/lib/python3.10/dist-packages/numba/core/typeinfer.py", line 1482, in resolve_value_type
raise TypingError(msg, loc=inst.loc)
numba.core.errors.TypingError: Failed in cuda mode pipeline (step: nopython frontend)
Untyped global name 'datetime': Cannot determine Numba type of <class 'type'>
File "my_cudf.py", line 14:
def gzpd_apply(my_df00):
<source elided>
if my_df00['time']==my_df00['time_1']:
my_df00['time_1'] = (datetime.strptime(my_df00['time_1'], '%H:%M:%S')+timedelta(minutes=5)).strftime('%H:%M:%S')
^
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.10/dist-packages/cudf/pandas/__main__.py", line 91, in <module>
main()
File "/usr/local/lib/python3.10/dist-packages/cudf/pandas/__main__.py", line 87, in main
runpy.run_path(args.args[0], run_name="__main__")
File "/usr/lib/python3.10/runpy.py", line 289, in run_path
return _run_module_code(code, init_globals, run_name,
File "/usr/lib/python3.10/runpy.py", line 96, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "my_cudf.py", line 37, in <module>
main()
File "my_cudf.py", line 27, in main
df_tmp = my_df.swifter.apply(gzpd_apply,axis=1) # apply
File "/usr/local/lib/python3.10/dist-packages/swifter/swifter.py", line 436, in apply
timed = timeit.timeit(wrapped, number=N_REPEATS)
File "/usr/lib/python3.10/timeit.py", line 234, in timeit
return Timer(stmt, setup, timer, globals).timeit(number)
File "/usr/lib/python3.10/timeit.py", line 178, in timeit
timing = self.inner(it, self.timer)
File "<timeit-src>", line 6, in inner
File "/usr/local/lib/python3.10/dist-packages/swifter/swifter.py", line 339, in wrapped
self._obj.iloc[self._SAMPLE_INDEX].apply(
File "/usr/local/lib/python3.10/dist-packages/nvtx/nvtx.py", line 115, in inner
result = func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/cudf/core/dataframe.py", line 4488, in apply
return self._apply(func, _get_row_kernel, *args, **kwargs)
File "/usr/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/usr/local/lib/python3.10/dist-packages/nvtx/nvtx.py", line 115, in inner
result = func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/cudf/core/indexed_frame.py", line 2364, in _apply
raise ValueError(
ValueError: user defined function compilation failed.
Update (2024.1.8)
I modified the above code to (with proper import):
with disable_module_accelerator():
df_tmp = my_df.swifter.apply(gzpd_apply,axis=1) # !! w/ swifter, the code returns error.
Only one line of code is included into the with-context. It throws the same error. Note that if I don't include any cudf related code/command, the above code snippet runns correctly regardless of swifter.