everyone, I'm a beginner on OpenCL and I wrote some simple code in C which sums two arrays. Here is part of the code:
// Create Kernel.
cl_kernel kernelSum = clCreateKernel( myProgram, "sum", &error );
// Set Input Array.
size_t arraySize = 1000;
char* a = ( char* ) malloc( sizeof( char ) * arraySize );
char* b = ( char* ) malloc( sizeof( char ) * arraySize );
char* c = ( char* ) malloc( sizeof( char ) * arraySize );
for (int i = 0; i < arraySize; i += 1)
{
a[ i ] = 1;
b[ i ] = 2;
c[ i ] = -1;
}
// Set Buffers.
cl_mem a_buffer = clCreateBuffer(
myContext,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
arraySize * sizeof( char ), a,
&error );
cl_mem b_buffer = clCreateBuffer(
myContext,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
arraySize * sizeof( char ), b,
&error );
cl_mem c_buffer = clCreateBuffer(
myContext,
CL_MEM_WRITE_ONLY,
arraySize * sizeof( char ), NULL,
&error );
printf( "Buffers created.\n" );
// Setting Kernel Arguments.
error = clSetKernelArg( kernelSum, 0, sizeof( cl_mem ), &a_buffer );
error |= clSetKernelArg( kernelSum, 1, sizeof( cl_mem ), &b_buffer );
error |= clSetKernelArg( kernelSum, 2, sizeof( cl_mem ), &c_buffer );
printf( "Arguments Set.\n" );
// Enqueue kernels to execute.
cl_event event;
size_t globalWorkOffset = 0;
size_t globalWorkSize[ 1 ] = { arraySize };
size_t localWorkSize[ 1 ] = { 1 };
clEnqueueNDRangeKernel(
myCommandQueue,
kernelSum,
1, // work_dim
0, // global work offset
globalWorkSize,
localWorkSize, // local work offset
0, NULL,
&event
);
printf( "Kernel Enqueued.\n" );
error = clEnqueueReadBuffer(
myCommandQueue,
c_buffer,
CL_TRUE, // blocking option
( size_t ) 0, arraySize * sizeof( char ), // offset, data_size
c, // host_ptr
0, NULL,
&event );
if ( error != CL_SUCCESS )
{
printf( "Buffer Reading Back Failed.\n" );
exit( 1 );
}
However, I got incorrect result : all the numbers in "c" array are zeros. I thought it has something to do with clEnqueueReadBuffer, or perhaps not. Any ideas about this issue? Expecting your suggestions! :-)
Your call to
clEnqueueReadBuffer
will not wait for the kernel to finish. It will most likely execute simultaneously with the kernel. Change the call to:This will cause
clEnqueueReadBuffer
to wait for the kernel event to finish before starting to read the buffer.