why doesn't the OpenCL kernel execute even though there are no errors? (c, nvidia, kubuntu)

93 Views Asked by At

I'm learning opencl and for some reason the kernel does nothing:

#include <stdlib.h>
#include <stdio.h>

#define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h>

int err = 0;
#define PRINTERR() fprintf(stderr, "Error at line %u.\n", __LINE__)
#define CHECKERR(x) if(x){PRINTERR();return __LINE__;}
#define CHECKNOTERR(x) if(!x){PRINTERR();return __LINE__;}

const char *KernelSource =
"__kernel void square(                                                  \n" \
"   __global float* input,                                              \n" \
"   __global float* output,                                             \n" \
"   const unsigned int count)                                           \n" \
"{                                                                      \n" \
"   int i = get_global_id(0);                                           \n" \
"   if(i == 0) printf(\"test\\n\");                                     \n" \
"   if(i < count)                                                       \n" \
"       output[i] = input[i] * input[i];                                \n" \
"}                                                                      \n" ;


#define DATA_SIZE 1024
int main(){
    float data[DATA_SIZE];
    float results[DATA_SIZE];

    size_t global;
    size_t local;

    cl_platform_id platform_id;
    cl_device_id device_id;

    cl_context context;
    cl_command_queue commands;
    cl_program program;
    cl_kernel kernel;

    cl_mem input;
    cl_mem output;

    unsigned int i = 0;
    unsigned int count = DATA_SIZE;
    for(i = 0; i < count; ++i)
        //data[i] = rand() / (float)RAND_MAX;
        data[i] = 2.f;


    int gpu = 1;
    err = clGetPlatformIDs (1, &platform_id, NULL); CHECKERR(err)
    err = clGetDeviceIDs(platform_id, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); CHECKERR(err)

    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err); CHECKERR(!context)
    commands = clCreateCommandQueueWithProperties(context, device_id, NULL, &err); CHECKERR(err)


    input = clCreateBuffer(context,  CL_MEM_READ_ONLY,  sizeof(float) * count, NULL, &err); CHECKERR(err)
    output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, &err); CHECKERR(err)
    CHECKERR(!input || !output)

    err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL); CHECKERR(err)


    program = clCreateProgramWithSource(context, 1, &KernelSource, NULL, &err); CHECKERR(err)
    err = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); CHECKERR(err)
    kernel = clCreateKernel(program, "square", &err); CHECKERR(err)

    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
    err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
    CHECKERR(err)


    err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); CHECKERR(err)
    err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); CHECKERR(err)
    err = clEnqueueReadBuffer(commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL ); CHECKERR(err)

    clFlush(commands);
    clFinish(commands);


    unsigned int correct = 0;
    for(i = 0; i < count; ++i)
        printf("%f\n",results[i]);


    printf("Computed '%d/%d' correct values!\n", correct, count);


    // free
    clReleaseMemObject(input);
    clReleaseMemObject(output);
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);

    return 0;
}

i want it to do things, but it doesn't.

i tried reading the input instead of the output and it goes fine. the printf in the kernel does nothing and if i run it clEnqueueReadBuffer gives just 0. i have an amd, so i can't test it on the cpu. i tried another example and it worked. (the one here) help appreciated.

1

There are 1 best solutions below

0
On BEST ANSWER

global is 0, so the program runs 0 times.