Atomic Add on Cuda not working..

1.3k Views Asked by At

My problem is to find out the number of integer points in n dimensional sphere using CUDA. I dont understand what is wrong with the below code but it is giving 0 output all the time. CUDA compute capability is 2.0 and tool kit version is 3.10. Thanks for all the help.

__global__ void count_in(int pow_rad, int ndim,int *digit,int w,unsigned int *count,double radius)
{


long int i,j;
int rem,idx,sq,num;
int iy=blockDim.y * blockIdx.y + threadIdx.y;
int ix=blockDim.x * blockIdx.x + threadIdx.x;
int width=gridDim.x*blockDim.x;
int h=2*w+1;
i=iy*width+ix;
if(i>pow_rad) return;

    sq=0;
    idx=0;
    num=i;
    for(j=0;j<ndim;j++)
        {digit[j]=0;}
    while(num!=0)
    {
        rem=num%w;
        num/=w;
        digit[idx]=rem;
        idx++;
    }
    for(j=0;j<ndim;j++)
        {sq+=(digit[j]-h)*(digit[j]-h);}
    if(sq<(radius*radius))
        atomicInc(count,(unsigned int)1);
    __syncthreads();
}

int main(int argc, char* argv[]) 
{
const long ntrials = 5;
int i;
for (int n = 0; n < ntrials; ++n) {
    int *digit;
    unsigned int *count;
    std::cout<<n<<std::endl;
    int pow_rad;
    unsigned int num;
    // Select radius and number of dimensions at random
    const double r = drand48() * (RMAX - RMIN) + RMIN;
    const int   nd = lrand48() % (MAXDIM - 1) + 1;
    cudaMalloc((void**) &digit,sizeof(int)*nd);
    cudaMalloc((void**) &count,sizeof(unsigned int));
    cudaMemset(count,0,sizeof(unsigned int));
    int h=(int)floor(r);
    int w=2*h+1;
    std::cout << "###"<< r <<" "<< nd<< std::endl;
    for(i=1;i<=nd;i++)
        pow_rad*=w;
    int width=(int)sqrt(pow_rad);
    // Call your function
    dim3 dimBlock(32,32);
    dim3 dimGrid((width/32)+1,(width/32)+1);

count_in<<<dimGrid,dimBlock>>>(pow_rad, nd,digit,w,count,r);
    cudaMemcpy(&num,count,sizeof(unsigned int),cudaMemcpyDeviceToHost);
std::cout << "-->"<<num << std::endl;
}
}
1

There are 1 best solutions below

1
On

I didn't look at all of your code, but the lines

    atomicInc(count,(unsigned int)1);

seems to show a common misunderstanding of the atomicInc function. The second argument is not the amount to increment, but the modulus; when the global variable reaches that amount, it resets to zero. With the value you specified, each time the statement executes the variable count is reset to 0.

If you change atomicInc to atomicAdd, or if you change the modulus to something large enough that it will never be reached, it should work better.