Passing Storable Vector to a C function allocating stored data on GPU

201 Views Asked by At

I have the following C code:

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>

double enorm(double* v1, int length){
    cublasHandle_t handle;
    double result = 0;
    double* vector;
    cudaMalloc((void**) &vector, length * sizeof(double));
    cublasSetVector(length, sizeof(double), v1, 1, vector,1);
    cublasCreate(&handle);
    cublasDnrm2(handle, length, vector, 1, &result);
    cudaFree(vector);
    return result;
}

double testnorm(double* v1, int len){
    double tmp = 0;
    for(int i = 0; i < len; i++){
        tmp += v1[i]*v1[i];
    }
    return sqrt(tmp);
}

int main() {
    double* a = malloc(2 * sizeof(double));
    a[0] = 3;
    a[1] = 4;
    printf("%.f\n", enorm(a, 2));
    printf("%.f\n", testnorm(a,2));
    return 0;
}

And following Haskell code, which borrows functions from above:

import qualified Foreign.Ptr as P
import System.IO.Unsafe
import Foreign.C.Types
import qualified Data.Vector.Storable as SV
import Foreign.C.Types

foreign import ccall "enorm" c_enorm :: P.Ptr CDouble -> CInt -> CDouble

foreign import ccall "testnorm" c_testnorm :: P.Ptr CDouble -> CInt -> CDouble

enorm :: SV.Vector CDouble -> CDouble
enorm v1 = unsafePerformIO $ do
  let len = fromIntegral $ SV.length v1
  SV.unsafeWith v1 $ \ptr -> return (c_enorm ptr len)

testnorm :: SV.Vector CDouble -> CDouble
testnorm v1 = unsafePerformIO $ do
  let len = fromIntegral $ SV.length v1
  SV.unsafeWith v1 $ \ptr -> return (c_testnorm ptr len)

main :: IO ()
main = do
  let a = SV.fromList [3,4] :: SV.Vector CDouble
  print $ enorm a
  print $ testnorm a

For some reason even though C program returns, as expected 5 and 5, the Haskell equivalent returns 0 and 5 which means that even though array is successfully passed to C function, there is some problem with copying this data to GPU. What is the cause of this behavior and how to do it so that it would give expected result?

C is compiled with:

gcc -o cmain blas.c -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm

Haskell:

gcc -c -fPIC -o testblas.o blas.c -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm
gcc -shared -o libtestblas.so  testblas.o
ghc -o main Main.hs -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm -L./ -ltestblas
export LD_LIBRARY_PATH=./; ./main 

EDIT: In Haskell cudaMalloc returns cudaErrorMemoryAllocation (unable to allocate enough memory) while in C everything ends with success.

0

There are 0 best solutions below