I have the following C code:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
double enorm(double* v1, int length){
cublasHandle_t handle;
double result = 0;
double* vector;
cudaMalloc((void**) &vector, length * sizeof(double));
cublasSetVector(length, sizeof(double), v1, 1, vector,1);
cublasCreate(&handle);
cublasDnrm2(handle, length, vector, 1, &result);
cudaFree(vector);
return result;
}
double testnorm(double* v1, int len){
double tmp = 0;
for(int i = 0; i < len; i++){
tmp += v1[i]*v1[i];
}
return sqrt(tmp);
}
int main() {
double* a = malloc(2 * sizeof(double));
a[0] = 3;
a[1] = 4;
printf("%.f\n", enorm(a, 2));
printf("%.f\n", testnorm(a,2));
return 0;
}
And following Haskell code, which borrows functions from above:
import qualified Foreign.Ptr as P
import System.IO.Unsafe
import Foreign.C.Types
import qualified Data.Vector.Storable as SV
import Foreign.C.Types
foreign import ccall "enorm" c_enorm :: P.Ptr CDouble -> CInt -> CDouble
foreign import ccall "testnorm" c_testnorm :: P.Ptr CDouble -> CInt -> CDouble
enorm :: SV.Vector CDouble -> CDouble
enorm v1 = unsafePerformIO $ do
let len = fromIntegral $ SV.length v1
SV.unsafeWith v1 $ \ptr -> return (c_enorm ptr len)
testnorm :: SV.Vector CDouble -> CDouble
testnorm v1 = unsafePerformIO $ do
let len = fromIntegral $ SV.length v1
SV.unsafeWith v1 $ \ptr -> return (c_testnorm ptr len)
main :: IO ()
main = do
let a = SV.fromList [3,4] :: SV.Vector CDouble
print $ enorm a
print $ testnorm a
For some reason even though C program returns, as expected 5 and 5, the Haskell equivalent returns 0 and 5 which means that even though array is successfully passed to C function, there is some problem with copying this data to GPU. What is the cause of this behavior and how to do it so that it would give expected result?
C is compiled with:
gcc -o cmain blas.c -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm
Haskell:
gcc -c -fPIC -o testblas.o blas.c -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm
gcc -shared -o libtestblas.so testblas.o
ghc -o main Main.hs -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm -L./ -ltestblas
export LD_LIBRARY_PATH=./; ./main
EDIT: In Haskell cudaMalloc returns cudaErrorMemoryAllocation (unable to allocate enough memory) while in C everything ends with success.