Using cblas_dgemm in C and returning the product matrix in python

172 Views Asked by At

I am fairly new to parallel computing and we have been assigned to implement a matrix algorithm in C for later use in python. The problem comes when my function mlsa from C is implemented in Python, because it returns a unique double value, when my objective is to return an array (a matrix). Since it doesn't return an array, when I try to save the image it comes to an error in the plt.imsave because of the dimentions.

The code is an implementation of a LSA algorithm:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>

double * mlsa(const double *A, double *W0, double *H0, const int niter, const int m, const int n, const int k) 
{
   double *WH, *W, *H, *WTW, *B, *C, *D, *E, *WHdef ;
   int    i, l;
   
   /* Reservamos memoria */
   WTW = (double*)calloc(k*k, sizeof(double));
   B = (double*)calloc(k*n, sizeof(double));
   C = (double*)calloc(k*n, sizeof(double));
   WH = (double*)calloc(m*n, sizeof(double));
   D = (double*)calloc(m*k, sizeof(double));
   E = (double*)calloc(m*k, sizeof(double));
   W = (double*)calloc(m*k, sizeof(double));
   H = (double*)calloc(k*n, sizeof(double));
   WHdef = (double*)calloc(m*n, sizeof(double));
   for(l = 0; l<niter; ++l){
    /* Producto W^T*W */
    cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, k, k, m, 1.0, W0, m, W0, k, 0.0, WTW, k);
    /* Producto de la matriz (W^T*W)*H  */
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, k, n, k, 1.0, WTW, k, H0, n, 0.0, B, n);
    /* Producto de la matriz W^T*A */
    cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, k, n, m, 1.0, W0, m, A, n, 0.0, C, n);
    /* Ahora hacemos el bucle para actualizar la matriz H0 elemento a elemento*/
    for(i=0; i<n*k;i++){
        H0[i] = (H0[i]*C[i])/B[i];
    }
    /* Producto de la matriz W*H */
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, W0, k, H0, n, 0.0, WH, n);
    /* Producto de la matriz (W*H)*H^T */
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, k, n, 1.0, WH, n, H0, n, 0.0, D, k);
    /* Producto de la matriz A*H^T */
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, k, n, 1.0, A, n, H0, n, 0.0, E, k);
    /* Actualizamos W0 elemento a elemento */
    for(i=0; i<m*k;i++){
        W0[i] = (W0[i]*E[i])/D[i];
    }
    
   }
   for(i=0;i<m*k;i++){
    W[i]=W0[i];
   }
   for(i=0;i<n*k;i++){
    H[i]=H0[i];
   }
   /* liberamos memoria */
   free(WTW);
   free(B);
   free(C);
   free(WH);
   free(D);
   free(E);
   
   /* Calculamos el producto W*H */
   
   cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, W, k, H, n, 0.0, WHdef, n);

   return WHdef;   
}

And this the implementation in Python:

import ctypes
import numpy as np
from numpy.ctypeslib import ndpointer
import matplotlib.pyplot as plt
from sklearn.decomposition import NMF
import time

#Images and dimentions

im = np.array(plt.imread('Input/bilde.bmp'))
im = im.astype(np.float64)
im2 = np.array(plt.imread('Input/leonardo.bmp'))
im2 = im2.astype(np.float64)

m1, n1 = np.shape(im)
m2, n2 = np.shape(im2)
k1 = round(min(m1, n1)/10)
k2 = round(min(m2, n2)/10)

#lib and function from C

mlsa = lib.mlsa
mlsa.restype = ctypes.c_double
mlsa.argtypes = [ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),
                 ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), ctypes.c_int, ctypes.c_int, ctypes.c_int,
                 ctypes.c_int]

# Implementation

time1_1 = time.time()
mlsa1 = mlsa(im, W01, H01, 50, m1, n1, k1)
print(mlsa1)
im_mia1 = np.ctypeslib.as_array(mlsa(im, W01, H01, 50, m1, n1, k1), shape=(m1,n1))
#im_mia1 = im_mia1.astype('uint8')
time1_2 = time.time() - time1_1
plt.imsave('Output/bilde_1_mio.bmp', im_mia1)

#mlsa.restype = ctypes.POINTER(ctypes.c_double * m2*n2)

time2_1 = time.time()
im_mia2 = np.ctypeslib.as_array(mlsa(im2, W02, H02, 50, m2, n2, k2), shape=(m2,n2))
#im_mia2 = im_mia2.astype('uint8')
time2_2 = time.time() - time2_1
plt.imsave('Output/leonardo_1_mio.bmp', im_mia2)

I hope you can help me, because i don't know what else to do.

0

There are 0 best solutions below