I am fairly new to parallel computing and we have been assigned to implement a matrix algorithm in C for later use in python. The problem comes when my function mlsa from C is implemented in Python, because it returns a unique double value, when my objective is to return an array (a matrix). Since it doesn't return an array, when I try to save the image it comes to an error in the plt.imsave because of the dimentions.
The code is an implementation of a LSA algorithm:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cblas.h>
double * mlsa(const double *A, double *W0, double *H0, const int niter, const int m, const int n, const int k)
{
double *WH, *W, *H, *WTW, *B, *C, *D, *E, *WHdef ;
int i, l;
/* Reservamos memoria */
WTW = (double*)calloc(k*k, sizeof(double));
B = (double*)calloc(k*n, sizeof(double));
C = (double*)calloc(k*n, sizeof(double));
WH = (double*)calloc(m*n, sizeof(double));
D = (double*)calloc(m*k, sizeof(double));
E = (double*)calloc(m*k, sizeof(double));
W = (double*)calloc(m*k, sizeof(double));
H = (double*)calloc(k*n, sizeof(double));
WHdef = (double*)calloc(m*n, sizeof(double));
for(l = 0; l<niter; ++l){
/* Producto W^T*W */
cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, k, k, m, 1.0, W0, m, W0, k, 0.0, WTW, k);
/* Producto de la matriz (W^T*W)*H */
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, k, n, k, 1.0, WTW, k, H0, n, 0.0, B, n);
/* Producto de la matriz W^T*A */
cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, k, n, m, 1.0, W0, m, A, n, 0.0, C, n);
/* Ahora hacemos el bucle para actualizar la matriz H0 elemento a elemento*/
for(i=0; i<n*k;i++){
H0[i] = (H0[i]*C[i])/B[i];
}
/* Producto de la matriz W*H */
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, W0, k, H0, n, 0.0, WH, n);
/* Producto de la matriz (W*H)*H^T */
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, k, n, 1.0, WH, n, H0, n, 0.0, D, k);
/* Producto de la matriz A*H^T */
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, k, n, 1.0, A, n, H0, n, 0.0, E, k);
/* Actualizamos W0 elemento a elemento */
for(i=0; i<m*k;i++){
W0[i] = (W0[i]*E[i])/D[i];
}
}
for(i=0;i<m*k;i++){
W[i]=W0[i];
}
for(i=0;i<n*k;i++){
H[i]=H0[i];
}
/* liberamos memoria */
free(WTW);
free(B);
free(C);
free(WH);
free(D);
free(E);
/* Calculamos el producto W*H */
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, W, k, H, n, 0.0, WHdef, n);
return WHdef;
}
And this the implementation in Python:
import ctypes
import numpy as np
from numpy.ctypeslib import ndpointer
import matplotlib.pyplot as plt
from sklearn.decomposition import NMF
import time
#Images and dimentions
im = np.array(plt.imread('Input/bilde.bmp'))
im = im.astype(np.float64)
im2 = np.array(plt.imread('Input/leonardo.bmp'))
im2 = im2.astype(np.float64)
m1, n1 = np.shape(im)
m2, n2 = np.shape(im2)
k1 = round(min(m1, n1)/10)
k2 = round(min(m2, n2)/10)
#lib and function from C
mlsa = lib.mlsa
mlsa.restype = ctypes.c_double
mlsa.argtypes = [ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),
ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), ctypes.c_int, ctypes.c_int, ctypes.c_int,
ctypes.c_int]
# Implementation
time1_1 = time.time()
mlsa1 = mlsa(im, W01, H01, 50, m1, n1, k1)
print(mlsa1)
im_mia1 = np.ctypeslib.as_array(mlsa(im, W01, H01, 50, m1, n1, k1), shape=(m1,n1))
#im_mia1 = im_mia1.astype('uint8')
time1_2 = time.time() - time1_1
plt.imsave('Output/bilde_1_mio.bmp', im_mia1)
#mlsa.restype = ctypes.POINTER(ctypes.c_double * m2*n2)
time2_1 = time.time()
im_mia2 = np.ctypeslib.as_array(mlsa(im2, W02, H02, 50, m2, n2, k2), shape=(m2,n2))
#im_mia2 = im_mia2.astype('uint8')
time2_2 = time.time() - time2_1
plt.imsave('Output/leonardo_1_mio.bmp', im_mia2)
I hope you can help me, because i don't know what else to do.