Optimizing Gaussian Elimination using High Level Synthesis

46 Views Asked by At

I'm trying to implement a linear equation AX=B solver in GF(256) using gaussian elimination on Vitis HLS and I'm looking to optimize my code/design for better latency and performance.I'm new to HLS so I've tried using pragmas to unroll or pipeline loops but the latency doesn't seem to change that much.Can someone help me find how can I parralelize certain computations and improve performance?

Here's my code :

void gaussian_elimination_solve(gal256 A[N][N], gal256 b[N], gal256 x[N]) {
    // Augment matrix A with vector b(b will become the last column of A)
    int i,j,k;
    gal256 augmented[N][N + 1];
augment:
    for (i = 0; i < N; i++) {
#pragma HLS LOOP_TRIPCOUNT min=N max=N
        for (j = 0; j < N+1; j++) {
#pragma HLS LOOP_TRIPCOUNT min=N+1 max=N+1
            augmented[i][j] = A[i][j];
        }
        augmented[i][N] = b[i];
    }

partial_pivot:
    for (i = 0; i < N; i++) {
        if (augmented[i][i] == 0) {
            for (j = i + 1; j < N; j++) {
#pragma HLS LOOP_TRIPCOUNT min=N max=N
                for (int k = i; k <= N; k++) {
#pragma HLS LOOP_TRIPCOUNT min=N max=N
                    augmented[i][k] ^= augmented[j][k];
                }
            }
            /*if (augmented[i][i] == 0) {
                // Singular matrix, no unique solution
                return 0; // Return ⊥
            }*/
        }

pivot_row_normalize:
        gal256 pivotInverse = gf256_inv(augmented[i][i]);
        for (k = i; k <= N; k++) {
#pragma HLS LOOP_TRIPCOUNT min=N max=N
            augmented[i][k] = gf256_mul(augmented[i][k],pivotInverse);
        }

row_elim:
        for (j = i + 1; j < N; j++) {
#pragma HLS LOOP_TRIPCOUNT min=N max=N
            gal256 factor = augmented[j][i];
            for (int k = i; k <= N; k++) {
#pragma HLS LOOP_TRIPCOUNT min=N max=N
                augmented[j][k] ^= gf256_mul(factor,augmented[i][k]);
            }
        }
    }

back_sub:
    for (i = N - 1; i > 0; i--) {
#pragma HLS LOOP_TRIPCOUNT min=N max=N
        for (j = 0; j < i; j++) {
#pragma HLS LOOP_TRIPCOUNT min=N max=N
#pragma HLS LOOP_FLATTEN
            augmented[j][N] ^= gf256_mul(augmented[j][i],augmented[i][N]);
        }
    }

copy_solution:
    for (int i = 0; i < N; i++) {
#pragma HLS LOOP_TRIPCOUNT min=N max=N
        x[i] = augmented[i][N];
    }
}
0

There are 0 best solutions below