This is my original sequential code for the heat diffusion on the heat sink then I need to parallize it while using MPI and by sharing the domain with each processor in 2D:
#define DUMP_STEADY_STATE
const double L = 0.15; /* length (x) of the heatsink (m) */
const double l = 0.12; /* height (y) of the heatsink (m) */
const double E = 0.008; /* width (z) of the heatsink (m) */
const double watercooling_T = 20; /* temperature of the fluid for water-cooling, (°C) */
const double CPU_TDP = 280; /* power dissipated by the CPU (W) */
/* dl: "spatial step" for simulation (m) */
/* dt: "time step" for simulation (s) */
double dl = 0.004;
double dt = 0.004;
/* sink_heat_capacity: specific heat capacity of the heatsink (J / kg / K) */
/* sink_density: density of the heatsink (kg / m^3) */
/* sink_conductivity: thermal conductivity of the heatsink (W / m / K) */
/* euros_per_kg: price of the matter by kilogram */
double sink_heat_capacity = 897;
double sink_density = 2710;
double sink_conductivity = 237;
double euros_per_kg = 1.594;
const double Stefan_Boltzmann = 5.6703e-8; /* (W / m^2 / K^4), radiation of black body */
const double heat_transfer_coefficient = 10; /* coefficient of thermal convection (W / m^2 / K) */
double CPU_surface;
/*
* Return True if the CPU is in contact with the heatsink at the point (x,y).
* This describes an AMD EPYC "Rome".
*/
static inline bool CPU_shape(double x, double y)
{
x -= (L - 0.0754) / 2;
y -= (l - 0.0585) / 2;
bool small_y_ok = (y > 0.015 && y < 0.025) || (y > 0.0337 && y < 0.0437);
bool small_x_ok = (x > 0.0113 && x < 0.0186) || (x > 0.0193 && x < 0.0266)
|| (x > 0.0485 && x < 0.0558) || (x > 0.0566 && x < 0.0639);
bool big_ok = (x > 0.03 && x < 0.045 && y > 0.0155 && y < 0.0435);
return big_ok || (small_x_ok && small_y_ok);
}
/* returns the total area of the surface of contact between CPU and heatsink (in m^2) */
double CPU_contact_surface()
{
double S = 0;
for (double x = dl / 2; x < L; x += dl)
for (double y = dl / 2; y < l; y += dl)
if (CPU_shape(x, y))
S += dl * dl;
return S;
}
/* Returns the new temperature of the cell (i, j, k). For this, there is an access to neighbor
* cells (left, right, top, bottom, front, back), except if (i, j, k) is on the external surface. */
static inline double update_temperature(const double *T, int u, int n, int m, int o, int i, int j, int k)
{
/* quantity of thermal energy that must be brought to a cell to make it heat up by 1°C */
const double cell_heat_capacity = sink_heat_capacity * sink_density * dl * dl * dl; /* J.K */
const double dl2 = dl * dl;
double thermal_flux = 0;
if (i > 0)
thermal_flux += (T[u - 1] - T[u]) * sink_conductivity * dl; /* neighbor x-1 */
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
if (i < n - 1)
thermal_flux += (T[u + 1] - T[u]) * sink_conductivity * dl; /* neighbor x+1 */
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
if (j > 0)
thermal_flux += (T[u - n] - T[u]) * sink_conductivity * dl; /* neighbor y-1 */
else {
/* Bottom cell: does it receive it from the CPU ? */
if (CPU_shape(i * dl, k * dl))
thermal_flux += CPU_TDP / CPU_surface * dl2;
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
}
if (j < m - 1)
thermal_flux += (T[u + n] - T[u]) * sink_conductivity * dl; /* neighbor y+1 */
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
if (k > 0)
thermal_flux += (T[u - n * m] - T[u]) * sink_conductivity * dl; /* neighbor z-1 */
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
if (k < o - 1)
thermal_flux += (T[u + n * m] - T[u]) * sink_conductivity * dl; /* neighbor z+1 */
else {
thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
}
/* adjust temperature depending on the heat flux */
return T[u] + thermal_flux * dt / cell_heat_capacity;
}
/* Run the simulation on the k-th xy plane.
* v is the index of the start of the k-th xy plane in the arrays T and R. */
static inline void do_xy_plane(const double *T, double *R, int v, int n, int m, int o, int k)
{
if (k == 0)
// we do not modify the z = 0 plane: it is maintained at constant temperature via water-cooling
return;
for (int j = 0; j < m; j++) { // y
for (int i = 0; i < n; i++) { // x
int u = v + j * n + i;
R[u] = update_temperature(T, u, n, m, o, i, j, k);
}
}
}
int main()
{
double start = wallclock_time();
CPU_surface = CPU_contact_surface();
double V = L * l * E;
int n = ceil(L / dl);
int m = ceil(E / dl);
int o = ceil(l / dl);
/* temperature of each cell, in degree Kelvin. */
double *T = malloc(n * m * o * sizeof(*T));
double *R = malloc(n * m * o * sizeof(*R));
if (T == NULL || R == NULL) {
perror("T or R could not be allocated");
exit(1);
}
/* initially the heatsink is at the temperature of the water-cooling fluid */
for (int u = 0; u < n * m * o; u++)
R[u] = T[u] = watercooling_T + 273.15;
/* let's go! we switch the CPU on and launch the simulation until it reaches a stationary state. */
double t = 0;
int n_steps = 0;
int convergence = 0;
/* simulating time steps */
while (convergence == 0) {
/* Update all cells. xy planes are processed, for increasing values of z. */
for (int k = 0; k < o; k++) { // z
int v = k * n * m;
do_xy_plane(T, R, v, n, m, o, k);
}
/* each second, we test the convergence, and print a short progress report */
if (n_steps % ((int)(1 / dt)) == 0) {
double delta_T = 0;
double max = -INFINITY;
for (int u = 0; u < n * m * o; u++) {
delta_T += (R[u] - T[u]) * (R[u] - T[u]);
if (R[u] > max)
max = R[u];
}
delta_T = sqrt(delta_T) / dt;
fprintf(stderr, "t = %.1fs ; T_max = %.1f°C ; convergence = %g\n", t, max - 273.15, delta_T);
if (delta_T < 0.1)
convergence = 1;
}
/* the new temperatures are in R */
double *tmp = R;
R = T;
T = tmp;
t += dt;
n_steps += 1;
}
#ifdef DUMP_STEADY_STATE
printf("###### STEADY STATE; t = %.1f\n", t);
for (int k = 0; k < o; k++) { // z
printf("# z = %g\n", k * dl);
for (int j = 0; j < m; j++) { // y
for (int i = 0; i < n; i++) { // x
printf("%.1f ", T[k * n * m + j * n + i] - 273.15);
}
printf("\n");
}
}
double end = wallclock_time();
fprintf(stderr, "Total computing time: %g sec\n", end - start);
printf("\n");
fprintf(stderr, "For graphical rendering: python3 rendu_picture_steady.py [filename.txt] %d %d %d\n", n, m, o);
#endif
exit(EXIT_SUCCESS);
}
Now I'm trying to do a 2d heat diffusion using OpenMPI :
I started to write a code but when I a have problem with receiving the data and then using them to calcule the new temperature in the MPI_SendRecv for ex if i'm on the top-left i will need the data from the processors at my right and in front of me and the problem is how to calcule the temperature while using both of the date maybe i don't do it in the right way :
int main(){ MPI_Init(NULL, NULL); // Initialize the MPI environment
// same initialisation
double start = wallclock_time();
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int z_domain = o / size; // for each processor
int rest_line_z = o % size;
int start_z =0;
int last_z=0;
int x_domain = n / size; // for each processor
int rest_line_x = n % size;
int start_x =0;
int last_x=0;
// number of processor on each axis (x et z).
int num_processors = size;
int x_processors = 1; // Initialize x_processors
int z_processors;
while (x_processors < num_processors) {
if (num_processors % x_processors == 0) {
z_processors = num_processors / x_processors;
if (x_processors >= 2 && z_processors >= 2) {
break;
}
}
x_processors++;
}
// Distribution of the local_domain to each processor
int x_count = 0;
int z_count = 0;
for (int i = 0; i < size; i++) {
if (x_count == x_processors) {
x_count = 0;
z_count++;
}
if (i == rank) {
start_x = x_count * x_domain;
start_z = z_count * z_domain;
}
x_count++;
}
if (rank == size - 1) { // the rest lines on z will be for the last processor
end_z = start_z + z_domain + rest_line_z;
}
else {
end_z = end_z + z_domain;
}
if (rank == x_processors - 1){ // rest line on x will be treated by the last processor on each z_domain
end_x = start_x + x_domain + rest_line_x;
else {
end_x = end_x + x_tranche;
}
double t = 0;
int n_steps = 0;
int convergence = 0;
while (convergence == 0) {
// for each processor , we parcours the x axis and the z axis
for (int k = start_z; k <= end_z; k++) {
for (int i = start_x; i <= end_x; i++) {
int v = k * n * m + i * m; // v = indicate the firt cell
double *halo = malloc( n * m * sizeof(*bord));
if (rank > x_processors && k == start_z){
MPI_Sendrecv(&T[v],n * m, MPI_DOUBLE, rank - x_processors, 0, halo ,n * m, MPI_DOUBLE, rank, 1, MPI_COMM_WORLD, &status);
}
if (rank + x_processors < size - 1 && k == end_z){
MPI_Sendrecv(&T[v],n * m, MPI_DOUBLE, rank + x_processors, 1,halo,n * m, MPI_DOUBLE, rank, 0, MPI_COMM_WORLD, &status);
}
if (rank >0 && i == start_x){
double *haloright = malloc(m * z_domain * sizeof(*haloright));
if ((rank > x_processors && k == start_z ) || (rank + x_processors < size - 1 && k == end_z)){
MPI_Sendrecv(&T[v],z_domain * m, MPI_DOUBLE, rank - 1, 2,haloright,z_domain * m, MPI_DOUBLE, rank, 3, MPI_COMM_WORLD, &status);
}
else {
MPI_Sendrecv(&T[v],z_domain * m, MPI_DOUBLE, rank - 1, 2,haloright,z_tranche * m, MPI_DOUBLE, rank, 3, MPI_COMM_WORLD, &status);
}
for (int j = 0; j < m; j++) { // y
int u = v + j * n + k;
R[u] = update_temperature(haloright, u, x_domain, m, z_domain, i, j, k);
}
}
if (rank < size - 1 && (rank + 1) % x_processors != 0 && i == end_x){
double *haloleft= malloc( m * z_domain * sizeof(*haloright));
if ((rank > x_processors && k == start_z ) || (rank + x_processors < size - 1 && k == end_z)){
MPI_Sendrecv(&halo[v],z_domain * m, MPI_DOUBLE, rank+1, 3,haloright,z_domain * m, MPI_DOUBLE, rank, 2, MPI_COMM_WORLD, &status);
}
else {
MPI_Sendrecv(&T[v],z_domain * m, MPI_DOUBLE, rank + 1, 3, haloright,z_domain * m, MPI_DOUBLE, rank, 2, MPI_COMM_WORLD, &status);
}
for (int j = 0; j < m; j++) { // y
int u = v + j * n + k;
R[u] = update_temperature(haloright, u, x_domain, m, z_domain, i, j, k);
}
}
for (int j = 0; j < m; j++) { // y
int u = v + j * n + k;
R[u] = update_temperature(bord, u, x_domain, m, z_domain, i, j, k);
}
}
}
double *tmp = R;
R = T;
T = tmp;
t += dt;
n_steps += 1;
}
#ifdef DUMP_STEADY_STATE
// same code
// Finalize the MPI environment
MPI_Finalize();
return 0;
}
Can someone help me please