Simulation of heat diffusion on a heat sink using MPI and a 2D decomposition of the data (3D)

103 Views Asked by At

This is my original sequential code for the heat diffusion on the heat sink then I need to parallize it while using MPI and by sharing the domain with each processor in 2D:

#define DUMP_STEADY_STATE
    
    const double L = 0.15;      /* length (x) of the heatsink (m) */
    const double l = 0.12;      /* height (y) of the heatsink (m) */
    const double E = 0.008;     /* width (z) of the heatsink (m) */
    const double watercooling_T = 20;   /* temperature of the fluid for water-cooling, (°C) */
    const double CPU_TDP = 280; /* power dissipated by the CPU (W) */
    
    /* dl: "spatial step" for simulation (m) */
    /* dt: "time step" for simulation (s) */
    
    double dl = 0.004;
    double dt = 0.004;
    
    
    /* sink_heat_capacity: specific heat capacity of the heatsink (J / kg / K) */
    /* sink_density: density of the heatsink (kg / m^3) */
    /* sink_conductivity: thermal conductivity of the heatsink (W / m / K) */
    /* euros_per_kg: price of the matter by kilogram */
    
    double sink_heat_capacity = 897;
    double sink_density = 2710;
    double sink_conductivity = 237;
    double euros_per_kg = 1.594;
    
    const double Stefan_Boltzmann = 5.6703e-8;  /* (W / m^2 / K^4), radiation of black body */
    const double heat_transfer_coefficient = 10;    /* coefficient of thermal convection (W / m^2 / K) */
    double CPU_surface;
    
    /* 
     * Return True if the CPU is in contact with the heatsink at the point (x,y).
     * This describes an AMD EPYC "Rome".
     */
    static inline bool CPU_shape(double x, double y)
    {
        x -= (L - 0.0754) / 2;
        y -= (l - 0.0585) / 2;
        bool small_y_ok = (y > 0.015 && y < 0.025) || (y > 0.0337 && y < 0.0437);
        bool small_x_ok = (x > 0.0113 && x < 0.0186) || (x > 0.0193 && x < 0.0266)
            || (x > 0.0485 && x < 0.0558) || (x > 0.0566 && x < 0.0639);
        bool big_ok = (x > 0.03 && x < 0.045 && y > 0.0155 && y < 0.0435);
        return big_ok || (small_x_ok && small_y_ok);
    }
    
    /* returns the total area of the surface of contact between CPU and heatsink (in m^2) */
    double CPU_contact_surface()
    {
        double S = 0;
        for (double x = dl / 2; x < L; x += dl)
            for (double y = dl / 2; y < l; y += dl)
                if (CPU_shape(x, y))
                    S += dl * dl;
        return S;
    }
    
    /* Returns the new temperature of the cell (i, j, k). For this, there is an access to neighbor
     * cells (left, right, top, bottom, front, back), except if (i, j, k) is on the external surface. */
    static inline double update_temperature(const double *T, int u, int n, int m, int o, int i, int j, int k)
    {
            /* quantity of thermal energy that must be brought to a cell to make it heat up by 1°C */
        const double cell_heat_capacity = sink_heat_capacity * sink_density * dl * dl * dl; /* J.K */
        const double dl2 = dl * dl;
        double thermal_flux = 0;
    
        if (i > 0)
            thermal_flux += (T[u - 1] - T[u]) * sink_conductivity * dl; /* neighbor x-1 */
        else {
            thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
            thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
        }
    
        if (i < n - 1)
            thermal_flux += (T[u + 1] - T[u]) * sink_conductivity * dl; /* neighbor x+1 */
        else {
            thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
            thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
        }
    
        if (j > 0)
            thermal_flux += (T[u - n] - T[u]) * sink_conductivity * dl; /* neighbor y-1 */
        else {
            /* Bottom cell: does it receive it from the CPU ? */
            if (CPU_shape(i * dl, k * dl))
                thermal_flux += CPU_TDP / CPU_surface * dl2;
            else {
                thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
                thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
            }
        }
    
        if (j < m - 1)
            thermal_flux += (T[u + n] - T[u]) * sink_conductivity * dl; /* neighbor y+1 */
        else {
            thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
            thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
        }
    
        if (k > 0)
            thermal_flux += (T[u - n * m] - T[u]) * sink_conductivity * dl; /* neighbor z-1 */
        else {
            thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
            thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
        }
    
        if (k < o - 1)
            thermal_flux += (T[u + n * m] - T[u]) * sink_conductivity * dl; /* neighbor z+1 */
        else {
            thermal_flux -= Stefan_Boltzmann * dl2 * pow(T[u], 4);
            thermal_flux -= heat_transfer_coefficient * dl2 * (T[u] - watercooling_T);
        }
    
        /* adjust temperature depending on the heat flux */
        return T[u] + thermal_flux * dt / cell_heat_capacity;
    }
    
    /* Run the simulation on the k-th xy plane.
     * v is the index of the start of the k-th xy plane in the arrays T and R. */
    static inline void do_xy_plane(const double *T, double *R, int v, int n, int m, int o, int k)
    {
        if (k == 0)
                    // we do not modify the z = 0 plane: it is maintained at constant temperature via water-cooling
            return;
    
        for (int j = 0; j < m; j++) {   // y
            for (int i = 0; i < n; i++) {   // x
                int u = v + j * n + i;
                R[u] = update_temperature(T, u, n, m, o, i, j, k);
            }
        }
    }
    
    int main()
    {
        double start = wallclock_time();
        CPU_surface = CPU_contact_surface();
        double V = L * l * E;
        int n = ceil(L / dl);
        int m = ceil(E / dl);
        int o = ceil(l / dl);
    
        /* temperature of each cell, in degree Kelvin. */
        double *T = malloc(n * m * o * sizeof(*T));
        double *R = malloc(n * m * o * sizeof(*R));
        if (T == NULL || R == NULL) {
            perror("T or R could not be allocated");
            exit(1);
        }
    
        /* initially the heatsink is at the temperature of the water-cooling fluid */
        for (int u = 0; u < n * m * o; u++)
            R[u] = T[u] = watercooling_T + 273.15;
    
        /* let's go! we switch the CPU on and launch the simulation until it reaches a stationary state. */
        double t = 0;
        int n_steps = 0;
        int convergence = 0;
    
        /* simulating time steps */
        while (convergence == 0) {
            /* Update all cells. xy planes are processed, for increasing values of z. */
            for (int k = 0; k < o; k++) {   // z
                int v = k * n * m;
                do_xy_plane(T, R, v, n, m, o, k);
            }
    
            /* each second, we test the convergence, and print a short progress report */
            if (n_steps % ((int)(1 / dt)) == 0) {
                double delta_T = 0;
                double max = -INFINITY;
                for (int u = 0; u < n * m * o; u++) {
                    delta_T += (R[u] - T[u]) * (R[u] - T[u]);
                    if (R[u] > max)
                        max = R[u];
                }
                delta_T = sqrt(delta_T) / dt;
                fprintf(stderr, "t = %.1fs ; T_max = %.1f°C ; convergence = %g\n", t, max - 273.15, delta_T);
                if (delta_T < 0.1)
                    convergence = 1;
            }
    
            /* the new temperatures are in R */
            double *tmp = R;
            R = T;
            T = tmp;
    
            t += dt;
            n_steps += 1;
        }
    
    #ifdef DUMP_STEADY_STATE
        printf("###### STEADY STATE; t = %.1f\n", t);
        for (int k = 0; k < o; k++) {   // z
            printf("# z = %g\n", k * dl);
            for (int j = 0; j < m; j++) {   // y
                for (int i = 0; i < n; i++) {   // x
                    printf("%.1f ", T[k * n * m + j * n + i] - 273.15);
                }
                printf("\n");
            }
        }
        double end = wallclock_time();
        fprintf(stderr, "Total computing time: %g sec\n", end - start);
        printf("\n");
        fprintf(stderr, "For graphical rendering: python3 rendu_picture_steady.py [filename.txt] %d %d %d\n", n, m, o);
    #endif
        exit(EXIT_SUCCESS);  
}

Now I'm trying to do a 2d heat diffusion using OpenMPI :

I started to write a code but when I a have problem with receiving the data and then using them to calcule the new temperature in the MPI_SendRecv for ex if i'm on the top-left i will need the data from the processors at my right and in front of me and the problem is how to calcule the temperature while using both of the date maybe i don't do it in the right way :

int main(){ MPI_Init(NULL, NULL); // Initialize the MPI environment    
    
    // same initialisation 
    
    double start = wallclock_time();
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
  
    int z_domain = o / size; // for each processor
    int rest_line_z = o % size;  
    int start_z =0;
    int last_z=0;

    int x_domain = n / size; // for each processor
    int rest_line_x = n % size;  
    int start_x =0;
    int last_x=0; 

    // number of processor on each axis (x et z). 
    int num_processors = size; 
    int x_processors = 1; // Initialize x_processors
    int z_processors;

    while (x_processors < num_processors) {
        if (num_processors % x_processors == 0) {
            z_processors = num_processors / x_processors;
            if (x_processors >= 2 && z_processors >= 2) {
                break;
            }
        }
        x_processors++;
    }

    // Distribution of the local_domain to each processor
int x_count = 0;
int z_count = 0;
for (int i = 0; i < size; i++) {
    if (x_count == x_processors) {
        x_count = 0;
        z_count++;
    }
    if (i == rank) {
        start_x = x_count * x_domain;
        start_z = z_count * z_domain;
    }
    x_count++;
}

if (rank == size - 1) { // the rest lines on z will be for the last processor
    end_z = start_z + z_domain + rest_line_z;
}
else {
    end_z = end_z + z_domain;
}
if (rank == x_processors - 1){ // rest line on x will be treated by the last processor on each z_domain
    end_x = start_x + x_domain + rest_line_x; 
else {
    end_x = end_x + x_tranche;
   }
   
    double t = 0;
    int n_steps = 0;
    int convergence = 0;


    while (convergence == 0) {
    // for each processor , we parcours the x axis and the z axis
    for (int k = start_z; k <= end_z; k++) {
        for (int i = start_x; i <= end_x; i++) {
            int v = k * n * m + i * m; // v = indicate the firt cell
            double *halo = malloc( n * m * sizeof(*bord));
             if (rank > x_processors && k == start_z){
                MPI_Sendrecv(&T[v],n * m, MPI_DOUBLE, rank - x_processors, 0, halo ,n * m, MPI_DOUBLE, rank, 1, MPI_COMM_WORLD, &status);
            }

            if (rank + x_processors < size - 1 && k == end_z){
                MPI_Sendrecv(&T[v],n * m, MPI_DOUBLE, rank + x_processors, 1,halo,n * m, MPI_DOUBLE, rank, 0, MPI_COMM_WORLD, &status);
            }

            if (rank >0 && i == start_x){
                double *haloright = malloc(m * z_domain * sizeof(*haloright));
                if ((rank > x_processors && k == start_z ) || (rank + x_processors < size - 1 && k == end_z)){
                    MPI_Sendrecv(&T[v],z_domain * m, MPI_DOUBLE, rank - 1, 2,haloright,z_domain * m, MPI_DOUBLE, rank, 3, MPI_COMM_WORLD, &status);
                }
                else {
                    MPI_Sendrecv(&T[v],z_domain * m, MPI_DOUBLE, rank - 1, 2,haloright,z_tranche * m, MPI_DOUBLE, rank, 3, MPI_COMM_WORLD, &status);
                }
                for (int j = 0; j < m; j++) {   // y
                    int u = v + j * n + k;
                    R[u] = update_temperature(haloright, u, x_domain, m, z_domain, i, j, k);
                    }
                }

            if (rank < size - 1 && (rank + 1) % x_processors != 0 && i == end_x){
                double *haloleft= malloc( m * z_domain * sizeof(*haloright));
                if ((rank > x_processors && k == start_z ) || (rank + x_processors < size - 1 && k == end_z)){
                MPI_Sendrecv(&halo[v],z_domain * m, MPI_DOUBLE, rank+1, 3,haloright,z_domain * m, MPI_DOUBLE, rank, 2, MPI_COMM_WORLD, &status);
                }
                else {
                MPI_Sendrecv(&T[v],z_domain * m, MPI_DOUBLE, rank + 1, 3, haloright,z_domain * m, MPI_DOUBLE, rank, 2, MPI_COMM_WORLD, &status);
                }
                 for (int j = 0; j < m; j++) {   // y
                    int u = v + j * n + k;
                    R[u] = update_temperature(haloright, u, x_domain, m, z_domain, i, j, k);
                }
            }

             for (int j = 0; j < m; j++) {   // y
                    int u = v + j * n + k;
                    R[u] = update_temperature(bord, u, x_domain, m, z_domain, i, j, k);
                }
            }
        
        }

        double *tmp = R;
        R = T;
        T = tmp;

        t += dt;
        n_steps += 1;
    
    }
     #ifdef DUMP_STEADY_STATE
            
           // same code 

        // Finalize the MPI environment
        MPI_Finalize();
        return 0;
}

Can someone help me please

0

There are 0 best solutions below