project.exe ended prematurely and may have crashed. exit code 0xc0000005

55 Views Asked by At

I have the TSP (Traveling Salesman Problem) which I am trying to solve in parallel using MPI (Message Passing Interface). I am attempting to apply the Work-Stealing algorithm to it. This is the Part of the main-funktion:

    // Prozess 0 initialisiert die Seeds
    if (MyRank == 0) {
        push_first_work_local(&local_work, num_cities);

        init_thread_route(&local_work, &num_cities, distance, nProcs, &best_route, MyRank);
    }
    int array_size = local_work.top_index;
    int sub_array_size = array_size / nProcs;
    stack_to_array(&local_work, &local_work_array, array_size);

    MPI_Bcast(&array_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&sub_array_size, 1, MPI_INT, 0, MPI_COMM_WORLD);

    int mod = array_size % nProcs;

        a_size[i] = sub_array_size;
        //a_offset[i] = i * sub_array_size;
    }

    for (int i = 0; i <= mod; i++) {
        a_size[i] = a_size[i] + 1;
    }

    a_offset[0] = 0;
    for (int i = 1; i < nProcs; i++) {
        a_offset[i] = a_offset[i - 1] + a_size[i - 1];
    }

    MPI_Scatterv(&local_work_array, a_size, a_offset, newtype, &temp_array, sub_array_size + 1, newtype, 0, MPI_COMM_WORLD);

    array_to_stack(&temp_stack, &temp_array, sub_array_size + 1);
    int* work_array = (int*)malloc(nProcs * sizeof(int)); 
    for (int i = 0; i < nProcs; i++) {
        work_array[i] = 0;
    }
    Arbeitsspeicherstruktur (temp_stack) des jeweiligen Prozesses verbleiben.
    work_array[MyRank] = temp_stack.top_index;
    double t_count = MPI_Wtime();
    MPI_Request req_length, req_status;
    struct route buf;

    while (work_to_do(work_array, nProcs)) {
    
        if (MPI_Wtime() - t_count > x_time) {
            work_array[MyRank] = temp_stack.top_index;
            for (int i = 0; i < nProcs; i++) {
                MPI_Bcast(&work_array[i], 1, MPI_INT, i, MPI_COMM_WORLD); 
            }

            for (int i = 0; i < nProcs; i++) { 
                buf = best_route; 
                MPI_Bcast(&buf, 1, newtype, i, MPI_COMM_WORLD); 
                update_best(&best_route, buf); 

                steal_work(&temp_stack, num_cities, &best_route, distance, MyRank, nProcs);
            }
            t_count = MPI_Wtime();
        }
        else
        {
            expand_top_route(&temp_stack, &num_cities, &best_route, distance, MyRank);
        }
    }

    MPI_Gather(&best_route, 1, newtype, best_array, 1, newtype, 0, MPI_COMM_WORLD);

    if (MyRank == 0) { 
        for (int i = 0; i < nProcs; i++) {
            update_best(&best_route, best_array[i]); 
        }

this is the function i implemented which should allow exchange between processes(work-stealing):

int steal_work(struct local_stack* plocal_work, int* num_cities, struct route* best_route, double* distance, int MyRank, int nProcs) {
    // Finde den Prozess mit der meisten Arbeit (meisten Routen im Stapel)
    int victim = MyRank;
    int max_work = plocal_work[MyRank].top_index;
    for (int i = 0; i < nProcs; i++) {
        if (plocal_work[i].top_index > max_work && i != MyRank) {
            victim = i;
            max_work = plocal_work[i].top_index;
        }
    }
    int request = 1;
    MPI_Request send_request, recv_request;
    MPI_Isend(&request, 1, MPI_INT, victim, 0, MPI_COMM_WORLD, &send_request);

    int response = 0;
    MPI_Irecv(&response, 1, MPI_INT, victim, 0, MPI_COMM_WORLD, &recv_request);

    MPI_Wait(&send_request, MPI_STATUS_IGNORE);
    MPI_Wait(&recv_request, MPI_STATUS_IGNORE);

    if (response == 1) {
        // Das Opfer hat Arbeit gestohlen
        MPI_Datatype newtype;
        init_mpi_new_type(&newtype);

        struct route stolen_route;
        MPI_Irecv(&stolen_route, 1, newtype, victim, 0, MPI_COMM_WORLD, &recv_request);
        MPI_Wait(&recv_request, MPI_STATUS_IGNORE);

        //expand_top_route(&stolen_route, num_cities, best_route, distance, MyRank);

        pop(&plocal_work[victim], &stolen_route);
    
        push(&plocal_work[MyRank], &stolen_route);

        return 1; 
    }
    return 0; 
}

and this caused this error:

job aborted:
[ranks] message

[0] terminated

[1] process exited without calling finalize

[2-6] terminated

[7] process exited without calling finalize

[8-10] terminated

[11] process exited without calling finalize

[12-15] terminated

---- error analysis -----

[1,7,11] on DESKTOP-xxxxxx
project.exe ended prematurely and may have crashed. exit code 0xc0000005

---- error analysis -----

can please anyone give tips about this issue?????

0

There are 0 best solutions below