I am attempting to run an MPI_allgather across multiple nodes of a cluster (openmpi). I run into an issue with the memory space that I have attempted to bypass using MPI_Comm_split_type(MPI_COMM_WORLD,MPI_COMM_TYPE_SHARED, 0, MPI_Info_null, shmcomm,ierr) However, the program doesn't progress beyond this stage when the number of processes per node is set to a number greater than 4. The command being used to run this is mpiexec -n 16 --N 8 --mca btl_openib_allow_ib true --mca orte_base_help_aggregate 0 ./main. What are the potential causes for this or are there any obvious mistakes in my understanding?
I tried various configurations of the mpiexec command, but they all seemed to yield the same issue.
program mpi_allgather_array_example
use mpi
implicit none
integer :: ierr, my_rank, num_procs, i,shmcomm, my_rank_scm, num_procs_scm
integer, parameter :: n = 2
integer, allocatable :: send_buf(:), recv_buf(:),total_buff(:)
! Initialize MPI
call MPI_Init(ierr)
call MPI_Comm_split_type(MPI_COMM_WORLD,MPI_COMM_TYPE_SHARED, 1,&
MPI_Info_null, shmcomm,ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, my_rank, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, num_procs, ierr)
call MPI_Comm_rank(shmcomm, my_rank_scm, ierr)
call MPI_Comm_size(shmcomm, num_procs_scm, ierr)
! Allocate memory for send and receive buffers
print *, "starting allocation"
allocate(send_buf(n), recv_buf((n*num_procs_scm)), total_buff(n*num_procs))
! Initialize send buffer
send_buf = (my_rank + 1) * (/ (i, i=1, n) /)
! Gather data from all processes to all processes in shmcomm
call mpi_barrier(shmcomm,ierr)
call MPI_allgather(send_buf, n, MPI_INTEGER,&
recv_buf, n, MPI_INTEGER,&
shmcomm, ierr)
! Output the received data
print *, 'Process', my_rank, 'received:', recv_buf
call MPI_Finalize(ierr)
end program mpi_allgather_array_example```