I tried to wrap RDMA in a helper class. I had some issues with send events not generating a CQE on the remote, and I fixed by replacing this snippet of code:
ibv_wc RdmaBase::wait_event(bool ignore_errors)
{
ibv_wc ret{};
void* ctxt;
HENSURE_ERRNO(ibv_get_cq_event(m_comp_channel, &m_cq, &ctxt) == 0);
ibv_ack_cq_events(m_cq, 1);
HENSURE_ERRNO(ibv_req_notify_cq(m_cq, 0) == 0);
while(true)
{
const int num_completions = ibv_poll_cq(m_cq, 1, &ret);
HENSURE_ERRNO(num_completions >= 0);
if(num_completions == 0)
{
continue;
}
else
{
if(!ignore_errors && ret.status != IBV_WC_SUCCESS)
{
FATAL_ERROR("Failed status %s (%d) for wr_id %d\n",
ibv_wc_status_str(ret.status),
ret.status,
(int) ret.wr_id);
}
break;
}
}
return ret;
}
By this one:
ibv_wc RdmaBase::wait_event(bool ignore_errors)
{
// This is kind of a coroutine instead of polling the events in a different thread
ibv_wc ret{};
start:
if(!m_polling)
{
void* ctxt;
HENSURE_ERRNO(ibv_get_cq_event(m_comp_channel, &m_cq, &ctxt) == 0);
ibv_ack_cq_events(m_cq, 1);
HENSURE_ERRNO(ibv_req_notify_cq(m_cq, 0) == 0);
m_polling = true;
}
while(true)
{
const int num_completions = ibv_poll_cq(m_cq, 1, &ret);
HENSURE_ERRNO(num_completions >= 0);
if(num_completions == 0)
{
m_polling = false;
goto start;
}
else
{
if(!ignore_errors && ret.status != IBV_WC_SUCCESS)
{
FATAL_ERROR("Failed status %s (%d) for wr_id %d\n",
ibv_wc_status_str(ret.status),
ret.status,
(int) ret.wr_id);
}
break;
}
}
return ret;
}
wait_event() should get the next WC on the CQ. If the CQ is not empty, it should pop one WC. If the CQ is empty, the function should be blocking until a WC is generated.
Is there a functional difference between these two codes? I am using SoftRoCE on Ubuntu 22.04.