I have a file storing a 2D float array of shape M*128, where M is a quite large number. Now I want to read N=8,000,000 rows of the array into memory, with the row indices to read are randomly picked and stored in a 1D array idx. I can do it with the following code (using pread or mmap):
#include <fcntl.h>
#include <omp.h>
#include <sys/mman.h>
#include <unistd.h>
#include <chrono>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <string>
int main() {
std::chrono::time_point<std::chrono::high_resolution_clock> start, stop;
using ms = std::chrono::duration<float, std::milli>;
constexpr int N = 8'000'000;
int fd;
// reading indices from file
int64_t* idx = (int64_t*)malloc(N * sizeof(int64_t));
fd = open("idx8M.bin", O_RDONLY);
if (fd == -1) {
return 1;
}
read(fd, idx, N * sizeof(int64_t));
close(fd);
constexpr int dim = 128;
constexpr size_t size = dim * sizeof(float);
void* data = malloc(N * size);
fd = open("2dArray.bin", O_RDONLY);
if (fd == -1) {
return 1;
}
size_t len = lseek(fd, 0, SEEK_END);
char* addr = (char*)mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
start = std::chrono::high_resolution_clock::now();
#pragma omp parallel for num_threads(32)
for (int i = 0; i < N; ++i) {
// pread(fd, (char*)data + i * size, size, idx[i] * size);
memcpy((char*)data + i * size, addr + idx[i] * size, size);
}
stop = std::chrono::high_resolution_clock::now();
printf("Cost %f ms\n", std::chrono::duration_cast<ms>(stop - start).count());
free(idx);
free(data);
close(fd);
munmap(addr, len);
return 0;
}
The problem is the reading speed. I ran the code on a server with an SSD (which is advertised to have 180MB/s maximum throughput). Either pread or mmap took over 600s to finish the read. Is there any way to speed up the reads?