Consider two independent processes, where Process A handles business and writes logs to a file, while Process B polls the log file and does other tasks (similar to tools like Telegraf, Vector, etc.). When Process A writing new log, I want Process B can read the log in a very low latency, such as 1ms. However in our production environment, delays exceeding 300ms frequently occur.
I think that this involves only the kernel's page cache and doesn't involve physical disk I/O, so it should be very fast, much shorter than 1ms. Why such a significant delay occurs and how it can be improved?
===
I created 2 minimal programs to reproduce this issue. For unknown reasons, I couldn't reproduce the 300ms delay in the test environment, but after running for a few minutes, I still encountered several instances of 3ms delays:
Run the process A to generate log:
./makelog tmp1.log
Run the process B to poll the log (the 1st column is the delay in usec):
./tailog tmp1.log 3000
delay:3225, now:290152, last_read:282002, last_sleep:286233, line:286927 read_len:37
delay:6376, now:813159, last_read:806386, last_sleep:806387, line:806783 read_len:111
delay:3773, now:302125, last_read:293886, last_sleep:298138, line:298352 read_len:37
delay:5931, now:218568, last_read:213272, last_sleep:217525, line:212637 read_len:8
The source code of Process A:
#include <sys/time.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
int get_now_usec() {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_usec;
}
int main(int argc, char **argv)
{
if (argc != 2) {
printf("invalid argument\n");
printf("Usage: %s filename\n", argv[0]);
return 1;
}
int fd = open(argv[1], O_CREAT | O_WRONLY, 0644);
if (fd < 0) {
perror("open file");
return 1;
}
char buf[300];
while (1) {
int slen = sprintf(buf, "linestart: %06d log blahblah hello\n", get_now_usec());
int wlen = write(fd, buf, slen);
if (wlen != slen) {
perror("write file");
return 1;
}
usleep(random() % 10000);
}
return 0;
}
The source code of Process B:
#include <sys/time.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
int get_now_usec() {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_usec;
}
int main(int argc, char **argv)
{
if (argc != 3) {
printf("invalid argument\n");
printf("Usage: %s filename timeout\n", argv[0]);
return 1;
}
char *filename = argv[1];
int threlhold = atoi(argv[2]);
int fd = open(filename, O_RDONLY);
lseek(fd, 0, SEEK_END);
char buf[1024*1024];
int last_sleep = 0;
int last_read = 0;
while (1) {
int len = read(fd, buf, sizeof(buf));
if (len < 0) {
perror("read file");
break;
}
if (len == 0) {
last_sleep = get_now_usec();
usleep(1000); // 1ms
continue;
}
if (memcmp(buf, "linestart: ", 11) != 0) {
continue;
}
buf[17] = '\0';
int log_us = atoi(&buf[11]);
int now = get_now_usec();
int delay = now - log_us;
if (delay > threlhold) {
printf("delay:%d, now:%d, last_read:%d, last_sleep:%d, line:%s read_len:%d\n",
delay, now, last_read, last_sleep, buf+11, len);
}
last_read = now;
}
return 1;
}