Significant delay in file updates between processes on Linux

49 Views Asked by At

Consider two independent processes, where Process A handles business and writes logs to a file, while Process B polls the log file and does other tasks (similar to tools like Telegraf, Vector, etc.). When Process A writing new log, I want Process B can read the log in a very low latency, such as 1ms. However in our production environment, delays exceeding 300ms frequently occur.

I think that this involves only the kernel's page cache and doesn't involve physical disk I/O, so it should be very fast, much shorter than 1ms. Why such a significant delay occurs and how it can be improved?

===

I created 2 minimal programs to reproduce this issue. For unknown reasons, I couldn't reproduce the 300ms delay in the test environment, but after running for a few minutes, I still encountered several instances of 3ms delays:

Run the process A to generate log:

./makelog tmp1.log

Run the process B to poll the log (the 1st column is the delay in usec):

./tailog tmp1.log 3000
delay:3225, now:290152, last_read:282002, last_sleep:286233, line:286927 read_len:37
delay:6376, now:813159, last_read:806386, last_sleep:806387, line:806783 read_len:111
delay:3773, now:302125, last_read:293886, last_sleep:298138, line:298352 read_len:37
delay:5931, now:218568, last_read:213272, last_sleep:217525, line:212637 read_len:8

The source code of Process A:

#include <sys/time.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>

int get_now_usec() {
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_usec;
}

int main(int argc, char **argv)
{
    if (argc != 2) {
        printf("invalid argument\n");
        printf("Usage: %s filename\n", argv[0]);
        return 1;
    }

    int fd = open(argv[1], O_CREAT | O_WRONLY, 0644);
    if (fd < 0) {
        perror("open file");
        return 1;
    }

    char buf[300];
    while (1) {
        int slen = sprintf(buf, "linestart: %06d log blahblah hello\n", get_now_usec());
        int wlen = write(fd, buf, slen);
        if (wlen != slen) {
            perror("write file");
            return 1;
        }
        usleep(random() % 10000);
    }
    return 0;
}

The source code of Process B:

#include <sys/time.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>

int get_now_usec() {
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_usec;
}

int main(int argc, char **argv)
{
    if (argc != 3) {
        printf("invalid argument\n");
        printf("Usage: %s filename timeout\n", argv[0]);
        return 1;
    }

    char *filename = argv[1];
    int threlhold = atoi(argv[2]);

    int fd = open(filename, O_RDONLY);
    lseek(fd, 0, SEEK_END);
    char buf[1024*1024];
    int last_sleep = 0;
    int last_read = 0;
    while (1) {
        int len = read(fd, buf, sizeof(buf));
        if (len < 0) {
            perror("read file");
            break;
        }
        if (len == 0) {
            last_sleep = get_now_usec();
            usleep(1000); // 1ms
            continue;
        }
        if (memcmp(buf, "linestart: ", 11) != 0) {
            continue;
        }

        buf[17] = '\0';
        int log_us = atoi(&buf[11]);

        int now = get_now_usec();
        int delay = now - log_us;
        if (delay > threlhold) {
            printf("delay:%d, now:%d, last_read:%d, last_sleep:%d, line:%s read_len:%d\n",
                    delay, now, last_read, last_sleep, buf+11, len);
        }

        last_read = now;
    }
    return 1;
}
0

There are 0 best solutions below