Questions about recording cpu time using BCC/eBPF

442 Views Asked by At

I am a rookie in BCC/eBPF. I am now trying to calculate the process's CPU utilization using BCC. I thought I can do this by recording the on-cpu time and off-cpu time of the process and then calculate the utilization by on-cpu time/(on-cpu time+off-cpu time). And there is also a sample named cpudist which providing me the idea.

However, I run my program below and found that the sum of the on-cpu time and off-cpu time has a great gap between the interval I set to output.

That's the eBPF C part:

#include <uapi/linux/ptrace.h>
#include <linux/sched.h>

    
typedef struct process_cpu_time{
    u64 oncpu_time;
    u64 offcpu_time;
}process_cpu_time;

typedef struct process_info{
    u32 pid;
    char comm[TASK_COMM_LEN];
}process_info;

BPF_HASH(oncpu_start, u32, u64, MAX_PID);    // BPF_HASH(name [, key_type [, leaf_type [, size]]])
BPF_HASH(offcpu_start, u32, u64, MAX_PID);   // BPF_HASH(name [, key_type [, leaf_type [, size]]])
BPF_HASH(cpu_time, u32, process_cpu_time, MAX_PID);

static inline bool str_cmp(const char* str1, const char* str2, const int max_len)
{
    for(int i = 0; i < max_len; i++)
    {
        if(str1[i] != str2[i])
            return false;
        else if(str1[i] == 0 && str2[i] == 0)
            return true;
    }
    return true;
}

static inline void str_cpy(char* Dest, const char *Src)
{
     while((*Dest = *Src)!=0)
     {
         Dest++;
         Src++;
     }
}

static inline void store_oncpu_start(u32 tgid, u32 pid, u64 ts)
{
    oncpu_start.update(&pid, &ts);
}

static inline void store_offcpu_start(u32 tgid, u32 pid, u64 ts)
{
    offcpu_start.update(&pid, &ts);
}

static inline void update_oncpu_time(u32 tgid, u32 pid, u64 ts)
{
    u64 *tsp = oncpu_start.lookup(&pid);
    if (tsp == 0)
        return;
    
    if(ts < *tsp)
        return;
    
    u64 delta = ts - *tsp;
    process_cpu_time* p = cpu_time.lookup(&pid);

    if(p != NULL)
        p->oncpu_time+=delta;
    else
    {
        process_cpu_time init = {0, 0};
        cpu_time.update(&pid, &init);
    }

}

static inline void update_offcpu_time(u32 tgid, u32 pid, u64 ts)
{
    u64 *tsp = offcpu_start.lookup(&pid);

    if (tsp == 0)
        return;
    
    if(ts < *tsp)
        return;
    
    u64 delta = ts - *tsp;
    process_cpu_time* p = cpu_time.lookup(&pid);
    if(p != NULL)
        p->offcpu_time+=delta;
    else
    {
        process_cpu_time init = {0, 0};
        cpu_time.update(&pid, &init);
    }
}

int sched_switch(struct pt_regs *ctx, struct task_struct *prev)
{
    u64 ts = bpf_ktime_get_ns();
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 tgid = pid_tgid >> 32, pid = pid_tgid;

    u32 prev_pid = prev->pid;
    u32 prev_tgid = prev->tgid;

    
    PREV_PID_FILTER
    {
        update_oncpu_time(prev_tgid, prev_pid, ts);
        store_offcpu_start(prev_tgid, prev_pid, ts);
    }

BAIL:
    PID_FILTER
    {    
        update_offcpu_time(tgid, pid, ts);
        store_oncpu_start(tgid, pid, ts);
    }

    return 0;
}

That's the Python part.

def main_loop(self, interval, process_cpu_time):
        while True:
            try:
                sleep(interval)
            except KeyboardInterrupt:
                print("receive KeyBoardInterrupt")
                if not self.output_file.closed:
                    self.output_file.flush()
                    self.output_file.close()
                exit()  
            cur_time = time.time()
            self.record(process_cpu_time, (cur_time-prev_time)*1e3, cur_time)
            prev_time = cur_time

def record(self, process_cpu_time, period, time_stamp):
        for k, v in sorted(self.bpf['cpu_time'].items_lookup_and_delete_batch(), key=lambda kv: (kv[1].oncpu_time), reverse=True):
            comm = pid_to_comm(k).strip('\n')
            oncpu_time_ms = v.oncpu_time / 1e6
            offcpu_time_ms = v.offcpu_time / 1e6
            total_time_ms = oncpu_time_ms + offcpu_time_ms
         
            self.output_file.write("%.2f,%12d,%20s,%2f,%.2f,%.2f\n" % (
                                    time_stamp,
                                    k, 
                                    comm,
                                    oncpu_time_ms, 
                                    offcpu_time_ms,
                                    utilization * 100,
                                    )
            )
            self.output_file.flush()

And I use it to snoop a dead loop program and waht I get is below:

TICKS           PID COMM    ON CPU(ms)      OFF CPU(ms)
1651389333.51   260271      cpu 223.990950  0.01
1651389334.57   260271      cpu 351.987700  0.01
1651389335.63   260271      cpu 511.988248  0.01
1651389336.68   260271      cpu 671.987550  0.01
1651389337.73   260271      cpu 767.989400  0.02
1651389338.79   260271      cpu 991.900899  0.07
1651389339.84   260271      cpu 959.964844  0.04
1651389340.90   260271      cpu 959.984888  0.02
1651389341.95   260271      cpu 927.996113  0.01
1651389343.00   260271      cpu 967.971981  0.03
1651389344.06   260271      cpu 575.989917  0.03

I set the interval to be one second for this example, and the sum of the on-cpu time and off-cpu time is not one second and also unstable.

Can someone tell my the reason for this? And how to fix such problem? Or is there any better approachs to calculate the CPU utilization using BCC?

0

There are 0 best solutions below