I am a rookie in BCC/eBPF. I am now trying to calculate the process's CPU utilization using BCC. I thought I can do this by recording the on-cpu time and off-cpu time of the process and then calculate the utilization by on-cpu time/(on-cpu time+off-cpu time). And there is also a sample named cpudist which providing me the idea.
However, I run my program below and found that the sum of the on-cpu time and off-cpu time has a great gap between the interval I set to output.
That's the eBPF C part:
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
typedef struct process_cpu_time{
u64 oncpu_time;
u64 offcpu_time;
}process_cpu_time;
typedef struct process_info{
u32 pid;
char comm[TASK_COMM_LEN];
}process_info;
BPF_HASH(oncpu_start, u32, u64, MAX_PID); // BPF_HASH(name [, key_type [, leaf_type [, size]]])
BPF_HASH(offcpu_start, u32, u64, MAX_PID); // BPF_HASH(name [, key_type [, leaf_type [, size]]])
BPF_HASH(cpu_time, u32, process_cpu_time, MAX_PID);
static inline bool str_cmp(const char* str1, const char* str2, const int max_len)
{
for(int i = 0; i < max_len; i++)
{
if(str1[i] != str2[i])
return false;
else if(str1[i] == 0 && str2[i] == 0)
return true;
}
return true;
}
static inline void str_cpy(char* Dest, const char *Src)
{
while((*Dest = *Src)!=0)
{
Dest++;
Src++;
}
}
static inline void store_oncpu_start(u32 tgid, u32 pid, u64 ts)
{
oncpu_start.update(&pid, &ts);
}
static inline void store_offcpu_start(u32 tgid, u32 pid, u64 ts)
{
offcpu_start.update(&pid, &ts);
}
static inline void update_oncpu_time(u32 tgid, u32 pid, u64 ts)
{
u64 *tsp = oncpu_start.lookup(&pid);
if (tsp == 0)
return;
if(ts < *tsp)
return;
u64 delta = ts - *tsp;
process_cpu_time* p = cpu_time.lookup(&pid);
if(p != NULL)
p->oncpu_time+=delta;
else
{
process_cpu_time init = {0, 0};
cpu_time.update(&pid, &init);
}
}
static inline void update_offcpu_time(u32 tgid, u32 pid, u64 ts)
{
u64 *tsp = offcpu_start.lookup(&pid);
if (tsp == 0)
return;
if(ts < *tsp)
return;
u64 delta = ts - *tsp;
process_cpu_time* p = cpu_time.lookup(&pid);
if(p != NULL)
p->offcpu_time+=delta;
else
{
process_cpu_time init = {0, 0};
cpu_time.update(&pid, &init);
}
}
int sched_switch(struct pt_regs *ctx, struct task_struct *prev)
{
u64 ts = bpf_ktime_get_ns();
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 tgid = pid_tgid >> 32, pid = pid_tgid;
u32 prev_pid = prev->pid;
u32 prev_tgid = prev->tgid;
PREV_PID_FILTER
{
update_oncpu_time(prev_tgid, prev_pid, ts);
store_offcpu_start(prev_tgid, prev_pid, ts);
}
BAIL:
PID_FILTER
{
update_offcpu_time(tgid, pid, ts);
store_oncpu_start(tgid, pid, ts);
}
return 0;
}
That's the Python part.
def main_loop(self, interval, process_cpu_time):
while True:
try:
sleep(interval)
except KeyboardInterrupt:
print("receive KeyBoardInterrupt")
if not self.output_file.closed:
self.output_file.flush()
self.output_file.close()
exit()
cur_time = time.time()
self.record(process_cpu_time, (cur_time-prev_time)*1e3, cur_time)
prev_time = cur_time
def record(self, process_cpu_time, period, time_stamp):
for k, v in sorted(self.bpf['cpu_time'].items_lookup_and_delete_batch(), key=lambda kv: (kv[1].oncpu_time), reverse=True):
comm = pid_to_comm(k).strip('\n')
oncpu_time_ms = v.oncpu_time / 1e6
offcpu_time_ms = v.offcpu_time / 1e6
total_time_ms = oncpu_time_ms + offcpu_time_ms
self.output_file.write("%.2f,%12d,%20s,%2f,%.2f,%.2f\n" % (
time_stamp,
k,
comm,
oncpu_time_ms,
offcpu_time_ms,
utilization * 100,
)
)
self.output_file.flush()
And I use it to snoop a dead loop program and waht I get is below:
TICKS PID COMM ON CPU(ms) OFF CPU(ms)
1651389333.51 260271 cpu 223.990950 0.01
1651389334.57 260271 cpu 351.987700 0.01
1651389335.63 260271 cpu 511.988248 0.01
1651389336.68 260271 cpu 671.987550 0.01
1651389337.73 260271 cpu 767.989400 0.02
1651389338.79 260271 cpu 991.900899 0.07
1651389339.84 260271 cpu 959.964844 0.04
1651389340.90 260271 cpu 959.984888 0.02
1651389341.95 260271 cpu 927.996113 0.01
1651389343.00 260271 cpu 967.971981 0.03
1651389344.06 260271 cpu 575.989917 0.03
I set the interval to be one second for this example, and the sum of the on-cpu time and off-cpu time is not one second and also unstable.
Can someone tell my the reason for this? And how to fix such problem? Or is there any better approachs to calculate the CPU utilization using BCC?