Reading sk_buff with ebpf inside dev_queue_xmit yields questionable data

917 Views Asked by At

I'm trying to capture outgoing ethernet frames on the local host before they are sent by inserting a kprobe into __dev_queue_xmit(). However, the bytes I extract from the sk_buff structure do not match the subsequently captured packets.

I only attempted it for linear skbs up to now, because I already get unexpected results there. For example, my kprobe reported the following information during a call to __dev_queue_xmit():

COMM            PID      TGID     LEN        DATALEN
chronyd         1058     1058     90         0
3431c4b06a8b3c7c3f2023bd08006500d0a57f040f7f0000000000000000000000000000000000006018d11a0f7f00000100000000000000000000000000000060a67f040f7f0000000000000000000000000000000000004001

COMM is the name of the process which called the function, PID is the calling thread's id and TGID its thread group id. LEN is the value of (skb->len - skb->data_len) and DATA_LEN is skb->data_len.

Next, the program has copied LEN (in this case 90) bytes starting at skb->data. Since DATALEN is zero, this is a linear skb. Thus, those bytes should contain exactly the frame which is about to be sent, shouldn't they?

Well, Wireshark subsequently recorded this frame:

0000   34 31 c4 b0 6a 8b 3c 7c 3f 20 23 bd 08 00 45 00
0010   00 4c 83 93 40 00 40 11 d1 a2 c0 a8 b2 18 c0 a8
0020   b2 01 c8 07 00 7b 00 38 e5 b4 23 00 06 20 00 00
0030   00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0040   00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0050   00 00 38 bc 17 13 12 4a 4c c0

The first 14 bytes, which are forming the ethernet header, match up perfectly as expected. Everything else doesn't match up at all. The question now is: Why do the bytes not match up?

(Yes, I am certain the frame from Wireshark is indeed the one caused by this call to __dev_queue_xmit(). This is because only background programs using the network were running at the time, so the amount of outgoing traffic was rather small. Additionally, the captured frame contains, as expected, 90 bytes. Also, this frame holds an NTP payload, which is just what you'd expect from chronyd.)

My kernel version is 5.12.6-200.fc33.x86_64.

If you want to try it out yourself or have a closer look at my program, here it is:

from bcc import BPF

from ctypes import cast, POINTER, c_char

prog = """
#include <linux/sched.h>
#include <linux/skbuff.h>

struct xmit_event {
    u64 ts;
    u32 pid;
    u32 tgid;
    u32 len;
    u32 datalen;
    u32 packet_buf_ptr;
    char comm[TASK_COMM_LEN];
    
    u64 head;
    u64 data;
    u64 tail;
    u64 end;
};
BPF_PERF_OUTPUT(xmits);

#define PACKET_BUF_SIZE 32768
# define PACKET_BUFS_PER_CPU 15

struct packet_buf {
    char data[PACKET_BUF_SIZE];
};
BPF_PERCPU_ARRAY(packet_buf, struct packet_buf, PACKET_BUFS_PER_CPU);
BPF_PERCPU_ARRAY(packet_buf_head, u32, 1);

int kprobe____dev_queue_xmit(struct pt_regs *ctx, struct sk_buff *skb, void *accel_priv) {
    if (skb == NULL || skb->data == NULL)
        return 0;
    struct xmit_event data = { };
    u64 both = bpf_get_current_pid_tgid();

    data.pid = both;
    if (data.pid == 0)
        return 0;
    data.tgid = both >> 32;
    data.ts = bpf_ktime_get_ns();
    bpf_get_current_comm(&data.comm, sizeof(data.comm));
    data.len = skb->len;
    
    // Copy packet contents
    int slot = 0;
    u32 *packet_buf_ptr = packet_buf_head.lookup(&slot);
    if (packet_buf_ptr == NULL)
        return 0;
    u32 buf_head = *packet_buf_ptr;
    u32 next_buf_head = (buf_head + 1) % PACKET_BUFS_PER_CPU;
    packet_buf_head.update(&slot, &next_buf_head);
    
    struct packet_buf *ringbuf = packet_buf.lookup(&buf_head);
    if (ringbuf == NULL)
        return 0;
    
    u32 skb_data_len = skb->data_len;
    u32 headlen = data.len - skb_data_len;
    headlen &= 0xffffff; // Useless, but validator demands it because "this unsigned(!) variable could otherwise be negative"
    bpf_probe_read_kernel(ringbuf->data, headlen < PACKET_BUF_SIZE ? headlen : PACKET_BUF_SIZE, skb->data);
    data.packet_buf_ptr = buf_head;
    
    data.len = headlen;
    data.datalen = skb_data_len;
    
    data.head = (u64) skb->head;
    data.data = (u64) skb->data;
    data.tail = (u64) skb->tail;
    data.end = (u64) skb->end;
    
    xmits.perf_submit(ctx, &data, sizeof(data));

    return 0;
}

"""

global b

def xmit_received(cpu, data, size):
    global b
    global py_packet_buf
    ev = b["xmits"].event(data)
    print("%-18d %-25s %-8d %-8d %-10d %-10d %-12d %-12d %-12d %-12d" % (ev.ts, ev.comm.decode(), ev.pid, ev.tgid, ev.len, ev.datalen, ev.head, ev.data, ev.tail, ev.end))
    bs = cast(py_packet_buf[ev.packet_buf_ptr][cpu].data, POINTER(c_char))[:ev.len]
    c = bytes(bs)
    print(c.hex())


def observe_kernel():
    # load BPF program
    global b
    b = BPF(text=prog)

    print("%-18s %-25s %-8s %-8s %-10s %-10s %-12s %-12s %-12s %-12s" % ("TS", "COMM", "PID", "TGID", "LEN", "DATALEN", "HEAD", "DATA", "TAIL", "END"))

    b["xmits"].open_perf_buffer(xmit_received)
    global py_packet_buf
    py_packet_buf = b["packet_buf"]

    try:
        while True:
            b.perf_buffer_poll()
    except KeyboardInterrupt:
        print("Kernel observer thread stopped.")

observe_kernel()
1

There are 1 best solutions below

0
On

Found the issue. I needed to replace

struct packet_buf {
    char data[PACKET_BUF_SIZE];
};

with

struct packet_buf {
    unsigned char data[PACKET_BUF_SIZE];
};

I, however, do not understand how signedness makes a difference when I am not performing comparisons or arithmetic operations with this data.