Trying to write XDP for redirecting packets in veth inside kubernetes

74 Views Asked by At

I want to use XDP for redirecting packets sent from pint-1 to ping-3 pod redirect to ping-2 pod inside Kubernetes, and testing this using ICMP(ping) inside the pod.

What I was expecting is that I can receivee modified ping packet in ping-2 pod.

I used hard-coded IP and MAC address in the code, and used return bpf_redirect()XDP_TXXDP_REDIRECT, but none of them worked.

I used perf to trace the XDP event and I got

➜  ebpf sudo perf trace --event 'xdp:*'
# bpf_redirect(ifindex2, 0)
     0.000 ping/936614 xdp:xdp_redirect_err:prog_id=1426 action=REDIRECT ifindex=66 to_ifindex=67 err=-6
# XDP_TX
 15562.756 ping/936800 xdp:xdp_bulk_tx:ifindex=66 action=TX sent=0 drops=1 err=-6
# XDP_REDIRECT
 27807.646 ping/936940 xdp:xdp_redirect_err:prog_id=1428 action=REDIRECT ifindex=66 to_ifindex=0 err=-22

My ENV setup is in fowlling

OS: Ubuntu 20.04(VM on Proxmox VE)
Kernel: 5.4.0-144-generic
Kubernetes: 1.23.17
CNI: flannel
clang: Ubuntu clang version 11.0.0-2~ubuntu20.04.1

Pod YAML

apiVersion: v1
kind: Pod
metadata:
  name: ping-1-privileged
spec:
  containers:
  - name: ping-1-privileged
    image: ubuntu20.04
    command: ["sleep", "infinity"]
    securityContext:
      privileged: true

---

apiVersion: v1
kind: Pod
metadata:
  name: ping-2-privileged
spec:
  containers:
  - name: ping-2-privileged
    image: ubuntu20.04
    command: ["sleep", "infinity"]
    securityContext:
      privileged: true

---

apiVersion: v1
kind: Pod
metadata:
  name: ping-3-privileged
spec:
  containers:
  - name: ping-3-privileged
    image: ubuntu20.04
    command: ["sleep", "infinity"]
    securityContext:
      privileged: true

and my code

#include <linux/bpf.h>  // XDP_PASS, ...
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/icmp.h>
// #include <netinet/in.h>
#include <bpf/bpf_helpers.h>  // SEC, bpf_printk
// #include <stdio.h>
// #include <stdlib.h>
#include <string.h>


/*
ping-1                                      1/1     Running   0          16d    10.244.0.59   ubuntu-pve   <none>           <none>
ping-1-privileged                           1/1     Running   0          13m    10.244.0.64   ubuntu-pve   <none>           <none>
ping-2                                      1/1     Running   0          16d    10.244.0.60   ubuntu-pve   <none>           <none>
ping-2-privileged                           1/1     Running   0          13m    10.244.0.63   ubuntu-pve   <none>           <none>
ping-3                                      1/1     Running   0          16d    10.244.0.61   ubuntu-pve   <none>           <none>
ping-3-privileged                           1/1     Running   0          13m    10.244.0.62   ubuntu-pve   <none>           
*/

const unsigned char ping_1_mac[6] = {0x92, 0xd3, 0x27, 0xf3, 0x2b, 0x11};
const unsigned char ping_2_mac[6] = {0xba, 0x0f, 0x32, 0xb0, 0xc6, 0x20};
const unsigned char ping_3_mac[6] = {0x72, 0x2f, 0xc1, 0xcd, 0x63, 0xa9};

const __u32 ping_1_ip = 10 | (244 << 8) | (0 << 16) | (64 << 24);
const __u32 ping_2_ip = 10 | (244 << 8) | (0 << 16) | (63 << 24);
const __be32 ping_3_ip = 10 | (244 << 8) | (0 << 16) | (62 << 24);

SEC("xdpprogram")
int myxdpprogram(struct xdp_md *ctx) {
    void * data = (void *)(long)ctx->data;
    void * data_end = (void *)(long)ctx->data_end;

    struct ethhdr * eth = data;
    if ((void*)eth + sizeof(*eth) <= data_end) {  // MAC ram size check
        struct iphdr * ip = data + sizeof(*eth);
        if ((void*)ip + sizeof(*ip) <= data_end) {  // IP ram size check
            if (ip->protocol == 1) { // check ICMP protocol
                struct icmphdr * icmp = data + sizeof(*eth) + sizeof(*ip);
                if ((void*)icmp + sizeof(*icmp) <= data_end) {  // ICMP ram size check
                    __u32 ifindex2 = 67;
                    bpf_printk("[Debug] ifindex2: %d\n", ifindex2);
                    bpf_printk("==========\n");

                    _Bool check_result = 1;
                    for (int i=0; i<6; i++) { // check mac
                        if (eth->h_source[i] != ping_3_mac[i] || eth->h_dest[i] != ping_1_mac[i]) {
                            bpf_printk("MAC ERROR\n");
                            check_result = 0;
                            break;
                        }
                    }
                    if (ip->saddr != ping_3_ip || ip->daddr != ping_1_ip) { // check ip
                        bpf_printk("IP ERROR\n");
                        check_result = 0;
                    }

                    if (check_result == 1) {
                        bpf_printk("[ Start of REWRITE! ]\n");

                        for (int i=0; i<6; i++) {
                            bpf_printk("[PRI][MAC][SRC][ORI] %x\n", eth->h_source[i]);
                        }
                        for (int i=0; i<6; i++) {
                            bpf_printk("[PRI][MAC][DEST][ORI] %x\n", eth->h_dest[i]);
                        }

                        bpf_printk("  ---  ETH ADDR CHANGED! --- \n");
                        for (int i=0; i<6; i++) {
                            eth->h_dest[i] = ping_2_mac[i];
                        }
                        for (int i=0; i<6; i++) {
                            bpf_printk("[PRI][MAC][SRC][MDF] %x\n", eth->h_source[i]);
                        }
                        for (int i=0; i<6; i++) {
                            bpf_printk("[PRI][MAC][DEST][MDF] %x\n", eth->h_dest[i]);
                        }
                            bpf_printk("  ------------  Dividers ------------ \n");
                        __u8 sipv4_0 = ip->saddr & 0xFF;
                        __u8 sipv4_1 = (ip->saddr >> 8) & 0xFF;
                        __u8 sipv4_2 = (ip->saddr >> 16) & 0xFF;
                        __u8 sipv4_3 = (ip->saddr >> 24) & 0xFF;
                        bpf_printk("[PRI][IP][SRC][ORI][0]: %d\n", sipv4_0);
                        bpf_printk("[PRI][IP][SRC][ORI][0]: %d\n", sipv4_1);
                        bpf_printk("[PRI][IP][SRC][ORI][0]: %d\n", sipv4_2);
                        bpf_printk("[PRI][IP][SRC][ORI][0]: %d\n", sipv4_3);
                        __u8 dipv4_0 = ip->daddr & 0xFF;
                        __u8 dipv4_1 = (ip->daddr >> 8) & 0xFF;
                        __u8 dipv4_2 = (ip->daddr >> 16) & 0xFF;
                        __u8 dipv4_3 = (ip->daddr >> 24) & 0xFF;
                        bpf_printk("[PRI][IP][DST][ORI][0]: %d\n", dipv4_0);
                        bpf_printk("[PRI][IP][DST][ORI][1]: %d\n", dipv4_1);
                        bpf_printk("[PRI][IP][DST][ORI][2]: %d\n", dipv4_2);
                        bpf_printk("[PRI][IP][DST][ORI][3]: %d\n", dipv4_3);
                        bpf_printk("  ---  IP SRC ADDR CHANGED! --- \n");
                        ip->saddr = ping_2_ip;
                        ip->daddr = ping_2_ip;
                        __u8 sipv4_0_1 = ip->saddr & 0xFF;
                        __u8 sipv4_1_1 = (ip->saddr >> 8) & 0xFF;
                        __u8 sipv4_2_1 = (ip->saddr >> 16) & 0xFF;
                        __u8 sipv4_3_1 = (ip->saddr >> 24) & 0xFF;
                        bpf_printk("[PRI][IP][SRC][MDF][0]: %d\n", sipv4_0_1);
                        bpf_printk("[PRI][IP][SRC][MDF][0]: %d\n", sipv4_1_1);
                        bpf_printk("[PRI][IP][SRC][MDF][0]: %d\n", sipv4_2_1);
                        bpf_printk("[PRI][IP][SRC][MDF][0]: %d\n", sipv4_3_1);
                        __u8 dipv4_0_1 = ip->daddr & 0xFF;
                        __u8 dipv4_1_1 = (ip->daddr >> 8) & 0xFF;
                        __u8 dipv4_2_1 = (ip->daddr >> 16) & 0xFF;
                        __u8 dipv4_3_1 = (ip->daddr >> 24) & 0xFF;
                        bpf_printk("[PRI][IP][DST][MDF][0]: %d\n", dipv4_0_1);
                        bpf_printk("[PRI][IP][DST][MDF][1]: %d\n", dipv4_1_1);
                        bpf_printk("[PRI][IP][DST][MDF][2]: %d\n", dipv4_2_1);
                        bpf_printk("[PRI][IP][DST][MDF][3]: %d\n", dipv4_3_1);
                        bpf_printk("[ End of REWRITE! ]\n");

                        // return bpf_redirect(ifindex2, 0);
                        // return XDP_TX;
                        return XDP_REDIRECT;
                    
                    }
                    bpf_printk("ping-3\n");
                }
            }
        }
    }
    return XDP_PASS;
}

char _license[] SEC("license") = "GPL v2";

pod veth

➜  ebpf bash getpodveth.sh ping-1-privileged ping-2-privileged ping-3-privileged
Pod: ping-1-privileged
PodName: ping-1-privileged
ContainerID: "6560f8dff79d4c7473a33df510e630f10967229fb7363c493cc23a9898c93e4f"
ContainerPID: "414426"
IfNum: "68"
VethName: "veth7443cccc"

Pod: ping-2-privileged
PodName: ping-2-privileged
ContainerID: "7731cb692c775a3c81e264880da98016b5041e2edb68013f3c5906a6332b0044"
ContainerPID: "414323"
IfNum: "67"
VethName: "vethf362cb52"

Pod: ping-3-privileged
PodName: ping-3-privileged
ContainerID: "4868a00c9ffcb3bceb2aa9b8c6994b924eb55e2b0155ea74c2554d3f1fc55616"
ContainerPID: "414271"
IfNum: "66"
VethName: "veth12d73784"

How i set XDP

clang -target bpf -c ping3-privileged.c -o ping3-privileged.o -O2 && \
sudo ip link set dev veth12d73784 xdp off && \
sudo ip link set dev veth12d73784 xdp obj ping3-privileged.o sec xdpprogram && \
ip a | grep veth12d73784 | grep xdp && \
kubectl exec -it ping-1-privileged -- ping -c 1 10.244.0.62

Can anyone point out what I was wrong or missing?

I am expecting to have icmp packet when using tcpdump capturing veth of pod ping-2.

0

There are 0 best solutions below