I want to use XDP for redirecting packets sent from pint-1 to ping-3 pod redirect to ping-2 pod inside Kubernetes, and testing this using ICMP(ping) inside the pod.
What I was expecting is that I can receivee modified ping packet in ping-2 pod.
I used hard-coded IP and MAC address in the code, and used return bpf_redirect()、XDP_TX、XDP_REDIRECT, but none of them worked.
I used perf to trace the XDP event and I got
➜ ebpf sudo perf trace --event 'xdp:*'
# bpf_redirect(ifindex2, 0)
0.000 ping/936614 xdp:xdp_redirect_err:prog_id=1426 action=REDIRECT ifindex=66 to_ifindex=67 err=-6
# XDP_TX
15562.756 ping/936800 xdp:xdp_bulk_tx:ifindex=66 action=TX sent=0 drops=1 err=-6
# XDP_REDIRECT
27807.646 ping/936940 xdp:xdp_redirect_err:prog_id=1428 action=REDIRECT ifindex=66 to_ifindex=0 err=-22
My ENV setup is in fowlling
OS: Ubuntu 20.04(VM on Proxmox VE)
Kernel: 5.4.0-144-generic
Kubernetes: 1.23.17
CNI: flannel
clang: Ubuntu clang version 11.0.0-2~ubuntu20.04.1
Pod YAML
apiVersion: v1
kind: Pod
metadata:
name: ping-1-privileged
spec:
containers:
- name: ping-1-privileged
image: ubuntu20.04
command: ["sleep", "infinity"]
securityContext:
privileged: true
---
apiVersion: v1
kind: Pod
metadata:
name: ping-2-privileged
spec:
containers:
- name: ping-2-privileged
image: ubuntu20.04
command: ["sleep", "infinity"]
securityContext:
privileged: true
---
apiVersion: v1
kind: Pod
metadata:
name: ping-3-privileged
spec:
containers:
- name: ping-3-privileged
image: ubuntu20.04
command: ["sleep", "infinity"]
securityContext:
privileged: true
and my code
#include <linux/bpf.h> // XDP_PASS, ...
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/icmp.h>
// #include <netinet/in.h>
#include <bpf/bpf_helpers.h> // SEC, bpf_printk
// #include <stdio.h>
// #include <stdlib.h>
#include <string.h>
/*
ping-1 1/1 Running 0 16d 10.244.0.59 ubuntu-pve <none> <none>
ping-1-privileged 1/1 Running 0 13m 10.244.0.64 ubuntu-pve <none> <none>
ping-2 1/1 Running 0 16d 10.244.0.60 ubuntu-pve <none> <none>
ping-2-privileged 1/1 Running 0 13m 10.244.0.63 ubuntu-pve <none> <none>
ping-3 1/1 Running 0 16d 10.244.0.61 ubuntu-pve <none> <none>
ping-3-privileged 1/1 Running 0 13m 10.244.0.62 ubuntu-pve <none>
*/
const unsigned char ping_1_mac[6] = {0x92, 0xd3, 0x27, 0xf3, 0x2b, 0x11};
const unsigned char ping_2_mac[6] = {0xba, 0x0f, 0x32, 0xb0, 0xc6, 0x20};
const unsigned char ping_3_mac[6] = {0x72, 0x2f, 0xc1, 0xcd, 0x63, 0xa9};
const __u32 ping_1_ip = 10 | (244 << 8) | (0 << 16) | (64 << 24);
const __u32 ping_2_ip = 10 | (244 << 8) | (0 << 16) | (63 << 24);
const __be32 ping_3_ip = 10 | (244 << 8) | (0 << 16) | (62 << 24);
SEC("xdpprogram")
int myxdpprogram(struct xdp_md *ctx) {
void * data = (void *)(long)ctx->data;
void * data_end = (void *)(long)ctx->data_end;
struct ethhdr * eth = data;
if ((void*)eth + sizeof(*eth) <= data_end) { // MAC ram size check
struct iphdr * ip = data + sizeof(*eth);
if ((void*)ip + sizeof(*ip) <= data_end) { // IP ram size check
if (ip->protocol == 1) { // check ICMP protocol
struct icmphdr * icmp = data + sizeof(*eth) + sizeof(*ip);
if ((void*)icmp + sizeof(*icmp) <= data_end) { // ICMP ram size check
__u32 ifindex2 = 67;
bpf_printk("[Debug] ifindex2: %d\n", ifindex2);
bpf_printk("==========\n");
_Bool check_result = 1;
for (int i=0; i<6; i++) { // check mac
if (eth->h_source[i] != ping_3_mac[i] || eth->h_dest[i] != ping_1_mac[i]) {
bpf_printk("MAC ERROR\n");
check_result = 0;
break;
}
}
if (ip->saddr != ping_3_ip || ip->daddr != ping_1_ip) { // check ip
bpf_printk("IP ERROR\n");
check_result = 0;
}
if (check_result == 1) {
bpf_printk("[ Start of REWRITE! ]\n");
for (int i=0; i<6; i++) {
bpf_printk("[PRI][MAC][SRC][ORI] %x\n", eth->h_source[i]);
}
for (int i=0; i<6; i++) {
bpf_printk("[PRI][MAC][DEST][ORI] %x\n", eth->h_dest[i]);
}
bpf_printk(" --- ETH ADDR CHANGED! --- \n");
for (int i=0; i<6; i++) {
eth->h_dest[i] = ping_2_mac[i];
}
for (int i=0; i<6; i++) {
bpf_printk("[PRI][MAC][SRC][MDF] %x\n", eth->h_source[i]);
}
for (int i=0; i<6; i++) {
bpf_printk("[PRI][MAC][DEST][MDF] %x\n", eth->h_dest[i]);
}
bpf_printk(" ------------ Dividers ------------ \n");
__u8 sipv4_0 = ip->saddr & 0xFF;
__u8 sipv4_1 = (ip->saddr >> 8) & 0xFF;
__u8 sipv4_2 = (ip->saddr >> 16) & 0xFF;
__u8 sipv4_3 = (ip->saddr >> 24) & 0xFF;
bpf_printk("[PRI][IP][SRC][ORI][0]: %d\n", sipv4_0);
bpf_printk("[PRI][IP][SRC][ORI][0]: %d\n", sipv4_1);
bpf_printk("[PRI][IP][SRC][ORI][0]: %d\n", sipv4_2);
bpf_printk("[PRI][IP][SRC][ORI][0]: %d\n", sipv4_3);
__u8 dipv4_0 = ip->daddr & 0xFF;
__u8 dipv4_1 = (ip->daddr >> 8) & 0xFF;
__u8 dipv4_2 = (ip->daddr >> 16) & 0xFF;
__u8 dipv4_3 = (ip->daddr >> 24) & 0xFF;
bpf_printk("[PRI][IP][DST][ORI][0]: %d\n", dipv4_0);
bpf_printk("[PRI][IP][DST][ORI][1]: %d\n", dipv4_1);
bpf_printk("[PRI][IP][DST][ORI][2]: %d\n", dipv4_2);
bpf_printk("[PRI][IP][DST][ORI][3]: %d\n", dipv4_3);
bpf_printk(" --- IP SRC ADDR CHANGED! --- \n");
ip->saddr = ping_2_ip;
ip->daddr = ping_2_ip;
__u8 sipv4_0_1 = ip->saddr & 0xFF;
__u8 sipv4_1_1 = (ip->saddr >> 8) & 0xFF;
__u8 sipv4_2_1 = (ip->saddr >> 16) & 0xFF;
__u8 sipv4_3_1 = (ip->saddr >> 24) & 0xFF;
bpf_printk("[PRI][IP][SRC][MDF][0]: %d\n", sipv4_0_1);
bpf_printk("[PRI][IP][SRC][MDF][0]: %d\n", sipv4_1_1);
bpf_printk("[PRI][IP][SRC][MDF][0]: %d\n", sipv4_2_1);
bpf_printk("[PRI][IP][SRC][MDF][0]: %d\n", sipv4_3_1);
__u8 dipv4_0_1 = ip->daddr & 0xFF;
__u8 dipv4_1_1 = (ip->daddr >> 8) & 0xFF;
__u8 dipv4_2_1 = (ip->daddr >> 16) & 0xFF;
__u8 dipv4_3_1 = (ip->daddr >> 24) & 0xFF;
bpf_printk("[PRI][IP][DST][MDF][0]: %d\n", dipv4_0_1);
bpf_printk("[PRI][IP][DST][MDF][1]: %d\n", dipv4_1_1);
bpf_printk("[PRI][IP][DST][MDF][2]: %d\n", dipv4_2_1);
bpf_printk("[PRI][IP][DST][MDF][3]: %d\n", dipv4_3_1);
bpf_printk("[ End of REWRITE! ]\n");
// return bpf_redirect(ifindex2, 0);
// return XDP_TX;
return XDP_REDIRECT;
}
bpf_printk("ping-3\n");
}
}
}
}
return XDP_PASS;
}
char _license[] SEC("license") = "GPL v2";
pod veth
➜ ebpf bash getpodveth.sh ping-1-privileged ping-2-privileged ping-3-privileged
Pod: ping-1-privileged
PodName: ping-1-privileged
ContainerID: "6560f8dff79d4c7473a33df510e630f10967229fb7363c493cc23a9898c93e4f"
ContainerPID: "414426"
IfNum: "68"
VethName: "veth7443cccc"
Pod: ping-2-privileged
PodName: ping-2-privileged
ContainerID: "7731cb692c775a3c81e264880da98016b5041e2edb68013f3c5906a6332b0044"
ContainerPID: "414323"
IfNum: "67"
VethName: "vethf362cb52"
Pod: ping-3-privileged
PodName: ping-3-privileged
ContainerID: "4868a00c9ffcb3bceb2aa9b8c6994b924eb55e2b0155ea74c2554d3f1fc55616"
ContainerPID: "414271"
IfNum: "66"
VethName: "veth12d73784"
How i set XDP
clang -target bpf -c ping3-privileged.c -o ping3-privileged.o -O2 && \
sudo ip link set dev veth12d73784 xdp off && \
sudo ip link set dev veth12d73784 xdp obj ping3-privileged.o sec xdpprogram && \
ip a | grep veth12d73784 | grep xdp && \
kubectl exec -it ping-1-privileged -- ping -c 1 10.244.0.62
Can anyone point out what I was wrong or missing?
I am expecting to have icmp packet when using tcpdump capturing veth of pod ping-2.