Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xdp: Sample xdp program implementing ip forward

Implements port to port forwarding with route table and arp table
lookup for ipv4 packets using bpf_redirect helper function and
lpm_trie map.

Signed-off-by: Christina Jacob <Christina.Jacob@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Christina Jacob and committed by
David S. Miller
3e29cd0e 4ad1ceec

+849
+4
samples/bpf/Makefile
··· 29 29 hostprogs-y += test_cgrp2_sock2 30 30 hostprogs-y += xdp1 31 31 hostprogs-y += xdp2 32 + hostprogs-y += xdp_router_ipv4 32 33 hostprogs-y += test_current_task_under_cgroup 33 34 hostprogs-y += trace_event 34 35 hostprogs-y += sampleip ··· 77 76 xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o 78 77 # reuse xdp1 source intentionally 79 78 xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o 79 + xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o 80 80 test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ 81 81 test_current_task_under_cgroup_user.o 82 82 trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o ··· 120 118 always += test_cgrp2_tc_kern.o 121 119 always += xdp1_kern.o 122 120 always += xdp2_kern.o 121 + always += xdp_router_ipv4_kern.o 123 122 always += test_current_task_under_cgroup_kern.o 124 123 always += trace_event_kern.o 125 124 always += sampleip_kern.o ··· 169 166 HOSTLOADLIBES_test_overhead += -lelf -lrt 170 167 HOSTLOADLIBES_xdp1 += -lelf 171 168 HOSTLOADLIBES_xdp2 += -lelf 169 + HOSTLOADLIBES_xdp_router_ipv4 += -lelf 172 170 HOSTLOADLIBES_test_current_task_under_cgroup += -lelf 173 171 HOSTLOADLIBES_trace_event += -lelf 174 172 HOSTLOADLIBES_sampleip += -lelf
+186
samples/bpf/xdp_router_ipv4_kern.c
··· 1 + /* Copyright (C) 2017 Cavium, Inc. 2 + * 3 + * This program is free software; you can redistribute it and/or modify it 4 + * under the terms of version 2 of the GNU General Public License 5 + * as published by the Free Software Foundation. 6 + */ 7 + #define KBUILD_MODNAME "foo" 8 + #include <uapi/linux/bpf.h> 9 + #include <linux/in.h> 10 + #include <linux/if_ether.h> 11 + #include <linux/if_packet.h> 12 + #include <linux/if_vlan.h> 13 + #include <linux/ip.h> 14 + #include <linux/ipv6.h> 15 + #include "bpf_helpers.h" 16 + #include <linux/slab.h> 17 + #include <net/ip_fib.h> 18 + 19 + struct trie_value { 20 + __u8 prefix[4]; 21 + __be64 value; 22 + int ifindex; 23 + int metric; 24 + __be32 gw; 25 + }; 26 + 27 + /* Key for lpm_trie*/ 28 + union key_4 { 29 + u32 b32[2]; 30 + u8 b8[8]; 31 + }; 32 + 33 + struct arp_entry { 34 + __be64 mac; 35 + __be32 dst; 36 + }; 37 + 38 + struct direct_map { 39 + struct arp_entry arp; 40 + int ifindex; 41 + __be64 mac; 42 + }; 43 + 44 + /* Map for trie implementation*/ 45 + struct bpf_map_def SEC("maps") lpm_map = { 46 + .type = BPF_MAP_TYPE_LPM_TRIE, 47 + .key_size = 8, 48 + .value_size = sizeof(struct trie_value), 49 + .max_entries = 50, 50 + .map_flags = BPF_F_NO_PREALLOC, 51 + }; 52 + 53 + /* Map for counter*/ 54 + struct bpf_map_def SEC("maps") rxcnt = { 55 + .type = BPF_MAP_TYPE_PERCPU_ARRAY, 56 + .key_size = sizeof(u32), 57 + .value_size = sizeof(u64), 58 + .max_entries = 256, 59 + }; 60 + 61 + /* Map for ARP table*/ 62 + struct bpf_map_def SEC("maps") arp_table = { 63 + .type = BPF_MAP_TYPE_HASH, 64 + .key_size = sizeof(__be32), 65 + .value_size = sizeof(__be64), 66 + .max_entries = 50, 67 + }; 68 + 69 + /* Map to keep the exact match entries in the route table*/ 70 + struct bpf_map_def SEC("maps") exact_match = { 71 + .type = BPF_MAP_TYPE_HASH, 72 + .key_size = sizeof(__be32), 73 + .value_size = sizeof(struct direct_map), 74 + .max_entries = 50, 75 + }; 76 + 77 + struct bpf_map_def SEC("maps") tx_port = { 78 + .type = BPF_MAP_TYPE_DEVMAP, 79 + .key_size = sizeof(int), 80 + .value_size = sizeof(int), 81 + .max_entries = 100, 82 + }; 83 + 84 + /* Function to set source and destination mac of the packet */ 85 + static inline void set_src_dst_mac(void *data, void *src, void *dst) 86 + { 87 + unsigned short *source = src; 88 + unsigned short *dest = dst; 89 + unsigned short *p = data; 90 + 91 + __builtin_memcpy(p, dest, 6); 92 + __builtin_memcpy(p + 3, source, 6); 93 + } 94 + 95 + /* Parse IPV4 packet to get SRC, DST IP and protocol */ 96 + static inline int parse_ipv4(void *data, u64 nh_off, void *data_end, 97 + __be32 *src, __be32 *dest) 98 + { 99 + struct iphdr *iph = data + nh_off; 100 + 101 + if (iph + 1 > data_end) 102 + return 0; 103 + *src = iph->saddr; 104 + *dest = iph->daddr; 105 + return iph->protocol; 106 + } 107 + 108 + SEC("xdp_router_ipv4") 109 + int xdp_router_ipv4_prog(struct xdp_md *ctx) 110 + { 111 + void *data_end = (void *)(long)ctx->data_end; 112 + __be64 *dest_mac = NULL, *src_mac = NULL; 113 + void *data = (void *)(long)ctx->data; 114 + struct trie_value *prefix_value; 115 + int rc = XDP_DROP, forward_to; 116 + struct ethhdr *eth = data; 117 + union key_4 key4; 118 + long *value; 119 + u16 h_proto; 120 + u32 ipproto; 121 + u64 nh_off; 122 + 123 + nh_off = sizeof(*eth); 124 + if (data + nh_off > data_end) 125 + return rc; 126 + 127 + h_proto = eth->h_proto; 128 + 129 + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { 130 + struct vlan_hdr *vhdr; 131 + 132 + vhdr = data + nh_off; 133 + nh_off += sizeof(struct vlan_hdr); 134 + if (data + nh_off > data_end) 135 + return rc; 136 + h_proto = vhdr->h_vlan_encapsulated_proto; 137 + } 138 + if (h_proto == htons(ETH_P_ARP)) { 139 + return XDP_PASS; 140 + } else if (h_proto == htons(ETH_P_IP)) { 141 + struct direct_map *direct_entry; 142 + __be32 src_ip = 0, dest_ip = 0; 143 + 144 + ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip); 145 + direct_entry = bpf_map_lookup_elem(&exact_match, &dest_ip); 146 + /* Check for exact match, this would give a faster lookup*/ 147 + if (direct_entry && direct_entry->mac && direct_entry->arp.mac) { 148 + src_mac = &direct_entry->mac; 149 + dest_mac = &direct_entry->arp.mac; 150 + forward_to = direct_entry->ifindex; 151 + } else { 152 + /* Look up in the trie for lpm*/ 153 + key4.b32[0] = 32; 154 + key4.b8[4] = dest_ip & 0xff; 155 + key4.b8[5] = (dest_ip >> 8) & 0xff; 156 + key4.b8[6] = (dest_ip >> 16) & 0xff; 157 + key4.b8[7] = (dest_ip >> 24) & 0xff; 158 + prefix_value = bpf_map_lookup_elem(&lpm_map, &key4); 159 + if (!prefix_value) 160 + return XDP_DROP; 161 + src_mac = &prefix_value->value; 162 + if (!src_mac) 163 + return XDP_DROP; 164 + dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip); 165 + if (!dest_mac) { 166 + if (!prefix_value->gw) 167 + return XDP_DROP; 168 + dest_ip = prefix_value->gw; 169 + dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip); 170 + } 171 + forward_to = prefix_value->ifindex; 172 + } 173 + } else { 174 + ipproto = 0; 175 + } 176 + if (src_mac && dest_mac) { 177 + set_src_dst_mac(data, src_mac, dest_mac); 178 + value = bpf_map_lookup_elem(&rxcnt, &ipproto); 179 + if (value) 180 + *value += 1; 181 + return bpf_redirect_map(&tx_port, forward_to, 0); 182 + } 183 + return rc; 184 + } 185 + 186 + char _license[] SEC("license") = "GPL";
+659
samples/bpf/xdp_router_ipv4_user.c
··· 1 + /* Copyright (C) 2017 Cavium, Inc. 2 + * 3 + * This program is free software; you can redistribute it and/or modify it 4 + * under the terms of version 2 of the GNU General Public License 5 + * as published by the Free Software Foundation. 6 + */ 7 + #include <linux/bpf.h> 8 + #include <linux/netlink.h> 9 + #include <linux/rtnetlink.h> 10 + #include <assert.h> 11 + #include <errno.h> 12 + #include <signal.h> 13 + #include <stdio.h> 14 + #include <stdlib.h> 15 + #include <string.h> 16 + #include <sys/socket.h> 17 + #include <unistd.h> 18 + #include "bpf_load.h" 19 + #include "libbpf.h" 20 + #include <arpa/inet.h> 21 + #include <fcntl.h> 22 + #include <poll.h> 23 + #include <net/if.h> 24 + #include <netdb.h> 25 + #include <sys/ioctl.h> 26 + #include <sys/syscall.h> 27 + #include "bpf_util.h" 28 + 29 + int sock, sock_arp, flags = 0; 30 + static int total_ifindex; 31 + int *ifindex_list; 32 + char buf[8192]; 33 + 34 + static int get_route_table(int rtm_family); 35 + static void int_exit(int sig) 36 + { 37 + int i = 0; 38 + 39 + for (i = 0; i < total_ifindex; i++) 40 + set_link_xdp_fd(ifindex_list[i], -1, flags); 41 + exit(0); 42 + } 43 + 44 + static void close_and_exit(int sig) 45 + { 46 + int i = 0; 47 + 48 + close(sock); 49 + close(sock_arp); 50 + 51 + for (i = 0; i < total_ifindex; i++) 52 + set_link_xdp_fd(ifindex_list[i], -1, flags); 53 + exit(0); 54 + } 55 + 56 + /* Get the mac address of the interface given interface name */ 57 + static __be64 getmac(char *iface) 58 + { 59 + struct ifreq ifr; 60 + __be64 mac = 0; 61 + int fd, i; 62 + 63 + fd = socket(AF_INET, SOCK_DGRAM, 0); 64 + ifr.ifr_addr.sa_family = AF_INET; 65 + strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1); 66 + if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) { 67 + printf("ioctl failed leaving....\n"); 68 + return -1; 69 + } 70 + for (i = 0; i < 6 ; i++) 71 + *((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i]; 72 + close(fd); 73 + return mac; 74 + } 75 + 76 + static int recv_msg(struct sockaddr_nl sock_addr, int sock) 77 + { 78 + struct nlmsghdr *nh; 79 + int len, nll = 0; 80 + char *buf_ptr; 81 + 82 + buf_ptr = buf; 83 + while (1) { 84 + len = recv(sock, buf_ptr, sizeof(buf) - nll, 0); 85 + if (len < 0) 86 + return len; 87 + 88 + nh = (struct nlmsghdr *)buf_ptr; 89 + 90 + if (nh->nlmsg_type == NLMSG_DONE) 91 + break; 92 + buf_ptr += len; 93 + nll += len; 94 + if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH) 95 + break; 96 + 97 + if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE) 98 + break; 99 + } 100 + return nll; 101 + } 102 + 103 + /* Function to parse the route entry returned by netlink 104 + * Updates the route entry related map entries 105 + */ 106 + static void read_route(struct nlmsghdr *nh, int nll) 107 + { 108 + char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24]; 109 + struct bpf_lpm_trie_key *prefix_key; 110 + struct rtattr *rt_attr; 111 + struct rtmsg *rt_msg; 112 + int rtm_family; 113 + int rtl; 114 + int i; 115 + struct route_table { 116 + int dst_len, iface, metric; 117 + char *iface_name; 118 + __be32 dst, gw; 119 + __be64 mac; 120 + } route; 121 + struct arp_table { 122 + __be64 mac; 123 + __be32 dst; 124 + }; 125 + 126 + struct direct_map { 127 + struct arp_table arp; 128 + int ifindex; 129 + __be64 mac; 130 + } direct_entry; 131 + 132 + if (nh->nlmsg_type == RTM_DELROUTE) 133 + printf("DELETING Route entry\n"); 134 + else if (nh->nlmsg_type == RTM_GETROUTE) 135 + printf("READING Route entry\n"); 136 + else if (nh->nlmsg_type == RTM_NEWROUTE) 137 + printf("NEW Route entry\n"); 138 + else 139 + printf("%d\n", nh->nlmsg_type); 140 + 141 + memset(&route, 0, sizeof(route)); 142 + printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n"); 143 + for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { 144 + rt_msg = (struct rtmsg *)NLMSG_DATA(nh); 145 + rtm_family = rt_msg->rtm_family; 146 + if (rtm_family == AF_INET) 147 + if (rt_msg->rtm_table != RT_TABLE_MAIN) 148 + continue; 149 + rt_attr = (struct rtattr *)RTM_RTA(rt_msg); 150 + rtl = RTM_PAYLOAD(nh); 151 + 152 + for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) { 153 + switch (rt_attr->rta_type) { 154 + case NDA_DST: 155 + sprintf(dsts, "%u", 156 + (*((__be32 *)RTA_DATA(rt_attr)))); 157 + break; 158 + case RTA_GATEWAY: 159 + sprintf(gws, "%u", 160 + *((__be32 *)RTA_DATA(rt_attr))); 161 + break; 162 + case RTA_OIF: 163 + sprintf(ifs, "%u", 164 + *((int *)RTA_DATA(rt_attr))); 165 + break; 166 + case RTA_METRICS: 167 + sprintf(metrics, "%u", 168 + *((int *)RTA_DATA(rt_attr))); 169 + default: 170 + break; 171 + } 172 + } 173 + sprintf(dsts_len, "%d", rt_msg->rtm_dst_len); 174 + route.dst = atoi(dsts); 175 + route.dst_len = atoi(dsts_len); 176 + route.gw = atoi(gws); 177 + route.iface = atoi(ifs); 178 + route.metric = atoi(metrics); 179 + route.iface_name = alloca(sizeof(char *) * IFNAMSIZ); 180 + route.iface_name = if_indextoname(route.iface, route.iface_name); 181 + route.mac = getmac(route.iface_name); 182 + if (route.mac == -1) { 183 + int i = 0; 184 + 185 + for (i = 0; i < total_ifindex; i++) 186 + set_link_xdp_fd(ifindex_list[i], -1, flags); 187 + exit(0); 188 + } 189 + assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0); 190 + if (rtm_family == AF_INET) { 191 + struct trie_value { 192 + __u8 prefix[4]; 193 + __be64 value; 194 + int ifindex; 195 + int metric; 196 + __be32 gw; 197 + } *prefix_value; 198 + 199 + prefix_key = alloca(sizeof(*prefix_key) + 3); 200 + prefix_value = alloca(sizeof(*prefix_value)); 201 + 202 + prefix_key->prefixlen = 32; 203 + prefix_key->prefixlen = route.dst_len; 204 + direct_entry.mac = route.mac & 0xffffffffffff; 205 + direct_entry.ifindex = route.iface; 206 + direct_entry.arp.mac = 0; 207 + direct_entry.arp.dst = 0; 208 + if (route.dst_len == 32) { 209 + if (nh->nlmsg_type == RTM_DELROUTE) 210 + assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0); 211 + else 212 + if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0) 213 + direct_entry.arp.dst = route.dst; 214 + assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0); 215 + } 216 + for (i = 0; i < 4; i++) 217 + prefix_key->data[i] = (route.dst >> i * 8) & 0xff; 218 + 219 + printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n", 220 + (int)prefix_key->data[0], 221 + (int)prefix_key->data[1], 222 + (int)prefix_key->data[2], 223 + (int)prefix_key->data[3], 224 + route.gw, route.dst_len, 225 + route.metric, 226 + route.iface_name); 227 + if (bpf_map_lookup_elem(map_fd[0], prefix_key, 228 + prefix_value) < 0) { 229 + for (i = 0; i < 4; i++) 230 + prefix_value->prefix[i] = prefix_key->data[i]; 231 + prefix_value->value = route.mac & 0xffffffffffff; 232 + prefix_value->ifindex = route.iface; 233 + prefix_value->gw = route.gw; 234 + prefix_value->metric = route.metric; 235 + 236 + assert(bpf_map_update_elem(map_fd[0], 237 + prefix_key, 238 + prefix_value, 0 239 + ) == 0); 240 + } else { 241 + if (nh->nlmsg_type == RTM_DELROUTE) { 242 + printf("deleting entry\n"); 243 + printf("prefix key=%d.%d.%d.%d/%d", 244 + prefix_key->data[0], 245 + prefix_key->data[1], 246 + prefix_key->data[2], 247 + prefix_key->data[3], 248 + prefix_key->prefixlen); 249 + assert(bpf_map_delete_elem(map_fd[0], 250 + prefix_key 251 + ) == 0); 252 + /* Rereading the route table to check if 253 + * there is an entry with the same 254 + * prefix but a different metric as the 255 + * deleted enty. 256 + */ 257 + get_route_table(AF_INET); 258 + } else if (prefix_key->data[0] == 259 + prefix_value->prefix[0] && 260 + prefix_key->data[1] == 261 + prefix_value->prefix[1] && 262 + prefix_key->data[2] == 263 + prefix_value->prefix[2] && 264 + prefix_key->data[3] == 265 + prefix_value->prefix[3] && 266 + route.metric >= prefix_value->metric) { 267 + continue; 268 + } else { 269 + for (i = 0; i < 4; i++) 270 + prefix_value->prefix[i] = 271 + prefix_key->data[i]; 272 + prefix_value->value = 273 + route.mac & 0xffffffffffff; 274 + prefix_value->ifindex = route.iface; 275 + prefix_value->gw = route.gw; 276 + prefix_value->metric = route.metric; 277 + assert(bpf_map_update_elem( 278 + map_fd[0], 279 + prefix_key, 280 + prefix_value, 281 + 0) == 0); 282 + } 283 + } 284 + } 285 + memset(&route, 0, sizeof(route)); 286 + memset(dsts, 0, sizeof(dsts)); 287 + memset(dsts_len, 0, sizeof(dsts_len)); 288 + memset(gws, 0, sizeof(gws)); 289 + memset(ifs, 0, sizeof(ifs)); 290 + memset(&route, 0, sizeof(route)); 291 + } 292 + } 293 + 294 + /* Function to read the existing route table when the process is launched*/ 295 + static int get_route_table(int rtm_family) 296 + { 297 + struct sockaddr_nl sa; 298 + struct nlmsghdr *nh; 299 + int sock, seq = 0; 300 + struct msghdr msg; 301 + struct iovec iov; 302 + int ret = 0; 303 + int nll; 304 + 305 + struct { 306 + struct nlmsghdr nl; 307 + struct rtmsg rt; 308 + char buf[8192]; 309 + } req; 310 + 311 + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 312 + if (sock < 0) { 313 + printf("open netlink socket: %s\n", strerror(errno)); 314 + return -1; 315 + } 316 + memset(&sa, 0, sizeof(sa)); 317 + sa.nl_family = AF_NETLINK; 318 + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { 319 + printf("bind to netlink: %s\n", strerror(errno)); 320 + ret = -1; 321 + goto cleanup; 322 + } 323 + memset(&req, 0, sizeof(req)); 324 + req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); 325 + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; 326 + req.nl.nlmsg_type = RTM_GETROUTE; 327 + 328 + req.rt.rtm_family = rtm_family; 329 + req.rt.rtm_table = RT_TABLE_MAIN; 330 + req.nl.nlmsg_pid = 0; 331 + req.nl.nlmsg_seq = ++seq; 332 + memset(&msg, 0, sizeof(msg)); 333 + iov.iov_base = (void *)&req.nl; 334 + iov.iov_len = req.nl.nlmsg_len; 335 + msg.msg_iov = &iov; 336 + msg.msg_iovlen = 1; 337 + ret = sendmsg(sock, &msg, 0); 338 + if (ret < 0) { 339 + printf("send to netlink: %s\n", strerror(errno)); 340 + ret = -1; 341 + goto cleanup; 342 + } 343 + memset(buf, 0, sizeof(buf)); 344 + nll = recv_msg(sa, sock); 345 + if (nll < 0) { 346 + printf("recv from netlink: %s\n", strerror(nll)); 347 + ret = -1; 348 + goto cleanup; 349 + } 350 + nh = (struct nlmsghdr *)buf; 351 + read_route(nh, nll); 352 + cleanup: 353 + close(sock); 354 + return ret; 355 + } 356 + 357 + /* Function to parse the arp entry returned by netlink 358 + * Updates the arp entry related map entries 359 + */ 360 + static void read_arp(struct nlmsghdr *nh, int nll) 361 + { 362 + struct rtattr *rt_attr; 363 + char dsts[24], mac[24]; 364 + struct ndmsg *rt_msg; 365 + int rtl, ndm_family; 366 + 367 + struct arp_table { 368 + __be64 mac; 369 + __be32 dst; 370 + } arp_entry; 371 + struct direct_map { 372 + struct arp_table arp; 373 + int ifindex; 374 + __be64 mac; 375 + } direct_entry; 376 + 377 + if (nh->nlmsg_type == RTM_GETNEIGH) 378 + printf("READING arp entry\n"); 379 + printf("Address\tHwAddress\n"); 380 + for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { 381 + rt_msg = (struct ndmsg *)NLMSG_DATA(nh); 382 + rt_attr = (struct rtattr *)RTM_RTA(rt_msg); 383 + ndm_family = rt_msg->ndm_family; 384 + rtl = RTM_PAYLOAD(nh); 385 + for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) { 386 + switch (rt_attr->rta_type) { 387 + case NDA_DST: 388 + sprintf(dsts, "%u", 389 + *((__be32 *)RTA_DATA(rt_attr))); 390 + break; 391 + case NDA_LLADDR: 392 + sprintf(mac, "%lld", 393 + *((__be64 *)RTA_DATA(rt_attr))); 394 + break; 395 + default: 396 + break; 397 + } 398 + } 399 + arp_entry.dst = atoi(dsts); 400 + arp_entry.mac = atol(mac); 401 + printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac); 402 + if (ndm_family == AF_INET) { 403 + if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst, 404 + &direct_entry) == 0) { 405 + if (nh->nlmsg_type == RTM_DELNEIGH) { 406 + direct_entry.arp.dst = 0; 407 + direct_entry.arp.mac = 0; 408 + } else if (nh->nlmsg_type == RTM_NEWNEIGH) { 409 + direct_entry.arp.dst = arp_entry.dst; 410 + direct_entry.arp.mac = arp_entry.mac; 411 + } 412 + assert(bpf_map_update_elem(map_fd[3], 413 + &arp_entry.dst, 414 + &direct_entry, 0 415 + ) == 0); 416 + memset(&direct_entry, 0, sizeof(direct_entry)); 417 + } 418 + if (nh->nlmsg_type == RTM_DELNEIGH) { 419 + assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0); 420 + } else if (nh->nlmsg_type == RTM_NEWNEIGH) { 421 + assert(bpf_map_update_elem(map_fd[2], 422 + &arp_entry.dst, 423 + &arp_entry.mac, 0 424 + ) == 0); 425 + } 426 + } 427 + memset(&arp_entry, 0, sizeof(arp_entry)); 428 + memset(dsts, 0, sizeof(dsts)); 429 + } 430 + } 431 + 432 + /* Function to read the existing arp table when the process is launched*/ 433 + static int get_arp_table(int rtm_family) 434 + { 435 + struct sockaddr_nl sa; 436 + struct nlmsghdr *nh; 437 + int sock, seq = 0; 438 + struct msghdr msg; 439 + struct iovec iov; 440 + int ret = 0; 441 + int nll; 442 + struct { 443 + struct nlmsghdr nl; 444 + struct ndmsg rt; 445 + char buf[8192]; 446 + } req; 447 + 448 + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 449 + if (sock < 0) { 450 + printf("open netlink socket: %s\n", strerror(errno)); 451 + return -1; 452 + } 453 + memset(&sa, 0, sizeof(sa)); 454 + sa.nl_family = AF_NETLINK; 455 + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { 456 + printf("bind to netlink: %s\n", strerror(errno)); 457 + ret = -1; 458 + goto cleanup; 459 + } 460 + memset(&req, 0, sizeof(req)); 461 + req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); 462 + req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; 463 + req.nl.nlmsg_type = RTM_GETNEIGH; 464 + req.rt.ndm_state = NUD_REACHABLE; 465 + req.rt.ndm_family = rtm_family; 466 + req.nl.nlmsg_pid = 0; 467 + req.nl.nlmsg_seq = ++seq; 468 + memset(&msg, 0, sizeof(msg)); 469 + iov.iov_base = (void *)&req.nl; 470 + iov.iov_len = req.nl.nlmsg_len; 471 + msg.msg_iov = &iov; 472 + msg.msg_iovlen = 1; 473 + ret = sendmsg(sock, &msg, 0); 474 + if (ret < 0) { 475 + printf("send to netlink: %s\n", strerror(errno)); 476 + ret = -1; 477 + goto cleanup; 478 + } 479 + memset(buf, 0, sizeof(buf)); 480 + nll = recv_msg(sa, sock); 481 + if (nll < 0) { 482 + printf("recv from netlink: %s\n", strerror(nll)); 483 + ret = -1; 484 + goto cleanup; 485 + } 486 + nh = (struct nlmsghdr *)buf; 487 + read_arp(nh, nll); 488 + cleanup: 489 + close(sock); 490 + return ret; 491 + } 492 + 493 + /* Function to keep track and update changes in route and arp table 494 + * Give regular statistics of packets forwarded 495 + */ 496 + static int monitor_route(void) 497 + { 498 + unsigned int nr_cpus = bpf_num_possible_cpus(); 499 + const unsigned int nr_keys = 256; 500 + struct pollfd fds_route, fds_arp; 501 + __u64 prev[nr_keys][nr_cpus]; 502 + struct sockaddr_nl la, lr; 503 + __u64 values[nr_cpus]; 504 + struct nlmsghdr *nh; 505 + int nll, ret = 0; 506 + int interval = 5; 507 + __u32 key; 508 + int i; 509 + 510 + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 511 + if (sock < 0) { 512 + printf("open netlink socket: %s\n", strerror(errno)); 513 + return -1; 514 + } 515 + 516 + fcntl(sock, F_SETFL, O_NONBLOCK); 517 + memset(&lr, 0, sizeof(lr)); 518 + lr.nl_family = AF_NETLINK; 519 + lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY; 520 + if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) { 521 + printf("bind to netlink: %s\n", strerror(errno)); 522 + ret = -1; 523 + goto cleanup; 524 + } 525 + fds_route.fd = sock; 526 + fds_route.events = POLL_IN; 527 + 528 + sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 529 + if (sock_arp < 0) { 530 + printf("open netlink socket: %s\n", strerror(errno)); 531 + return -1; 532 + } 533 + 534 + fcntl(sock_arp, F_SETFL, O_NONBLOCK); 535 + memset(&la, 0, sizeof(la)); 536 + la.nl_family = AF_NETLINK; 537 + la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY; 538 + if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) { 539 + printf("bind to netlink: %s\n", strerror(errno)); 540 + ret = -1; 541 + goto cleanup; 542 + } 543 + fds_arp.fd = sock_arp; 544 + fds_arp.events = POLL_IN; 545 + 546 + memset(prev, 0, sizeof(prev)); 547 + do { 548 + signal(SIGINT, close_and_exit); 549 + signal(SIGTERM, close_and_exit); 550 + 551 + sleep(interval); 552 + for (key = 0; key < nr_keys; key++) { 553 + __u64 sum = 0; 554 + 555 + assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); 556 + for (i = 0; i < nr_cpus; i++) 557 + sum += (values[i] - prev[key][i]); 558 + if (sum) 559 + printf("proto %u: %10llu pkt/s\n", 560 + key, sum / interval); 561 + memcpy(prev[key], values, sizeof(values)); 562 + } 563 + 564 + memset(buf, 0, sizeof(buf)); 565 + if (poll(&fds_route, 1, 3) == POLL_IN) { 566 + nll = recv_msg(lr, sock); 567 + if (nll < 0) { 568 + printf("recv from netlink: %s\n", strerror(nll)); 569 + ret = -1; 570 + goto cleanup; 571 + } 572 + 573 + nh = (struct nlmsghdr *)buf; 574 + printf("Routing table updated.\n"); 575 + read_route(nh, nll); 576 + } 577 + memset(buf, 0, sizeof(buf)); 578 + if (poll(&fds_arp, 1, 3) == POLL_IN) { 579 + nll = recv_msg(la, sock_arp); 580 + if (nll < 0) { 581 + printf("recv from netlink: %s\n", strerror(nll)); 582 + ret = -1; 583 + goto cleanup; 584 + } 585 + 586 + nh = (struct nlmsghdr *)buf; 587 + read_arp(nh, nll); 588 + } 589 + 590 + } while (1); 591 + cleanup: 592 + close(sock); 593 + return ret; 594 + } 595 + 596 + int main(int ac, char **argv) 597 + { 598 + char filename[256]; 599 + char **ifname_list; 600 + int i = 1; 601 + 602 + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 603 + if (ac < 2) { 604 + printf("usage: %s [-S] Interface name list\n", argv[0]); 605 + return 1; 606 + } 607 + if (!strcmp(argv[1], "-S")) { 608 + flags = XDP_FLAGS_SKB_MODE; 609 + total_ifindex = ac - 2; 610 + ifname_list = (argv + 2); 611 + } else { 612 + flags = 0; 613 + total_ifindex = ac - 1; 614 + ifname_list = (argv + 1); 615 + } 616 + if (load_bpf_file(filename)) { 617 + printf("%s", bpf_log_buf); 618 + return 1; 619 + } 620 + printf("\n**************loading bpf file*********************\n\n\n"); 621 + if (!prog_fd[0]) { 622 + printf("load_bpf_file: %s\n", strerror(errno)); 623 + return 1; 624 + } 625 + ifindex_list = (int *)malloc(total_ifindex * sizeof(int *)); 626 + for (i = 0; i < total_ifindex; i++) { 627 + ifindex_list[i] = if_nametoindex(ifname_list[i]); 628 + if (!ifindex_list[i]) { 629 + printf("Couldn't translate interface name: %s", 630 + strerror(errno)); 631 + return 1; 632 + } 633 + } 634 + for (i = 0; i < total_ifindex; i++) { 635 + if (set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) { 636 + printf("link set xdp fd failed\n"); 637 + int recovery_index = i; 638 + 639 + for (i = 0; i < recovery_index; i++) 640 + set_link_xdp_fd(ifindex_list[i], -1, flags); 641 + 642 + return 1; 643 + } 644 + printf("Attached to %d\n", ifindex_list[i]); 645 + } 646 + signal(SIGINT, int_exit); 647 + signal(SIGTERM, int_exit); 648 + 649 + printf("*******************ROUTE TABLE*************************\n\n\n"); 650 + get_route_table(AF_INET); 651 + printf("*******************ARP TABLE***************************\n\n\n"); 652 + get_arp_table(AF_INET); 653 + if (monitor_route() < 0) { 654 + printf("Error in receiving route update"); 655 + return 1; 656 + } 657 + 658 + return 0; 659 + }