Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'af-xdp-sock-diag'

Björn Töpel says:

====================
This series adds an AF_XDP sock_diag interface for querying sockets
from user-space. Tools like iproute2 ss(8) can use this interface to
list open AF_XDP sockets.

The diagnostic provides information about the Rx/Tx/fill/completetion
rings, umem, memory usage and such. For a complete list, please refer
to the xsk_diag.c file.

The AF_XDP sock_diag interface is optional, and can be built as a
module. A separate patch series, adding ss(8) iproute2 support, will
follow.

v1->v2:
* Removed extra newline
* Zero-out all user-space facing structures prior setting the
members
* Added explicit "pad" member in _msg struct
* Removed unused variable "req" in xsk_diag_handler_dump()

Thanks to Daniel for reviewing the series!
====================

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

+346 -5
+4
include/net/net_namespace.h
··· 31 31 #include <net/netns/xfrm.h> 32 32 #include <net/netns/mpls.h> 33 33 #include <net/netns/can.h> 34 + #include <net/netns/xdp.h> 34 35 #include <linux/ns_common.h> 35 36 #include <linux/idr.h> 36 37 #include <linux/skbuff.h> ··· 161 160 #endif 162 161 #if IS_ENABLED(CONFIG_CAN) 163 162 struct netns_can can; 163 + #endif 164 + #ifdef CONFIG_XDP_SOCKETS 165 + struct netns_xdp xdp; 164 166 #endif 165 167 struct sock *diag_nlsk; 166 168 atomic_t fnhe_genid;
+13
include/net/netns/xdp.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __NETNS_XDP_H__ 3 + #define __NETNS_XDP_H__ 4 + 5 + #include <linux/rculist.h> 6 + #include <linux/mutex.h> 7 + 8 + struct netns_xdp { 9 + struct mutex lock; 10 + struct hlist_head list; 11 + }; 12 + 13 + #endif /* __NETNS_XDP_H__ */
+1
include/net/xdp_sock.h
··· 42 42 struct work_struct work; 43 43 struct page **pgs; 44 44 u32 npgs; 45 + int id; 45 46 struct net_device *dev; 46 47 struct xdp_umem_fq_reuse *fq_reuse; 47 48 u16 queue_id;
+72
include/uapi/linux/xdp_diag.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + /* 3 + * xdp_diag: interface for query/monitor XDP sockets 4 + * Copyright(c) 2019 Intel Corporation. 5 + */ 6 + 7 + #ifndef _LINUX_XDP_DIAG_H 8 + #define _LINUX_XDP_DIAG_H 9 + 10 + #include <linux/types.h> 11 + 12 + struct xdp_diag_req { 13 + __u8 sdiag_family; 14 + __u8 sdiag_protocol; 15 + __u16 pad; 16 + __u32 xdiag_ino; 17 + __u32 xdiag_show; 18 + __u32 xdiag_cookie[2]; 19 + }; 20 + 21 + struct xdp_diag_msg { 22 + __u8 xdiag_family; 23 + __u8 xdiag_type; 24 + __u16 pad; 25 + __u32 xdiag_ino; 26 + __u32 xdiag_cookie[2]; 27 + }; 28 + 29 + #define XDP_SHOW_INFO (1 << 0) /* Basic information */ 30 + #define XDP_SHOW_RING_CFG (1 << 1) 31 + #define XDP_SHOW_UMEM (1 << 2) 32 + #define XDP_SHOW_MEMINFO (1 << 3) 33 + 34 + enum { 35 + XDP_DIAG_NONE, 36 + XDP_DIAG_INFO, 37 + XDP_DIAG_UID, 38 + XDP_DIAG_RX_RING, 39 + XDP_DIAG_TX_RING, 40 + XDP_DIAG_UMEM, 41 + XDP_DIAG_UMEM_FILL_RING, 42 + XDP_DIAG_UMEM_COMPLETION_RING, 43 + XDP_DIAG_MEMINFO, 44 + __XDP_DIAG_MAX, 45 + }; 46 + 47 + #define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1) 48 + 49 + struct xdp_diag_info { 50 + __u32 ifindex; 51 + __u32 queue_id; 52 + }; 53 + 54 + struct xdp_diag_ring { 55 + __u32 entries; /*num descs */ 56 + }; 57 + 58 + #define XDP_DU_F_ZEROCOPY (1 << 0) 59 + 60 + struct xdp_diag_umem { 61 + __u64 size; 62 + __u32 id; 63 + __u32 num_pages; 64 + __u32 chunk_size; 65 + __u32 headroom; 66 + __u32 ifindex; 67 + __u32 queue_id; 68 + __u32 flags; 69 + __u32 refs; 70 + }; 71 + 72 + #endif /* _LINUX_XDP_DIAG_H */
+8
net/xdp/Kconfig
··· 5 5 help 6 6 XDP sockets allows a channel between XDP programs and 7 7 userspace applications. 8 + 9 + config XDP_SOCKETS_DIAG 10 + tristate "XDP sockets: monitoring interface" 11 + depends on XDP_SOCKETS 12 + default n 13 + help 14 + Support for PF_XDP sockets monitoring interface used by the ss tool. 15 + If unsure, say Y.
+1
net/xdp/Makefile
··· 1 1 obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o 2 + obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
+13
net/xdp/xdp_umem.c
··· 13 13 #include <linux/mm.h> 14 14 #include <linux/netdevice.h> 15 15 #include <linux/rtnetlink.h> 16 + #include <linux/idr.h> 16 17 17 18 #include "xdp_umem.h" 18 19 #include "xsk_queue.h" 19 20 20 21 #define XDP_UMEM_MIN_CHUNK_SIZE 2048 22 + 23 + static DEFINE_IDA(umem_ida); 21 24 22 25 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) 23 26 { ··· 185 182 struct mm_struct *mm; 186 183 187 184 xdp_umem_clear_dev(umem); 185 + 186 + ida_simple_remove(&umem_ida, umem->id); 188 187 189 188 if (umem->fq) { 190 189 xskq_destroy(umem->fq); ··· 394 389 if (!umem) 395 390 return ERR_PTR(-ENOMEM); 396 391 392 + err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL); 393 + if (err < 0) { 394 + kfree(umem); 395 + return ERR_PTR(err); 396 + } 397 + umem->id = err; 398 + 397 399 err = xdp_umem_reg(umem, mr); 398 400 if (err) { 401 + ida_simple_remove(&umem_ida, umem->id); 399 402 kfree(umem); 400 403 return ERR_PTR(err); 401 404 }
+31 -5
net/xdp/xsk.c
··· 27 27 28 28 #include "xsk_queue.h" 29 29 #include "xdp_umem.h" 30 + #include "xsk.h" 30 31 31 32 #define TX_BATCH_SIZE 16 32 - 33 - static struct xdp_sock *xdp_sk(struct sock *sk) 34 - { 35 - return (struct xdp_sock *)sk; 36 - } 37 33 38 34 bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) 39 35 { ··· 345 349 return 0; 346 350 347 351 net = sock_net(sk); 352 + 353 + mutex_lock(&net->xdp.lock); 354 + sk_del_node_init_rcu(sk); 355 + mutex_unlock(&net->xdp.lock); 348 356 349 357 local_bh_disable(); 350 358 sock_prot_inuse_add(net, sk->sk_prot, -1); ··· 746 746 mutex_init(&xs->mutex); 747 747 spin_lock_init(&xs->tx_completion_lock); 748 748 749 + mutex_lock(&net->xdp.lock); 750 + sk_add_node_rcu(sk, &net->xdp.list); 751 + mutex_unlock(&net->xdp.lock); 752 + 749 753 local_bh_disable(); 750 754 sock_prot_inuse_add(net, &xsk_proto, 1); 751 755 local_bh_enable(); ··· 761 757 .family = PF_XDP, 762 758 .create = xsk_create, 763 759 .owner = THIS_MODULE, 760 + }; 761 + 762 + static int __net_init xsk_net_init(struct net *net) 763 + { 764 + mutex_init(&net->xdp.lock); 765 + INIT_HLIST_HEAD(&net->xdp.list); 766 + return 0; 767 + } 768 + 769 + static void __net_exit xsk_net_exit(struct net *net) 770 + { 771 + WARN_ON_ONCE(!hlist_empty(&net->xdp.list)); 772 + } 773 + 774 + static struct pernet_operations xsk_net_ops = { 775 + .init = xsk_net_init, 776 + .exit = xsk_net_exit, 764 777 }; 765 778 766 779 static int __init xsk_init(void) ··· 792 771 if (err) 793 772 goto out_proto; 794 773 774 + err = register_pernet_subsys(&xsk_net_ops); 775 + if (err) 776 + goto out_sk; 795 777 return 0; 796 778 779 + out_sk: 780 + sock_unregister(PF_XDP); 797 781 out_proto: 798 782 proto_unregister(&xsk_proto); 799 783 out:
+12
net/xdp/xsk.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright(c) 2019 Intel Corporation. */ 3 + 4 + #ifndef XSK_H_ 5 + #define XSK_H_ 6 + 7 + static inline struct xdp_sock *xdp_sk(struct sock *sk) 8 + { 9 + return (struct xdp_sock *)sk; 10 + } 11 + 12 + #endif /* XSK_H_ */
+191
net/xdp/xsk_diag.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* XDP sockets monitoring support 3 + * 4 + * Copyright(c) 2019 Intel Corporation. 5 + * 6 + * Author: Björn Töpel <bjorn.topel@intel.com> 7 + */ 8 + 9 + #include <linux/module.h> 10 + #include <net/xdp_sock.h> 11 + #include <linux/xdp_diag.h> 12 + #include <linux/sock_diag.h> 13 + 14 + #include "xsk_queue.h" 15 + #include "xsk.h" 16 + 17 + static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb) 18 + { 19 + struct xdp_diag_info di = {}; 20 + 21 + di.ifindex = xs->dev ? xs->dev->ifindex : 0; 22 + di.queue_id = xs->queue_id; 23 + return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di); 24 + } 25 + 26 + static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type, 27 + struct sk_buff *nlskb) 28 + { 29 + struct xdp_diag_ring dr = {}; 30 + 31 + dr.entries = queue->nentries; 32 + return nla_put(nlskb, nl_type, sizeof(dr), &dr); 33 + } 34 + 35 + static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs, 36 + struct sk_buff *nlskb) 37 + { 38 + int err = 0; 39 + 40 + if (xs->rx) 41 + err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb); 42 + if (!err && xs->tx) 43 + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb); 44 + return err; 45 + } 46 + 47 + static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) 48 + { 49 + struct xdp_umem *umem = xs->umem; 50 + struct xdp_diag_umem du = {}; 51 + int err; 52 + 53 + if (!umem) 54 + return 0; 55 + 56 + du.id = umem->id; 57 + du.size = umem->size; 58 + du.num_pages = umem->npgs; 59 + du.chunk_size = (__u32)(~umem->chunk_mask + 1); 60 + du.headroom = umem->headroom; 61 + du.ifindex = umem->dev ? umem->dev->ifindex : 0; 62 + du.queue_id = umem->queue_id; 63 + du.flags = 0; 64 + if (umem->zc) 65 + du.flags |= XDP_DU_F_ZEROCOPY; 66 + du.refs = refcount_read(&umem->users); 67 + 68 + err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du); 69 + 70 + if (!err && umem->fq) 71 + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb); 72 + if (!err && umem->cq) { 73 + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING, 74 + nlskb); 75 + } 76 + return err; 77 + } 78 + 79 + static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, 80 + struct xdp_diag_req *req, 81 + struct user_namespace *user_ns, 82 + u32 portid, u32 seq, u32 flags, int sk_ino) 83 + { 84 + struct xdp_sock *xs = xdp_sk(sk); 85 + struct xdp_diag_msg *msg; 86 + struct nlmsghdr *nlh; 87 + 88 + nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg), 89 + flags); 90 + if (!nlh) 91 + return -EMSGSIZE; 92 + 93 + msg = nlmsg_data(nlh); 94 + memset(msg, 0, sizeof(*msg)); 95 + msg->xdiag_family = AF_XDP; 96 + msg->xdiag_type = sk->sk_type; 97 + msg->xdiag_ino = sk_ino; 98 + sock_diag_save_cookie(sk, msg->xdiag_cookie); 99 + 100 + if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb)) 101 + goto out_nlmsg_trim; 102 + 103 + if ((req->xdiag_show & XDP_SHOW_INFO) && 104 + nla_put_u32(nlskb, XDP_DIAG_UID, 105 + from_kuid_munged(user_ns, sock_i_uid(sk)))) 106 + goto out_nlmsg_trim; 107 + 108 + if ((req->xdiag_show & XDP_SHOW_RING_CFG) && 109 + xsk_diag_put_rings_cfg(xs, nlskb)) 110 + goto out_nlmsg_trim; 111 + 112 + if ((req->xdiag_show & XDP_SHOW_UMEM) && 113 + xsk_diag_put_umem(xs, nlskb)) 114 + goto out_nlmsg_trim; 115 + 116 + if ((req->xdiag_show & XDP_SHOW_MEMINFO) && 117 + sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) 118 + goto out_nlmsg_trim; 119 + 120 + nlmsg_end(nlskb, nlh); 121 + return 0; 122 + 123 + out_nlmsg_trim: 124 + nlmsg_cancel(nlskb, nlh); 125 + return -EMSGSIZE; 126 + } 127 + 128 + static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb) 129 + { 130 + struct xdp_diag_req *req = nlmsg_data(cb->nlh); 131 + struct net *net = sock_net(nlskb->sk); 132 + int num = 0, s_num = cb->args[0]; 133 + struct sock *sk; 134 + 135 + mutex_lock(&net->xdp.lock); 136 + 137 + sk_for_each(sk, &net->xdp.list) { 138 + if (!net_eq(sock_net(sk), net)) 139 + continue; 140 + if (num++ < s_num) 141 + continue; 142 + 143 + if (xsk_diag_fill(sk, nlskb, req, 144 + sk_user_ns(NETLINK_CB(cb->skb).sk), 145 + NETLINK_CB(cb->skb).portid, 146 + cb->nlh->nlmsg_seq, NLM_F_MULTI, 147 + sock_i_ino(sk)) < 0) { 148 + num--; 149 + break; 150 + } 151 + } 152 + 153 + mutex_unlock(&net->xdp.lock); 154 + cb->args[0] = num; 155 + return nlskb->len; 156 + } 157 + 158 + static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr) 159 + { 160 + struct netlink_dump_control c = { .dump = xsk_diag_dump }; 161 + int hdrlen = sizeof(struct xdp_diag_req); 162 + struct net *net = sock_net(nlskb->sk); 163 + 164 + if (nlmsg_len(hdr) < hdrlen) 165 + return -EINVAL; 166 + 167 + if (!(hdr->nlmsg_flags & NLM_F_DUMP)) 168 + return -EOPNOTSUPP; 169 + 170 + return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c); 171 + } 172 + 173 + static const struct sock_diag_handler xsk_diag_handler = { 174 + .family = AF_XDP, 175 + .dump = xsk_diag_handler_dump, 176 + }; 177 + 178 + static int __init xsk_diag_init(void) 179 + { 180 + return sock_diag_register(&xsk_diag_handler); 181 + } 182 + 183 + static void __exit xsk_diag_exit(void) 184 + { 185 + sock_diag_unregister(&xsk_diag_handler); 186 + } 187 + 188 + module_init(xsk_diag_init); 189 + module_exit(xsk_diag_exit); 190 + MODULE_LICENSE("GPL"); 191 + MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);