Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xsk: support mbuf on ZC RX

Given that skb_shared_info relies on skb_frag_t, in order to support
xskb chaining, introduce xdp_buff_xsk::xskb_list_node and
xsk_buff_pool::xskb_list.

This is needed so ZC drivers can add frags as xskb nodes which will make
it possible to handle it both when producing AF_XDP Rx descriptors as
well as freeing/recycling all the frags that a single frame carries.

Speaking of latter, update xsk_buff_free() to take care of list nodes.
For the former (adding as frags), introduce xsk_buff_add_frag() for ZC
drivers usage that is going to be used to add a frag to xskb list from
pool.

xsk_buff_get_frag() will be utilized by XDP_TX and, on contrary, will
return xdp_buff.

One of the previous patches added a wrapper for ZC Rx so implement xskb
list walk and production of Rx descriptors there.

On bind() path, bail out if socket wants to use ZC multi-buffer but
underlying netdev does not support it.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230719132421.584801-12-maciej.fijalkowski@intel.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Maciej Fijalkowski and committed by
Alexei Starovoitov
24ea5012 13ce2daa

+78 -1
+44
include/net/xdp_sock_drv.h
··· 108 108 static inline void xsk_buff_free(struct xdp_buff *xdp) 109 109 { 110 110 struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); 111 + struct list_head *xskb_list = &xskb->pool->xskb_list; 112 + struct xdp_buff_xsk *pos, *tmp; 111 113 114 + if (likely(!xdp_buff_has_frags(xdp))) 115 + goto out; 116 + 117 + list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) { 118 + list_del(&pos->xskb_list_node); 119 + xp_free(pos); 120 + } 121 + 122 + xdp_get_shared_info_from_buff(xdp)->nr_frags = 0; 123 + out: 112 124 xp_free(xskb); 125 + } 126 + 127 + static inline void xsk_buff_add_frag(struct xdp_buff *xdp) 128 + { 129 + struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp); 130 + 131 + list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list); 132 + } 133 + 134 + static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) 135 + { 136 + struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp); 137 + struct xdp_buff *ret = NULL; 138 + struct xdp_buff_xsk *frag; 139 + 140 + frag = list_first_entry_or_null(&xskb->pool->xskb_list, 141 + struct xdp_buff_xsk, xskb_list_node); 142 + if (frag) { 143 + list_del(&frag->xskb_list_node); 144 + ret = &frag->xdp; 145 + } 146 + 147 + return ret; 113 148 } 114 149 115 150 static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) ··· 298 263 299 264 static inline void xsk_buff_free(struct xdp_buff *xdp) 300 265 { 266 + } 267 + 268 + static inline void xsk_buff_add_frag(struct xdp_buff *xdp) 269 + { 270 + } 271 + 272 + static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) 273 + { 274 + return NULL; 301 275 } 302 276 303 277 static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
+2
include/net/xsk_buff_pool.h
··· 29 29 struct xsk_buff_pool *pool; 30 30 u64 orig_addr; 31 31 struct list_head free_list_node; 32 + struct list_head xskb_list_node; 32 33 }; 33 34 34 35 #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) ··· 55 54 struct xdp_umem *umem; 56 55 struct work_struct work; 57 56 struct list_head free_list; 57 + struct list_head xskb_list; 58 58 u32 heads_cnt; 59 59 u16 queue_id; 60 60
+25 -1
net/xdp/xsk.c
··· 155 155 static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) 156 156 { 157 157 struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); 158 + u32 frags = xdp_buff_has_frags(xdp); 159 + struct xdp_buff_xsk *pos, *tmp; 160 + struct list_head *xskb_list; 161 + u32 contd = 0; 162 + int err; 158 163 159 - return __xsk_rcv_zc(xs, xskb, len, 0); 164 + if (frags) 165 + contd = XDP_PKT_CONTD; 166 + 167 + err = __xsk_rcv_zc(xs, xskb, len, contd); 168 + if (err || likely(!frags)) 169 + goto out; 170 + 171 + xskb_list = &xskb->pool->xskb_list; 172 + list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) { 173 + if (list_is_singular(xskb_list)) 174 + contd = 0; 175 + len = pos->xdp.data_end - pos->xdp.data; 176 + err = __xsk_rcv_zc(xs, pos, len, contd); 177 + if (err) 178 + return err; 179 + list_del(&pos->xskb_list_node); 180 + } 181 + 182 + out: 183 + return err; 160 184 } 161 185 162 186 static void *xsk_copy_xdp_start(struct xdp_buff *from)
+7
net/xdp/xsk_buff_pool.c
··· 86 86 pool->umem = umem; 87 87 pool->addrs = umem->addrs; 88 88 INIT_LIST_HEAD(&pool->free_list); 89 + INIT_LIST_HEAD(&pool->xskb_list); 89 90 INIT_LIST_HEAD(&pool->xsk_tx_list); 90 91 spin_lock_init(&pool->xsk_tx_list_lock); 91 92 spin_lock_init(&pool->cq_lock); ··· 100 99 xskb->pool = pool; 101 100 xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; 102 101 INIT_LIST_HEAD(&xskb->free_list_node); 102 + INIT_LIST_HEAD(&xskb->xskb_list_node); 103 103 if (pool->unaligned) 104 104 pool->free_heads[i] = xskb; 105 105 else ··· 185 183 return 0; 186 184 187 185 if ((netdev->xdp_features & NETDEV_XDP_ACT_ZC) != NETDEV_XDP_ACT_ZC) { 186 + err = -EOPNOTSUPP; 187 + goto err_unreg_pool; 188 + } 189 + 190 + if (netdev->xdp_zc_max_segs == 1 && (flags & XDP_USE_SG)) { 188 191 err = -EOPNOTSUPP; 189 192 goto err_unreg_pool; 190 193 }