Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'shrink-struct-ubuf_info'

Pavel Begunkov says:

====================
shrink struct ubuf_info

struct ubuf_info is large but not all fields are needed for all
cases. We have limited space in io_uring for it and large ubuf_info
prevents some struct embedding, even though we use only a subset
of the fields. It's also not very clean trying to use this typeless
extra space.

Shrink struct ubuf_info to only necessary fields used in generic paths,
namely ->callback, ->refcnt and ->flags, which take only 16 bytes. And
make MSG_ZEROCOPY and some other users to embed it into a larger struct
ubuf_info_msgzc mimicking the former ubuf_info.

Note, xen/vhost may also have some cleaning on top by creating
new structs containing ubuf_info but with proper types.
====================

Link: https://lore.kernel.org/r/cover.1663892211.git.asml.silence@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+48 -35
+1 -1
drivers/net/xen-netback/common.h
··· 62 62 * ubuf_to_vif is a helper which finds the struct xenvif from a pointer 63 63 * to this field. 64 64 */ 65 - struct ubuf_info callback_struct; 65 + struct ubuf_info_msgzc callback_struct; 66 66 }; 67 67 68 68 #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
+2 -2
drivers/net/xen-netback/interface.c
··· 591 591 } 592 592 593 593 for (i = 0; i < MAX_PENDING_REQS; i++) { 594 - queue->pending_tx_info[i].callback_struct = (struct ubuf_info) 595 - { .callback = xenvif_zerocopy_callback, 594 + queue->pending_tx_info[i].callback_struct = (struct ubuf_info_msgzc) 595 + { { .callback = xenvif_zerocopy_callback }, 596 596 { { .ctx = NULL, 597 597 .desc = i } } }; 598 598 queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
+4 -3
drivers/net/xen-netback/netback.c
··· 133 133 134 134 /* Find the containing VIF's structure from a pointer in pending_tx_info array 135 135 */ 136 - static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf) 136 + static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info_msgzc *ubuf) 137 137 { 138 138 u16 pending_idx = ubuf->desc; 139 139 struct pending_tx_info *temp = ··· 1228 1228 return work_done; 1229 1229 } 1230 1230 1231 - void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf, 1231 + void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf_base, 1232 1232 bool zerocopy_success) 1233 1233 { 1234 1234 unsigned long flags; 1235 1235 pending_ring_idx_t index; 1236 + struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base); 1236 1237 struct xenvif_queue *queue = ubuf_to_queue(ubuf); 1237 1238 1238 1239 /* This is the only place where we grab this lock, to protect callbacks ··· 1242 1241 spin_lock_irqsave(&queue->callback_lock, flags); 1243 1242 do { 1244 1243 u16 pending_idx = ubuf->desc; 1245 - ubuf = (struct ubuf_info *) ubuf->ctx; 1244 + ubuf = (struct ubuf_info_msgzc *) ubuf->ctx; 1246 1245 BUG_ON(queue->dealloc_prod - queue->dealloc_cons >= 1247 1246 MAX_PENDING_REQS); 1248 1247 index = pending_index(queue->dealloc_prod);
+8 -7
drivers/vhost/net.c
··· 118 118 /* Number of XDP frames batched */ 119 119 int batched_xdp; 120 120 /* an array of userspace buffers info */ 121 - struct ubuf_info *ubuf_info; 121 + struct ubuf_info_msgzc *ubuf_info; 122 122 /* Reference counting for outstanding ubufs. 123 123 * Protected by vq mutex. Writers must also take device mutex. */ 124 124 struct vhost_net_ubuf_ref *ubufs; ··· 382 382 } 383 383 384 384 static void vhost_zerocopy_callback(struct sk_buff *skb, 385 - struct ubuf_info *ubuf, bool success) 385 + struct ubuf_info *ubuf_base, bool success) 386 386 { 387 + struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base); 387 388 struct vhost_net_ubuf_ref *ubufs = ubuf->ctx; 388 389 struct vhost_virtqueue *vq = ubufs->vq; 389 390 int cnt; ··· 872 871 size_t len, total_len = 0; 873 872 int err; 874 873 struct vhost_net_ubuf_ref *ubufs; 875 - struct ubuf_info *ubuf; 874 + struct ubuf_info_msgzc *ubuf; 876 875 bool zcopy_used; 877 876 int sent_pkts = 0; 878 877 ··· 908 907 ubuf = nvq->ubuf_info + nvq->upend_idx; 909 908 vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); 910 909 vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; 911 - ubuf->callback = vhost_zerocopy_callback; 912 910 ubuf->ctx = nvq->ubufs; 913 911 ubuf->desc = nvq->upend_idx; 914 - ubuf->flags = SKBFL_ZEROCOPY_FRAG; 915 - refcount_set(&ubuf->refcnt, 1); 912 + ubuf->ubuf.callback = vhost_zerocopy_callback; 913 + ubuf->ubuf.flags = SKBFL_ZEROCOPY_FRAG; 914 + refcount_set(&ubuf->ubuf.refcnt, 1); 916 915 msg.msg_control = &ctl; 917 916 ctl.type = TUN_MSG_UBUF; 918 - ctl.ptr = ubuf; 917 + ctl.ptr = &ubuf->ubuf; 919 918 msg.msg_controllen = sizeof(ctl); 920 919 ubufs = nvq->ubufs; 921 920 atomic_inc(&ubufs->refcount);
+9 -2
include/linux/skbuff.h
··· 533 533 struct ubuf_info { 534 534 void (*callback)(struct sk_buff *, struct ubuf_info *, 535 535 bool zerocopy_success); 536 + refcount_t refcnt; 537 + u8 flags; 538 + }; 539 + 540 + struct ubuf_info_msgzc { 541 + struct ubuf_info ubuf; 542 + 536 543 union { 537 544 struct { 538 545 unsigned long desc; ··· 552 545 u32 bytelen; 553 546 }; 554 547 }; 555 - refcount_t refcnt; 556 - u8 flags; 557 548 558 549 struct mmpin { 559 550 struct user_struct *user; ··· 560 555 }; 561 556 562 557 #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) 558 + #define uarg_to_msgzc(ubuf_ptr) container_of((ubuf_ptr), struct ubuf_info_msgzc, \ 559 + ubuf) 563 560 564 561 int mm_account_pinned_pages(struct mmpin *mmp, size_t size); 565 562 void mm_unaccount_pinned_pages(struct mmpin *mmp);
+21 -17
net/core/skbuff.c
··· 1188 1188 1189 1189 static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) 1190 1190 { 1191 - struct ubuf_info *uarg; 1191 + struct ubuf_info_msgzc *uarg; 1192 1192 struct sk_buff *skb; 1193 1193 1194 1194 WARN_ON_ONCE(!in_task()); ··· 1206 1206 return NULL; 1207 1207 } 1208 1208 1209 - uarg->callback = msg_zerocopy_callback; 1209 + uarg->ubuf.callback = msg_zerocopy_callback; 1210 1210 uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1; 1211 1211 uarg->len = 1; 1212 1212 uarg->bytelen = size; 1213 1213 uarg->zerocopy = 1; 1214 - uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; 1215 - refcount_set(&uarg->refcnt, 1); 1214 + uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; 1215 + refcount_set(&uarg->ubuf.refcnt, 1); 1216 1216 sock_hold(sk); 1217 1217 1218 - return uarg; 1218 + return &uarg->ubuf; 1219 1219 } 1220 1220 1221 - static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg) 1221 + static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg) 1222 1222 { 1223 1223 return container_of((void *)uarg, struct sk_buff, cb); 1224 1224 } ··· 1227 1227 struct ubuf_info *uarg) 1228 1228 { 1229 1229 if (uarg) { 1230 + struct ubuf_info_msgzc *uarg_zc; 1230 1231 const u32 byte_limit = 1 << 19; /* limit to a few TSO */ 1231 1232 u32 bytelen, next; 1232 1233 ··· 1243 1242 return NULL; 1244 1243 } 1245 1244 1246 - bytelen = uarg->bytelen + size; 1247 - if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) { 1245 + uarg_zc = uarg_to_msgzc(uarg); 1246 + bytelen = uarg_zc->bytelen + size; 1247 + if (uarg_zc->len == USHRT_MAX - 1 || bytelen > byte_limit) { 1248 1248 /* TCP can create new skb to attach new uarg */ 1249 1249 if (sk->sk_type == SOCK_STREAM) 1250 1250 goto new_alloc; ··· 1253 1251 } 1254 1252 1255 1253 next = (u32)atomic_read(&sk->sk_zckey); 1256 - if ((u32)(uarg->id + uarg->len) == next) { 1257 - if (mm_account_pinned_pages(&uarg->mmp, size)) 1254 + if ((u32)(uarg_zc->id + uarg_zc->len) == next) { 1255 + if (mm_account_pinned_pages(&uarg_zc->mmp, size)) 1258 1256 return NULL; 1259 - uarg->len++; 1260 - uarg->bytelen = bytelen; 1257 + uarg_zc->len++; 1258 + uarg_zc->bytelen = bytelen; 1261 1259 atomic_set(&sk->sk_zckey, ++next); 1262 1260 1263 1261 /* no extra ref when appending to datagram (MSG_MORE) */ ··· 1293 1291 return true; 1294 1292 } 1295 1293 1296 - static void __msg_zerocopy_callback(struct ubuf_info *uarg) 1294 + static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg) 1297 1295 { 1298 1296 struct sk_buff *tail, *skb = skb_from_uarg(uarg); 1299 1297 struct sock_exterr_skb *serr; ··· 1346 1344 void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, 1347 1345 bool success) 1348 1346 { 1349 - uarg->zerocopy = uarg->zerocopy & success; 1347 + struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg); 1348 + 1349 + uarg_zc->zerocopy = uarg_zc->zerocopy & success; 1350 1350 1351 1351 if (refcount_dec_and_test(&uarg->refcnt)) 1352 - __msg_zerocopy_callback(uarg); 1352 + __msg_zerocopy_callback(uarg_zc); 1353 1353 } 1354 1354 EXPORT_SYMBOL_GPL(msg_zerocopy_callback); 1355 1355 1356 1356 void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) 1357 1357 { 1358 - struct sock *sk = skb_from_uarg(uarg)->sk; 1358 + struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk; 1359 1359 1360 1360 atomic_dec(&sk->sk_zckey); 1361 - uarg->len--; 1361 + uarg_to_msgzc(uarg)->len--; 1362 1362 1363 1363 if (have_uref) 1364 1364 msg_zerocopy_callback(NULL, uarg, true);
+1 -1
net/ipv4/ip_output.c
··· 1043 1043 paged = true; 1044 1044 zc = true; 1045 1045 } else { 1046 - uarg->zerocopy = 0; 1046 + uarg_to_msgzc(uarg)->zerocopy = 0; 1047 1047 skb_zcopy_set(skb, uarg, &extra_uref); 1048 1048 } 1049 1049 }
+1 -1
net/ipv4/tcp.c
··· 1239 1239 } 1240 1240 zc = sk->sk_route_caps & NETIF_F_SG; 1241 1241 if (!zc) 1242 - uarg->zerocopy = 0; 1242 + uarg_to_msgzc(uarg)->zerocopy = 0; 1243 1243 } 1244 1244 } 1245 1245
+1 -1
net/ipv6/ip6_output.c
··· 1567 1567 paged = true; 1568 1568 zc = true; 1569 1569 } else { 1570 - uarg->zerocopy = 0; 1570 + uarg_to_msgzc(uarg)->zerocopy = 0; 1571 1571 skb_zcopy_set(skb, uarg, &extra_uref); 1572 1572 } 1573 1573 }