Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-uring-ubufops' of git://git.kernel.org/pub/scm/linux/kernel/git/kuba/linux into for-6.10/io_uring

Merge net changes required for the upcoming send zerocopy improvements.

* 'for-uring-ubufops' of git://git.kernel.org/pub/scm/linux/kernel/git/kuba/linux:
net: add callback for setting a ubuf_info to skb
net: extend ubuf_info callback to ops structure

Signed-off-by: Jens Axboe <axboe@kernel.dk>

+62 -33
+1 -1
drivers/net/tap.c
··· 754 754 skb_zcopy_init(skb, msg_control); 755 755 } else if (msg_control) { 756 756 struct ubuf_info *uarg = msg_control; 757 - uarg->callback(NULL, uarg, false); 757 + uarg->ops->complete(NULL, uarg, false); 758 758 } 759 759 760 760 dev_queue_xmit(skb);
+1 -1
drivers/net/tun.c
··· 1906 1906 skb_zcopy_init(skb, msg_control); 1907 1907 } else if (msg_control) { 1908 1908 struct ubuf_info *uarg = msg_control; 1909 - uarg->callback(NULL, uarg, false); 1909 + uarg->ops->complete(NULL, uarg, false); 1910 1910 } 1911 1911 1912 1912 skb_reset_network_header(skb);
+2 -3
drivers/net/xen-netback/common.h
··· 390 390 391 391 void xenvif_carrier_on(struct xenvif *vif); 392 392 393 - /* Callback from stack when TX packet can be released */ 394 - void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf, 395 - bool zerocopy_success); 393 + /* Callbacks from stack when TX packet can be released */ 394 + extern const struct ubuf_info_ops xenvif_ubuf_ops; 396 395 397 396 static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue) 398 397 {
+1 -1
drivers/net/xen-netback/interface.c
··· 593 593 594 594 for (i = 0; i < MAX_PENDING_REQS; i++) { 595 595 queue->pending_tx_info[i].callback_struct = (struct ubuf_info_msgzc) 596 - { { .callback = xenvif_zerocopy_callback }, 596 + { { .ops = &xenvif_ubuf_ops }, 597 597 { { .ctx = NULL, 598 598 .desc = i } } }; 599 599 queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
+8 -3
drivers/net/xen-netback/netback.c
··· 1156 1156 uarg = skb_shinfo(skb)->destructor_arg; 1157 1157 /* increase inflight counter to offset decrement in callback */ 1158 1158 atomic_inc(&queue->inflight_packets); 1159 - uarg->callback(NULL, uarg, true); 1159 + uarg->ops->complete(NULL, uarg, true); 1160 1160 skb_shinfo(skb)->destructor_arg = NULL; 1161 1161 1162 1162 /* Fill the skb with the new (local) frags. */ ··· 1278 1278 return work_done; 1279 1279 } 1280 1280 1281 - void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf_base, 1282 - bool zerocopy_success) 1281 + static void xenvif_zerocopy_callback(struct sk_buff *skb, 1282 + struct ubuf_info *ubuf_base, 1283 + bool zerocopy_success) 1283 1284 { 1284 1285 unsigned long flags; 1285 1286 pending_ring_idx_t index; ··· 1312 1311 queue->stats.tx_zerocopy_fail++; 1313 1312 xenvif_skb_zerocopy_complete(queue); 1314 1313 } 1314 + 1315 + const struct ubuf_info_ops xenvif_ubuf_ops = { 1316 + .complete = xenvif_zerocopy_callback, 1317 + }; 1315 1318 1316 1319 static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) 1317 1320 {
+6 -2
drivers/vhost/net.c
··· 380 380 } 381 381 } 382 382 383 - static void vhost_zerocopy_callback(struct sk_buff *skb, 383 + static void vhost_zerocopy_complete(struct sk_buff *skb, 384 384 struct ubuf_info *ubuf_base, bool success) 385 385 { 386 386 struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base); ··· 407 407 408 408 rcu_read_unlock_bh(); 409 409 } 410 + 411 + static const struct ubuf_info_ops vhost_ubuf_ops = { 412 + .complete = vhost_zerocopy_complete, 413 + }; 410 414 411 415 static inline unsigned long busy_clock(void) 412 416 { ··· 883 879 vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; 884 880 ubuf->ctx = nvq->ubufs; 885 881 ubuf->desc = nvq->upend_idx; 886 - ubuf->ubuf.callback = vhost_zerocopy_callback; 882 + ubuf->ubuf.ops = &vhost_ubuf_ops; 887 883 ubuf->ubuf.flags = SKBFL_ZEROCOPY_FRAG; 888 884 refcount_set(&ubuf->ubuf.refcnt, 1); 889 885 msg.msg_control = &ctl;
+13 -8
include/linux/skbuff.h
··· 527 527 #define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \ 528 528 SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS) 529 529 530 + struct ubuf_info_ops { 531 + void (*complete)(struct sk_buff *, struct ubuf_info *, 532 + bool zerocopy_success); 533 + /* has to be compatible with skb_zcopy_set() */ 534 + int (*link_skb)(struct sk_buff *skb, struct ubuf_info *uarg); 535 + }; 536 + 530 537 /* 531 538 * The callback notifies userspace to release buffers when skb DMA is done in 532 539 * lower device, the skb last reference should be 0 when calling this. ··· 543 536 * The desc field is used to track userspace buffer index. 544 537 */ 545 538 struct ubuf_info { 546 - void (*callback)(struct sk_buff *, struct ubuf_info *, 547 - bool zerocopy_success); 539 + const struct ubuf_info_ops *ops; 548 540 refcount_t refcnt; 549 541 u8 flags; 550 542 }; ··· 1668 1662 } 1669 1663 #endif 1670 1664 1665 + extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops; 1666 + 1671 1667 struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, 1672 1668 struct ubuf_info *uarg); 1673 1669 1674 1670 void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); 1675 - 1676 - void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, 1677 - bool success); 1678 1671 1679 1672 int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, 1680 1673 struct sk_buff *skb, struct iov_iter *from, ··· 1762 1757 static inline void net_zcopy_put(struct ubuf_info *uarg) 1763 1758 { 1764 1759 if (uarg) 1765 - uarg->callback(NULL, uarg, true); 1760 + uarg->ops->complete(NULL, uarg, true); 1766 1761 } 1767 1762 1768 1763 static inline void net_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref) 1769 1764 { 1770 1765 if (uarg) { 1771 - if (uarg->callback == msg_zerocopy_callback) 1766 + if (uarg->ops == &msg_zerocopy_ubuf_ops) 1772 1767 msg_zerocopy_put_abort(uarg, have_uref); 1773 1768 else if (have_uref) 1774 1769 net_zcopy_put(uarg); ··· 1782 1777 1783 1778 if (uarg) { 1784 1779 if (!skb_zcopy_is_nouarg(skb)) 1785 - uarg->callback(skb, uarg, zerocopy_success); 1780 + uarg->ops->complete(skb, uarg, zerocopy_success); 1786 1781 1787 1782 skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY; 1788 1783 }
+6 -2
io_uring/notif.c
··· 23 23 io_req_task_complete(notif, ts); 24 24 } 25 25 26 - static void io_tx_ubuf_callback(struct sk_buff *skb, struct ubuf_info *uarg, 26 + static void io_tx_ubuf_complete(struct sk_buff *skb, struct ubuf_info *uarg, 27 27 bool success) 28 28 { 29 29 struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg); ··· 42 42 notif->io_task_work.func = io_notif_tw_complete; 43 43 __io_req_task_work_add(notif, IOU_F_TWQ_LAZY_WAKE); 44 44 } 45 + 46 + static const struct ubuf_info_ops io_ubuf_ops = { 47 + .complete = io_tx_ubuf_complete, 48 + }; 45 49 46 50 struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx) 47 51 __must_hold(&ctx->uring_lock) ··· 66 62 nd->zc_report = false; 67 63 nd->account_pages = 0; 68 64 nd->uarg.flags = IO_NOTIF_UBUF_FLAGS; 69 - nd->uarg.callback = io_tx_ubuf_callback; 65 + nd->uarg.ops = &io_ubuf_ops; 70 66 refcount_set(&nd->uarg.refcnt, 1); 71 67 return notif; 72 68 }
+24 -12
net/core/skbuff.c
··· 1708 1708 return NULL; 1709 1709 } 1710 1710 1711 - uarg->ubuf.callback = msg_zerocopy_callback; 1711 + uarg->ubuf.ops = &msg_zerocopy_ubuf_ops; 1712 1712 uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1; 1713 1713 uarg->len = 1; 1714 1714 uarg->bytelen = size; ··· 1734 1734 u32 bytelen, next; 1735 1735 1736 1736 /* there might be non MSG_ZEROCOPY users */ 1737 - if (uarg->callback != msg_zerocopy_callback) 1737 + if (uarg->ops != &msg_zerocopy_ubuf_ops) 1738 1738 return NULL; 1739 1739 1740 1740 /* realloc only when socket is locked (TCP, UDP cork), ··· 1845 1845 sock_put(sk); 1846 1846 } 1847 1847 1848 - void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, 1849 - bool success) 1848 + static void msg_zerocopy_complete(struct sk_buff *skb, struct ubuf_info *uarg, 1849 + bool success) 1850 1850 { 1851 1851 struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg); 1852 1852 ··· 1855 1855 if (refcount_dec_and_test(&uarg->refcnt)) 1856 1856 __msg_zerocopy_callback(uarg_zc); 1857 1857 } 1858 - EXPORT_SYMBOL_GPL(msg_zerocopy_callback); 1859 1858 1860 1859 void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) 1861 1860 { ··· 1864 1865 uarg_to_msgzc(uarg)->len--; 1865 1866 1866 1867 if (have_uref) 1867 - msg_zerocopy_callback(NULL, uarg, true); 1868 + msg_zerocopy_complete(NULL, uarg, true); 1868 1869 } 1869 1870 EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort); 1871 + 1872 + const struct ubuf_info_ops msg_zerocopy_ubuf_ops = { 1873 + .complete = msg_zerocopy_complete, 1874 + }; 1875 + EXPORT_SYMBOL_GPL(msg_zerocopy_ubuf_ops); 1870 1876 1871 1877 int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, 1872 1878 struct msghdr *msg, int len, ··· 1880 1876 struct ubuf_info *orig_uarg = skb_zcopy(skb); 1881 1877 int err, orig_len = skb->len; 1882 1878 1883 - /* An skb can only point to one uarg. This edge case happens when 1884 - * TCP appends to an skb, but zerocopy_realloc triggered a new alloc. 1885 - */ 1886 - if (orig_uarg && uarg != orig_uarg) 1887 - return -EEXIST; 1879 + if (uarg->ops->link_skb) { 1880 + err = uarg->ops->link_skb(skb, uarg); 1881 + if (err) 1882 + return err; 1883 + } else { 1884 + /* An skb can only point to one uarg. This edge case happens 1885 + * when TCP appends to an skb, but zerocopy_realloc triggered 1886 + * a new alloc. 1887 + */ 1888 + if (orig_uarg && uarg != orig_uarg) 1889 + return -EEXIST; 1890 + } 1888 1891 1889 1892 err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len); 1890 1893 if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) { ··· 1905 1894 return err; 1906 1895 } 1907 1896 1908 - skb_zcopy_set(skb, uarg, NULL); 1897 + if (!uarg->ops->link_skb) 1898 + skb_zcopy_set(skb, uarg, NULL); 1909 1899 return skb->len - orig_len; 1910 1900 } 1911 1901 EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);