Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

io_uring: cache nodes and mapped buffers

Frequent alloc/free cycles on these is pretty costly. Use an io cache to
more efficiently reuse these buffers.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20250227223916.143006-7-kbusch@meta.com
[axboe: fix imu leak]
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Keith Busch and committed by
Jens Axboe
ed9f3112 1f6540e2

+65 -16
+2
include/linux/io_uring_types.h
··· 292 292 293 293 struct io_file_table file_table; 294 294 struct io_rsrc_data buf_table; 295 + struct io_alloc_cache node_cache; 296 + struct io_alloc_cache imu_cache; 295 297 296 298 struct io_submit_state submit_state; 297 299
+1 -1
io_uring/filetable.c
··· 68 68 if (slot_index >= ctx->file_table.data.nr) 69 69 return -EINVAL; 70 70 71 - node = io_rsrc_node_alloc(IORING_RSRC_FILE); 71 + node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); 72 72 if (!node) 73 73 return -ENOMEM; 74 74
+2
io_uring/io_uring.c
··· 291 291 io_alloc_cache_free(&ctx->uring_cache, kfree); 292 292 io_alloc_cache_free(&ctx->msg_cache, kfree); 293 293 io_futex_cache_free(ctx); 294 + io_rsrc_cache_free(ctx); 294 295 } 295 296 296 297 static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ··· 339 338 ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX, 340 339 sizeof(struct io_kiocb), 0); 341 340 ret |= io_futex_cache_init(ctx); 341 + ret |= io_rsrc_cache_init(ctx); 342 342 if (ret) 343 343 goto free_ref; 344 344 init_completion(&ctx->ref_comp);
+57 -14
io_uring/rsrc.c
··· 33 33 #define IORING_MAX_FIXED_FILES (1U << 20) 34 34 #define IORING_MAX_REG_BUFFERS (1U << 14) 35 35 36 + #define IO_CACHED_BVECS_SEGS 32 37 + 36 38 int __io_account_mem(struct user_struct *user, unsigned long nr_pages) 37 39 { 38 40 unsigned long page_limit, cur_pages, new_pages; ··· 113 111 unpin_user_page(imu->bvec[i].bv_page); 114 112 } 115 113 114 + static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx, 115 + int nr_bvecs) 116 + { 117 + if (nr_bvecs <= IO_CACHED_BVECS_SEGS) 118 + return io_cache_alloc(&ctx->imu_cache, GFP_KERNEL); 119 + return kvmalloc(struct_size_t(struct io_mapped_ubuf, bvec, nr_bvecs), 120 + GFP_KERNEL); 121 + } 122 + 123 + static void io_free_imu(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu) 124 + { 125 + if (imu->nr_bvecs > IO_CACHED_BVECS_SEGS || 126 + !io_alloc_cache_put(&ctx->imu_cache, imu)) 127 + kvfree(imu); 128 + } 129 + 116 130 static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu) 117 131 { 118 132 if (!refcount_dec_and_test(&imu->refs)) ··· 137 119 if (imu->acct_pages) 138 120 io_unaccount_mem(ctx, imu->acct_pages); 139 121 imu->release(imu->priv); 140 - kvfree(imu); 122 + io_free_imu(ctx, imu); 141 123 } 142 124 143 - struct io_rsrc_node *io_rsrc_node_alloc(int type) 125 + struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type) 144 126 { 145 127 struct io_rsrc_node *node; 146 128 147 - node = kzalloc(sizeof(*node), GFP_KERNEL); 129 + node = io_cache_alloc(&ctx->node_cache, GFP_KERNEL); 148 130 if (node) { 149 131 node->type = type; 150 132 node->refs = 1; 133 + node->tag = 0; 134 + node->file_ptr = 0; 151 135 } 152 136 return node; 153 137 } 154 138 155 - __cold void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data) 139 + bool io_rsrc_cache_init(struct io_ring_ctx *ctx) 140 + { 141 + const int imu_cache_size = struct_size_t(struct io_mapped_ubuf, bvec, 142 + IO_CACHED_BVECS_SEGS); 143 + const int node_size = sizeof(struct io_rsrc_node); 144 + bool ret; 145 + 146 + ret = io_alloc_cache_init(&ctx->node_cache, IO_ALLOC_CACHE_MAX, 147 + node_size, 0); 148 + ret |= io_alloc_cache_init(&ctx->imu_cache, IO_ALLOC_CACHE_MAX, 149 + imu_cache_size, 0); 150 + return ret; 151 + } 152 + 153 + void io_rsrc_cache_free(struct io_ring_ctx *ctx) 154 + { 155 + io_alloc_cache_free(&ctx->node_cache, kfree); 156 + io_alloc_cache_free(&ctx->imu_cache, kfree); 157 + } 158 + 159 + __cold void io_rsrc_data_free(struct io_ring_ctx *ctx, 160 + struct io_rsrc_data *data) 156 161 { 157 162 if (!data->nr) 158 163 return; ··· 248 207 err = -EBADF; 249 208 break; 250 209 } 251 - node = io_rsrc_node_alloc(IORING_RSRC_FILE); 210 + node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); 252 211 if (!node) { 253 212 err = -ENOMEM; 254 213 fput(file); ··· 506 465 break; 507 466 } 508 467 509 - kfree(node); 468 + if (!io_alloc_cache_put(&ctx->node_cache, node)) 469 + kvfree(node); 510 470 } 511 471 512 472 int io_sqe_files_unregister(struct io_ring_ctx *ctx) ··· 569 527 goto fail; 570 528 } 571 529 ret = -ENOMEM; 572 - node = io_rsrc_node_alloc(IORING_RSRC_FILE); 530 + node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); 573 531 if (!node) { 574 532 fput(file); 575 533 goto fail; ··· 774 732 if (!iov->iov_base) 775 733 return NULL; 776 734 777 - node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); 735 + node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); 778 736 if (!node) 779 737 return ERR_PTR(-ENOMEM); 780 738 node->buf = NULL; ··· 794 752 coalesced = io_coalesce_buffer(&pages, &nr_pages, &data); 795 753 } 796 754 797 - imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); 755 + imu = io_alloc_imu(ctx, nr_pages); 798 756 if (!imu) 799 757 goto done; 800 758 759 + imu->nr_bvecs = nr_pages; 801 760 ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage); 802 761 if (ret) { 803 762 unpin_user_pages(pages, nr_pages); ··· 809 766 /* store original address for later verification */ 810 767 imu->ubuf = (unsigned long) iov->iov_base; 811 768 imu->len = iov->iov_len; 812 - imu->nr_bvecs = nr_pages; 813 769 imu->folio_shift = PAGE_SHIFT; 814 770 imu->release = io_release_ubuf; 815 771 imu->priv = imu; ··· 831 789 } 832 790 done: 833 791 if (ret) { 834 - kvfree(imu); 792 + if (imu) 793 + io_free_imu(ctx, imu); 835 794 if (node) 836 795 io_put_rsrc_node(ctx, node); 837 796 node = ERR_PTR(ret); ··· 936 893 goto unlock; 937 894 } 938 895 939 - node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); 896 + node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); 940 897 if (!node) { 941 898 ret = -ENOMEM; 942 899 goto unlock; 943 900 } 944 901 945 902 nr_bvecs = blk_rq_nr_phys_segments(rq); 946 - imu = kvmalloc(struct_size(imu, bvec, nr_bvecs), GFP_KERNEL); 903 + imu = io_alloc_imu(ctx, nr_bvecs); 947 904 if (!imu) { 948 905 kfree(node); 949 906 ret = -ENOMEM; ··· 1180 1137 if (!src_node) { 1181 1138 dst_node = NULL; 1182 1139 } else { 1183 - dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); 1140 + dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); 1184 1141 if (!dst_node) { 1185 1142 ret = -ENOMEM; 1186 1143 goto out_free;
+3 -1
io_uring/rsrc.h
··· 48 48 unsigned int nr_folios; 49 49 }; 50 50 51 - struct io_rsrc_node *io_rsrc_node_alloc(int type); 51 + bool io_rsrc_cache_init(struct io_ring_ctx *ctx); 52 + void io_rsrc_cache_free(struct io_ring_ctx *ctx); 53 + struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type); 52 54 void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node); 53 55 void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data); 54 56 int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);