Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'bpf-af-xdp-cleanups'

Björn Töpel says:

====================
This the second follow-up set. The first four patches are uapi
changes:

* Removing rebind support
* Getting rid of structure hole
* Removing explicit cache line alignment
* Stricter bind checks

The last patches do some cleanups, where the umem and refcount_t
changes were suggested by Daniel.

* Add a missing write-barrier and use READ_ONCE for data-dependencies
* Clean up umem and do proper locking
* Convert atomic_t to refcount_t
====================

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

+226 -157
+23 -23
include/uapi/linux/if_xdp.h
··· 17 17 18 18 struct sockaddr_xdp { 19 19 __u16 sxdp_family; 20 + __u16 sxdp_flags; 20 21 __u32 sxdp_ifindex; 21 22 __u32 sxdp_queue_id; 22 23 __u32 sxdp_shared_umem_fd; 23 - __u16 sxdp_flags; 24 + }; 25 + 26 + struct xdp_ring_offset { 27 + __u64 producer; 28 + __u64 consumer; 29 + __u64 desc; 30 + }; 31 + 32 + struct xdp_mmap_offsets { 33 + struct xdp_ring_offset rx; 34 + struct xdp_ring_offset tx; 35 + struct xdp_ring_offset fr; /* Fill */ 36 + struct xdp_ring_offset cr; /* Completion */ 24 37 }; 25 38 26 39 /* XDP socket options */ 27 - #define XDP_RX_RING 1 28 - #define XDP_TX_RING 2 29 - #define XDP_UMEM_REG 3 30 - #define XDP_UMEM_FILL_RING 4 31 - #define XDP_UMEM_COMPLETION_RING 5 32 - #define XDP_STATISTICS 6 40 + #define XDP_MMAP_OFFSETS 1 41 + #define XDP_RX_RING 2 42 + #define XDP_TX_RING 3 43 + #define XDP_UMEM_REG 4 44 + #define XDP_UMEM_FILL_RING 5 45 + #define XDP_UMEM_COMPLETION_RING 6 46 + #define XDP_STATISTICS 7 33 47 34 48 struct xdp_umem_reg { 35 49 __u64 addr; /* Start of packet data area */ ··· 64 50 #define XDP_UMEM_PGOFF_FILL_RING 0x100000000 65 51 #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000 66 52 53 + /* Rx/Tx descriptor */ 67 54 struct xdp_desc { 68 55 __u32 idx; 69 56 __u32 len; ··· 73 58 __u8 padding[5]; 74 59 }; 75 60 76 - struct xdp_ring { 77 - __u32 producer __attribute__((aligned(64))); 78 - __u32 consumer __attribute__((aligned(64))); 79 - }; 80 - 81 - /* Used for the RX and TX queues for packets */ 82 - struct xdp_rxtx_ring { 83 - struct xdp_ring ptrs; 84 - struct xdp_desc desc[0] __attribute__((aligned(64))); 85 - }; 86 - 87 - /* Used for the fill and completion queues for buffers */ 88 - struct xdp_umem_ring { 89 - struct xdp_ring ptrs; 90 - __u32 desc[0] __attribute__((aligned(64))); 91 - }; 61 + /* UMEM descriptor is __u32 */ 92 62 93 63 #endif /* _LINUX_IF_XDP_H */
+41 -44
net/xdp/xdp_umem.c
··· 16 16 17 17 #define XDP_UMEM_MIN_FRAME_SIZE 2048 18 18 19 - int xdp_umem_create(struct xdp_umem **umem) 20 - { 21 - *umem = kzalloc(sizeof(**umem), GFP_KERNEL); 22 - 23 - if (!*umem) 24 - return -ENOMEM; 25 - 26 - return 0; 27 - } 28 - 29 19 static void xdp_umem_unpin_pages(struct xdp_umem *umem) 30 20 { 31 21 unsigned int i; 32 22 33 - if (umem->pgs) { 34 - for (i = 0; i < umem->npgs; i++) { 35 - struct page *page = umem->pgs[i]; 23 + for (i = 0; i < umem->npgs; i++) { 24 + struct page *page = umem->pgs[i]; 36 25 37 - set_page_dirty_lock(page); 38 - put_page(page); 39 - } 40 - 41 - kfree(umem->pgs); 42 - umem->pgs = NULL; 26 + set_page_dirty_lock(page); 27 + put_page(page); 43 28 } 29 + 30 + kfree(umem->pgs); 31 + umem->pgs = NULL; 44 32 } 45 33 46 34 static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 47 35 { 48 - if (umem->user) { 49 - atomic_long_sub(umem->npgs, &umem->user->locked_vm); 50 - free_uid(umem->user); 51 - } 36 + atomic_long_sub(umem->npgs, &umem->user->locked_vm); 37 + free_uid(umem->user); 52 38 } 53 39 54 40 static void xdp_umem_release(struct xdp_umem *umem) ··· 52 66 umem->cq = NULL; 53 67 } 54 68 55 - if (umem->pgs) { 56 - xdp_umem_unpin_pages(umem); 69 + xdp_umem_unpin_pages(umem); 57 70 58 - task = get_pid_task(umem->pid, PIDTYPE_PID); 59 - put_pid(umem->pid); 60 - if (!task) 61 - goto out; 62 - mm = get_task_mm(task); 63 - put_task_struct(task); 64 - if (!mm) 65 - goto out; 71 + task = get_pid_task(umem->pid, PIDTYPE_PID); 72 + put_pid(umem->pid); 73 + if (!task) 74 + goto out; 75 + mm = get_task_mm(task); 76 + put_task_struct(task); 77 + if (!mm) 78 + goto out; 66 79 67 - mmput(mm); 68 - umem->pgs = NULL; 69 - } 70 - 80 + mmput(mm); 71 81 xdp_umem_unaccount_pages(umem); 72 82 out: 73 83 kfree(umem); ··· 78 96 79 97 void xdp_get_umem(struct xdp_umem *umem) 80 98 { 81 - atomic_inc(&umem->users); 99 + refcount_inc(&umem->users); 82 100 } 83 101 84 102 void xdp_put_umem(struct xdp_umem *umem) ··· 86 104 if (!umem) 87 105 return; 88 106 89 - if (atomic_dec_and_test(&umem->users)) { 107 + if (refcount_dec_and_test(&umem->users)) { 90 108 INIT_WORK(&umem->work, xdp_umem_release_deferred); 91 109 schedule_work(&umem->work); 92 110 } ··· 149 167 return 0; 150 168 } 151 169 152 - int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 170 + static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 153 171 { 154 172 u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom; 155 173 u64 addr = mr->addr, size = mr->len; 156 174 unsigned int nframes, nfpp; 157 175 int size_chk, err; 158 - 159 - if (!umem) 160 - return -EINVAL; 161 176 162 177 if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) { 163 178 /* Strictly speaking we could support this, if: ··· 206 227 umem->frame_size_log2 = ilog2(frame_size); 207 228 umem->nfpp_mask = nfpp - 1; 208 229 umem->nfpplog2 = ilog2(nfpp); 209 - atomic_set(&umem->users, 1); 230 + refcount_set(&umem->users, 1); 210 231 211 232 err = xdp_umem_account_pages(umem); 212 233 if (err) ··· 222 243 out: 223 244 put_pid(umem->pid); 224 245 return err; 246 + } 247 + 248 + struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 249 + { 250 + struct xdp_umem *umem; 251 + int err; 252 + 253 + umem = kzalloc(sizeof(*umem), GFP_KERNEL); 254 + if (!umem) 255 + return ERR_PTR(-ENOMEM); 256 + 257 + err = xdp_umem_reg(umem, mr); 258 + if (err) { 259 + kfree(umem); 260 + return ERR_PTR(err); 261 + } 262 + 263 + return umem; 225 264 } 226 265 227 266 bool xdp_umem_validate_queues(struct xdp_umem *umem)
+2 -3
net/xdp/xdp_umem.h
··· 27 27 struct pid *pid; 28 28 unsigned long address; 29 29 size_t size; 30 - atomic_t users; 30 + refcount_t users; 31 31 struct work_struct work; 32 32 }; 33 33 ··· 50 50 } 51 51 52 52 bool xdp_umem_validate_queues(struct xdp_umem *umem); 53 - int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr); 54 53 void xdp_get_umem(struct xdp_umem *umem); 55 54 void xdp_put_umem(struct xdp_umem *umem); 56 - int xdp_umem_create(struct xdp_umem **umem); 55 + struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr); 57 56 58 57 #endif /* XDP_UMEM_H_ */
+67 -40
net/xdp/xsk.c
··· 142 142 goto out; 143 143 } 144 144 145 + if (xs->queue_id >= xs->dev->real_num_tx_queues) { 146 + err = -ENXIO; 147 + goto out; 148 + } 149 + 145 150 skb = sock_alloc_send_skb(sk, len, !need_wait, &err); 146 151 if (unlikely(!skb)) { 147 152 err = -EAGAIN; ··· 228 223 if (!q) 229 224 return -ENOMEM; 230 225 226 + /* Make sure queue is ready before it can be seen by others */ 227 + smp_wmb(); 231 228 *queue = q; 232 229 return 0; 233 - } 234 - 235 - static void __xsk_release(struct xdp_sock *xs) 236 - { 237 - /* Wait for driver to stop using the xdp socket. */ 238 - synchronize_net(); 239 - 240 - dev_put(xs->dev); 241 230 } 242 231 243 232 static int xsk_release(struct socket *sock) ··· 250 251 local_bh_enable(); 251 252 252 253 if (xs->dev) { 253 - __xsk_release(xs); 254 + /* Wait for driver to stop using the xdp socket. */ 255 + synchronize_net(); 256 + dev_put(xs->dev); 254 257 xs->dev = NULL; 255 258 } 256 259 ··· 286 285 { 287 286 struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; 288 287 struct sock *sk = sock->sk; 289 - struct net_device *dev, *dev_curr; 290 288 struct xdp_sock *xs = xdp_sk(sk); 291 - struct xdp_umem *old_umem = NULL; 289 + struct net_device *dev; 292 290 int err = 0; 293 291 294 292 if (addr_len < sizeof(struct sockaddr_xdp)) ··· 296 296 return -EINVAL; 297 297 298 298 mutex_lock(&xs->mutex); 299 - dev_curr = xs->dev; 299 + if (xs->dev) { 300 + err = -EBUSY; 301 + goto out_release; 302 + } 303 + 300 304 dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex); 301 305 if (!dev) { 302 306 err = -ENODEV; ··· 312 308 goto out_unlock; 313 309 } 314 310 315 - if (sxdp->sxdp_queue_id >= dev->num_rx_queues) { 311 + if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) || 312 + (xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) { 316 313 err = -EINVAL; 317 314 goto out_unlock; 318 315 } ··· 348 343 } 349 344 350 345 xdp_get_umem(umem_xs->umem); 351 - old_umem = xs->umem; 352 346 xs->umem = umem_xs->umem; 353 347 sockfd_put(sock); 354 348 } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { ··· 357 353 /* This xsk has its own umem. */ 358 354 xskq_set_umem(xs->umem->fq, &xs->umem->props); 359 355 xskq_set_umem(xs->umem->cq, &xs->umem->props); 360 - } 361 - 362 - /* Rebind? */ 363 - if (dev_curr && (dev_curr != dev || 364 - xs->queue_id != sxdp->sxdp_queue_id)) { 365 - __xsk_release(xs); 366 - if (old_umem) 367 - xdp_put_umem(old_umem); 368 356 } 369 357 370 358 xs->dev = dev; ··· 406 410 struct xdp_umem_reg mr; 407 411 struct xdp_umem *umem; 408 412 409 - if (xs->umem) 410 - return -EBUSY; 411 - 412 413 if (copy_from_user(&mr, optval, sizeof(mr))) 413 414 return -EFAULT; 414 415 415 416 mutex_lock(&xs->mutex); 416 - err = xdp_umem_create(&umem); 417 - 418 - err = xdp_umem_reg(umem, &mr); 419 - if (err) { 420 - kfree(umem); 417 + if (xs->umem) { 421 418 mutex_unlock(&xs->mutex); 422 - return err; 419 + return -EBUSY; 420 + } 421 + 422 + umem = xdp_umem_create(&mr); 423 + if (IS_ERR(umem)) { 424 + mutex_unlock(&xs->mutex); 425 + return PTR_ERR(umem); 423 426 } 424 427 425 428 /* Make sure umem is ready before it can be seen by others */ 426 429 smp_wmb(); 427 - 428 430 xs->umem = umem; 429 431 mutex_unlock(&xs->mutex); 430 432 return 0; ··· 433 439 struct xsk_queue **q; 434 440 int entries; 435 441 436 - if (!xs->umem) 437 - return -EINVAL; 438 - 439 442 if (copy_from_user(&entries, optval, sizeof(entries))) 440 443 return -EFAULT; 441 444 442 445 mutex_lock(&xs->mutex); 446 + if (!xs->umem) { 447 + mutex_unlock(&xs->mutex); 448 + return -EINVAL; 449 + } 450 + 443 451 q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq : 444 452 &xs->umem->cq; 445 453 err = xsk_init_queue(entries, q, true); ··· 491 495 492 496 return 0; 493 497 } 498 + case XDP_MMAP_OFFSETS: 499 + { 500 + struct xdp_mmap_offsets off; 501 + 502 + if (len < sizeof(off)) 503 + return -EINVAL; 504 + 505 + off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); 506 + off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); 507 + off.rx.desc = offsetof(struct xdp_rxtx_ring, desc); 508 + off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); 509 + off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); 510 + off.tx.desc = offsetof(struct xdp_rxtx_ring, desc); 511 + 512 + off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer); 513 + off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); 514 + off.fr.desc = offsetof(struct xdp_umem_ring, desc); 515 + off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer); 516 + off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); 517 + off.cr.desc = offsetof(struct xdp_umem_ring, desc); 518 + 519 + len = sizeof(off); 520 + if (copy_to_user(optval, &off, len)) 521 + return -EFAULT; 522 + if (put_user(len, optlen)) 523 + return -EFAULT; 524 + 525 + return 0; 526 + } 494 527 default: 495 528 break; 496 529 } ··· 534 509 unsigned long size = vma->vm_end - vma->vm_start; 535 510 struct xdp_sock *xs = xdp_sk(sock->sk); 536 511 struct xsk_queue *q = NULL; 512 + struct xdp_umem *umem; 537 513 unsigned long pfn; 538 514 struct page *qpg; 539 515 540 516 if (offset == XDP_PGOFF_RX_RING) { 541 - q = xs->rx; 517 + q = READ_ONCE(xs->rx); 542 518 } else if (offset == XDP_PGOFF_TX_RING) { 543 - q = xs->tx; 519 + q = READ_ONCE(xs->tx); 544 520 } else { 545 - if (!xs->umem) 521 + umem = READ_ONCE(xs->umem); 522 + if (!umem) 546 523 return -EINVAL; 547 524 548 525 if (offset == XDP_UMEM_PGOFF_FILL_RING) 549 - q = xs->umem->fq; 526 + q = READ_ONCE(umem->fq); 550 527 else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) 551 - q = xs->umem->cq; 528 + q = READ_ONCE(umem->cq); 552 529 } 553 530 554 531 if (!q)
+17
net/xdp/xsk_queue.h
··· 13 13 14 14 #define RX_BATCH_SIZE 16 15 15 16 + struct xdp_ring { 17 + u32 producer ____cacheline_aligned_in_smp; 18 + u32 consumer ____cacheline_aligned_in_smp; 19 + }; 20 + 21 + /* Used for the RX and TX queues for packets */ 22 + struct xdp_rxtx_ring { 23 + struct xdp_ring ptrs; 24 + struct xdp_desc desc[0] ____cacheline_aligned_in_smp; 25 + }; 26 + 27 + /* Used for the fill and completion queues for buffers */ 28 + struct xdp_umem_ring { 29 + struct xdp_ring ptrs; 30 + u32 desc[0] ____cacheline_aligned_in_smp; 31 + }; 32 + 16 33 struct xsk_queue { 17 34 struct xdp_umem_props umem_props; 18 35 u32 ring_mask;
+76 -47
samples/bpf/xdpsock_user.c
··· 79 79 u32 cached_cons; 80 80 u32 mask; 81 81 u32 size; 82 - struct xdp_umem_ring *ring; 82 + u32 *producer; 83 + u32 *consumer; 84 + u32 *ring; 85 + void *map; 83 86 }; 84 87 85 88 struct xdp_umem { ··· 97 94 u32 cached_cons; 98 95 u32 mask; 99 96 u32 size; 100 - struct xdp_rxtx_ring *ring; 97 + u32 *producer; 98 + u32 *consumer; 99 + struct xdp_desc *ring; 100 + void *map; 101 101 }; 102 102 103 103 struct xdpsock { ··· 161 155 return free_entries; 162 156 163 157 /* Refresh the local tail pointer */ 164 - q->cached_cons = q->ring->ptrs.consumer; 158 + q->cached_cons = *q->consumer; 165 159 166 160 return q->size - (q->cached_prod - q->cached_cons); 167 161 } ··· 174 168 return free_entries; 175 169 176 170 /* Refresh the local tail pointer */ 177 - q->cached_cons = q->ring->ptrs.consumer + q->size; 171 + q->cached_cons = *q->consumer + q->size; 178 172 return q->cached_cons - q->cached_prod; 179 173 } 180 174 ··· 183 177 u32 entries = q->cached_prod - q->cached_cons; 184 178 185 179 if (entries == 0) { 186 - q->cached_prod = q->ring->ptrs.producer; 180 + q->cached_prod = *q->producer; 187 181 entries = q->cached_prod - q->cached_cons; 188 182 } 189 183 ··· 195 189 u32 entries = q->cached_prod - q->cached_cons; 196 190 197 191 if (entries == 0) { 198 - q->cached_prod = q->ring->ptrs.producer; 192 + q->cached_prod = *q->producer; 199 193 entries = q->cached_prod - q->cached_cons; 200 194 } 201 195 ··· 214 208 for (i = 0; i < nb; i++) { 215 209 u32 idx = fq->cached_prod++ & fq->mask; 216 210 217 - fq->ring->desc[idx] = d[i].idx; 211 + fq->ring[idx] = d[i].idx; 218 212 } 219 213 220 214 u_smp_wmb(); 221 215 222 - fq->ring->ptrs.producer = fq->cached_prod; 216 + *fq->producer = fq->cached_prod; 223 217 224 218 return 0; 225 219 } ··· 235 229 for (i = 0; i < nb; i++) { 236 230 u32 idx = fq->cached_prod++ & fq->mask; 237 231 238 - fq->ring->desc[idx] = d[i]; 232 + fq->ring[idx] = d[i]; 239 233 } 240 234 241 235 u_smp_wmb(); 242 236 243 - fq->ring->ptrs.producer = fq->cached_prod; 237 + *fq->producer = fq->cached_prod; 244 238 245 239 return 0; 246 240 } ··· 254 248 255 249 for (i = 0; i < entries; i++) { 256 250 idx = cq->cached_cons++ & cq->mask; 257 - d[i] = cq->ring->desc[idx]; 251 + d[i] = cq->ring[idx]; 258 252 } 259 253 260 254 if (entries > 0) { 261 255 u_smp_wmb(); 262 256 263 - cq->ring->ptrs.consumer = cq->cached_cons; 257 + *cq->consumer = cq->cached_cons; 264 258 } 265 259 266 260 return entries; ··· 276 270 const struct xdp_desc *descs, 277 271 unsigned int ndescs) 278 272 { 279 - struct xdp_rxtx_ring *r = uq->ring; 273 + struct xdp_desc *r = uq->ring; 280 274 unsigned int i; 281 275 282 276 if (xq_nb_free(uq, ndescs) < ndescs) ··· 285 279 for (i = 0; i < ndescs; i++) { 286 280 u32 idx = uq->cached_prod++ & uq->mask; 287 281 288 - r->desc[idx].idx = descs[i].idx; 289 - r->desc[idx].len = descs[i].len; 290 - r->desc[idx].offset = descs[i].offset; 282 + r[idx].idx = descs[i].idx; 283 + r[idx].len = descs[i].len; 284 + r[idx].offset = descs[i].offset; 291 285 } 292 286 293 287 u_smp_wmb(); 294 288 295 - r->ptrs.producer = uq->cached_prod; 289 + *uq->producer = uq->cached_prod; 296 290 return 0; 297 291 } 298 292 299 293 static inline int xq_enq_tx_only(struct xdp_uqueue *uq, 300 294 __u32 idx, unsigned int ndescs) 301 295 { 302 - struct xdp_rxtx_ring *q = uq->ring; 296 + struct xdp_desc *r = uq->ring; 303 297 unsigned int i; 304 298 305 299 if (xq_nb_free(uq, ndescs) < ndescs) ··· 308 302 for (i = 0; i < ndescs; i++) { 309 303 u32 idx = uq->cached_prod++ & uq->mask; 310 304 311 - q->desc[idx].idx = idx + i; 312 - q->desc[idx].len = sizeof(pkt_data) - 1; 313 - q->desc[idx].offset = 0; 305 + r[idx].idx = idx + i; 306 + r[idx].len = sizeof(pkt_data) - 1; 307 + r[idx].offset = 0; 314 308 } 315 309 316 310 u_smp_wmb(); 317 311 318 - q->ptrs.producer = uq->cached_prod; 312 + *uq->producer = uq->cached_prod; 319 313 return 0; 320 314 } 321 315 ··· 323 317 struct xdp_desc *descs, 324 318 int ndescs) 325 319 { 326 - struct xdp_rxtx_ring *r = uq->ring; 320 + struct xdp_desc *r = uq->ring; 327 321 unsigned int idx; 328 322 int i, entries; 329 323 ··· 333 327 334 328 for (i = 0; i < entries; i++) { 335 329 idx = uq->cached_cons++ & uq->mask; 336 - descs[i] = r->desc[idx]; 330 + descs[i] = r[idx]; 337 331 } 338 332 339 333 if (entries > 0) { 340 334 u_smp_wmb(); 341 335 342 - r->ptrs.consumer = uq->cached_cons; 336 + *uq->consumer = uq->cached_cons; 343 337 } 344 338 345 339 return entries; ··· 398 392 static struct xdp_umem *xdp_umem_configure(int sfd) 399 393 { 400 394 int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS; 395 + struct xdp_mmap_offsets off; 401 396 struct xdp_umem_reg mr; 402 397 struct xdp_umem *umem; 398 + socklen_t optlen; 403 399 void *bufs; 404 400 405 401 umem = calloc(1, sizeof(*umem)); ··· 421 413 lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size, 422 414 sizeof(int)) == 0); 423 415 424 - umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) + 425 - FQ_NUM_DESCS * sizeof(u32), 426 - PROT_READ | PROT_WRITE, 427 - MAP_SHARED | MAP_POPULATE, sfd, 428 - XDP_UMEM_PGOFF_FILL_RING); 429 - lassert(umem->fq.ring != MAP_FAILED); 416 + optlen = sizeof(off); 417 + lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off, 418 + &optlen) == 0); 419 + 420 + umem->fq.map = mmap(0, off.fr.desc + 421 + FQ_NUM_DESCS * sizeof(u32), 422 + PROT_READ | PROT_WRITE, 423 + MAP_SHARED | MAP_POPULATE, sfd, 424 + XDP_UMEM_PGOFF_FILL_RING); 425 + lassert(umem->fq.map != MAP_FAILED); 430 426 431 427 umem->fq.mask = FQ_NUM_DESCS - 1; 432 428 umem->fq.size = FQ_NUM_DESCS; 429 + umem->fq.producer = umem->fq.map + off.fr.producer; 430 + umem->fq.consumer = umem->fq.map + off.fr.consumer; 431 + umem->fq.ring = umem->fq.map + off.fr.desc; 433 432 434 - umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) + 433 + umem->cq.map = mmap(0, off.cr.desc + 435 434 CQ_NUM_DESCS * sizeof(u32), 436 435 PROT_READ | PROT_WRITE, 437 436 MAP_SHARED | MAP_POPULATE, sfd, 438 437 XDP_UMEM_PGOFF_COMPLETION_RING); 439 - lassert(umem->cq.ring != MAP_FAILED); 438 + lassert(umem->cq.map != MAP_FAILED); 440 439 441 440 umem->cq.mask = CQ_NUM_DESCS - 1; 442 441 umem->cq.size = CQ_NUM_DESCS; 442 + umem->cq.producer = umem->cq.map + off.cr.producer; 443 + umem->cq.consumer = umem->cq.map + off.cr.consumer; 444 + umem->cq.ring = umem->cq.map + off.cr.desc; 443 445 444 446 umem->frames = (char (*)[FRAME_SIZE])bufs; 445 447 umem->fd = sfd; ··· 467 449 static struct xdpsock *xsk_configure(struct xdp_umem *umem) 468 450 { 469 451 struct sockaddr_xdp sxdp = {}; 452 + struct xdp_mmap_offsets off; 470 453 int sfd, ndescs = NUM_DESCS; 471 454 struct xdpsock *xsk; 472 455 bool shared = true; 456 + socklen_t optlen; 473 457 u32 i; 474 458 475 459 sfd = socket(PF_XDP, SOCK_RAW, 0); ··· 494 474 &ndescs, sizeof(int)) == 0); 495 475 lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING, 496 476 &ndescs, sizeof(int)) == 0); 477 + optlen = sizeof(off); 478 + lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off, 479 + &optlen) == 0); 497 480 498 481 /* Rx */ 499 - xsk->rx.ring = mmap(NULL, 500 - sizeof(struct xdp_ring) + 501 - NUM_DESCS * sizeof(struct xdp_desc), 502 - PROT_READ | PROT_WRITE, 503 - MAP_SHARED | MAP_POPULATE, sfd, 504 - XDP_PGOFF_RX_RING); 505 - lassert(xsk->rx.ring != MAP_FAILED); 482 + xsk->rx.map = mmap(NULL, 483 + off.rx.desc + 484 + NUM_DESCS * sizeof(struct xdp_desc), 485 + PROT_READ | PROT_WRITE, 486 + MAP_SHARED | MAP_POPULATE, sfd, 487 + XDP_PGOFF_RX_RING); 488 + lassert(xsk->rx.map != MAP_FAILED); 506 489 507 490 if (!shared) { 508 491 for (i = 0; i < NUM_DESCS / 2; i++) ··· 514 491 } 515 492 516 493 /* Tx */ 517 - xsk->tx.ring = mmap(NULL, 518 - sizeof(struct xdp_ring) + 519 - NUM_DESCS * sizeof(struct xdp_desc), 520 - PROT_READ | PROT_WRITE, 521 - MAP_SHARED | MAP_POPULATE, sfd, 522 - XDP_PGOFF_TX_RING); 523 - lassert(xsk->tx.ring != MAP_FAILED); 494 + xsk->tx.map = mmap(NULL, 495 + off.tx.desc + 496 + NUM_DESCS * sizeof(struct xdp_desc), 497 + PROT_READ | PROT_WRITE, 498 + MAP_SHARED | MAP_POPULATE, sfd, 499 + XDP_PGOFF_TX_RING); 500 + lassert(xsk->tx.map != MAP_FAILED); 524 501 525 502 xsk->rx.mask = NUM_DESCS - 1; 526 503 xsk->rx.size = NUM_DESCS; 504 + xsk->rx.producer = xsk->rx.map + off.rx.producer; 505 + xsk->rx.consumer = xsk->rx.map + off.rx.consumer; 506 + xsk->rx.ring = xsk->rx.map + off.rx.desc; 527 507 528 508 xsk->tx.mask = NUM_DESCS - 1; 529 509 xsk->tx.size = NUM_DESCS; 510 + xsk->tx.producer = xsk->tx.map + off.tx.producer; 511 + xsk->tx.consumer = xsk->tx.map + off.tx.consumer; 512 + xsk->tx.ring = xsk->tx.map + off.tx.desc; 530 513 531 514 sxdp.sxdp_family = PF_XDP; 532 515 sxdp.sxdp_ifindex = opt_ifindex;