Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'Introduce-XDP-to-ena'

Sameeh Jubran says:

====================
Introduce XDP to ena

This patchset includes 3 patches:
* XDP_DROP implementation
* XDP_TX implementation
* A fix for an issue which might occur due to the XDP_TX patch. I see fit
to place it as a standalone patch for clarity.

Difference from v2:
* Fixed the usage of rx headroom (XDP_PACKET_HEADROOM)
* Aligned the page_offset of the packet when passing it to the stack
* Switched to using xdp_frame in xdp xmit queue
* Dropped the print for unsupported commands
* Cosmetic changes

Difference from RFC v1 (XDP_DROP patch):
* Initialized xdp.rxq pointer
* Updated max_mtu on attachment of xdp and removed the check from
ena_change_mtu()
* Moved the xdp execution from ena_rx_skb() to ena_clean_rx_irq()
* Moved xdp buff (struct xdp_buff) from rx_ring to the local stack
* Started using netlink's extack mechanism to deliver error messages to
the user
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+890 -149
+3 -1
drivers/net/ethernet/amazon/ena/ena_ethtool.c
··· 744 744 struct ena_adapter *adapter = netdev_priv(netdev); 745 745 u32 count = channels->combined_count; 746 746 /* The check for max value is already done in ethtool */ 747 - if (count < ENA_MIN_NUM_IO_QUEUES) 747 + if (count < ENA_MIN_NUM_IO_QUEUES || 748 + (ena_xdp_present(adapter) && 749 + !ena_xdp_legal_queue_count(adapter, channels->combined_count))) 748 750 return -EINVAL; 749 751 750 752 return ena_update_queue_count(adapter, count);
+814 -148
drivers/net/ethernet/amazon/ena/ena_netdev.c
··· 36 36 #include <linux/cpu_rmap.h> 37 37 #endif /* CONFIG_RFS_ACCEL */ 38 38 #include <linux/ethtool.h> 39 - #include <linux/if_vlan.h> 40 39 #include <linux/kernel.h> 41 40 #include <linux/module.h> 42 41 #include <linux/numa.h> ··· 46 47 #include <net/ip.h> 47 48 48 49 #include "ena_netdev.h" 50 + #include <linux/bpf_trace.h> 49 51 #include "ena_pci_id_tbl.h" 50 52 51 53 static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n"; ··· 77 77 static void check_for_admin_com_state(struct ena_adapter *adapter); 78 78 static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); 79 79 static int ena_restore_device(struct ena_adapter *adapter); 80 + 81 + static void ena_init_io_rings(struct ena_adapter *adapter, 82 + int first_index, int count); 83 + static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index, 84 + int count); 85 + static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index, 86 + int count); 87 + static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid); 88 + static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, 89 + int first_index, 90 + int count); 91 + static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid); 92 + static void ena_free_tx_resources(struct ena_adapter *adapter, int qid); 93 + static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget); 94 + static void ena_destroy_all_tx_queues(struct ena_adapter *adapter); 95 + static void ena_free_all_io_tx_resources(struct ena_adapter *adapter); 96 + static void ena_napi_disable_in_range(struct ena_adapter *adapter, 97 + int first_index, int count); 98 + static void ena_napi_enable_in_range(struct ena_adapter *adapter, 99 + int first_index, int count); 100 + static int ena_up(struct ena_adapter *adapter); 101 + static void ena_down(struct ena_adapter *adapter); 102 + static void ena_unmask_interrupt(struct ena_ring *tx_ring, 103 + struct ena_ring *rx_ring); 104 + static void ena_update_ring_numa_node(struct ena_ring *tx_ring, 105 + struct ena_ring *rx_ring); 106 + static void ena_unmap_tx_buff(struct ena_ring *tx_ring, 107 + struct ena_tx_buffer *tx_info); 108 + static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, 109 + int first_index, int count); 80 110 81 111 static void ena_tx_timeout(struct net_device *dev) 82 112 { ··· 153 123 return ret; 154 124 } 155 125 126 + static int ena_xmit_common(struct net_device *dev, 127 + struct ena_ring *ring, 128 + struct ena_tx_buffer *tx_info, 129 + struct ena_com_tx_ctx *ena_tx_ctx, 130 + u16 next_to_use, 131 + u32 bytes) 132 + { 133 + struct ena_adapter *adapter = netdev_priv(dev); 134 + int rc, nb_hw_desc; 135 + 136 + if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq, 137 + ena_tx_ctx))) { 138 + netif_dbg(adapter, tx_queued, dev, 139 + "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", 140 + ring->qid); 141 + ena_com_write_sq_doorbell(ring->ena_com_io_sq); 142 + } 143 + 144 + /* prepare the packet's descriptors to dma engine */ 145 + rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx, 146 + &nb_hw_desc); 147 + 148 + /* In case there isn't enough space in the queue for the packet, 149 + * we simply drop it. All other failure reasons of 150 + * ena_com_prepare_tx() are fatal and therefore require a device reset. 151 + */ 152 + if (unlikely(rc)) { 153 + netif_err(adapter, tx_queued, dev, 154 + "failed to prepare tx bufs\n"); 155 + u64_stats_update_begin(&ring->syncp); 156 + ring->tx_stats.prepare_ctx_err++; 157 + u64_stats_update_end(&ring->syncp); 158 + if (rc != -ENOMEM) { 159 + adapter->reset_reason = 160 + ENA_REGS_RESET_DRIVER_INVALID_STATE; 161 + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 162 + } 163 + return rc; 164 + } 165 + 166 + u64_stats_update_begin(&ring->syncp); 167 + ring->tx_stats.cnt++; 168 + ring->tx_stats.bytes += bytes; 169 + u64_stats_update_end(&ring->syncp); 170 + 171 + tx_info->tx_descs = nb_hw_desc; 172 + tx_info->last_jiffies = jiffies; 173 + tx_info->print_once = 0; 174 + 175 + ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, 176 + ring->ring_size); 177 + return 0; 178 + } 179 + 180 + /* This is the XDP napi callback. XDP queues use a separate napi callback 181 + * than Rx/Tx queues. 182 + */ 183 + static int ena_xdp_io_poll(struct napi_struct *napi, int budget) 184 + { 185 + struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); 186 + u32 xdp_work_done, xdp_budget; 187 + struct ena_ring *xdp_ring; 188 + int napi_comp_call = 0; 189 + int ret; 190 + 191 + xdp_ring = ena_napi->xdp_ring; 192 + xdp_ring->first_interrupt = ena_napi->first_interrupt; 193 + 194 + xdp_budget = budget; 195 + 196 + if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) || 197 + test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) { 198 + napi_complete_done(napi, 0); 199 + return 0; 200 + } 201 + 202 + xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget); 203 + 204 + /* If the device is about to reset or down, avoid unmask 205 + * the interrupt and return 0 so NAPI won't reschedule 206 + */ 207 + if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) { 208 + napi_complete_done(napi, 0); 209 + ret = 0; 210 + } else if (xdp_budget > xdp_work_done) { 211 + napi_comp_call = 1; 212 + if (napi_complete_done(napi, xdp_work_done)) 213 + ena_unmask_interrupt(xdp_ring, NULL); 214 + ena_update_ring_numa_node(xdp_ring, NULL); 215 + ret = xdp_work_done; 216 + } else { 217 + ret = xdp_budget; 218 + } 219 + 220 + u64_stats_update_begin(&xdp_ring->syncp); 221 + xdp_ring->tx_stats.napi_comp += napi_comp_call; 222 + xdp_ring->tx_stats.tx_poll++; 223 + u64_stats_update_end(&xdp_ring->syncp); 224 + 225 + return ret; 226 + } 227 + 228 + static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring, 229 + struct ena_tx_buffer *tx_info, 230 + struct xdp_buff *xdp, 231 + void **push_hdr, 232 + u32 *push_len) 233 + { 234 + struct ena_adapter *adapter = xdp_ring->adapter; 235 + struct ena_com_buf *ena_buf; 236 + dma_addr_t dma = 0; 237 + u32 size; 238 + 239 + tx_info->xdpf = convert_to_xdp_frame(xdp); 240 + size = tx_info->xdpf->len; 241 + ena_buf = tx_info->bufs; 242 + 243 + /* llq push buffer */ 244 + *push_len = min_t(u32, size, xdp_ring->tx_max_header_size); 245 + *push_hdr = tx_info->xdpf->data; 246 + 247 + if (size - *push_len > 0) { 248 + dma = dma_map_single(xdp_ring->dev, 249 + *push_hdr + *push_len, 250 + size - *push_len, 251 + DMA_TO_DEVICE); 252 + if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) 253 + goto error_report_dma_error; 254 + 255 + tx_info->map_linear_data = 1; 256 + tx_info->num_of_bufs = 1; 257 + } 258 + 259 + ena_buf->paddr = dma; 260 + ena_buf->len = size; 261 + 262 + return 0; 263 + 264 + error_report_dma_error: 265 + u64_stats_update_begin(&xdp_ring->syncp); 266 + xdp_ring->tx_stats.dma_mapping_err++; 267 + u64_stats_update_end(&xdp_ring->syncp); 268 + netdev_warn(adapter->netdev, "failed to map xdp buff\n"); 269 + 270 + xdp_return_frame_rx_napi(tx_info->xdpf); 271 + tx_info->xdpf = NULL; 272 + tx_info->num_of_bufs = 0; 273 + 274 + return -EINVAL; 275 + } 276 + 277 + static int ena_xdp_xmit_buff(struct net_device *dev, 278 + struct xdp_buff *xdp, 279 + int qid, 280 + struct ena_rx_buffer *rx_info) 281 + { 282 + struct ena_adapter *adapter = netdev_priv(dev); 283 + struct ena_com_tx_ctx ena_tx_ctx = {0}; 284 + struct ena_tx_buffer *tx_info; 285 + struct ena_ring *xdp_ring; 286 + struct ena_ring *rx_ring; 287 + u16 next_to_use, req_id; 288 + int rc; 289 + void *push_hdr; 290 + u32 push_len; 291 + 292 + xdp_ring = &adapter->tx_ring[qid]; 293 + next_to_use = xdp_ring->next_to_use; 294 + req_id = xdp_ring->free_ids[next_to_use]; 295 + tx_info = &xdp_ring->tx_buffer_info[req_id]; 296 + tx_info->num_of_bufs = 0; 297 + rx_ring = &xdp_ring->adapter->rx_ring[qid - 298 + xdp_ring->adapter->xdp_first_ring]; 299 + page_ref_inc(rx_info->page); 300 + tx_info->xdp_rx_page = rx_info->page; 301 + 302 + rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len); 303 + if (unlikely(rc)) 304 + goto error_drop_packet; 305 + 306 + ena_tx_ctx.ena_bufs = tx_info->bufs; 307 + ena_tx_ctx.push_header = push_hdr; 308 + ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 309 + ena_tx_ctx.req_id = req_id; 310 + ena_tx_ctx.header_len = push_len; 311 + 312 + rc = ena_xmit_common(dev, 313 + xdp_ring, 314 + tx_info, 315 + &ena_tx_ctx, 316 + next_to_use, 317 + xdp->data_end - xdp->data); 318 + if (rc) 319 + goto error_unmap_dma; 320 + /* trigger the dma engine. ena_com_write_sq_doorbell() 321 + * has a mb 322 + */ 323 + ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq); 324 + u64_stats_update_begin(&xdp_ring->syncp); 325 + xdp_ring->tx_stats.doorbells++; 326 + u64_stats_update_end(&xdp_ring->syncp); 327 + 328 + return NETDEV_TX_OK; 329 + 330 + error_unmap_dma: 331 + ena_unmap_tx_buff(xdp_ring, tx_info); 332 + tx_info->xdpf = NULL; 333 + error_drop_packet: 334 + 335 + return NETDEV_TX_OK; 336 + } 337 + 338 + static int ena_xdp_execute(struct ena_ring *rx_ring, 339 + struct xdp_buff *xdp, 340 + struct ena_rx_buffer *rx_info) 341 + { 342 + struct bpf_prog *xdp_prog; 343 + u32 verdict = XDP_PASS; 344 + 345 + rcu_read_lock(); 346 + xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog); 347 + 348 + if (!xdp_prog) 349 + goto out; 350 + 351 + verdict = bpf_prog_run_xdp(xdp_prog, xdp); 352 + 353 + if (verdict == XDP_TX) 354 + ena_xdp_xmit_buff(rx_ring->netdev, 355 + xdp, 356 + rx_ring->qid + rx_ring->adapter->num_io_queues, 357 + rx_info); 358 + else if (unlikely(verdict == XDP_ABORTED)) 359 + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); 360 + else if (unlikely(verdict > XDP_TX)) 361 + bpf_warn_invalid_xdp_action(verdict); 362 + out: 363 + rcu_read_unlock(); 364 + return verdict; 365 + } 366 + 367 + static void ena_init_all_xdp_queues(struct ena_adapter *adapter) 368 + { 369 + adapter->xdp_first_ring = adapter->num_io_queues; 370 + adapter->xdp_num_queues = adapter->num_io_queues; 371 + 372 + ena_init_io_rings(adapter, 373 + adapter->xdp_first_ring, 374 + adapter->xdp_num_queues); 375 + } 376 + 377 + static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) 378 + { 379 + int rc = 0; 380 + 381 + rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring, 382 + adapter->xdp_num_queues); 383 + if (rc) 384 + goto setup_err; 385 + 386 + rc = ena_create_io_tx_queues_in_range(adapter, 387 + adapter->xdp_first_ring, 388 + adapter->xdp_num_queues); 389 + if (rc) 390 + goto create_err; 391 + 392 + return 0; 393 + 394 + create_err: 395 + ena_free_all_io_tx_resources(adapter); 396 + setup_err: 397 + return rc; 398 + } 399 + 400 + /* Provides a way for both kernel and bpf-prog to know 401 + * more about the RX-queue a given XDP frame arrived on. 402 + */ 403 + static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring) 404 + { 405 + int rc; 406 + 407 + rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid); 408 + 409 + if (rc) { 410 + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, 411 + "Failed to register xdp rx queue info. RX queue num %d rc: %d\n", 412 + rx_ring->qid, rc); 413 + goto err; 414 + } 415 + 416 + rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, 417 + NULL); 418 + 419 + if (rc) { 420 + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, 421 + "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n", 422 + rx_ring->qid, rc); 423 + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 424 + } 425 + 426 + err: 427 + return rc; 428 + } 429 + 430 + static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring) 431 + { 432 + xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq); 433 + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 434 + } 435 + 436 + void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, 437 + struct bpf_prog *prog, 438 + int first, 439 + int count) 440 + { 441 + struct ena_ring *rx_ring; 442 + int i = 0; 443 + 444 + for (i = first; i < count; i++) { 445 + rx_ring = &adapter->rx_ring[i]; 446 + xchg(&rx_ring->xdp_bpf_prog, prog); 447 + if (prog) { 448 + ena_xdp_register_rxq_info(rx_ring); 449 + rx_ring->rx_headroom = XDP_PACKET_HEADROOM; 450 + } else { 451 + ena_xdp_unregister_rxq_info(rx_ring); 452 + rx_ring->rx_headroom = 0; 453 + } 454 + } 455 + } 456 + 457 + void ena_xdp_exchange_program(struct ena_adapter *adapter, 458 + struct bpf_prog *prog) 459 + { 460 + struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); 461 + 462 + ena_xdp_exchange_program_rx_in_range(adapter, 463 + prog, 464 + 0, 465 + adapter->num_io_queues); 466 + 467 + if (old_bpf_prog) 468 + bpf_prog_put(old_bpf_prog); 469 + } 470 + 471 + static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter) 472 + { 473 + bool was_up; 474 + int rc; 475 + 476 + was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 477 + 478 + if (was_up) 479 + ena_down(adapter); 480 + 481 + adapter->xdp_first_ring = 0; 482 + adapter->xdp_num_queues = 0; 483 + ena_xdp_exchange_program(adapter, NULL); 484 + if (was_up) { 485 + rc = ena_up(adapter); 486 + if (rc) 487 + return rc; 488 + } 489 + return 0; 490 + } 491 + 492 + static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) 493 + { 494 + struct ena_adapter *adapter = netdev_priv(netdev); 495 + struct bpf_prog *prog = bpf->prog; 496 + struct bpf_prog *old_bpf_prog; 497 + int rc, prev_mtu; 498 + bool is_up; 499 + 500 + is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 501 + rc = ena_xdp_allowed(adapter); 502 + if (rc == ENA_XDP_ALLOWED) { 503 + old_bpf_prog = adapter->xdp_bpf_prog; 504 + if (prog) { 505 + if (!is_up) { 506 + ena_init_all_xdp_queues(adapter); 507 + } else if (!old_bpf_prog) { 508 + ena_down(adapter); 509 + ena_init_all_xdp_queues(adapter); 510 + } 511 + ena_xdp_exchange_program(adapter, prog); 512 + 513 + if (is_up && !old_bpf_prog) { 514 + rc = ena_up(adapter); 515 + if (rc) 516 + return rc; 517 + } 518 + } else if (old_bpf_prog) { 519 + rc = ena_destroy_and_free_all_xdp_queues(adapter); 520 + if (rc) 521 + return rc; 522 + } 523 + 524 + prev_mtu = netdev->max_mtu; 525 + netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu; 526 + 527 + if (!old_bpf_prog) 528 + netif_info(adapter, drv, adapter->netdev, 529 + "xdp program set, changing the max_mtu from %d to %d", 530 + prev_mtu, netdev->max_mtu); 531 + 532 + } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) { 533 + netif_err(adapter, drv, adapter->netdev, 534 + "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on", 535 + netdev->mtu, ENA_XDP_MAX_MTU); 536 + NL_SET_ERR_MSG_MOD(bpf->extack, 537 + "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info"); 538 + return -EINVAL; 539 + } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) { 540 + netif_err(adapter, drv, adapter->netdev, 541 + "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n", 542 + adapter->num_io_queues, adapter->max_num_io_queues); 543 + NL_SET_ERR_MSG_MOD(bpf->extack, 544 + "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info"); 545 + return -EINVAL; 546 + } 547 + 548 + return 0; 549 + } 550 + 551 + /* This is the main xdp callback, it's used by the kernel to set/unset the xdp 552 + * program as well as to query the current xdp program id. 553 + */ 554 + static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) 555 + { 556 + struct ena_adapter *adapter = netdev_priv(netdev); 557 + 558 + switch (bpf->command) { 559 + case XDP_SETUP_PROG: 560 + return ena_xdp_set(netdev, bpf); 561 + case XDP_QUERY_PROG: 562 + bpf->prog_id = adapter->xdp_bpf_prog ? 563 + adapter->xdp_bpf_prog->aux->id : 0; 564 + break; 565 + default: 566 + return -EINVAL; 567 + } 568 + return 0; 569 + } 570 + 156 571 static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) 157 572 { 158 573 #ifdef CONFIG_RFS_ACCEL ··· 639 164 u64_stats_init(&ring->syncp); 640 165 } 641 166 642 - static void ena_init_io_rings(struct ena_adapter *adapter) 167 + static void ena_init_io_rings(struct ena_adapter *adapter, 168 + int first_index, int count) 643 169 { 644 170 struct ena_com_dev *ena_dev; 645 171 struct ena_ring *txr, *rxr; ··· 648 172 649 173 ena_dev = adapter->ena_dev; 650 174 651 - for (i = 0; i < adapter->num_io_queues; i++) { 175 + for (i = first_index; i < first_index + count; i++) { 652 176 txr = &adapter->tx_ring[i]; 653 177 rxr = &adapter->rx_ring[i]; 654 178 655 - /* TX/RX common ring state */ 179 + /* TX common ring state */ 656 180 ena_init_io_rings_common(adapter, txr, i); 657 - ena_init_io_rings_common(adapter, rxr, i); 658 181 659 182 /* TX specific ring state */ 660 183 txr->ring_size = adapter->requested_tx_ring_size; ··· 663 188 txr->smoothed_interval = 664 189 ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); 665 190 666 - /* RX specific ring state */ 667 - rxr->ring_size = adapter->requested_rx_ring_size; 668 - rxr->rx_copybreak = adapter->rx_copybreak; 669 - rxr->sgl_size = adapter->max_rx_sgl_size; 670 - rxr->smoothed_interval = 671 - ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); 672 - rxr->empty_rx_queue = 0; 673 - adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; 191 + /* Don't init RX queues for xdp queues */ 192 + if (!ENA_IS_XDP_INDEX(adapter, i)) { 193 + /* RX common ring state */ 194 + ena_init_io_rings_common(adapter, rxr, i); 195 + 196 + /* RX specific ring state */ 197 + rxr->ring_size = adapter->requested_rx_ring_size; 198 + rxr->rx_copybreak = adapter->rx_copybreak; 199 + rxr->sgl_size = adapter->max_rx_sgl_size; 200 + rxr->smoothed_interval = 201 + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); 202 + rxr->empty_rx_queue = 0; 203 + adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; 204 + } 674 205 } 675 206 } 676 207 ··· 766 285 tx_ring->push_buf_intermediate_buf = NULL; 767 286 } 768 287 769 - /* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues 770 - * @adapter: private structure 771 - * 772 - * Return 0 on success, negative on failure 773 - */ 774 - static int ena_setup_all_tx_resources(struct ena_adapter *adapter) 288 + static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, 289 + int first_index, 290 + int count) 775 291 { 776 292 int i, rc = 0; 777 293 778 - for (i = 0; i < adapter->num_io_queues; i++) { 294 + for (i = first_index; i < first_index + count; i++) { 779 295 rc = ena_setup_tx_resources(adapter, i); 780 296 if (rc) 781 297 goto err_setup_tx; ··· 786 308 "Tx queue %d: allocation failed\n", i); 787 309 788 310 /* rewind the index freeing the rings as we go */ 789 - while (i--) 311 + while (first_index < i--) 790 312 ena_free_tx_resources(adapter, i); 791 313 return rc; 314 + } 315 + 316 + static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, 317 + int first_index, int count) 318 + { 319 + int i; 320 + 321 + for (i = first_index; i < first_index + count; i++) 322 + ena_free_tx_resources(adapter, i); 792 323 } 793 324 794 325 /* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues ··· 807 320 */ 808 321 static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) 809 322 { 810 - int i; 811 - 812 - for (i = 0; i < adapter->num_io_queues; i++) 813 - ena_free_tx_resources(adapter, i); 323 + ena_free_all_io_tx_resources_in_range(adapter, 324 + 0, 325 + adapter->xdp_num_queues + 326 + adapter->num_io_queues); 814 327 } 815 328 816 329 static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id) ··· 982 495 rx_info->page = page; 983 496 rx_info->page_offset = 0; 984 497 ena_buf = &rx_info->ena_buf; 985 - ena_buf->paddr = dma; 986 - ena_buf->len = ENA_PAGE_SIZE; 498 + ena_buf->paddr = dma + rx_ring->rx_headroom; 499 + ena_buf->len = ENA_PAGE_SIZE - rx_ring->rx_headroom; 987 500 988 501 return 0; 989 502 } ··· 1000 513 return; 1001 514 } 1002 515 1003 - dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE, 516 + dma_unmap_page(rx_ring->dev, 517 + ena_buf->paddr - rx_ring->rx_headroom, 518 + ENA_PAGE_SIZE, 1004 519 DMA_FROM_DEVICE); 1005 520 1006 521 __free_page(page); ··· 1109 620 ena_free_rx_bufs(adapter, i); 1110 621 } 1111 622 1112 - static void ena_unmap_tx_skb(struct ena_ring *tx_ring, 1113 - struct ena_tx_buffer *tx_info) 623 + static void ena_unmap_tx_buff(struct ena_ring *tx_ring, 624 + struct ena_tx_buffer *tx_info) 1114 625 { 1115 626 struct ena_com_buf *ena_buf; 1116 627 u32 cnt; ··· 1164 675 tx_ring->qid, i); 1165 676 } 1166 677 1167 - ena_unmap_tx_skb(tx_ring, tx_info); 678 + ena_unmap_tx_buff(tx_ring, tx_info); 1168 679 1169 680 dev_kfree_skb_any(tx_info->skb); 1170 681 } ··· 1177 688 struct ena_ring *tx_ring; 1178 689 int i; 1179 690 1180 - for (i = 0; i < adapter->num_io_queues; i++) { 691 + for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { 1181 692 tx_ring = &adapter->tx_ring[i]; 1182 693 ena_free_tx_bufs(tx_ring); 1183 694 } ··· 1188 699 u16 ena_qid; 1189 700 int i; 1190 701 1191 - for (i = 0; i < adapter->num_io_queues; i++) { 702 + for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { 1192 703 ena_qid = ENA_IO_TXQ_IDX(i); 1193 704 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); 1194 705 } ··· 1212 723 ena_destroy_all_rx_queues(adapter); 1213 724 } 1214 725 726 + static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, 727 + struct ena_tx_buffer *tx_info, bool is_xdp) 728 + { 729 + if (tx_info) 730 + netif_err(ring->adapter, 731 + tx_done, 732 + ring->netdev, 733 + "tx_info doesn't have valid %s", 734 + is_xdp ? "xdp frame" : "skb"); 735 + else 736 + netif_err(ring->adapter, 737 + tx_done, 738 + ring->netdev, 739 + "Invalid req_id: %hu\n", 740 + req_id); 741 + 742 + u64_stats_update_begin(&ring->syncp); 743 + ring->tx_stats.bad_req_id++; 744 + u64_stats_update_end(&ring->syncp); 745 + 746 + /* Trigger device reset */ 747 + ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; 748 + set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags); 749 + return -EFAULT; 750 + } 751 + 1215 752 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 1216 753 { 1217 754 struct ena_tx_buffer *tx_info = NULL; ··· 1248 733 return 0; 1249 734 } 1250 735 1251 - if (tx_info) 1252 - netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, 1253 - "tx_info doesn't have valid skb\n"); 1254 - else 1255 - netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, 1256 - "Invalid req_id: %hu\n", req_id); 736 + return handle_invalid_req_id(tx_ring, req_id, tx_info, false); 737 + } 1257 738 1258 - u64_stats_update_begin(&tx_ring->syncp); 1259 - tx_ring->tx_stats.bad_req_id++; 1260 - u64_stats_update_end(&tx_ring->syncp); 739 + static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id) 740 + { 741 + struct ena_tx_buffer *tx_info = NULL; 1261 742 1262 - /* Trigger device reset */ 1263 - tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; 1264 - set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags); 1265 - return -EFAULT; 743 + if (likely(req_id < xdp_ring->ring_size)) { 744 + tx_info = &xdp_ring->tx_buffer_info[req_id]; 745 + if (likely(tx_info->xdpf)) 746 + return 0; 747 + } 748 + 749 + return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); 1266 750 } 1267 751 1268 752 static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) ··· 1300 786 tx_info->skb = NULL; 1301 787 tx_info->last_jiffies = 0; 1302 788 1303 - ena_unmap_tx_skb(tx_ring, tx_info); 789 + ena_unmap_tx_buff(tx_ring, tx_info); 1304 790 1305 791 netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, 1306 792 "tx_poll: q %d skb %p completed\n", tx_ring->qid, ··· 1551 1037 } 1552 1038 } 1553 1039 1040 + int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) 1041 + { 1042 + struct ena_rx_buffer *rx_info; 1043 + int ret; 1044 + 1045 + rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; 1046 + xdp->data = page_address(rx_info->page) + 1047 + rx_info->page_offset + rx_ring->rx_headroom; 1048 + xdp_set_data_meta_invalid(xdp); 1049 + xdp->data_hard_start = page_address(rx_info->page); 1050 + xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len; 1051 + /* If for some reason we received a bigger packet than 1052 + * we expect, then we simply drop it 1053 + */ 1054 + if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) 1055 + return XDP_DROP; 1056 + 1057 + ret = ena_xdp_execute(rx_ring, xdp, rx_info); 1058 + 1059 + /* The xdp program might expand the headers */ 1060 + if (ret == XDP_PASS) { 1061 + rx_info->page_offset = xdp->data - xdp->data_hard_start; 1062 + rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; 1063 + } 1064 + 1065 + return ret; 1066 + } 1554 1067 /* ena_clean_rx_irq - Cleanup RX irq 1555 1068 * @rx_ring: RX ring to clean 1556 1069 * @napi: napi handler ··· 1589 1048 u32 budget) 1590 1049 { 1591 1050 u16 next_to_clean = rx_ring->next_to_clean; 1592 - u32 res_budget, work_done; 1593 - 1594 1051 struct ena_com_rx_ctx ena_rx_ctx; 1595 1052 struct ena_adapter *adapter; 1053 + u32 res_budget, work_done; 1054 + int rx_copybreak_pkt = 0; 1055 + int refill_threshold; 1596 1056 struct sk_buff *skb; 1597 1057 int refill_required; 1598 - int refill_threshold; 1599 - int rc = 0; 1058 + struct xdp_buff xdp; 1600 1059 int total_len = 0; 1601 - int rx_copybreak_pkt = 0; 1060 + int xdp_verdict; 1061 + int rc = 0; 1602 1062 int i; 1603 1063 1604 1064 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 1605 1065 "%s qid %d\n", __func__, rx_ring->qid); 1606 1066 res_budget = budget; 1067 + xdp.rxq = &rx_ring->xdp_rxq; 1607 1068 1608 1069 do { 1070 + xdp_verdict = XDP_PASS; 1071 + skb = NULL; 1609 1072 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 1610 1073 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 1611 1074 ena_rx_ctx.descs = 0; ··· 1627 1082 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, 1628 1083 ena_rx_ctx.l4_proto, ena_rx_ctx.hash); 1629 1084 1630 - /* allocate skb and fill it */ 1631 - skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs, 1632 - &next_to_clean); 1085 + if (ena_xdp_present_ring(rx_ring)) 1086 + xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); 1633 1087 1634 - /* exit if we failed to retrieve a buffer */ 1088 + /* allocate skb and fill it */ 1089 + if (xdp_verdict == XDP_PASS) 1090 + skb = ena_rx_skb(rx_ring, 1091 + rx_ring->ena_bufs, 1092 + ena_rx_ctx.descs, 1093 + &next_to_clean); 1094 + 1635 1095 if (unlikely(!skb)) { 1096 + if (xdp_verdict == XDP_TX) { 1097 + ena_free_rx_page(rx_ring, 1098 + &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]); 1099 + res_budget--; 1100 + } 1636 1101 for (i = 0; i < ena_rx_ctx.descs; i++) { 1637 1102 rx_ring->free_ids[next_to_clean] = 1638 1103 rx_ring->ena_bufs[i].req_id; ··· 1650 1095 ENA_RX_RING_IDX_NEXT(next_to_clean, 1651 1096 rx_ring->ring_size); 1652 1097 } 1098 + if (xdp_verdict == XDP_TX || xdp_verdict == XDP_DROP) 1099 + continue; 1653 1100 break; 1654 1101 } 1655 1102 ··· 1745 1188 struct ena_ring *rx_ring) 1746 1189 { 1747 1190 struct ena_eth_io_intr_reg intr_reg; 1748 - u32 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ? 1749 - rx_ring->smoothed_interval : 1750 - ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev); 1191 + u32 rx_interval = 0; 1192 + /* Rx ring can be NULL when for XDP tx queues which don't have an 1193 + * accompanying rx_ring pair. 1194 + */ 1195 + if (rx_ring) 1196 + rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ? 1197 + rx_ring->smoothed_interval : 1198 + ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev); 1751 1199 1752 1200 /* Update intr register: rx intr delay, 1753 1201 * tx intr delay and interrupt unmask ··· 1765 1203 /* It is a shared MSI-X. 1766 1204 * Tx and Rx CQ have pointer to it. 1767 1205 * So we use one of them to reach the intr reg 1206 + * The Tx ring is used because the rx_ring is NULL for XDP queues 1768 1207 */ 1769 - ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); 1208 + ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg); 1770 1209 } 1771 1210 1772 1211 static void ena_update_ring_numa_node(struct ena_ring *tx_ring, ··· 1785 1222 1786 1223 if (numa_node != NUMA_NO_NODE) { 1787 1224 ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); 1788 - ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node); 1225 + if (rx_ring) 1226 + ena_com_update_numa_node(rx_ring->ena_com_io_cq, 1227 + numa_node); 1789 1228 } 1790 1229 1791 1230 tx_ring->cpu = cpu; 1792 - rx_ring->cpu = cpu; 1231 + if (rx_ring) 1232 + rx_ring->cpu = cpu; 1793 1233 1794 1234 return; 1795 1235 out: 1796 1236 put_cpu(); 1797 1237 } 1798 1238 1239 + static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) 1240 + { 1241 + u32 total_done = 0; 1242 + u16 next_to_clean; 1243 + u32 tx_bytes = 0; 1244 + int tx_pkts = 0; 1245 + u16 req_id; 1246 + int rc; 1247 + 1248 + if (unlikely(!xdp_ring)) 1249 + return 0; 1250 + next_to_clean = xdp_ring->next_to_clean; 1251 + 1252 + while (tx_pkts < budget) { 1253 + struct ena_tx_buffer *tx_info; 1254 + struct xdp_frame *xdpf; 1255 + 1256 + rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, 1257 + &req_id); 1258 + if (rc) 1259 + break; 1260 + 1261 + rc = validate_xdp_req_id(xdp_ring, req_id); 1262 + if (rc) 1263 + break; 1264 + 1265 + tx_info = &xdp_ring->tx_buffer_info[req_id]; 1266 + xdpf = tx_info->xdpf; 1267 + 1268 + tx_info->xdpf = NULL; 1269 + tx_info->last_jiffies = 0; 1270 + ena_unmap_tx_buff(xdp_ring, tx_info); 1271 + 1272 + netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, 1273 + "tx_poll: q %d skb %p completed\n", xdp_ring->qid, 1274 + xdpf); 1275 + 1276 + tx_bytes += xdpf->len; 1277 + tx_pkts++; 1278 + total_done += tx_info->tx_descs; 1279 + 1280 + __free_page(tx_info->xdp_rx_page); 1281 + xdp_ring->free_ids[next_to_clean] = req_id; 1282 + next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, 1283 + xdp_ring->ring_size); 1284 + } 1285 + 1286 + xdp_ring->next_to_clean = next_to_clean; 1287 + ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done); 1288 + ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq); 1289 + 1290 + netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, 1291 + "tx_poll: q %d done. total pkts: %d\n", 1292 + xdp_ring->qid, tx_pkts); 1293 + 1294 + return tx_pkts; 1295 + } 1296 + 1799 1297 static int ena_io_poll(struct napi_struct *napi, int budget) 1800 1298 { 1801 1299 struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); 1802 1300 struct ena_ring *tx_ring, *rx_ring; 1803 - 1804 1301 u32 tx_work_done; 1805 1302 u32 rx_work_done; 1806 1303 int tx_budget; ··· 1869 1246 1870 1247 tx_ring = ena_napi->tx_ring; 1871 1248 rx_ring = ena_napi->rx_ring; 1249 + 1250 + tx_ring->first_interrupt = ena_napi->first_interrupt; 1251 + rx_ring->first_interrupt = ena_napi->first_interrupt; 1872 1252 1873 1253 tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER; 1874 1254 ··· 1944 1318 { 1945 1319 struct ena_napi *ena_napi = data; 1946 1320 1947 - ena_napi->tx_ring->first_interrupt = true; 1948 - ena_napi->rx_ring->first_interrupt = true; 1321 + ena_napi->first_interrupt = true; 1949 1322 1950 1323 napi_schedule_irqoff(&ena_napi->napi); 1951 1324 ··· 2019 1394 { 2020 1395 struct net_device *netdev; 2021 1396 int irq_idx, i, cpu; 1397 + int io_queue_count; 2022 1398 2023 1399 netdev = adapter->netdev; 1400 + io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 2024 1401 2025 - for (i = 0; i < adapter->num_io_queues; i++) { 1402 + for (i = 0; i < io_queue_count; i++) { 2026 1403 irq_idx = ENA_IO_IRQ_IDX(i); 2027 1404 cpu = i % num_online_cpus(); 2028 1405 ··· 2152 1525 synchronize_irq(adapter->irq_tbl[i].vector); 2153 1526 } 2154 1527 2155 - static void ena_del_napi(struct ena_adapter *adapter) 1528 + static void ena_del_napi_in_range(struct ena_adapter *adapter, 1529 + int first_index, 1530 + int count) 2156 1531 { 2157 1532 int i; 2158 1533 2159 - for (i = 0; i < adapter->num_io_queues; i++) 2160 - netif_napi_del(&adapter->ena_napi[i].napi); 1534 + for (i = first_index; i < first_index + count; i++) { 1535 + /* Check if napi was initialized before */ 1536 + if (!ENA_IS_XDP_INDEX(adapter, i) || 1537 + adapter->ena_napi[i].xdp_ring) 1538 + netif_napi_del(&adapter->ena_napi[i].napi); 1539 + else 1540 + WARN_ON(ENA_IS_XDP_INDEX(adapter, i) && 1541 + adapter->ena_napi[i].xdp_ring); 1542 + } 2161 1543 } 2162 1544 2163 - static void ena_init_napi(struct ena_adapter *adapter) 1545 + static void ena_init_napi_in_range(struct ena_adapter *adapter, 1546 + int first_index, int count) 2164 1547 { 2165 - struct ena_napi *napi; 1548 + struct ena_napi *napi = {0}; 2166 1549 int i; 2167 1550 2168 - for (i = 0; i < adapter->num_io_queues; i++) { 1551 + for (i = first_index; i < first_index + count; i++) { 2169 1552 napi = &adapter->ena_napi[i]; 2170 1553 2171 1554 netif_napi_add(adapter->netdev, 2172 1555 &adapter->ena_napi[i].napi, 2173 - ena_io_poll, 1556 + ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll, 2174 1557 ENA_NAPI_BUDGET); 2175 - napi->rx_ring = &adapter->rx_ring[i]; 2176 - napi->tx_ring = &adapter->tx_ring[i]; 1558 + 1559 + if (!ENA_IS_XDP_INDEX(adapter, i)) { 1560 + napi->rx_ring = &adapter->rx_ring[i]; 1561 + napi->tx_ring = &adapter->tx_ring[i]; 1562 + } else { 1563 + napi->xdp_ring = &adapter->tx_ring[i]; 1564 + } 2177 1565 napi->qid = i; 2178 1566 } 2179 1567 } 2180 1568 2181 - static void ena_napi_disable_all(struct ena_adapter *adapter) 1569 + static void ena_napi_disable_in_range(struct ena_adapter *adapter, 1570 + int first_index, 1571 + int count) 2182 1572 { 2183 1573 int i; 2184 1574 2185 - for (i = 0; i < adapter->num_io_queues; i++) 1575 + for (i = first_index; i < first_index + count; i++) 2186 1576 napi_disable(&adapter->ena_napi[i].napi); 2187 1577 } 2188 1578 2189 - static void ena_napi_enable_all(struct ena_adapter *adapter) 1579 + static void ena_napi_enable_in_range(struct ena_adapter *adapter, 1580 + int first_index, 1581 + int count) 2190 1582 { 2191 1583 int i; 2192 1584 2193 - for (i = 0; i < adapter->num_io_queues; i++) 1585 + for (i = first_index; i < first_index + count; i++) 2194 1586 napi_enable(&adapter->ena_napi[i].napi); 2195 1587 } 2196 1588 ··· 2224 1578 rc = ena_rss_init_default(adapter); 2225 1579 if (rc && (rc != -EOPNOTSUPP)) { 2226 1580 netif_err(adapter, ifup, adapter->netdev, 2227 - "Failed to init RSS rc: %d\n", rc); 1581 + "Failed to init RSS rc: %d\n", rc); 2228 1582 return rc; 2229 1583 } 2230 1584 } ··· 2262 1616 /* enable transmits */ 2263 1617 netif_tx_start_all_queues(adapter->netdev); 2264 1618 2265 - ena_napi_enable_all(adapter); 1619 + ena_napi_enable_in_range(adapter, 1620 + 0, 1621 + adapter->xdp_num_queues + adapter->num_io_queues); 2266 1622 2267 1623 return 0; 2268 1624 } ··· 2297 1649 if (rc) { 2298 1650 netif_err(adapter, ifup, adapter->netdev, 2299 1651 "Failed to create I/O TX queue num %d rc: %d\n", 2300 - qid, rc); 1652 + qid, rc); 2301 1653 return rc; 2302 1654 } 2303 1655 ··· 2316 1668 return rc; 2317 1669 } 2318 1670 2319 - static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) 1671 + static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, 1672 + int first_index, int count) 2320 1673 { 2321 1674 struct ena_com_dev *ena_dev = adapter->ena_dev; 2322 1675 int rc, i; 2323 1676 2324 - for (i = 0; i < adapter->num_io_queues; i++) { 1677 + for (i = first_index; i < first_index + count; i++) { 2325 1678 rc = ena_create_io_tx_queue(adapter, i); 2326 1679 if (rc) 2327 1680 goto create_err; ··· 2331 1682 return 0; 2332 1683 2333 1684 create_err: 2334 - while (i--) 1685 + while (i-- > first_index) 2335 1686 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); 2336 1687 2337 1688 return rc; ··· 2376 1727 netif_err(adapter, ifup, adapter->netdev, 2377 1728 "Failed to get RX queue handlers. RX queue num %d rc: %d\n", 2378 1729 qid, rc); 2379 - ena_com_destroy_io_queue(ena_dev, ena_qid); 2380 - return rc; 1730 + goto err; 2381 1731 } 2382 1732 2383 1733 ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); 2384 1734 1735 + return rc; 1736 + err: 1737 + ena_com_destroy_io_queue(ena_dev, ena_qid); 2385 1738 return rc; 2386 1739 } 2387 1740 ··· 2411 1760 } 2412 1761 2413 1762 static void set_io_rings_size(struct ena_adapter *adapter, 2414 - int new_tx_size, int new_rx_size) 1763 + int new_tx_size, 1764 + int new_rx_size) 2415 1765 { 2416 1766 int i; 2417 1767 ··· 2446 1794 * ones due to past queue allocation failures. 2447 1795 */ 2448 1796 set_io_rings_size(adapter, adapter->requested_tx_ring_size, 2449 - adapter->requested_rx_ring_size); 1797 + adapter->requested_rx_ring_size); 2450 1798 2451 1799 while (1) { 2452 - rc = ena_setup_all_tx_resources(adapter); 1800 + if (ena_xdp_present(adapter)) { 1801 + rc = ena_setup_and_create_all_xdp_queues(adapter); 1802 + 1803 + if (rc) 1804 + goto err_setup_tx; 1805 + } 1806 + rc = ena_setup_tx_resources_in_range(adapter, 1807 + 0, 1808 + adapter->num_io_queues); 2453 1809 if (rc) 2454 1810 goto err_setup_tx; 2455 1811 2456 - rc = ena_create_all_io_tx_queues(adapter); 1812 + rc = ena_create_io_tx_queues_in_range(adapter, 1813 + 0, 1814 + adapter->num_io_queues); 2457 1815 if (rc) 2458 1816 goto err_create_tx_queues; 2459 1817 ··· 2487 1825 if (rc != -ENOMEM) { 2488 1826 netif_err(adapter, ifup, adapter->netdev, 2489 1827 "Queue creation failed with error code %d\n", 2490 - rc); 1828 + rc); 2491 1829 return rc; 2492 1830 } 2493 1831 ··· 2510 1848 new_rx_ring_size = cur_rx_ring_size / 2; 2511 1849 2512 1850 if (new_tx_ring_size < ENA_MIN_RING_SIZE || 2513 - new_rx_ring_size < ENA_MIN_RING_SIZE) { 1851 + new_rx_ring_size < ENA_MIN_RING_SIZE) { 2514 1852 netif_err(adapter, ifup, adapter->netdev, 2515 1853 "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", 2516 1854 ENA_MIN_RING_SIZE); ··· 2529 1867 2530 1868 static int ena_up(struct ena_adapter *adapter) 2531 1869 { 2532 - int rc, i; 1870 + int io_queue_count, rc, i; 2533 1871 2534 1872 netdev_dbg(adapter->netdev, "%s\n", __func__); 2535 1873 1874 + io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 2536 1875 ena_setup_io_intr(adapter); 2537 1876 2538 1877 /* napi poll functions should be initialized before running ··· 2541 1878 * interrupt, causing the ISR to fire immediately while the poll 2542 1879 * function wasn't set yet, causing a null dereference 2543 1880 */ 2544 - ena_init_napi(adapter); 1881 + ena_init_napi_in_range(adapter, 0, io_queue_count); 2545 1882 2546 1883 rc = ena_request_io_irq(adapter); 2547 1884 if (rc) ··· 2572 1909 /* schedule napi in case we had pending packets 2573 1910 * from the last time we disable napi 2574 1911 */ 2575 - for (i = 0; i < adapter->num_io_queues; i++) 1912 + for (i = 0; i < io_queue_count; i++) 2576 1913 napi_schedule(&adapter->ena_napi[i].napi); 2577 1914 2578 1915 return rc; ··· 2585 1922 err_create_queues_with_backoff: 2586 1923 ena_free_io_irq(adapter); 2587 1924 err_req_irq: 2588 - ena_del_napi(adapter); 1925 + ena_del_napi_in_range(adapter, 0, io_queue_count); 2589 1926 2590 1927 return rc; 2591 1928 } 2592 1929 2593 1930 static void ena_down(struct ena_adapter *adapter) 2594 1931 { 1932 + int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 1933 + 2595 1934 netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__); 2596 1935 2597 1936 clear_bit(ENA_FLAG_DEV_UP, &adapter->flags); ··· 2606 1941 netif_tx_disable(adapter->netdev); 2607 1942 2608 1943 /* After this point the napi handler won't enable the tx queue */ 2609 - ena_napi_disable_all(adapter); 1944 + ena_napi_disable_in_range(adapter, 0, io_queue_count); 2610 1945 2611 1946 /* After destroy the queue there won't be any new interrupts */ 2612 1947 ··· 2624 1959 2625 1960 ena_disable_io_intr_sync(adapter); 2626 1961 ena_free_io_irq(adapter); 2627 - ena_del_napi(adapter); 1962 + ena_del_napi_in_range(adapter, 0, io_queue_count); 2628 1963 2629 1964 ena_free_all_tx_bufs(adapter); 2630 1965 ena_free_all_rx_bufs(adapter); ··· 2714 2049 ena_close(adapter->netdev); 2715 2050 adapter->requested_tx_ring_size = new_tx_size; 2716 2051 adapter->requested_rx_ring_size = new_rx_size; 2717 - ena_init_io_rings(adapter); 2052 + ena_init_io_rings(adapter, 2053 + 0, 2054 + adapter->xdp_num_queues + 2055 + adapter->num_io_queues); 2718 2056 return dev_was_up ? ena_up(adapter) : 0; 2719 2057 } 2720 2058 2721 2059 int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) 2722 2060 { 2723 2061 struct ena_com_dev *ena_dev = adapter->ena_dev; 2062 + int prev_channel_count; 2724 2063 bool dev_was_up; 2725 2064 2726 2065 dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 2727 2066 ena_close(adapter->netdev); 2067 + prev_channel_count = adapter->num_io_queues; 2728 2068 adapter->num_io_queues = new_channel_count; 2069 + if (ena_xdp_present(adapter) && 2070 + ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) { 2071 + adapter->xdp_first_ring = new_channel_count; 2072 + adapter->xdp_num_queues = new_channel_count; 2073 + if (prev_channel_count > new_channel_count) 2074 + ena_xdp_exchange_program_rx_in_range(adapter, 2075 + NULL, 2076 + new_channel_count, 2077 + prev_channel_count); 2078 + else 2079 + ena_xdp_exchange_program_rx_in_range(adapter, 2080 + adapter->xdp_bpf_prog, 2081 + prev_channel_count, 2082 + new_channel_count); 2083 + } 2084 + 2729 2085 /* We need to destroy the rss table so that the indirection 2730 2086 * table will be reinitialized by ena_up() 2731 2087 */ 2732 2088 ena_com_rss_destroy(ena_dev); 2733 - ena_init_io_rings(adapter); 2089 + ena_init_io_rings(adapter, 2090 + 0, 2091 + adapter->xdp_num_queues + 2092 + adapter->num_io_queues); 2734 2093 return dev_was_up ? ena_open(adapter->netdev) : 0; 2735 2094 } 2736 2095 ··· 2938 2249 tx_info->skb = NULL; 2939 2250 2940 2251 tx_info->num_of_bufs += i; 2941 - ena_unmap_tx_skb(tx_ring, tx_info); 2252 + ena_unmap_tx_buff(tx_ring, tx_info); 2942 2253 2943 2254 return -EINVAL; 2944 2255 } ··· 2953 2264 struct netdev_queue *txq; 2954 2265 void *push_hdr; 2955 2266 u16 next_to_use, req_id, header_len; 2956 - int qid, rc, nb_hw_desc; 2267 + int qid, rc; 2957 2268 2958 2269 netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); 2959 2270 /* Determine which tx ring we will be placed on */ ··· 2988 2299 /* set flags and meta data */ 2989 2300 ena_tx_csum(&ena_tx_ctx, skb); 2990 2301 2991 - if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx))) { 2992 - netif_dbg(adapter, tx_queued, dev, 2993 - "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", 2994 - qid); 2995 - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 2996 - } 2997 - 2998 - /* prepare the packet's descriptors to dma engine */ 2999 - rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 3000 - &nb_hw_desc); 3001 - 3002 - /* ena_com_prepare_tx() can't fail due to overflow of tx queue, 3003 - * since the number of free descriptors in the queue is checked 3004 - * after sending the previous packet. In case there isn't enough 3005 - * space in the queue for the next packet, it is stopped 3006 - * until there is again enough available space in the queue. 3007 - * All other failure reasons of ena_com_prepare_tx() are fatal 3008 - * and therefore require a device reset. 3009 - */ 3010 - if (unlikely(rc)) { 3011 - netif_err(adapter, tx_queued, dev, 3012 - "failed to prepare tx bufs\n"); 3013 - u64_stats_update_begin(&tx_ring->syncp); 3014 - tx_ring->tx_stats.prepare_ctx_err++; 3015 - u64_stats_update_end(&tx_ring->syncp); 3016 - adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE; 3017 - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 2302 + rc = ena_xmit_common(dev, 2303 + tx_ring, 2304 + tx_info, 2305 + &ena_tx_ctx, 2306 + next_to_use, 2307 + skb->len); 2308 + if (rc) 3018 2309 goto error_unmap_dma; 3019 - } 3020 2310 3021 2311 netdev_tx_sent_queue(txq, skb->len); 3022 - 3023 - u64_stats_update_begin(&tx_ring->syncp); 3024 - tx_ring->tx_stats.cnt++; 3025 - tx_ring->tx_stats.bytes += skb->len; 3026 - u64_stats_update_end(&tx_ring->syncp); 3027 - 3028 - tx_info->tx_descs = nb_hw_desc; 3029 - tx_info->last_jiffies = jiffies; 3030 - tx_info->print_once = 0; 3031 - 3032 - tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, 3033 - tx_ring->ring_size); 3034 2312 3035 2313 /* stop the queue when no more space available, the packet can have up 3036 2314 * to sgl_size + 2. one for the meta descriptor and one for header ··· 3045 2389 return NETDEV_TX_OK; 3046 2390 3047 2391 error_unmap_dma: 3048 - ena_unmap_tx_skb(tx_ring, tx_info); 2392 + ena_unmap_tx_buff(tx_ring, tx_info); 3049 2393 tx_info->skb = NULL; 3050 2394 3051 2395 error_drop_packet: ··· 3224 2568 .ndo_change_mtu = ena_change_mtu, 3225 2569 .ndo_set_mac_address = NULL, 3226 2570 .ndo_validate_addr = eth_validate_addr, 2571 + .ndo_bpf = ena_xdp, 3227 2572 }; 3228 2573 3229 2574 static int ena_device_validate_params(struct ena_adapter *adapter, ··· 3604 2947 struct ena_ring *tx_ring; 3605 2948 struct ena_ring *rx_ring; 3606 2949 int i, budget, rc; 2950 + int io_queue_count; 3607 2951 2952 + io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; 3608 2953 /* Make sure the driver doesn't turn the device in other process */ 3609 2954 smp_rmb(); 3610 2955 ··· 3621 2962 3622 2963 budget = ENA_MONITORED_TX_QUEUES; 3623 2964 3624 - for (i = adapter->last_monitored_tx_qid; i < adapter->num_io_queues; i++) { 2965 + for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { 3625 2966 tx_ring = &adapter->tx_ring[i]; 3626 2967 rx_ring = &adapter->rx_ring[i]; 3627 2968 ··· 3629 2970 if (unlikely(rc)) 3630 2971 return; 3631 2972 3632 - rc = check_for_rx_interrupt_queue(adapter, rx_ring); 2973 + rc = !ENA_IS_XDP_INDEX(adapter, i) ? 2974 + check_for_rx_interrupt_queue(adapter, rx_ring) : 0; 3633 2975 if (unlikely(rc)) 3634 2976 return; 3635 2977 ··· 3639 2979 break; 3640 2980 } 3641 2981 3642 - adapter->last_monitored_tx_qid = i % adapter->num_io_queues; 2982 + adapter->last_monitored_tx_qid = i % io_queue_count; 3643 2983 } 3644 2984 3645 2985 /* trigger napi schedule after 2 consecutive detections */ ··· 4216 3556 adapter->num_io_queues = max_num_io_queues; 4217 3557 adapter->max_num_io_queues = max_num_io_queues; 4218 3558 3559 + adapter->xdp_first_ring = 0; 3560 + adapter->xdp_num_queues = 0; 3561 + 4219 3562 adapter->last_monitored_tx_qid = 0; 4220 3563 4221 3564 adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; ··· 4232 3569 "Failed to query interrupt moderation feature\n"); 4233 3570 goto err_netdev_destroy; 4234 3571 } 4235 - ena_init_io_rings(adapter); 3572 + ena_init_io_rings(adapter, 3573 + 0, 3574 + adapter->xdp_num_queues + 3575 + adapter->num_io_queues); 4236 3576 4237 3577 netdev->netdev_ops = &ena_netdev_ops; 4238 3578 netdev->watchdog_timeo = TX_TIMEOUT;
+73
drivers/net/ethernet/amazon/ena/ena_netdev.h
··· 36 36 #include <linux/bitops.h> 37 37 #include <linux/dim.h> 38 38 #include <linux/etherdevice.h> 39 + #include <linux/if_vlan.h> 39 40 #include <linux/inetdevice.h> 40 41 #include <linux/interrupt.h> 41 42 #include <linux/netdevice.h> ··· 143 142 144 143 #define ENA_MMIO_DISABLE_REG_READ BIT(0) 145 144 145 + /* The max MTU size is configured to be the ethernet frame size without 146 + * the overhead of the ethernet header, which can have a VLAN header, and 147 + * a frame check sequence (FCS). 148 + * The buffer size we share with the device is defined to be ENA_PAGE_SIZE 149 + */ 150 + 151 + #define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \ 152 + VLAN_HLEN - XDP_PACKET_HEADROOM) 153 + 154 + #define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \ 155 + ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues)) 156 + 146 157 struct ena_irq { 147 158 irq_handler_t handler; 148 159 void *data; ··· 168 155 struct napi_struct napi ____cacheline_aligned; 169 156 struct ena_ring *tx_ring; 170 157 struct ena_ring *rx_ring; 158 + struct ena_ring *xdp_ring; 159 + bool first_interrupt; 171 160 u32 qid; 172 161 struct dim dim; 173 162 }; ··· 194 179 u32 tx_descs; 195 180 /* num of buffers used by this skb */ 196 181 u32 num_of_bufs; 182 + 183 + /* XDP buffer structure which is used for sending packets in 184 + * the xdp queues 185 + */ 186 + struct xdp_frame *xdpf; 187 + /* The rx page for the rx buffer that was received in rx and 188 + * re transmitted on xdp tx queues as a result of XDP_TX action. 189 + * We need to free the page once we finished cleaning the buffer in 190 + * clean_xdp_irq() 191 + */ 192 + struct page *xdp_rx_page; 197 193 198 194 /* Indicate if bufs[0] map the linear data of the skb. */ 199 195 u8 map_linear_data; ··· 284 258 struct ena_adapter *adapter; 285 259 struct ena_com_io_cq *ena_com_io_cq; 286 260 struct ena_com_io_sq *ena_com_io_sq; 261 + struct bpf_prog *xdp_bpf_prog; 262 + struct xdp_rxq_info xdp_rxq; 287 263 288 264 u16 next_to_use; 289 265 u16 next_to_clean; 290 266 u16 rx_copybreak; 267 + u16 rx_headroom; 291 268 u16 qid; 292 269 u16 mtu; 293 270 u16 sgl_size; ··· 408 379 u32 last_monitored_tx_qid; 409 380 410 381 enum ena_regs_reset_reason_types reset_reason; 382 + 383 + struct bpf_prog *xdp_bpf_prog; 384 + u32 xdp_first_ring; 385 + u32 xdp_num_queues; 411 386 }; 412 387 413 388 void ena_set_ethtool_ops(struct net_device *netdev); ··· 423 390 int ena_update_queue_sizes(struct ena_adapter *adapter, 424 391 u32 new_tx_size, 425 392 u32 new_rx_size); 393 + 426 394 int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); 427 395 428 396 int ena_get_sset_count(struct net_device *netdev, int sset); 397 + 398 + enum ena_xdp_errors_t { 399 + ENA_XDP_ALLOWED = 0, 400 + ENA_XDP_CURRENT_MTU_TOO_LARGE, 401 + ENA_XDP_NO_ENOUGH_QUEUES, 402 + }; 403 + 404 + static inline bool ena_xdp_queues_present(struct ena_adapter *adapter) 405 + { 406 + return adapter->xdp_first_ring != 0; 407 + } 408 + 409 + static inline bool ena_xdp_present(struct ena_adapter *adapter) 410 + { 411 + return !!adapter->xdp_bpf_prog; 412 + } 413 + 414 + static inline bool ena_xdp_present_ring(struct ena_ring *ring) 415 + { 416 + return !!ring->xdp_bpf_prog; 417 + } 418 + 419 + static inline int ena_xdp_legal_queue_count(struct ena_adapter *adapter, 420 + u32 queues) 421 + { 422 + return 2 * queues <= adapter->max_num_io_queues; 423 + } 424 + 425 + static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter) 426 + { 427 + enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED; 428 + 429 + if (adapter->netdev->mtu > ENA_XDP_MAX_MTU) 430 + rc = ENA_XDP_CURRENT_MTU_TOO_LARGE; 431 + else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues)) 432 + rc = ENA_XDP_NO_ENOUGH_QUEUES; 433 + 434 + return rc; 435 + } 429 436 430 437 #endif /* !(ENA_H) */