Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'gve-jumbo-frame'

Jeroen de Borst says:

====================
gve: Add jumbo-frame support for GQ

This patchset introduces jumbo-frame support for the GQ queue format.
The device already supports jumbo-frames on TX. This introduces
multi-descriptor RX packets using a packet continuation bit.

A widely deployed driver has a bug with causes it to fail to load
when a MTU greater than 2048 bytes is configured. A jumbo-frame device
option is introduced to pass a jumbo-frame MTU only to drivers that
support it.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+402 -165
+18 -3
drivers/net/ethernet/google/gve/gve.h
··· 142 142 s16 tail; 143 143 }; 144 144 145 + /* A single received packet split across multiple buffers may be 146 + * reconstructed using the information in this structure. 147 + */ 148 + struct gve_rx_ctx { 149 + /* head and tail of skb chain for the current packet or NULL if none */ 150 + struct sk_buff *skb_head; 151 + struct sk_buff *skb_tail; 152 + u16 total_expected_size; 153 + u8 expected_frag_cnt; 154 + u8 curr_frag_cnt; 155 + u8 reuse_frags; 156 + }; 157 + 145 158 /* Contains datapath state used to represent an RX queue. */ 146 159 struct gve_rx_ring { 147 160 struct gve_priv *gve; ··· 166 153 167 154 /* threshold for posting new buffs and descs */ 168 155 u32 db_threshold; 156 + u16 packet_buffer_size; 169 157 }; 170 158 171 159 /* DQO fields. */ ··· 214 200 u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */ 215 201 u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */ 216 202 u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */ 203 + u64 rx_cont_packet_cnt; /* free-running multi-fragment packets received */ 204 + u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */ 205 + u64 rx_frag_copy_cnt; /* free-running count of rx segments copied into skb linear portion */ 217 206 u32 q_num; /* queue index */ 218 207 u32 ntfy_id; /* notification block index */ 219 208 struct gve_queue_resources *q_resources; /* head and tail pointer idx */ 220 209 dma_addr_t q_resources_bus; /* dma address for the queue resources */ 221 210 struct u64_stats_sync statss; /* sync stats for 32bit archs */ 222 211 223 - /* head and tail of skb chain for the current packet or NULL if none */ 224 - struct sk_buff *skb_head; 225 - struct sk_buff *skb_tail; 212 + struct gve_rx_ctx ctx; /* Info for packet currently being processed in this ring. */ 226 213 }; 227 214 228 215 /* A TX desc ring entry */
+55 -4
drivers/net/ethernet/google/gve/gve_adminq.c
··· 38 38 struct gve_device_option *option, 39 39 struct gve_device_option_gqi_rda **dev_op_gqi_rda, 40 40 struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, 41 - struct gve_device_option_dqo_rda **dev_op_dqo_rda) 41 + struct gve_device_option_dqo_rda **dev_op_dqo_rda, 42 + struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) 42 43 { 43 44 u32 req_feat_mask = be32_to_cpu(option->required_features_mask); 44 45 u16 option_length = be16_to_cpu(option->option_length); ··· 112 111 } 113 112 *dev_op_dqo_rda = (void *)(option + 1); 114 113 break; 114 + case GVE_DEV_OPT_ID_JUMBO_FRAMES: 115 + if (option_length < sizeof(**dev_op_jumbo_frames) || 116 + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) { 117 + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, 118 + "Jumbo Frames", 119 + (int)sizeof(**dev_op_jumbo_frames), 120 + GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES, 121 + option_length, req_feat_mask); 122 + break; 123 + } 124 + 125 + if (option_length > sizeof(**dev_op_jumbo_frames)) { 126 + dev_warn(&priv->pdev->dev, 127 + GVE_DEVICE_OPTION_TOO_BIG_FMT, 128 + "Jumbo Frames"); 129 + } 130 + *dev_op_jumbo_frames = (void *)(option + 1); 131 + break; 115 132 default: 116 133 /* If we don't recognize the option just continue 117 134 * without doing anything. ··· 145 126 struct gve_device_descriptor *descriptor, 146 127 struct gve_device_option_gqi_rda **dev_op_gqi_rda, 147 128 struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, 148 - struct gve_device_option_dqo_rda **dev_op_dqo_rda) 129 + struct gve_device_option_dqo_rda **dev_op_dqo_rda, 130 + struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) 149 131 { 150 132 const int num_options = be16_to_cpu(descriptor->num_device_options); 151 133 struct gve_device_option *dev_opt; ··· 166 146 167 147 gve_parse_device_option(priv, descriptor, dev_opt, 168 148 dev_op_gqi_rda, dev_op_gqi_qpl, 169 - dev_op_dqo_rda); 149 + dev_op_dqo_rda, dev_op_jumbo_frames); 170 150 dev_opt = next_opt; 171 151 } 172 152 ··· 550 530 cpu_to_be64(rx->data.data_bus), 551 531 cmd.create_rx_queue.index = cpu_to_be32(queue_index); 552 532 cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); 533 + cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size); 553 534 } else { 554 535 cmd.create_rx_queue.rx_ring_size = 555 536 cpu_to_be16(priv->rx_desc_cnt); ··· 681 660 return 0; 682 661 } 683 662 663 + static void gve_enable_supported_features(struct gve_priv *priv, 664 + u32 supported_features_mask, 665 + const struct gve_device_option_jumbo_frames 666 + *dev_op_jumbo_frames) 667 + { 668 + /* Before control reaches this point, the page-size-capped max MTU from 669 + * the gve_device_descriptor field has already been stored in 670 + * priv->dev->max_mtu. We overwrite it with the true max MTU below. 671 + */ 672 + if (dev_op_jumbo_frames && 673 + (supported_features_mask & GVE_SUP_JUMBO_FRAMES_MASK)) { 674 + dev_info(&priv->pdev->dev, 675 + "JUMBO FRAMES device option enabled.\n"); 676 + priv->dev->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu); 677 + } 678 + } 679 + 684 680 int gve_adminq_describe_device(struct gve_priv *priv) 685 681 { 682 + struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; 686 683 struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL; 687 684 struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; 688 685 struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL; 689 686 struct gve_device_descriptor *descriptor; 687 + u32 supported_features_mask = 0; 690 688 union gve_adminq_command cmd; 691 689 dma_addr_t descriptor_bus; 692 690 int err = 0; ··· 729 689 goto free_device_descriptor; 730 690 731 691 err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda, 732 - &dev_op_gqi_qpl, &dev_op_dqo_rda); 692 + &dev_op_gqi_qpl, &dev_op_dqo_rda, 693 + &dev_op_jumbo_frames); 733 694 if (err) 734 695 goto free_device_descriptor; 735 696 ··· 745 704 priv->queue_format = GVE_DQO_RDA_FORMAT; 746 705 dev_info(&priv->pdev->dev, 747 706 "Driver is running with DQO RDA queue format.\n"); 707 + supported_features_mask = 708 + be32_to_cpu(dev_op_dqo_rda->supported_features_mask); 748 709 } else if (dev_op_gqi_rda) { 749 710 priv->queue_format = GVE_GQI_RDA_FORMAT; 750 711 dev_info(&priv->pdev->dev, 751 712 "Driver is running with GQI RDA queue format.\n"); 713 + supported_features_mask = 714 + be32_to_cpu(dev_op_gqi_rda->supported_features_mask); 752 715 } else { 753 716 priv->queue_format = GVE_GQI_QPL_FORMAT; 717 + if (dev_op_gqi_qpl) 718 + supported_features_mask = 719 + be32_to_cpu(dev_op_gqi_qpl->supported_features_mask); 754 720 dev_info(&priv->pdev->dev, 755 721 "Driver is running with GQI QPL queue format.\n"); 756 722 } ··· 793 745 priv->rx_desc_cnt = priv->rx_data_slot_cnt; 794 746 } 795 747 priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues); 748 + 749 + gve_enable_supported_features(priv, supported_features_mask, 750 + dev_op_jumbo_frames); 796 751 797 752 free_device_descriptor: 798 753 dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor,
+14
drivers/net/ethernet/google/gve/gve_adminq.h
··· 108 108 109 109 static_assert(sizeof(struct gve_device_option_dqo_rda) == 8); 110 110 111 + struct gve_device_option_jumbo_frames { 112 + __be32 supported_features_mask; 113 + __be16 max_mtu; 114 + u8 padding[2]; 115 + }; 116 + 117 + static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8); 118 + 111 119 /* Terminology: 112 120 * 113 121 * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA ··· 129 121 GVE_DEV_OPT_ID_GQI_RDA = 0x2, 130 122 GVE_DEV_OPT_ID_GQI_QPL = 0x3, 131 123 GVE_DEV_OPT_ID_DQO_RDA = 0x4, 124 + GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, 132 125 }; 133 126 134 127 enum gve_dev_opt_req_feat_mask { ··· 137 128 GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0, 138 129 GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0, 139 130 GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, 131 + GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, 132 + }; 133 + 134 + enum gve_sup_feature_mask { 135 + GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, 140 136 }; 141 137 142 138 #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0
+7 -6
drivers/net/ethernet/google/gve/gve_desc.h
··· 90 90 91 91 /* GVE Recive Packet Descriptor Flags */ 92 92 #define GVE_RXFLG(x) cpu_to_be16(1 << (3 + (x))) 93 - #define GVE_RXF_FRAG GVE_RXFLG(3) /* IP Fragment */ 94 - #define GVE_RXF_IPV4 GVE_RXFLG(4) /* IPv4 */ 95 - #define GVE_RXF_IPV6 GVE_RXFLG(5) /* IPv6 */ 96 - #define GVE_RXF_TCP GVE_RXFLG(6) /* TCP Packet */ 97 - #define GVE_RXF_UDP GVE_RXFLG(7) /* UDP Packet */ 98 - #define GVE_RXF_ERR GVE_RXFLG(8) /* Packet Error Detected */ 93 + #define GVE_RXF_FRAG GVE_RXFLG(3) /* IP Fragment */ 94 + #define GVE_RXF_IPV4 GVE_RXFLG(4) /* IPv4 */ 95 + #define GVE_RXF_IPV6 GVE_RXFLG(5) /* IPv6 */ 96 + #define GVE_RXF_TCP GVE_RXFLG(6) /* TCP Packet */ 97 + #define GVE_RXF_UDP GVE_RXFLG(7) /* UDP Packet */ 98 + #define GVE_RXF_ERR GVE_RXFLG(8) /* Packet Error Detected */ 99 + #define GVE_RXF_PKT_CONT GVE_RXFLG(10) /* Multi Fragment RX packet */ 99 100 100 101 /* GVE IRQ */ 101 102 #define GVE_IRQ_ACK BIT(31)
+4
drivers/net/ethernet/google/gve/gve_ethtool.c
··· 43 43 44 44 static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { 45 45 "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_bytes[%u]", 46 + "rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", 46 47 "rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]", 47 48 "rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]", 48 49 "rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]", ··· 266 265 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 267 266 start)); 268 267 data[i++] = tmp_rx_bytes; 268 + data[i++] = rx->rx_cont_packet_cnt; 269 + data[i++] = rx->rx_frag_flip_cnt; 270 + data[i++] = rx->rx_frag_copy_cnt; 269 271 /* rx dropped packets */ 270 272 data[i++] = tmp_rx_skb_alloc_fail + 271 273 tmp_rx_buf_alloc_fail +
-8
drivers/net/ethernet/google/gve/gve_main.c
··· 1371 1371 "Could not get device information: err=%d\n", err); 1372 1372 goto err; 1373 1373 } 1374 - if (gve_is_gqi(priv) && priv->dev->max_mtu > PAGE_SIZE) { 1375 - priv->dev->max_mtu = PAGE_SIZE; 1376 - err = gve_adminq_set_mtu(priv, priv->dev->mtu); 1377 - if (err) { 1378 - dev_err(&priv->pdev->dev, "Could not set mtu"); 1379 - goto err; 1380 - } 1381 - } 1382 1374 priv->dev->mtu = priv->dev->max_mtu; 1383 1375 num_ntfy = pci_msix_vec_count(priv->pdev); 1384 1376 if (num_ntfy <= 0) {
+249 -100
drivers/net/ethernet/google/gve/gve_rx.c
··· 143 143 return err; 144 144 } 145 145 146 + static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) 147 + { 148 + ctx->curr_frag_cnt = 0; 149 + ctx->total_expected_size = 0; 150 + ctx->expected_frag_cnt = 0; 151 + ctx->skb_head = NULL; 152 + ctx->skb_tail = NULL; 153 + ctx->reuse_frags = false; 154 + } 155 + 146 156 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) 147 157 { 148 158 struct gve_rx_ring *rx = &priv->rx[idx]; ··· 219 209 rx->cnt = 0; 220 210 rx->db_threshold = priv->rx_desc_cnt / 2; 221 211 rx->desc.seqno = 1; 212 + 213 + /* Allocating half-page buffers allows page-flipping which is faster 214 + * than copying or allocating new pages. 215 + */ 216 + rx->packet_buffer_size = PAGE_SIZE / 2; 217 + gve_rx_ctx_clear(&rx->ctx); 222 218 gve_rx_add_to_block(priv, idx); 223 219 224 220 return 0; ··· 291 275 return PKT_HASH_TYPE_L2; 292 276 } 293 277 278 + static u16 gve_rx_ctx_padding(struct gve_rx_ctx *ctx) 279 + { 280 + return (ctx->curr_frag_cnt == 0) ? GVE_RX_PAD : 0; 281 + } 282 + 294 283 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, 295 284 struct gve_rx_slot_page_info *page_info, 296 - u16 len) 285 + u16 packet_buffer_size, u16 len, 286 + struct gve_rx_ctx *ctx) 297 287 { 298 - struct sk_buff *skb = napi_get_frags(napi); 288 + u32 offset = page_info->page_offset + gve_rx_ctx_padding(ctx); 289 + struct sk_buff *skb; 299 290 300 - if (unlikely(!skb)) 291 + if (!ctx->skb_head) 292 + ctx->skb_head = napi_get_frags(napi); 293 + 294 + if (unlikely(!ctx->skb_head)) 301 295 return NULL; 302 296 303 - skb_add_rx_frag(skb, 0, page_info->page, 304 - page_info->page_offset + 305 - GVE_RX_PAD, len, PAGE_SIZE / 2); 297 + skb = ctx->skb_head; 298 + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page_info->page, 299 + offset, len, packet_buffer_size); 306 300 307 301 return skb; 308 302 } ··· 324 298 /* "flip" to other packet buffer on this page */ 325 299 page_info->page_offset ^= PAGE_SIZE / 2; 326 300 *(slot_addr) ^= offset; 327 - } 328 - 329 - static bool gve_rx_can_flip_buffers(struct net_device *netdev) 330 - { 331 - return PAGE_SIZE >= 4096 332 - ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false; 333 301 } 334 302 335 303 static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info) ··· 345 325 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev, 346 326 struct gve_rx_slot_page_info *page_info, u16 len, 347 327 struct napi_struct *napi, 348 - union gve_rx_data_slot *data_slot) 328 + union gve_rx_data_slot *data_slot, 329 + u16 packet_buffer_size, struct gve_rx_ctx *ctx) 349 330 { 350 - struct sk_buff *skb; 331 + struct sk_buff *skb = gve_rx_add_frags(napi, page_info, packet_buffer_size, len, ctx); 351 332 352 - skb = gve_rx_add_frags(napi, page_info, len); 353 333 if (!skb) 354 334 return NULL; 355 335 ··· 368 348 u16 len, struct napi_struct *napi, 369 349 union gve_rx_data_slot *data_slot) 370 350 { 351 + struct gve_rx_ctx *ctx = &rx->ctx; 371 352 struct sk_buff *skb; 372 353 373 354 /* if raw_addressing mode is not enabled gvnic can only receive into ··· 376 355 * choice is to copy the data out of it so that we can return it to the 377 356 * device. 378 357 */ 379 - if (page_info->can_flip) { 380 - skb = gve_rx_add_frags(napi, page_info, len); 358 + if (ctx->reuse_frags) { 359 + skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx); 381 360 /* No point in recycling if we didn't get the skb */ 382 361 if (skb) { 383 362 /* Make sure that the page isn't freed. */ ··· 385 364 gve_rx_flip_buff(page_info, &data_slot->qpl_offset); 386 365 } 387 366 } else { 388 - skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD); 367 + const u16 padding = gve_rx_ctx_padding(ctx); 368 + 369 + skb = gve_rx_copy(netdev, napi, page_info, len, padding, ctx); 389 370 if (skb) { 390 371 u64_stats_update_begin(&rx->statss); 391 - rx->rx_copied_pkt++; 372 + rx->rx_frag_copy_cnt++; 392 373 u64_stats_update_end(&rx->statss); 393 374 } 394 375 } 395 376 return skb; 396 377 } 397 378 398 - static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, 399 - netdev_features_t feat, u32 idx) 379 + #define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x)) 380 + static u16 gve_rx_get_fragment_size(struct gve_rx_ctx *ctx, struct gve_rx_desc *desc) 400 381 { 382 + return be16_to_cpu(desc->len) - gve_rx_ctx_padding(ctx); 383 + } 384 + 385 + static bool gve_rx_ctx_init(struct gve_rx_ctx *ctx, struct gve_rx_ring *rx) 386 + { 387 + bool qpl_mode = !rx->data.raw_addressing, packet_size_error = false; 388 + bool buffer_error = false, desc_error = false, seqno_error = false; 401 389 struct gve_rx_slot_page_info *page_info; 402 390 struct gve_priv *priv = rx->gve; 403 - struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 404 - struct net_device *dev = priv->dev; 405 - union gve_rx_data_slot *data_slot; 406 - struct sk_buff *skb = NULL; 407 - dma_addr_t page_bus; 408 - void *va; 409 - u16 len; 391 + u32 idx = rx->cnt & rx->mask; 392 + bool reuse_frags, can_flip; 393 + struct gve_rx_desc *desc; 394 + u16 packet_size = 0; 395 + u16 n_frags = 0; 396 + int recycle; 410 397 411 - /* Prefetch two packet pages ahead, we will need it soon. */ 412 - page_info = &rx->data.page_info[(idx + 2) & rx->mask]; 413 - va = page_info->page_address + GVE_RX_PAD + 414 - page_info->page_offset; 398 + /** In QPL mode, we only flip buffers when all buffers containing the packet 399 + * can be flipped. RDA can_flip decisions will be made later, per frag. 400 + */ 401 + can_flip = qpl_mode; 402 + reuse_frags = can_flip; 403 + do { 404 + u16 frag_size; 415 405 416 - prefetch(page_info->page); /* Kernel page struct. */ 417 - prefetch(va); /* Packet header. */ 418 - prefetch(va + 64); /* Next cacheline too. */ 406 + n_frags++; 407 + desc = &rx->desc.desc_ring[idx]; 408 + desc_error = unlikely(desc->flags_seq & GVE_RXF_ERR) || desc_error; 409 + if (GVE_SEQNO(desc->flags_seq) != rx->desc.seqno) { 410 + seqno_error = true; 411 + netdev_warn(priv->dev, 412 + "RX seqno error: want=%d, got=%d, dropping packet and scheduling reset.", 413 + rx->desc.seqno, GVE_SEQNO(desc->flags_seq)); 414 + } 415 + frag_size = be16_to_cpu(desc->len); 416 + packet_size += frag_size; 417 + if (frag_size > rx->packet_buffer_size) { 418 + packet_size_error = true; 419 + netdev_warn(priv->dev, 420 + "RX fragment error: packet_buffer_size=%d, frag_size=%d, droping packet.", 421 + rx->packet_buffer_size, be16_to_cpu(desc->len)); 422 + } 423 + page_info = &rx->data.page_info[idx]; 424 + if (can_flip) { 425 + recycle = gve_rx_can_recycle_buffer(page_info); 426 + reuse_frags = reuse_frags && recycle > 0; 427 + buffer_error = buffer_error || unlikely(recycle < 0); 428 + } 429 + idx = (idx + 1) & rx->mask; 430 + rx->desc.seqno = gve_next_seqno(rx->desc.seqno); 431 + } while (GVE_PKTCONT_BIT_IS_SET(desc->flags_seq)); 419 432 420 - /* drop this packet */ 421 - if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) { 433 + prefetch(rx->desc.desc_ring + idx); 434 + 435 + ctx->curr_frag_cnt = 0; 436 + ctx->total_expected_size = packet_size - GVE_RX_PAD; 437 + ctx->expected_frag_cnt = n_frags; 438 + ctx->skb_head = NULL; 439 + ctx->reuse_frags = reuse_frags; 440 + 441 + if (ctx->expected_frag_cnt > 1) { 442 + u64_stats_update_begin(&rx->statss); 443 + rx->rx_cont_packet_cnt++; 444 + u64_stats_update_end(&rx->statss); 445 + } 446 + if (ctx->total_expected_size > priv->rx_copybreak && !ctx->reuse_frags && qpl_mode) { 447 + u64_stats_update_begin(&rx->statss); 448 + rx->rx_copied_pkt++; 449 + u64_stats_update_end(&rx->statss); 450 + } 451 + 452 + if (unlikely(buffer_error || seqno_error || packet_size_error)) { 453 + gve_schedule_reset(priv); 454 + return false; 455 + } 456 + 457 + if (unlikely(desc_error)) { 422 458 u64_stats_update_begin(&rx->statss); 423 459 rx->rx_desc_err_dropped_pkt++; 424 460 u64_stats_update_end(&rx->statss); 425 461 return false; 426 462 } 463 + return true; 464 + } 427 465 428 - len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD; 429 - page_info = &rx->data.page_info[idx]; 466 + static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, 467 + struct gve_rx_slot_page_info *page_info, struct napi_struct *napi, 468 + u16 len, union gve_rx_data_slot *data_slot) 469 + { 470 + struct net_device *netdev = priv->dev; 471 + struct gve_rx_ctx *ctx = &rx->ctx; 472 + struct sk_buff *skb = NULL; 430 473 431 - data_slot = &rx->data.data_ring[idx]; 432 - page_bus = (rx->data.raw_addressing) ? 433 - be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK : 434 - rx->data.qpl->page_buses[idx]; 435 - dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, 436 - PAGE_SIZE, DMA_FROM_DEVICE); 437 - 438 - if (len <= priv->rx_copybreak) { 474 + if (len <= priv->rx_copybreak && ctx->expected_frag_cnt == 1) { 439 475 /* Just copy small packets */ 440 - skb = gve_rx_copy(dev, napi, page_info, len, GVE_RX_PAD); 441 - u64_stats_update_begin(&rx->statss); 442 - rx->rx_copied_pkt++; 443 - rx->rx_copybreak_pkt++; 444 - u64_stats_update_end(&rx->statss); 476 + skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD, ctx); 477 + if (skb) { 478 + u64_stats_update_begin(&rx->statss); 479 + rx->rx_copied_pkt++; 480 + rx->rx_frag_copy_cnt++; 481 + rx->rx_copybreak_pkt++; 482 + } u64_stats_update_end(&rx->statss); 445 483 } else { 446 - u8 can_flip = gve_rx_can_flip_buffers(dev); 447 - int recycle = 0; 448 - 449 - if (can_flip) { 450 - recycle = gve_rx_can_recycle_buffer(page_info); 451 - if (recycle < 0) { 452 - if (!rx->data.raw_addressing) 453 - gve_schedule_reset(priv); 454 - return false; 455 - } 456 - } 457 - 458 - page_info->can_flip = can_flip && recycle; 459 484 if (rx->data.raw_addressing) { 460 - skb = gve_rx_raw_addressing(&priv->pdev->dev, dev, 485 + int recycle = gve_rx_can_recycle_buffer(page_info); 486 + 487 + if (unlikely(recycle < 0)) { 488 + gve_schedule_reset(priv); 489 + return NULL; 490 + } 491 + page_info->can_flip = recycle; 492 + if (page_info->can_flip) { 493 + u64_stats_update_begin(&rx->statss); 494 + rx->rx_frag_flip_cnt++; 495 + u64_stats_update_end(&rx->statss); 496 + } 497 + skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev, 461 498 page_info, len, napi, 462 - data_slot); 499 + data_slot, 500 + rx->packet_buffer_size, ctx); 463 501 } else { 464 - skb = gve_rx_qpl(&priv->pdev->dev, dev, rx, 502 + if (ctx->reuse_frags) { 503 + u64_stats_update_begin(&rx->statss); 504 + rx->rx_frag_flip_cnt++; 505 + u64_stats_update_end(&rx->statss); 506 + } 507 + skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx, 465 508 page_info, len, napi, data_slot); 466 509 } 467 510 } 511 + return skb; 512 + } 468 513 469 - if (!skb) { 470 - u64_stats_update_begin(&rx->statss); 471 - rx->rx_skb_alloc_fail++; 472 - u64_stats_update_end(&rx->statss); 473 - return false; 514 + static bool gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, 515 + u64 *packet_size_bytes, u32 *work_done) 516 + { 517 + struct gve_rx_slot_page_info *page_info; 518 + struct gve_rx_ctx *ctx = &rx->ctx; 519 + union gve_rx_data_slot *data_slot; 520 + struct gve_priv *priv = rx->gve; 521 + struct gve_rx_desc *first_desc; 522 + struct sk_buff *skb = NULL; 523 + struct gve_rx_desc *desc; 524 + struct napi_struct *napi; 525 + dma_addr_t page_bus; 526 + u32 work_cnt = 0; 527 + void *va; 528 + u32 idx; 529 + u16 len; 530 + 531 + idx = rx->cnt & rx->mask; 532 + first_desc = &rx->desc.desc_ring[idx]; 533 + desc = first_desc; 534 + napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 535 + 536 + if (unlikely(!gve_rx_ctx_init(ctx, rx))) 537 + goto skb_alloc_fail; 538 + 539 + while (ctx->curr_frag_cnt < ctx->expected_frag_cnt) { 540 + /* Prefetch two packet buffers ahead, we will need it soon. */ 541 + page_info = &rx->data.page_info[(idx + 2) & rx->mask]; 542 + va = page_info->page_address + page_info->page_offset; 543 + 544 + prefetch(page_info->page); /* Kernel page struct. */ 545 + prefetch(va); /* Packet header. */ 546 + prefetch(va + 64); /* Next cacheline too. */ 547 + 548 + len = gve_rx_get_fragment_size(ctx, desc); 549 + 550 + page_info = &rx->data.page_info[idx]; 551 + data_slot = &rx->data.data_ring[idx]; 552 + page_bus = rx->data.raw_addressing ? 553 + be64_to_cpu(data_slot->addr) - page_info->page_offset : 554 + rx->data.qpl->page_buses[idx]; 555 + dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, PAGE_SIZE, DMA_FROM_DEVICE); 556 + 557 + skb = gve_rx_skb(priv, rx, page_info, napi, len, data_slot); 558 + if (!skb) { 559 + u64_stats_update_begin(&rx->statss); 560 + rx->rx_skb_alloc_fail++; 561 + u64_stats_update_end(&rx->statss); 562 + goto skb_alloc_fail; 563 + } 564 + 565 + ctx->curr_frag_cnt++; 566 + rx->cnt++; 567 + idx = rx->cnt & rx->mask; 568 + work_cnt++; 569 + desc = &rx->desc.desc_ring[idx]; 474 570 } 475 571 476 572 if (likely(feat & NETIF_F_RXCSUM)) { 477 573 /* NIC passes up the partial sum */ 478 - if (rx_desc->csum) 574 + if (first_desc->csum) 479 575 skb->ip_summed = CHECKSUM_COMPLETE; 480 576 else 481 577 skb->ip_summed = CHECKSUM_NONE; 482 - skb->csum = csum_unfold(rx_desc->csum); 578 + skb->csum = csum_unfold(first_desc->csum); 483 579 } 484 580 485 581 /* parse flags & pass relevant info up */ 486 582 if (likely(feat & NETIF_F_RXHASH) && 487 - gve_needs_rss(rx_desc->flags_seq)) 488 - skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash), 489 - gve_rss_type(rx_desc->flags_seq)); 583 + gve_needs_rss(first_desc->flags_seq)) 584 + skb_set_hash(skb, be32_to_cpu(first_desc->rss_hash), 585 + gve_rss_type(first_desc->flags_seq)); 490 586 587 + *packet_size_bytes = skb->len + (skb->protocol ? ETH_HLEN : 0); 588 + *work_done = work_cnt; 491 589 if (skb_is_nonlinear(skb)) 492 590 napi_gro_frags(napi); 493 591 else 494 592 napi_gro_receive(napi, skb); 593 + 594 + gve_rx_ctx_clear(ctx); 495 595 return true; 596 + 597 + skb_alloc_fail: 598 + if (napi->skb) 599 + napi_free_frags(napi); 600 + *packet_size_bytes = 0; 601 + *work_done = ctx->expected_frag_cnt; 602 + while (ctx->curr_frag_cnt < ctx->expected_frag_cnt) { 603 + rx->cnt++; 604 + ctx->curr_frag_cnt++; 605 + } 606 + gve_rx_ctx_clear(ctx); 607 + return false; 496 608 } 497 609 498 610 bool gve_rx_work_pending(struct gve_rx_ring *rx) ··· 683 529 union gve_rx_data_slot *data_slot = 684 530 &rx->data.data_ring[idx]; 685 531 struct device *dev = &priv->pdev->dev; 686 - 687 532 gve_rx_free_buffer(dev, page_info, data_slot); 688 533 page_info->page = NULL; 689 534 if (gve_rx_alloc_buffer(priv, dev, page_info, ··· 703 550 static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, 704 551 netdev_features_t feat) 705 552 { 553 + u32 work_done = 0, total_packet_cnt = 0, ok_packet_cnt = 0; 706 554 struct gve_priv *priv = rx->gve; 707 - u32 work_done = 0, packets = 0; 555 + u32 idx = rx->cnt & rx->mask; 708 556 struct gve_rx_desc *desc; 709 - u32 cnt = rx->cnt; 710 - u32 idx = cnt & rx->mask; 711 557 u64 bytes = 0; 712 558 713 - desc = rx->desc.desc_ring + idx; 559 + desc = &rx->desc.desc_ring[idx]; 714 560 while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) && 715 561 work_done < budget) { 562 + u64 packet_size_bytes = 0; 563 + u32 work_cnt = 0; 716 564 bool dropped; 717 565 718 566 netif_info(priv, rx_status, priv->dev, ··· 724 570 rx->q_num, GVE_SEQNO(desc->flags_seq), 725 571 rx->desc.seqno); 726 572 727 - /* prefetch two descriptors ahead */ 728 - prefetch(rx->desc.desc_ring + ((cnt + 2) & rx->mask)); 729 - 730 - dropped = !gve_rx(rx, desc, feat, idx); 573 + dropped = !gve_rx(rx, feat, &packet_size_bytes, &work_cnt); 731 574 if (!dropped) { 732 - bytes += be16_to_cpu(desc->len) - GVE_RX_PAD; 733 - packets++; 575 + bytes += packet_size_bytes; 576 + ok_packet_cnt++; 734 577 } 735 - cnt++; 736 - idx = cnt & rx->mask; 737 - desc = rx->desc.desc_ring + idx; 738 - rx->desc.seqno = gve_next_seqno(rx->desc.seqno); 739 - work_done++; 578 + total_packet_cnt++; 579 + idx = rx->cnt & rx->mask; 580 + desc = &rx->desc.desc_ring[idx]; 581 + work_done += work_cnt; 740 582 } 741 583 742 - if (!work_done && rx->fill_cnt - cnt > rx->db_threshold) 584 + if (!work_done && rx->fill_cnt - rx->cnt > rx->db_threshold) 743 585 return 0; 744 586 745 587 if (work_done) { 746 588 u64_stats_update_begin(&rx->statss); 747 - rx->rpackets += packets; 589 + rx->rpackets += ok_packet_cnt; 748 590 rx->rbytes += bytes; 749 591 u64_stats_update_end(&rx->statss); 750 - rx->cnt = cnt; 751 592 } 752 593 753 594 /* restock ring slots */ 754 595 if (!rx->data.raw_addressing) { 755 596 /* In QPL mode buffs are refilled as the desc are processed */ 756 597 rx->fill_cnt += work_done; 757 - } else if (rx->fill_cnt - cnt <= rx->db_threshold) { 598 + } else if (rx->fill_cnt - rx->cnt <= rx->db_threshold) { 758 599 /* In raw addressing mode buffs are only refilled if the avail 759 600 * falls below a threshold. 760 601 */ ··· 759 610 /* If we were not able to completely refill buffers, we'll want 760 611 * to schedule this queue for work again to refill buffers. 761 612 */ 762 - if (rx->fill_cnt - cnt <= rx->db_threshold) { 613 + if (rx->fill_cnt - rx->cnt <= rx->db_threshold) { 763 614 gve_rx_write_doorbell(priv, rx); 764 615 return budget; 765 616 } 766 617 } 767 618 768 619 gve_rx_write_doorbell(priv, rx); 769 - return work_done; 620 + return total_packet_cnt; 770 621 } 771 622 772 623 int gve_rx_poll(struct gve_notify_block *block, int budget)
+34 -34
drivers/net/ethernet/google/gve/gve_rx_dqo.c
··· 240 240 rx->dqo.bufq.mask = buffer_queue_slots - 1; 241 241 rx->dqo.complq.num_free_slots = completion_queue_slots; 242 242 rx->dqo.complq.mask = completion_queue_slots - 1; 243 - rx->skb_head = NULL; 244 - rx->skb_tail = NULL; 243 + rx->ctx.skb_head = NULL; 244 + rx->ctx.skb_tail = NULL; 245 245 246 246 rx->dqo.num_buf_states = min_t(s16, S16_MAX, buffer_queue_slots * 4); 247 247 rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, ··· 467 467 468 468 static void gve_rx_free_skb(struct gve_rx_ring *rx) 469 469 { 470 - if (!rx->skb_head) 470 + if (!rx->ctx.skb_head) 471 471 return; 472 472 473 - dev_kfree_skb_any(rx->skb_head); 474 - rx->skb_head = NULL; 475 - rx->skb_tail = NULL; 473 + dev_kfree_skb_any(rx->ctx.skb_head); 474 + rx->ctx.skb_head = NULL; 475 + rx->ctx.skb_tail = NULL; 476 476 } 477 477 478 478 /* Chains multi skbs for single rx packet. ··· 483 483 u16 buf_len, struct gve_rx_ring *rx, 484 484 struct gve_priv *priv) 485 485 { 486 - int num_frags = skb_shinfo(rx->skb_tail)->nr_frags; 486 + int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; 487 487 488 488 if (unlikely(num_frags == MAX_SKB_FRAGS)) { 489 489 struct sk_buff *skb; ··· 492 492 if (!skb) 493 493 return -1; 494 494 495 - skb_shinfo(rx->skb_tail)->frag_list = skb; 496 - rx->skb_tail = skb; 495 + skb_shinfo(rx->ctx.skb_tail)->frag_list = skb; 496 + rx->ctx.skb_tail = skb; 497 497 num_frags = 0; 498 498 } 499 - if (rx->skb_tail != rx->skb_head) { 500 - rx->skb_head->len += buf_len; 501 - rx->skb_head->data_len += buf_len; 502 - rx->skb_head->truesize += priv->data_buffer_size_dqo; 499 + if (rx->ctx.skb_tail != rx->ctx.skb_head) { 500 + rx->ctx.skb_head->len += buf_len; 501 + rx->ctx.skb_head->data_len += buf_len; 502 + rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo; 503 503 } 504 504 505 - skb_add_rx_frag(rx->skb_tail, num_frags, 505 + skb_add_rx_frag(rx->ctx.skb_tail, num_frags, 506 506 buf_state->page_info.page, 507 507 buf_state->page_info.page_offset, 508 508 buf_len, priv->data_buffer_size_dqo); ··· 556 556 buf_len, DMA_FROM_DEVICE); 557 557 558 558 /* Append to current skb if one exists. */ 559 - if (rx->skb_head) { 559 + if (rx->ctx.skb_head) { 560 560 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx, 561 561 priv)) != 0) { 562 562 goto error; ··· 567 567 } 568 568 569 569 if (eop && buf_len <= priv->rx_copybreak) { 570 - rx->skb_head = gve_rx_copy(priv->dev, napi, 571 - &buf_state->page_info, buf_len, 0); 572 - if (unlikely(!rx->skb_head)) 570 + rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, 571 + &buf_state->page_info, buf_len, 0, NULL); 572 + if (unlikely(!rx->ctx.skb_head)) 573 573 goto error; 574 - rx->skb_tail = rx->skb_head; 574 + rx->ctx.skb_tail = rx->ctx.skb_head; 575 575 576 576 u64_stats_update_begin(&rx->statss); 577 577 rx->rx_copied_pkt++; ··· 583 583 return 0; 584 584 } 585 585 586 - rx->skb_head = napi_get_frags(napi); 587 - if (unlikely(!rx->skb_head)) 586 + rx->ctx.skb_head = napi_get_frags(napi); 587 + if (unlikely(!rx->ctx.skb_head)) 588 588 goto error; 589 - rx->skb_tail = rx->skb_head; 589 + rx->ctx.skb_tail = rx->ctx.skb_head; 590 590 591 - skb_add_rx_frag(rx->skb_head, 0, buf_state->page_info.page, 591 + skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page, 592 592 buf_state->page_info.page_offset, buf_len, 593 593 priv->data_buffer_size_dqo); 594 594 gve_dec_pagecnt_bias(&buf_state->page_info); ··· 635 635 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type]; 636 636 int err; 637 637 638 - skb_record_rx_queue(rx->skb_head, rx->q_num); 638 + skb_record_rx_queue(rx->ctx.skb_head, rx->q_num); 639 639 640 640 if (feat & NETIF_F_RXHASH) 641 - gve_rx_skb_hash(rx->skb_head, desc, ptype); 641 + gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype); 642 642 643 643 if (feat & NETIF_F_RXCSUM) 644 - gve_rx_skb_csum(rx->skb_head, desc, ptype); 644 + gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype); 645 645 646 646 /* RSC packets must set gso_size otherwise the TCP stack will complain 647 647 * that packets are larger than MTU. 648 648 */ 649 649 if (desc->rsc) { 650 - err = gve_rx_complete_rsc(rx->skb_head, desc, ptype); 650 + err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype); 651 651 if (err < 0) 652 652 return err; 653 653 } 654 654 655 - if (skb_headlen(rx->skb_head) == 0) 655 + if (skb_headlen(rx->ctx.skb_head) == 0) 656 656 napi_gro_frags(napi); 657 657 else 658 - napi_gro_receive(napi, rx->skb_head); 658 + napi_gro_receive(napi, rx->ctx.skb_head); 659 659 660 660 return 0; 661 661 } ··· 717 717 /* Free running counter of completed descriptors */ 718 718 rx->cnt++; 719 719 720 - if (!rx->skb_head) 720 + if (!rx->ctx.skb_head) 721 721 continue; 722 722 723 723 if (!compl_desc->end_of_packet) 724 724 continue; 725 725 726 726 work_done++; 727 - pkt_bytes = rx->skb_head->len; 727 + pkt_bytes = rx->ctx.skb_head->len; 728 728 /* The ethernet header (first ETH_HLEN bytes) is snipped off 729 729 * by eth_type_trans. 730 730 */ 731 - if (skb_headlen(rx->skb_head)) 731 + if (skb_headlen(rx->ctx.skb_head)) 732 732 pkt_bytes += ETH_HLEN; 733 733 734 734 /* gve_rx_complete_skb() will consume skb if successful */ ··· 741 741 } 742 742 743 743 bytes += pkt_bytes; 744 - rx->skb_head = NULL; 745 - rx->skb_tail = NULL; 744 + rx->ctx.skb_head = NULL; 745 + rx->ctx.skb_tail = NULL; 746 746 } 747 747 748 748 gve_rx_post_buffers_dqo(rx);
+20 -9
drivers/net/ethernet/google/gve/gve_utils.c
··· 50 50 51 51 struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, 52 52 struct gve_rx_slot_page_info *page_info, u16 len, 53 - u16 pad) 53 + u16 padding, struct gve_rx_ctx *ctx) 54 54 { 55 - struct sk_buff *skb = napi_alloc_skb(napi, len); 56 - void *va = page_info->page_address + pad + 57 - page_info->page_offset; 55 + void *va = page_info->page_address + padding + page_info->page_offset; 56 + int skb_linear_offset = 0; 57 + bool set_protocol = false; 58 + struct sk_buff *skb; 58 59 59 - if (unlikely(!skb)) 60 - return NULL; 60 + if (ctx) { 61 + if (!ctx->skb_head) 62 + ctx->skb_head = napi_alloc_skb(napi, ctx->total_expected_size); 61 63 64 + if (unlikely(!ctx->skb_head)) 65 + return NULL; 66 + skb = ctx->skb_head; 67 + skb_linear_offset = skb->len; 68 + set_protocol = ctx->curr_frag_cnt == ctx->expected_frag_cnt - 1; 69 + } else { 70 + skb = napi_alloc_skb(napi, len); 71 + set_protocol = true; 72 + } 62 73 __skb_put(skb, len); 74 + skb_copy_to_linear_data_offset(skb, skb_linear_offset, va, len); 63 75 64 - skb_copy_to_linear_data(skb, va, len); 65 - 66 - skb->protocol = eth_type_trans(skb, dev); 76 + if (set_protocol) 77 + skb->protocol = eth_type_trans(skb, dev); 67 78 68 79 return skb; 69 80 }
+1 -1
drivers/net/ethernet/google/gve/gve_utils.h
··· 19 19 20 20 struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, 21 21 struct gve_rx_slot_page_info *page_info, u16 len, 22 - u16 pad); 22 + u16 pad, struct gve_rx_ctx *ctx); 23 23 24 24 /* Decrement pagecnt_bias. Set it back to INT_MAX if it reached zero. */ 25 25 void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info);