Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ibmvnic: Increase max subcrq indirect entries with fallback

POWER8 support a maximum of 16 subcrq indirect descriptor entries per
H_SEND_SUB_CRQ_INDIRECT call, while POWER9 and newer hypervisors
support up to 128 entries. Increasing the max number of indirect
descriptor entries improves batching efficiency and reduces
hcall overhead, which enhances throughput under large workload on POWER9+.

Currently, ibmvnic driver always uses a fixed number of max indirect
descriptor entries (16). send_subcrq_indirect() treats all hypervisor
errors the same:
- Cleanup and Drop the entire batch of descriptors.
- Return an error to the caller.
- Rely on TCP/IP retransmissions to recover.
- If the hypervisor returns H_PARAMETER (e.g., because 128
entries are not supported on POWER8), the driver will continue
to drop batches, resulting in unnecessary packet loss.

In this patch:
Raise the default maximum indirect entries to 128 to improve ibmvnic
batching on morden platform. But also gracefully fall back to
16 entries for Power 8 systems.

Since there is no VIO interface to query the hypervisor’s supported
limit, vnic handles send_subcrq_indirect() H_PARAMETER errors:
- On first H_PARAMETER failure, log the failure context
- Reduce max_indirect_entries to 16 and allow the single batch to drop.
- Subsequent calls automatically use the correct lower limit,
avoiding repeated drops.

The goal is to optimizes performance on modern systems while handles
falling back for older POWER8 hypervisors.

Performance shows 40% improvements with MTU (1500) on largework load.

Signed-off-by: Mingming Cao <mmc@linux.ibm.com>
Reviewed-by: Brian King <bjking1@linux.ibm.com>
Reviewed-by: Haren Myneni <haren@linux.ibm.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250821130215.97960-1-mmc@linux.ibm.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Mingming Cao and committed by
Paolo Abeni
3c149179 07ca488d

+56 -9
+52 -7
drivers/net/ethernet/ibm/ibmvnic.c
··· 756 756 adapter->rx_pool[i].active = 0; 757 757 } 758 758 759 + static void ibmvnic_set_safe_max_ind_descs(struct ibmvnic_adapter *adapter) 760 + { 761 + if (adapter->cur_max_ind_descs > IBMVNIC_SAFE_IND_DESC) { 762 + netdev_info(adapter->netdev, 763 + "set max ind descs from %u to safe limit %u\n", 764 + adapter->cur_max_ind_descs, 765 + IBMVNIC_SAFE_IND_DESC); 766 + adapter->cur_max_ind_descs = IBMVNIC_SAFE_IND_DESC; 767 + } 768 + } 769 + 759 770 static void replenish_rx_pool(struct ibmvnic_adapter *adapter, 760 771 struct ibmvnic_rx_pool *pool) 761 772 { ··· 854 843 sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); 855 844 856 845 /* if send_subcrq_indirect queue is full, flush to VIOS */ 857 - if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || 846 + if (ind_bufp->index == adapter->cur_max_ind_descs || 858 847 i == count - 1) { 859 848 lpar_rc = 860 849 send_subcrq_indirect(adapter, handle, ··· 873 862 failure: 874 863 if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) 875 864 dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); 865 + 866 + /* Detect platform limit H_PARAMETER */ 867 + if (lpar_rc == H_PARAMETER) 868 + ibmvnic_set_safe_max_ind_descs(adapter); 869 + 870 + /* For all error case, temporarily drop only this batch 871 + * Rely on TCP/IP retransmissions to retry and recover 872 + */ 876 873 for (i = ind_bufp->index - 1; i >= 0; --i) { 877 874 struct ibmvnic_rx_buff *rx_buff; 878 875 ··· 2400 2381 rc = send_subcrq_direct(adapter, handle, 2401 2382 (u64 *)ind_bufp->indir_arr); 2402 2383 2403 - if (rc) 2384 + if (rc) { 2385 + dev_err_ratelimited(&adapter->vdev->dev, 2386 + "tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n", 2387 + rc, entries, &dma_addr, handle); 2388 + /* Detect platform limit H_PARAMETER */ 2389 + if (rc == H_PARAMETER) 2390 + ibmvnic_set_safe_max_ind_descs(adapter); 2391 + 2392 + /* For all error case, temporarily drop only this batch 2393 + * Rely on TCP/IP retransmissions to retry and recover 2394 + */ 2404 2395 ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); 2405 - else 2396 + } else { 2406 2397 ind_bufp->index = 0; 2398 + } 2407 2399 return rc; 2408 2400 } 2409 2401 2410 2402 static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) 2411 2403 { 2412 2404 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 2405 + u32 cur_max_ind_descs = adapter->cur_max_ind_descs; 2413 2406 int queue_num = skb_get_queue_mapping(skb); 2414 2407 u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; 2415 2408 struct device *dev = &adapter->vdev->dev; ··· 2621 2590 tx_crq.v1.n_crq_elem = num_entries; 2622 2591 tx_buff->num_entries = num_entries; 2623 2592 /* flush buffer if current entry can not fit */ 2624 - if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { 2593 + if (num_entries + ind_bufp->index > cur_max_ind_descs) { 2625 2594 lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); 2626 2595 if (lpar_rc != H_SUCCESS) 2627 2596 goto tx_flush_err; ··· 2634 2603 ind_bufp->index += num_entries; 2635 2604 if (__netdev_tx_sent_queue(txq, skb->len, 2636 2605 netdev_xmit_more() && 2637 - ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { 2606 + ind_bufp->index < cur_max_ind_descs)) { 2638 2607 lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); 2639 2608 if (lpar_rc != H_SUCCESS) 2640 2609 goto tx_err; ··· 4037 4006 } 4038 4007 4039 4008 dma_free_coherent(dev, 4040 - IBMVNIC_IND_ARR_SZ, 4009 + IBMVNIC_IND_MAX_ARR_SZ, 4041 4010 scrq->ind_buf.indir_arr, 4042 4011 scrq->ind_buf.indir_dma); 4043 4012 ··· 4094 4063 4095 4064 scrq->ind_buf.indir_arr = 4096 4065 dma_alloc_coherent(dev, 4097 - IBMVNIC_IND_ARR_SZ, 4066 + IBMVNIC_IND_MAX_ARR_SZ, 4098 4067 &scrq->ind_buf.indir_dma, 4099 4068 GFP_KERNEL); 4100 4069 ··· 6400 6369 rc = reset_sub_crq_queues(adapter); 6401 6370 } 6402 6371 } else { 6372 + if (adapter->reset_reason == VNIC_RESET_MOBILITY) { 6373 + /* After an LPM, reset the max number of indirect 6374 + * subcrq descriptors per H_SEND_SUB_CRQ_INDIRECT 6375 + * hcall to the default max (e.g POWER8 -> POWER10) 6376 + * 6377 + * If the new destination platform does not support 6378 + * the higher limit max (e.g. POWER10-> POWER8 LPM) 6379 + * H_PARAMETER will trigger automatic fallback to the 6380 + * safe minimum limit. 6381 + */ 6382 + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; 6383 + } 6384 + 6403 6385 rc = init_sub_crqs(adapter); 6404 6386 } 6405 6387 ··· 6564 6520 6565 6521 adapter->wait_for_reset = false; 6566 6522 adapter->last_reset_time = jiffies; 6523 + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; 6567 6524 6568 6525 rc = register_netdev(netdev); 6569 6526 if (rc) {
+4 -2
drivers/net/ethernet/ibm/ibmvnic.h
··· 29 29 #define IBMVNIC_BUFFS_PER_POOL 100 30 30 #define IBMVNIC_MAX_QUEUES 16 31 31 #define IBMVNIC_MAX_QUEUE_SZ 4096 32 - #define IBMVNIC_MAX_IND_DESCS 16 33 - #define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) 32 + #define IBMVNIC_MAX_IND_DESCS 128 33 + #define IBMVNIC_SAFE_IND_DESC 16 34 + #define IBMVNIC_IND_MAX_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) 34 35 35 36 #define IBMVNIC_TSO_BUF_SZ 65536 36 37 #define IBMVNIC_TSO_BUFS 64 ··· 931 930 struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl; 932 931 dma_addr_t ip_offload_ctrl_tok; 933 932 u32 msg_enable; 933 + u32 cur_max_ind_descs; 934 934 935 935 /* Vital Product Data (VPD) */ 936 936 struct ibmvnic_vpd *vpd;