Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'gve-stats-reporting-fixes'

Max Yuan says:

====================
gve: Stats reporting fixes

This series addresses two issues related to statistics in the gve driver.

The first patch fixes a memory corruption issue that occurs when resizing
the stats region during queue count changes. By allocating the maximum
possible size upfront and aligning offset calculations with the NIC,
we ensure stability and accuracy across reconfigurations.

The second patch fixes the 'rx_dropped' counter by removing allocation
failures and incorporating XDP transmit and redirect errors to provide
a more accurate representation of dropped packets.
====================

Link: https://patch.msgid.link/20260202193925.3106272-1-hramamurthy@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+53 -28
+51 -26
drivers/net/ethernet/google/gve/gve_ethtool.c
··· 152 152 u64 tmp_rx_pkts, tmp_rx_hsplit_pkt, tmp_rx_bytes, tmp_rx_hsplit_bytes, 153 153 tmp_rx_skb_alloc_fail, tmp_rx_buf_alloc_fail, 154 154 tmp_rx_desc_err_dropped_pkt, tmp_rx_hsplit_unsplit_pkt, 155 - tmp_tx_pkts, tmp_tx_bytes; 155 + tmp_tx_pkts, tmp_tx_bytes, 156 + tmp_xdp_tx_errors, tmp_xdp_redirect_errors; 156 157 u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_hsplit_unsplit_pkt, 157 158 rx_pkts, rx_hsplit_pkt, rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, 158 - tx_dropped; 159 - int stats_idx, base_stats_idx, max_stats_idx; 159 + tx_dropped, xdp_tx_errors, xdp_redirect_errors; 160 + int rx_base_stats_idx, max_rx_stats_idx, max_tx_stats_idx; 161 + int stats_idx, stats_region_len, nic_stats_len; 160 162 struct stats *report_stats; 161 163 int *rx_qid_to_stats_idx; 162 164 int *tx_qid_to_stats_idx; ··· 200 198 for (rx_pkts = 0, rx_bytes = 0, rx_hsplit_pkt = 0, 201 199 rx_skb_alloc_fail = 0, rx_buf_alloc_fail = 0, 202 200 rx_desc_err_dropped_pkt = 0, rx_hsplit_unsplit_pkt = 0, 201 + xdp_tx_errors = 0, xdp_redirect_errors = 0, 203 202 ring = 0; 204 203 ring < priv->rx_cfg.num_queues; ring++) { 205 204 if (priv->rx) { ··· 218 215 rx->rx_desc_err_dropped_pkt; 219 216 tmp_rx_hsplit_unsplit_pkt = 220 217 rx->rx_hsplit_unsplit_pkt; 218 + tmp_xdp_tx_errors = rx->xdp_tx_errors; 219 + tmp_xdp_redirect_errors = 220 + rx->xdp_redirect_errors; 221 221 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 222 222 start)); 223 223 rx_pkts += tmp_rx_pkts; ··· 230 224 rx_buf_alloc_fail += tmp_rx_buf_alloc_fail; 231 225 rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt; 232 226 rx_hsplit_unsplit_pkt += tmp_rx_hsplit_unsplit_pkt; 227 + xdp_tx_errors += tmp_xdp_tx_errors; 228 + xdp_redirect_errors += tmp_xdp_redirect_errors; 233 229 } 234 230 } 235 231 for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0; ··· 257 249 data[i++] = rx_bytes; 258 250 data[i++] = tx_bytes; 259 251 /* total rx dropped packets */ 260 - data[i++] = rx_skb_alloc_fail + rx_buf_alloc_fail + 261 - rx_desc_err_dropped_pkt; 252 + data[i++] = rx_skb_alloc_fail + rx_desc_err_dropped_pkt + 253 + xdp_tx_errors + xdp_redirect_errors; 262 254 data[i++] = tx_dropped; 263 255 data[i++] = priv->tx_timeo_cnt; 264 256 data[i++] = rx_skb_alloc_fail; ··· 273 265 data[i++] = priv->stats_report_trigger_cnt; 274 266 i = GVE_MAIN_STATS_LEN; 275 267 276 - /* For rx cross-reporting stats, start from nic rx stats in report */ 277 - base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues + 278 - GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues; 279 - /* The boundary between driver stats and NIC stats shifts if there are 280 - * stopped queues. 281 - */ 282 - base_stats_idx += NIC_RX_STATS_REPORT_NUM * num_stopped_rxqs + 283 - NIC_TX_STATS_REPORT_NUM * num_stopped_txqs; 284 - max_stats_idx = NIC_RX_STATS_REPORT_NUM * 285 - (priv->rx_cfg.num_queues - num_stopped_rxqs) + 286 - base_stats_idx; 268 + rx_base_stats_idx = 0; 269 + max_rx_stats_idx = 0; 270 + max_tx_stats_idx = 0; 271 + stats_region_len = priv->stats_report_len - 272 + sizeof(struct gve_stats_report); 273 + nic_stats_len = (NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues + 274 + NIC_TX_STATS_REPORT_NUM * num_tx_queues) * sizeof(struct stats); 275 + if (unlikely((stats_region_len - 276 + nic_stats_len) % sizeof(struct stats))) { 277 + net_err_ratelimited("Starting index of NIC stats should be multiple of stats size"); 278 + } else { 279 + /* For rx cross-reporting stats, 280 + * start from nic rx stats in report 281 + */ 282 + rx_base_stats_idx = (stats_region_len - nic_stats_len) / 283 + sizeof(struct stats); 284 + /* The boundary between driver stats and NIC stats 285 + * shifts if there are stopped queues 286 + */ 287 + rx_base_stats_idx += NIC_RX_STATS_REPORT_NUM * 288 + num_stopped_rxqs + NIC_TX_STATS_REPORT_NUM * 289 + num_stopped_txqs; 290 + max_rx_stats_idx = NIC_RX_STATS_REPORT_NUM * 291 + (priv->rx_cfg.num_queues - num_stopped_rxqs) + 292 + rx_base_stats_idx; 293 + max_tx_stats_idx = NIC_TX_STATS_REPORT_NUM * 294 + (num_tx_queues - num_stopped_txqs) + 295 + max_rx_stats_idx; 296 + } 287 297 /* Preprocess the stats report for rx, map queue id to start index */ 288 298 skip_nic_stats = false; 289 - for (stats_idx = base_stats_idx; stats_idx < max_stats_idx; 299 + for (stats_idx = rx_base_stats_idx; stats_idx < max_rx_stats_idx; 290 300 stats_idx += NIC_RX_STATS_REPORT_NUM) { 291 301 u32 stat_name = be32_to_cpu(report_stats[stats_idx].stat_name); 292 302 u32 queue_id = be32_to_cpu(report_stats[stats_idx].queue_id); ··· 337 311 tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; 338 312 tmp_rx_desc_err_dropped_pkt = 339 313 rx->rx_desc_err_dropped_pkt; 314 + tmp_xdp_tx_errors = rx->xdp_tx_errors; 315 + tmp_xdp_redirect_errors = 316 + rx->xdp_redirect_errors; 340 317 } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 341 318 start)); 342 319 data[i++] = tmp_rx_bytes; ··· 350 321 data[i++] = rx->rx_frag_alloc_cnt; 351 322 /* rx dropped packets */ 352 323 data[i++] = tmp_rx_skb_alloc_fail + 353 - tmp_rx_buf_alloc_fail + 354 - tmp_rx_desc_err_dropped_pkt; 324 + tmp_rx_desc_err_dropped_pkt + 325 + tmp_xdp_tx_errors + 326 + tmp_xdp_redirect_errors; 355 327 data[i++] = rx->rx_copybreak_pkt; 356 328 data[i++] = rx->rx_copied_pkt; 357 329 /* stats from NIC */ ··· 384 354 i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS; 385 355 } 386 356 387 - /* For tx cross-reporting stats, start from nic tx stats in report */ 388 - base_stats_idx = max_stats_idx; 389 - max_stats_idx = NIC_TX_STATS_REPORT_NUM * 390 - (num_tx_queues - num_stopped_txqs) + 391 - max_stats_idx; 392 - /* Preprocess the stats report for tx, map queue id to start index */ 393 357 skip_nic_stats = false; 394 - for (stats_idx = base_stats_idx; stats_idx < max_stats_idx; 358 + /* NIC TX stats start right after NIC RX stats */ 359 + for (stats_idx = max_rx_stats_idx; stats_idx < max_tx_stats_idx; 395 360 stats_idx += NIC_TX_STATS_REPORT_NUM) { 396 361 u32 stat_name = be32_to_cpu(report_stats[stats_idx].stat_name); 397 362 u32 queue_id = be32_to_cpu(report_stats[stats_idx].queue_id);
+2 -2
drivers/net/ethernet/google/gve/gve_main.c
··· 283 283 int tx_stats_num, rx_stats_num; 284 284 285 285 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * 286 - gve_num_tx_queues(priv); 286 + priv->tx_cfg.max_queues; 287 287 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * 288 - priv->rx_cfg.num_queues; 288 + priv->rx_cfg.max_queues; 289 289 priv->stats_report_len = struct_size(priv->stats_report, stats, 290 290 size_add(tx_stats_num, rx_stats_num)); 291 291 priv->stats_report =