Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: mana: Handle hardware recovery events when probing the device

When MANA is being probed, it's possible that hardware is in recovery
mode and the device may get GDMA_EQE_HWC_RESET_REQUEST over HWC in the
middle of the probe. Detect such condition and go through the recovery
service procedure.

Signed-off-by: Long Li <longli@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Link: https://patch.msgid.link/1764193552-9712-1-git-send-email-longli@linux.microsoft.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Long Li and committed by
Jakub Kicinski
9bf66036 6ab57873

+170 -18
+159 -17
drivers/net/ethernet/microsoft/mana/gdma_main.c
··· 15 15 16 16 struct dentry *mana_debugfs_root; 17 17 18 + struct mana_dev_recovery { 19 + struct list_head list; 20 + struct pci_dev *pdev; 21 + enum gdma_eqe_type type; 22 + }; 23 + 24 + static struct mana_dev_recovery_work { 25 + struct list_head dev_list; 26 + struct delayed_work work; 27 + 28 + /* Lock for dev_list above */ 29 + spinlock_t lock; 30 + } mana_dev_recovery_work; 31 + 18 32 static u32 mana_gd_r32(struct gdma_context *g, u64 offset) 19 33 { 20 34 return readl(g->bar0_va + offset); ··· 401 387 402 388 #define MANA_SERVICE_PERIOD 10 403 389 390 + static void mana_serv_rescan(struct pci_dev *pdev) 391 + { 392 + struct pci_bus *parent; 393 + 394 + pci_lock_rescan_remove(); 395 + 396 + parent = pdev->bus; 397 + if (!parent) { 398 + dev_err(&pdev->dev, "MANA service: no parent bus\n"); 399 + goto out; 400 + } 401 + 402 + pci_stop_and_remove_bus_device(pdev); 403 + pci_rescan_bus(parent); 404 + 405 + out: 406 + pci_unlock_rescan_remove(); 407 + } 408 + 404 409 static void mana_serv_fpga(struct pci_dev *pdev) 405 410 { 406 411 struct pci_bus *bus, *parent; ··· 452 419 { 453 420 struct gdma_context *gc = pci_get_drvdata(pdev); 454 421 struct hw_channel_context *hwc; 422 + int ret; 455 423 456 424 if (!gc) { 457 - dev_err(&pdev->dev, "MANA service: no GC\n"); 425 + /* Perform PCI rescan on device if GC is not set up */ 426 + dev_err(&pdev->dev, "MANA service: GC not setup, rescanning\n"); 427 + mana_serv_rescan(pdev); 458 428 return; 459 429 } 460 430 ··· 476 440 477 441 msleep(MANA_SERVICE_PERIOD * 1000); 478 442 479 - mana_gd_resume(pdev); 443 + ret = mana_gd_resume(pdev); 444 + if (ret == -ETIMEDOUT || ret == -EPROTO) { 445 + /* Perform PCI rescan on device if we failed on HWC */ 446 + dev_err(&pdev->dev, "MANA service: resume failed, rescanning\n"); 447 + mana_serv_rescan(pdev); 448 + goto out; 449 + } 480 450 481 - dev_info(&pdev->dev, "MANA reset cycle completed\n"); 451 + if (ret) 452 + dev_info(&pdev->dev, "MANA reset cycle failed err %d\n", ret); 453 + else 454 + dev_info(&pdev->dev, "MANA reset cycle completed\n"); 482 455 483 456 out: 484 457 gc->in_service = false; ··· 499 454 enum gdma_eqe_type type; 500 455 }; 501 456 502 - static void mana_serv_func(struct work_struct *w) 457 + static void mana_do_service(enum gdma_eqe_type type, struct pci_dev *pdev) 503 458 { 504 - struct mana_serv_work *mns_wk; 505 - struct pci_dev *pdev; 506 - 507 - mns_wk = container_of(w, struct mana_serv_work, serv_work); 508 - pdev = mns_wk->pdev; 509 - 510 - if (!pdev) 511 - goto out; 512 - 513 - switch (mns_wk->type) { 459 + switch (type) { 514 460 case GDMA_EQE_HWC_FPGA_RECONFIG: 515 461 mana_serv_fpga(pdev); 516 462 break; ··· 511 475 break; 512 476 513 477 default: 514 - dev_err(&pdev->dev, "MANA service: unknown type %d\n", 515 - mns_wk->type); 478 + dev_err(&pdev->dev, "MANA service: unknown type %d\n", type); 516 479 break; 517 480 } 481 + } 518 482 519 - out: 483 + static void mana_recovery_delayed_func(struct work_struct *w) 484 + { 485 + struct mana_dev_recovery_work *work; 486 + struct mana_dev_recovery *dev; 487 + unsigned long flags; 488 + 489 + work = container_of(w, struct mana_dev_recovery_work, work.work); 490 + 491 + spin_lock_irqsave(&work->lock, flags); 492 + 493 + while (!list_empty(&work->dev_list)) { 494 + dev = list_first_entry(&work->dev_list, 495 + struct mana_dev_recovery, list); 496 + list_del(&dev->list); 497 + spin_unlock_irqrestore(&work->lock, flags); 498 + 499 + mana_do_service(dev->type, dev->pdev); 500 + pci_dev_put(dev->pdev); 501 + kfree(dev); 502 + 503 + spin_lock_irqsave(&work->lock, flags); 504 + } 505 + 506 + spin_unlock_irqrestore(&work->lock, flags); 507 + } 508 + 509 + static void mana_serv_func(struct work_struct *w) 510 + { 511 + struct mana_serv_work *mns_wk; 512 + struct pci_dev *pdev; 513 + 514 + mns_wk = container_of(w, struct mana_serv_work, serv_work); 515 + pdev = mns_wk->pdev; 516 + 517 + if (pdev) 518 + mana_do_service(mns_wk->type, pdev); 519 + 520 520 pci_dev_put(pdev); 521 521 kfree(mns_wk); 522 522 module_put(THIS_MODULE); ··· 612 540 case GDMA_EQE_HWC_FPGA_RECONFIG: 613 541 case GDMA_EQE_HWC_RESET_REQUEST: 614 542 dev_info(gc->dev, "Recv MANA service type:%d\n", type); 543 + 544 + if (!test_and_set_bit(GC_PROBE_SUCCEEDED, &gc->flags)) { 545 + /* 546 + * Device is in probe and we received a hardware reset 547 + * event, the probe function will detect that the flag 548 + * has changed and perform service procedure. 549 + */ 550 + dev_info(gc->dev, 551 + "Service is to be processed in probe\n"); 552 + break; 553 + } 615 554 616 555 if (gc->in_service) { 617 556 dev_info(gc->dev, "Already in service\n"); ··· 2021 1938 if (err) 2022 1939 goto cleanup_mana; 2023 1940 1941 + /* 1942 + * If a hardware reset event has occurred over HWC during probe, 1943 + * rollback and perform hardware reset procedure. 1944 + */ 1945 + if (test_and_set_bit(GC_PROBE_SUCCEEDED, &gc->flags)) { 1946 + err = -EPROTO; 1947 + goto cleanup_mana_rdma; 1948 + } 1949 + 2024 1950 return 0; 2025 1951 1952 + cleanup_mana_rdma: 1953 + mana_rdma_remove(&gc->mana_ib); 2026 1954 cleanup_mana: 2027 1955 mana_remove(&gc->mana, false); 2028 1956 cleanup_gd: ··· 2057 1963 disable_dev: 2058 1964 pci_disable_device(pdev); 2059 1965 dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err); 1966 + 1967 + /* 1968 + * Hardware could be in recovery mode and the HWC returns TIMEDOUT or 1969 + * EPROTO from mana_gd_setup(), mana_probe() or mana_rdma_probe(), or 1970 + * we received a hardware reset event over HWC interrupt. In this case, 1971 + * perform the device recovery procedure after MANA_SERVICE_PERIOD 1972 + * seconds. 1973 + */ 1974 + if (err == -ETIMEDOUT || err == -EPROTO) { 1975 + struct mana_dev_recovery *dev; 1976 + unsigned long flags; 1977 + 1978 + dev_info(&pdev->dev, "Start MANA recovery mode\n"); 1979 + 1980 + dev = kzalloc(sizeof(*dev), GFP_KERNEL); 1981 + if (!dev) 1982 + return err; 1983 + 1984 + dev->pdev = pci_dev_get(pdev); 1985 + dev->type = GDMA_EQE_HWC_RESET_REQUEST; 1986 + 1987 + spin_lock_irqsave(&mana_dev_recovery_work.lock, flags); 1988 + list_add_tail(&dev->list, &mana_dev_recovery_work.dev_list); 1989 + spin_unlock_irqrestore(&mana_dev_recovery_work.lock, flags); 1990 + 1991 + schedule_delayed_work(&mana_dev_recovery_work.work, 1992 + secs_to_jiffies(MANA_SERVICE_PERIOD)); 1993 + } 1994 + 2060 1995 return err; 2061 1996 } 2062 1997 ··· 2190 2067 { 2191 2068 int err; 2192 2069 2070 + INIT_LIST_HEAD(&mana_dev_recovery_work.dev_list); 2071 + spin_lock_init(&mana_dev_recovery_work.lock); 2072 + INIT_DELAYED_WORK(&mana_dev_recovery_work.work, mana_recovery_delayed_func); 2073 + 2193 2074 mana_debugfs_root = debugfs_create_dir("mana", NULL); 2194 2075 2195 2076 err = pci_register_driver(&mana_driver); ··· 2207 2080 2208 2081 static void __exit mana_driver_exit(void) 2209 2082 { 2083 + struct mana_dev_recovery *dev; 2084 + unsigned long flags; 2085 + 2086 + disable_delayed_work_sync(&mana_dev_recovery_work.work); 2087 + 2088 + spin_lock_irqsave(&mana_dev_recovery_work.lock, flags); 2089 + while (!list_empty(&mana_dev_recovery_work.dev_list)) { 2090 + dev = list_first_entry(&mana_dev_recovery_work.dev_list, 2091 + struct mana_dev_recovery, list); 2092 + list_del(&dev->list); 2093 + pci_dev_put(dev->pdev); 2094 + kfree(dev); 2095 + } 2096 + spin_unlock_irqrestore(&mana_dev_recovery_work.lock, flags); 2097 + 2210 2098 pci_unregister_driver(&mana_driver); 2211 2099 2212 2100 debugfs_remove(mana_debugfs_root);
+11 -1
include/net/mana/gdma.h
··· 382 382 char name[MANA_IRQ_NAME_SZ]; 383 383 }; 384 384 385 + enum gdma_context_flags { 386 + GC_PROBE_SUCCEEDED = 0, 387 + }; 388 + 385 389 struct gdma_context { 386 390 struct device *dev; 387 391 struct dentry *mana_pci_debugfs; ··· 434 430 u64 pf_cap_flags1; 435 431 436 432 struct workqueue_struct *service_wq; 433 + 434 + unsigned long flags; 437 435 }; 438 436 439 437 static inline bool mana_gd_is_mana(struct gdma_dev *gd) ··· 606 600 /* Driver can send HWC periodically to query stats */ 607 601 #define GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY BIT(21) 608 602 603 + /* Driver can handle hardware recovery events during probe */ 604 + #define GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY BIT(22) 605 + 609 606 #define GDMA_DRV_CAP_FLAGS1 \ 610 607 (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ 611 608 GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ ··· 620 611 GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \ 621 612 GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \ 622 613 GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \ 623 - GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE) 614 + GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE | \ 615 + GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY) 624 616 625 617 #define GDMA_DRV_CAP_FLAGS2 0 626 618