Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cxlflash: Resolve oops in wait_port_offline

If an async error interrupt is generated, and the error requires the FC
link to be reset, it cannot be performed in the interrupt context. So a
work element is scheduled to complete the link reset in a process
context. If either an EEH event or an escalation occurs in between when
the interrupt is generated and the scheduled work is started, the MMIO
space may no longer be available. This will cause an oops in the worker
thread.

[ 606.806583] NIP kthread_data+0x28/0x40
[ 606.806633] LR wq_worker_sleeping+0x30/0x100
[ 606.806694] Call Trace:
[ 606.806721] 0x50 (unreliable)
[ 606.806796] wq_worker_sleeping+0x30/0x100
[ 606.806884] __schedule+0x69c/0x8a0
[ 606.806959] schedule+0x44/0xc0
[ 606.807034] do_exit+0x770/0xb90
[ 606.807109] die+0x300/0x460
[ 606.807185] bad_page_fault+0xd8/0x150
[ 606.807259] handle_page_fault+0x2c/0x30
[ 606.807338] wait_port_offline.constprop.12+0x60/0x130 [cxlflash]

To prevent the problem space area from being unmapped, when there is
pending work, a mapcount (using the kref mechanism) is held. The
mapcount is released only when the work is completed. The last
reference release is tied to the unmapping service.

Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
Acked-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Reviewed-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Manoj Kumar and committed by
Martin K. Petersen
b45cdbaf ee91e332

+26 -3
+2
drivers/scsi/cxlflash/common.h
··· 165 165 struct sisl_host_map __iomem *host_map; /* MC host map */ 166 166 struct sisl_ctrl_map __iomem *ctrl_map; /* MC control map */ 167 167 168 + struct kref mapcount; 169 + 168 170 ctx_hndl_t ctx_hndl; /* master's context handle */ 169 171 u64 *hrrq_start; 170 172 u64 *hrrq_end;
+24 -3
drivers/scsi/cxlflash/main.c
··· 368 368 369 369 no_room: 370 370 afu->read_room = true; 371 + kref_get(&cfg->afu->mapcount); 371 372 schedule_work(&cfg->work_q); 372 373 rc = SCSI_MLQUEUE_HOST_BUSY; 373 374 goto out; ··· 474 473 return rc; 475 474 } 476 475 476 + static void afu_unmap(struct kref *ref) 477 + { 478 + struct afu *afu = container_of(ref, struct afu, mapcount); 479 + 480 + if (likely(afu->afu_map)) { 481 + cxl_psa_unmap((void __iomem *)afu->afu_map); 482 + afu->afu_map = NULL; 483 + } 484 + } 485 + 477 486 /** 478 487 * cxlflash_driver_info() - information handler for this host driver 479 488 * @host: SCSI host associated with device. ··· 514 503 ulong lock_flags; 515 504 short lflag = 0; 516 505 int rc = 0; 506 + int kref_got = 0; 517 507 518 508 dev_dbg_ratelimited(dev, "%s: (scp=%p) %d/%d/%d/%llu " 519 509 "cdb=(%08X-%08X-%08X-%08X)\n", ··· 559 547 goto out; 560 548 } 561 549 550 + kref_get(&cfg->afu->mapcount); 551 + kref_got = 1; 552 + 562 553 cmd->rcb.ctx_id = afu->ctx_hndl; 563 554 cmd->rcb.port_sel = port_sel; 564 555 cmd->rcb.lun_id = lun_to_lunid(scp->device->lun); ··· 602 587 } 603 588 604 589 out: 590 + if (kref_got) 591 + kref_put(&afu->mapcount, afu_unmap); 605 592 pr_devel("%s: returning rc=%d\n", __func__, rc); 606 593 return rc; 607 594 } ··· 678 661 cxl_psa_unmap((void __iomem *)afu->afu_map); 679 662 afu->afu_map = NULL; 680 663 } 664 + kref_put(&afu->mapcount, afu_unmap); 681 665 } 682 666 } 683 667 ··· 764 746 scsi_remove_host(cfg->host); 765 747 /* fall through */ 766 748 case INIT_STATE_AFU: 767 - term_afu(cfg); 768 749 cancel_work_sync(&cfg->work_q); 750 + term_afu(cfg); 769 751 case INIT_STATE_PCI: 770 752 pci_release_regions(cfg->dev); 771 753 pci_disable_device(pdev); ··· 1349 1331 __func__, port); 1350 1332 cfg->lr_state = LINK_RESET_REQUIRED; 1351 1333 cfg->lr_port = port; 1334 + kref_get(&cfg->afu->mapcount); 1352 1335 schedule_work(&cfg->work_q); 1353 1336 } 1354 1337 ··· 1370 1351 1371 1352 if (info->action & SCAN_HOST) { 1372 1353 atomic_inc(&cfg->scan_host_needed); 1354 + kref_get(&cfg->afu->mapcount); 1373 1355 schedule_work(&cfg->work_q); 1374 1356 } 1375 1357 } ··· 1766 1746 rc = -ENOMEM; 1767 1747 goto err1; 1768 1748 } 1749 + kref_init(&afu->mapcount); 1769 1750 1770 1751 /* No byte reverse on reading afu_version or string will be backwards */ 1771 1752 reg = readq(&afu->afu_map->global.regs.afu_version); ··· 1801 1780 return rc; 1802 1781 1803 1782 err2: 1804 - cxl_psa_unmap((void __iomem *)afu->afu_map); 1805 - afu->afu_map = NULL; 1783 + kref_put(&afu->mapcount, afu_unmap); 1806 1784 err1: 1807 1785 term_mc(cfg, UNDO_START); 1808 1786 goto out; ··· 2374 2354 2375 2355 if (atomic_dec_if_positive(&cfg->scan_host_needed) >= 0) 2376 2356 scsi_scan_host(cfg->host); 2357 + kref_put(&afu->mapcount, afu_unmap); 2377 2358 } 2378 2359 2379 2360 /**