Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: bnx2fc: Flush destroy_work queue before calling bnx2fc_interface_put()

The bnx2fc_destroy() functions are removing the interface before calling
destroy_work. This results multiple WARNings from sysfs_remove_group() as
the controller rport device attributes are removed too early.

Replace the fcoe_port's destroy_work queue. It's not needed.

The problem is easily reproducible with the following steps.

Example:

$ dmesg -w &
$ systemctl enable --now fcoe
$ fipvlan -s -c ens2f1
$ fcoeadm -d ens2f1.802
[ 583.464488] host2: libfc: Link down on port (7500a1)
[ 583.472651] bnx2fc: 7500a1 - rport not created Yet!!
[ 583.490468] ------------[ cut here ]------------
[ 583.538725] sysfs group 'power' not found for kobject 'rport-2:0-0'
[ 583.568814] WARNING: CPU: 3 PID: 192 at fs/sysfs/group.c:279 sysfs_remove_group+0x6f/0x80
[ 583.607130] Modules linked in: dm_service_time 8021q garp mrp stp llc bnx2fc cnic uio rpcsec_gss_krb5 auth_rpcgss nfsv4 ...
[ 583.942994] CPU: 3 PID: 192 Comm: kworker/3:2 Kdump: loaded Not tainted 5.14.0-39.el9.x86_64 #1
[ 583.984105] Hardware name: HP ProLiant DL120 G7, BIOS J01 07/01/2013
[ 584.016535] Workqueue: fc_wq_2 fc_rport_final_delete [scsi_transport_fc]
[ 584.050691] RIP: 0010:sysfs_remove_group+0x6f/0x80
[ 584.074725] Code: ff 5b 48 89 ef 5d 41 5c e9 ee c0 ff ff 48 89 ef e8 f6 b8 ff ff eb d1 49 8b 14 24 48 8b 33 48 c7 c7 ...
[ 584.162586] RSP: 0018:ffffb567c15afdc0 EFLAGS: 00010282
[ 584.188225] RAX: 0000000000000000 RBX: ffffffff8eec4220 RCX: 0000000000000000
[ 584.221053] RDX: ffff8c1586ce84c0 RSI: ffff8c1586cd7cc0 RDI: ffff8c1586cd7cc0
[ 584.255089] RBP: 0000000000000000 R08: 0000000000000000 R09: ffffb567c15afc00
[ 584.287954] R10: ffffb567c15afbf8 R11: ffffffff8fbe7f28 R12: ffff8c1486326400
[ 584.322356] R13: ffff8c1486326480 R14: ffff8c1483a4a000 R15: 0000000000000004
[ 584.355379] FS: 0000000000000000(0000) GS:ffff8c1586cc0000(0000) knlGS:0000000000000000
[ 584.394419] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 584.421123] CR2: 00007fe95a6f7840 CR3: 0000000107674002 CR4: 00000000000606e0
[ 584.454888] Call Trace:
[ 584.466108] device_del+0xb2/0x3e0
[ 584.481701] device_unregister+0x13/0x60
[ 584.501306] bsg_unregister_queue+0x5b/0x80
[ 584.522029] bsg_remove_queue+0x1c/0x40
[ 584.541884] fc_rport_final_delete+0xf3/0x1d0 [scsi_transport_fc]
[ 584.573823] process_one_work+0x1e3/0x3b0
[ 584.592396] worker_thread+0x50/0x3b0
[ 584.609256] ? rescuer_thread+0x370/0x370
[ 584.628877] kthread+0x149/0x170
[ 584.643673] ? set_kthread_struct+0x40/0x40
[ 584.662909] ret_from_fork+0x22/0x30
[ 584.680002] ---[ end trace 53575ecefa942ece ]---

Link: https://lore.kernel.org/r/20220115040044.1013475-1-jmeneghi@redhat.com
Fixes: 0cbf32e1681d ("[SCSI] bnx2fc: Avoid calling bnx2fc_if_destroy with unnecessary locks")
Tested-by: Guangwu Zhang <guazhang@redhat.com>
Co-developed-by: Maurizio Lombardi <mlombard@redhat.com>
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Signed-off-by: John Meneghini <jmeneghi@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

John Meneghini and committed by
Martin K. Petersen
847f9ea4 8c9db667

+5 -15
+5 -15
drivers/scsi/bnx2fc/bnx2fc_fcoe.c
··· 82 82 static void bnx2fc_unbind_pcidev(struct bnx2fc_hba *hba); 83 83 static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, 84 84 struct device *parent, int npiv); 85 - static void bnx2fc_destroy_work(struct work_struct *work); 85 + static void bnx2fc_port_destroy(struct fcoe_port *port); 86 86 87 87 static struct bnx2fc_hba *bnx2fc_hba_lookup(struct net_device *phys_dev); 88 88 static struct bnx2fc_interface *bnx2fc_interface_lookup(struct net_device ··· 907 907 __bnx2fc_destroy(interface); 908 908 } 909 909 mutex_unlock(&bnx2fc_dev_lock); 910 - 911 - /* Ensure ALL destroy work has been completed before return */ 912 - flush_workqueue(bnx2fc_wq); 913 910 return; 914 911 915 912 default: ··· 1212 1215 mutex_unlock(&n_port->lp_mutex); 1213 1216 bnx2fc_free_vport(interface->hba, port->lport); 1214 1217 bnx2fc_port_shutdown(port->lport); 1218 + bnx2fc_port_destroy(port); 1215 1219 bnx2fc_interface_put(interface); 1216 - queue_work(bnx2fc_wq, &port->destroy_work); 1217 1220 return 0; 1218 1221 } 1219 1222 ··· 1522 1525 port->lport = lport; 1523 1526 port->priv = interface; 1524 1527 port->get_netdev = bnx2fc_netdev; 1525 - INIT_WORK(&port->destroy_work, bnx2fc_destroy_work); 1526 1528 1527 1529 /* Configure fcoe_port */ 1528 1530 rc = bnx2fc_lport_config(lport); ··· 1649 1653 bnx2fc_interface_cleanup(interface); 1650 1654 bnx2fc_stop(interface); 1651 1655 list_del(&interface->list); 1656 + bnx2fc_port_destroy(port); 1652 1657 bnx2fc_interface_put(interface); 1653 - queue_work(bnx2fc_wq, &port->destroy_work); 1654 1658 } 1655 1659 1656 1660 /** ··· 1690 1694 return rc; 1691 1695 } 1692 1696 1693 - static void bnx2fc_destroy_work(struct work_struct *work) 1697 + static void bnx2fc_port_destroy(struct fcoe_port *port) 1694 1698 { 1695 - struct fcoe_port *port; 1696 1699 struct fc_lport *lport; 1697 1700 1698 - port = container_of(work, struct fcoe_port, destroy_work); 1699 1701 lport = port->lport; 1700 - 1701 - BNX2FC_HBA_DBG(lport, "Entered bnx2fc_destroy_work\n"); 1702 + BNX2FC_HBA_DBG(lport, "Entered %s, destroying lport %p\n", __func__, lport); 1702 1703 1703 1704 bnx2fc_if_destroy(lport); 1704 1705 } ··· 2548 2555 if (interface->hba == hba) 2549 2556 __bnx2fc_destroy(interface); 2550 2557 mutex_unlock(&bnx2fc_dev_lock); 2551 - 2552 - /* Ensure ALL destroy work has been completed before return */ 2553 - flush_workqueue(bnx2fc_wq); 2554 2558 2555 2559 bnx2fc_ulp_stop(hba); 2556 2560 /* unregister cnic device */