Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: libsas: direct call probe and destruct

In commit 87c8331fcf72 ("[SCSI] libsas: prevent domain rediscovery
competing with ata error handling") introduced disco mutex to prevent
rediscovery competing with ata error handling and put the whole
revalidation in the mutex. But the rphy add/remove needs to wait for the
error handling which also grabs the disco mutex. This may leads to dead
lock.So the probe and destruct event were introduce to do the rphy
add/remove asynchronously and out of the lock.

The asynchronously processed workers makes the whole discovery process
not atomic, the other events may interrupt the process. For example,
if a loss of signal event inserted before the probe event, the
sas_deform_port() is called and the port will be deleted.

And sas_port_delete() may run before the destruct event, but the
port-x:x is the top parent of end device or expander. This leads to
a kernel WARNING such as:

[ 82.042979] sysfs group 'power' not found for kobject 'phy-1:0:22'
[ 82.042983] ------------[ cut here ]------------
[ 82.042986] WARNING: CPU: 54 PID: 1714 at fs/sysfs/group.c:237
sysfs_remove_group+0x94/0xa0
[ 82.043059] Call trace:
[ 82.043082] [<ffff0000082e7624>] sysfs_remove_group+0x94/0xa0
[ 82.043085] [<ffff00000864e320>] dpm_sysfs_remove+0x60/0x70
[ 82.043086] [<ffff00000863ee10>] device_del+0x138/0x308
[ 82.043089] [<ffff00000869a2d0>] sas_phy_delete+0x38/0x60
[ 82.043091] [<ffff00000869a86c>] do_sas_phy_delete+0x6c/0x80
[ 82.043093] [<ffff00000863dc20>] device_for_each_child+0x58/0xa0
[ 82.043095] [<ffff000008696f80>] sas_remove_children+0x40/0x50
[ 82.043100] [<ffff00000869d1bc>] sas_destruct_devices+0x64/0xa0
[ 82.043102] [<ffff0000080e93bc>] process_one_work+0x1fc/0x4b0
[ 82.043104] [<ffff0000080e96c0>] worker_thread+0x50/0x490
[ 82.043105] [<ffff0000080f0364>] kthread+0xfc/0x128
[ 82.043107] [<ffff0000080836c0>] ret_from_fork+0x10/0x50

Make probe and destruct a direct call in the disco and revalidate function,
but put them outside the lock. The whole discovery or revalidate won't
be interrupted by other events. And the DISCE_PROBE and DISCE_DESTRUCT
event are deleted as a result of the direct call.

Introduce a new list to destruct the sas_port and put the port delete after
the destruct. This makes sure the right order of destroying the sysfs
kobject and fix the warning above.

In sas_ex_revalidate_domain() have a loop to find all broadcasted
device, and sometimes we have a chance to find the same expander twice.
Because the sas_port will be deleted at the end of the whole revalidate
process, sas_port with the same name cannot be added before this.
Otherwise the sysfs will complain of creating duplicate filename. Since
the LLDD will send broadcast for every device change, we can only
process one expander's revalidation.

[mkp: kbuild test robot warning]

Signed-off-by: Jason Yan <yanaijie@huawei.com>
CC: John Garry <john.garry@huawei.com>
CC: Johannes Thumshirn <jthumshirn@suse.de>
CC: Ewan Milne <emilne@redhat.com>
CC: Christoph Hellwig <hch@lst.de>
CC: Tomas Henzl <thenzl@redhat.com>
CC: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Jason Yan and committed by
Martin K. Petersen
0558f33c 517e5153

+27 -22
-1
drivers/scsi/libsas/sas_ata.c
··· 730 730 if (res) 731 731 return res; 732 732 733 - sas_discover_event(dev->port, DISCE_PROBE); 734 733 return 0; 735 734 } 736 735
+18 -14
drivers/scsi/libsas/sas_discover.c
··· 212 212 } 213 213 } 214 214 215 - static void sas_probe_devices(struct work_struct *work) 215 + static void sas_probe_devices(struct asd_sas_port *port) 216 216 { 217 217 struct domain_device *dev, *n; 218 - struct sas_discovery_event *ev = to_sas_discovery_event(work); 219 - struct asd_sas_port *port = ev->port; 220 - 221 - clear_bit(DISCE_PROBE, &port->disc.pending); 222 218 223 219 /* devices must be domain members before link recovery and probe */ 224 220 list_for_each_entry(dev, &port->disco_list, disco_list_node) { ··· 290 294 res = sas_notify_lldd_dev_found(dev); 291 295 if (res) 292 296 return res; 293 - sas_discover_event(dev->port, DISCE_PROBE); 294 297 295 298 return 0; 296 299 } ··· 348 353 sas_put_device(dev); 349 354 } 350 355 351 - static void sas_destruct_devices(struct work_struct *work) 356 + void sas_destruct_devices(struct asd_sas_port *port) 352 357 { 353 358 struct domain_device *dev, *n; 354 - struct sas_discovery_event *ev = to_sas_discovery_event(work); 355 - struct asd_sas_port *port = ev->port; 356 - 357 - clear_bit(DISCE_DESTRUCT, &port->disc.pending); 358 359 359 360 list_for_each_entry_safe(dev, n, &port->destroy_list, disco_list_node) { 360 361 list_del_init(&dev->disco_list_node); ··· 358 367 sas_remove_children(&dev->rphy->dev); 359 368 sas_rphy_delete(dev->rphy); 360 369 sas_unregister_common_dev(port, dev); 370 + } 371 + } 372 + 373 + static void sas_destruct_ports(struct asd_sas_port *port) 374 + { 375 + struct sas_port *sas_port, *p; 376 + 377 + list_for_each_entry_safe(sas_port, p, &port->sas_port_del_list, del_list) { 378 + list_del_init(&sas_port->del_list); 379 + sas_port_delete(sas_port); 361 380 } 362 381 } 363 382 ··· 385 384 if (!test_and_set_bit(SAS_DEV_DESTROY, &dev->state)) { 386 385 sas_rphy_unlink(dev->rphy); 387 386 list_move_tail(&dev->disco_list_node, &port->destroy_list); 388 - sas_discover_event(dev->port, DISCE_DESTRUCT); 389 387 } 390 388 } 391 389 ··· 490 490 port->port_dev = NULL; 491 491 } 492 492 493 + sas_probe_devices(port); 494 + 493 495 SAS_DPRINTK("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id, 494 496 task_pid_nr(current), error); 495 497 } ··· 525 523 port->id, task_pid_nr(current), res); 526 524 out: 527 525 mutex_unlock(&ha->disco_mutex); 526 + 527 + sas_destruct_devices(port); 528 + sas_destruct_ports(port); 529 + sas_probe_devices(port); 528 530 } 529 531 530 532 /* ---------- Events ---------- */ ··· 584 578 static const work_func_t sas_event_fns[DISC_NUM_EVENTS] = { 585 579 [DISCE_DISCOVER_DOMAIN] = sas_discover_domain, 586 580 [DISCE_REVALIDATE_DOMAIN] = sas_revalidate_domain, 587 - [DISCE_PROBE] = sas_probe_devices, 588 581 [DISCE_SUSPEND] = sas_suspend_devices, 589 582 [DISCE_RESUME] = sas_resume_devices, 590 - [DISCE_DESTRUCT] = sas_destruct_devices, 591 583 }; 592 584 593 585 disc->pending = 0;
+3 -5
drivers/scsi/libsas/sas_expander.c
··· 1916 1916 sas_port_delete_phy(phy->port, phy->phy); 1917 1917 sas_device_set_phy(found, phy->port); 1918 1918 if (phy->port->num_phys == 0) 1919 - sas_port_delete(phy->port); 1919 + list_add_tail(&phy->port->del_list, 1920 + &parent->port->sas_port_del_list); 1920 1921 phy->port = NULL; 1921 1922 } 1922 1923 } ··· 2125 2124 struct domain_device *dev = NULL; 2126 2125 2127 2126 res = sas_find_bcast_dev(port_dev, &dev); 2128 - while (res == 0 && dev) { 2127 + if (res == 0 && dev) { 2129 2128 struct expander_device *ex = &dev->ex_dev; 2130 2129 int i = 0, phy_id; 2131 2130 ··· 2137 2136 res = sas_rediscover(dev, phy_id); 2138 2137 i = phy_id + 1; 2139 2138 } while (i < ex->num_phys); 2140 - 2141 - dev = NULL; 2142 - res = sas_find_bcast_dev(port_dev, &dev); 2143 2139 } 2144 2140 return res; 2145 2141 }
+1
drivers/scsi/libsas/sas_internal.h
··· 101 101 void sas_hae_reset(struct work_struct *work); 102 102 103 103 void sas_free_device(struct kref *kref); 104 + void sas_destruct_devices(struct asd_sas_port *port); 104 105 105 106 extern const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS]; 106 107 extern const work_func_t sas_port_event_fns[PORT_NUM_EVENTS];
+3
drivers/scsi/libsas/sas_port.c
··· 66 66 rc = sas_notify_lldd_dev_found(dev); 67 67 if (rc) { 68 68 sas_unregister_dev(port, dev); 69 + sas_destruct_devices(port); 69 70 continue; 70 71 } 71 72 ··· 221 220 222 221 if (port->num_phys == 1) { 223 222 sas_unregister_domain_devices(port, gone); 223 + sas_destruct_devices(port); 224 224 sas_port_delete(port->port); 225 225 port->port = NULL; 226 226 } else { ··· 319 317 INIT_LIST_HEAD(&port->dev_list); 320 318 INIT_LIST_HEAD(&port->disco_list); 321 319 INIT_LIST_HEAD(&port->destroy_list); 320 + INIT_LIST_HEAD(&port->sas_port_del_list); 322 321 spin_lock_init(&port->phy_list_lock); 323 322 INIT_LIST_HEAD(&port->phy_list); 324 323 port->ha = sas_ha;
+1 -2
include/scsi/libsas.h
··· 82 82 enum discover_event { 83 83 DISCE_DISCOVER_DOMAIN = 0U, 84 84 DISCE_REVALIDATE_DOMAIN, 85 - DISCE_PROBE, 86 85 DISCE_SUSPEND, 87 86 DISCE_RESUME, 88 - DISCE_DESTRUCT, 89 87 DISC_NUM_EVENTS, 90 88 }; 91 89 ··· 260 262 struct list_head dev_list; 261 263 struct list_head disco_list; 262 264 struct list_head destroy_list; 265 + struct list_head sas_port_del_list; 263 266 enum sas_linkrate linkrate; 264 267 265 268 struct sas_work work;
+1
include/scsi/scsi_transport_sas.h
··· 156 156 157 157 struct mutex phy_list_mutex; 158 158 struct list_head phy_list; 159 + struct list_head del_list; /* libsas only */ 159 160 }; 160 161 161 162 #define dev_to_sas_port(d) \