Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

gpio: shared: fix a deadlock

It's possible that the auxiliary proxy device we add when setting up the
GPIO controller exposing shared pins, will get matched and probed
immediately. The gpio-proxy-driver will then retrieve the shared
descriptor structure. That will cause a recursive mutex locking and
a deadlock because we're already holding the gpio_shared_lock in
gpio_device_setup_shared() and try to take it again in
devm_gpiod_shared_get() like this:

[ 4.298346] gpiolib_shared: GPIO 130 owned by f100000.pinctrl is shared by multiple consumers
[ 4.307157] gpiolib_shared: Setting up a shared GPIO entry for speaker@0,3
[ 4.314604]
[ 4.316146] ============================================
[ 4.321600] WARNING: possible recursive locking detected
[ 4.327054] 6.18.0-rc7-next-20251125-g3f300d0674f6-dirty #3887 Not tainted
[ 4.334115] --------------------------------------------
[ 4.339566] kworker/u32:3/71 is trying to acquire lock:
[ 4.344931] ffffda019ba71850 (gpio_shared_lock){+.+.}-{4:4}, at: devm_gpiod_shared_get+0x34/0x2e0
[ 4.354057]
[ 4.354057] but task is already holding lock:
[ 4.360041] ffffda019ba71850 (gpio_shared_lock){+.+.}-{4:4}, at: gpio_device_setup_shared+0x30/0x268
[ 4.369421]
[ 4.369421] other info that might help us debug this:
[ 4.376126] Possible unsafe locking scenario:
[ 4.376126]
[ 4.382198] CPU0
[ 4.384711] ----
[ 4.387223] lock(gpio_shared_lock);
[ 4.390992] lock(gpio_shared_lock);
[ 4.394761]
[ 4.394761] *** DEADLOCK ***
[ 4.394761]
[ 4.400832] May be due to missing lock nesting notation
[ 4.400832]
[ 4.407802] 5 locks held by kworker/u32:3/71:
[ 4.412279] #0: ffff000080020948 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work+0x194/0x64c
[ 4.422650] #1: ffff800080963d60 (deferred_probe_work){+.+.}-{0:0}, at: process_one_work+0x1bc/0x64c
[ 4.432117] #2: ffff00008165c8f8 (&dev->mutex){....}-{4:4}, at: __device_attach+0x3c/0x198
[ 4.440700] #3: ffffda019ba71850 (gpio_shared_lock){+.+.}-{4:4}, at: gpio_device_setup_shared+0x30/0x268
[ 4.450523] #4: ffff0000810fe918 (&dev->mutex){....}-{4:4}, at: __device_attach+0x3c/0x198
[ 4.459103]
[ 4.459103] stack backtrace:
[ 4.463581] CPU: 6 UID: 0 PID: 71 Comm: kworker/u32:3 Not tainted 6.18.0-rc7-next-20251125-g3f300d0674f6-dirty #3887 PREEMPT
[ 4.463589] Hardware name: Qualcomm Technologies, Inc. Robotics RB5 (DT)
[ 4.463593] Workqueue: events_unbound deferred_probe_work_func
[ 4.463602] Call trace:
[ 4.463604] show_stack+0x18/0x24 (C)
[ 4.463617] dump_stack_lvl+0x70/0x98
[ 4.463627] dump_stack+0x18/0x24
[ 4.463636] print_deadlock_bug+0x224/0x238
[ 4.463643] __lock_acquire+0xe4c/0x15f0
[ 4.463648] lock_acquire+0x1cc/0x344
[ 4.463653] __mutex_lock+0xb8/0x840
[ 4.463661] mutex_lock_nested+0x24/0x30
[ 4.463667] devm_gpiod_shared_get+0x34/0x2e0
[ 4.463674] gpio_shared_proxy_probe+0x18/0x138
[ 4.463682] auxiliary_bus_probe+0x40/0x78
[ 4.463688] really_probe+0xbc/0x2c0
[ 4.463694] __driver_probe_device+0x78/0x120
[ 4.463701] driver_probe_device+0x3c/0x160
[ 4.463708] __device_attach_driver+0xb8/0x140
[ 4.463716] bus_for_each_drv+0x88/0xe8
[ 4.463723] __device_attach+0xa0/0x198
[ 4.463729] device_initial_probe+0x14/0x20
[ 4.463737] bus_probe_device+0xb4/0xc0
[ 4.463743] device_add+0x578/0x76c
[ 4.463747] __auxiliary_device_add+0x40/0xac
[ 4.463752] gpio_device_setup_shared+0x1f8/0x268
[ 4.463758] gpiochip_add_data_with_key+0xdac/0x10ac
[ 4.463763] devm_gpiochip_add_data_with_key+0x30/0x80
[ 4.463768] msm_pinctrl_probe+0x4b0/0x5e0
[ 4.463779] sm8250_pinctrl_probe+0x18/0x40
[ 4.463784] platform_probe+0x5c/0xa4
[ 4.463793] really_probe+0xbc/0x2c0
[ 4.463800] __driver_probe_device+0x78/0x120
[ 4.463807] driver_probe_device+0x3c/0x160
[ 4.463814] __device_attach_driver+0xb8/0x140
[ 4.463821] bus_for_each_drv+0x88/0xe8
[ 4.463827] __device_attach+0xa0/0x198
[ 4.463834] device_initial_probe+0x14/0x20
[ 4.463841] bus_probe_device+0xb4/0xc0
[ 4.463847] deferred_probe_work_func+0x90/0xcc
[ 4.463854] process_one_work+0x214/0x64c
[ 4.463860] worker_thread+0x1bc/0x360
[ 4.463866] kthread+0x14c/0x220
[ 4.463871] ret_from_fork+0x10/0x20
[ 77.265041] random: crng init done

Fortunately, at the time of creating of the auxiliary device, we already
know the correct entry so let's store it as the device's platform data.
We don't need to hold gpio_shared_lock in devm_gpiod_shared_get() as
we're not removing the entry or traversing the list anymore but we still
need to protect it from concurrent modification of its fields so add a
more fine-grained mutex.

Fixes: a060b8c511ab ("gpiolib: implement low-level, shared GPIO support")
Reported-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Closes: https://lore.kernel.org/all/fimuvblfy2cmn7o4wzcxjzrux5mwhvlvyxfsgeqs6ore2xg75i@ax46d3sfmdux/
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Tested-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20251128-gpio-shared-deadlock-v2-1-9f3ae8ddcb09@linaro.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>

+28 -35
+28 -35
drivers/gpio/gpiolib-shared.c
··· 49 49 unsigned int offset; 50 50 /* Index in the property value array. */ 51 51 size_t index; 52 + struct mutex lock; 52 53 struct gpio_shared_desc *shared_desc; 53 54 struct kref ref; 54 55 struct list_head refs; ··· 171 170 entry->offset = offset; 172 171 entry->index = count; 173 172 INIT_LIST_HEAD(&entry->refs); 173 + mutex_init(&entry->lock); 174 174 175 175 list_add_tail(&entry->list, &gpio_shared_list); 176 176 } ··· 248 246 } 249 247 250 248 static int gpio_shared_make_adev(struct gpio_device *gdev, 249 + struct gpio_shared_entry *entry, 251 250 struct gpio_shared_ref *ref) 252 251 { 253 252 struct auxiliary_device *adev = &ref->adev; ··· 261 258 adev->id = ref->dev_id; 262 259 adev->name = "proxy"; 263 260 adev->dev.parent = gdev->dev.parent; 261 + adev->dev.platform_data = entry; 264 262 adev->dev.release = gpio_shared_adev_release; 265 263 266 264 ret = auxiliary_device_init(adev); ··· 465 461 pr_debug("Setting up a shared GPIO entry for %s\n", 466 462 fwnode_get_name(ref->fwnode)); 467 463 468 - ret = gpio_shared_make_adev(gdev, ref); 464 + ret = gpio_shared_make_adev(gdev, entry, ref); 469 465 if (ret) 470 466 return ret; 471 467 } ··· 499 495 { 500 496 struct gpio_shared_entry *entry = 501 497 container_of(kref, struct gpio_shared_entry, ref); 502 - struct gpio_shared_desc *shared_desc = entry->shared_desc; 498 + struct gpio_shared_desc *shared_desc; 503 499 504 - guard(mutex)(&gpio_shared_lock); 500 + guard(mutex)(&entry->lock); 505 501 502 + shared_desc = entry->shared_desc; 506 503 gpio_device_put(shared_desc->desc->gdev); 507 504 if (shared_desc->can_sleep) 508 505 mutex_destroy(&shared_desc->mutex); ··· 526 521 struct gpio_shared_desc *shared_desc; 527 522 struct gpio_device *gdev; 528 523 524 + lockdep_assert_held(&entry->lock); 525 + 529 526 shared_desc = kzalloc(sizeof(*shared_desc), GFP_KERNEL); 530 527 if (!shared_desc) 531 528 return ERR_PTR(-ENOMEM); ··· 548 541 return shared_desc; 549 542 } 550 543 551 - static struct gpio_shared_entry *gpiod_shared_find(struct auxiliary_device *adev) 544 + struct gpio_shared_desc *devm_gpiod_shared_get(struct device *dev) 552 545 { 553 546 struct gpio_shared_desc *shared_desc; 554 547 struct gpio_shared_entry *entry; 555 - struct gpio_shared_ref *ref; 548 + int ret; 556 549 557 - guard(mutex)(&gpio_shared_lock); 550 + lockdep_assert_not_held(&gpio_shared_lock); 558 551 559 - list_for_each_entry(entry, &gpio_shared_list, list) { 560 - list_for_each_entry(ref, &entry->refs, list) { 561 - if (adev != &ref->adev) 562 - continue; 552 + entry = dev_get_platdata(dev); 553 + if (WARN_ON(!entry)) 554 + /* Programmer bug */ 555 + return ERR_PTR(-ENOENT); 563 556 564 - if (entry->shared_desc) { 565 - kref_get(&entry->ref); 566 - return entry; 567 - } 568 - 557 + scoped_guard(mutex, &entry->lock) { 558 + if (entry->shared_desc) { 559 + kref_get(&entry->ref); 560 + shared_desc = entry->shared_desc; 561 + } else { 569 562 shared_desc = gpiod_shared_desc_create(entry); 570 563 if (IS_ERR(shared_desc)) 571 564 return ERR_CAST(shared_desc); 572 565 573 566 kref_init(&entry->ref); 574 567 entry->shared_desc = shared_desc; 575 - 576 - pr_debug("Device %s acquired a reference to the shared GPIO %u owned by %s\n", 577 - dev_name(&adev->dev), gpiod_hwgpio(shared_desc->desc), 578 - gpio_device_get_label(shared_desc->desc->gdev)); 579 - 580 - 581 - return entry; 582 568 } 569 + 570 + pr_debug("Device %s acquired a reference to the shared GPIO %u owned by %s\n", 571 + dev_name(dev), gpiod_hwgpio(shared_desc->desc), 572 + gpio_device_get_label(shared_desc->desc->gdev)); 583 573 } 584 - 585 - return ERR_PTR(-ENOENT); 586 - } 587 - 588 - struct gpio_shared_desc *devm_gpiod_shared_get(struct device *dev) 589 - { 590 - struct gpio_shared_entry *entry; 591 - int ret; 592 - 593 - entry = gpiod_shared_find(to_auxiliary_dev(dev)); 594 - if (IS_ERR(entry)) 595 - return ERR_CAST(entry); 596 574 597 575 ret = devm_add_action_or_reset(dev, gpiod_shared_put, entry); 598 576 if (ret) 599 577 return ERR_PTR(ret); 600 578 601 - return entry->shared_desc; 579 + return shared_desc; 602 580 } 603 581 EXPORT_SYMBOL_GPL(devm_gpiod_shared_get); 604 582 ··· 599 607 static void gpio_shared_drop_entry(struct gpio_shared_entry *entry) 600 608 { 601 609 list_del(&entry->list); 610 + mutex_destroy(&entry->lock); 602 611 fwnode_handle_put(entry->fwnode); 603 612 kfree(entry); 604 613 }