Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: ufs: core: Fix devfreq deadlocks

There is a lock inversion and rwsem read-lock recursion in the devfreq
target callback which can lead to deadlocks.

Specifically, ufshcd_devfreq_scale() already holds a clk_scaling_lock
read lock when toggling the write booster, which involves taking the
dev_cmd mutex before taking another clk_scaling_lock read lock.

This can lead to a deadlock if another thread:

1) tries to acquire the dev_cmd and clk_scaling locks in the correct
order, or

2) takes a clk_scaling write lock before the attempt to take the
clk_scaling read lock a second time.

Fix this by dropping the clk_scaling_lock before toggling the write booster
as was done before commit 0e9d4ca43ba8 ("scsi: ufs: Protect some contexts
from unexpected clock scaling").

While the devfreq callbacks are already serialised, add a second
serialising mutex to handle the unlikely case where a callback triggered
through the devfreq sysfs interface is racing with a request to disable
clock scaling through the UFS controller 'clkscale_enable' sysfs
attribute. This could otherwise lead to the write booster being left
disabled after having disabled clock scaling.

Also take the new mutex in ufshcd_clk_scaling_allow() to make sure that any
pending write booster update has completed on return.

Note that this currently only affects Qualcomm platforms since commit
87bd05016a64 ("scsi: ufs: core: Allow host driver to disable wb toggling
during clock scaling").

The lock inversion (i.e. 1 above) was reported by lockdep as:

======================================================
WARNING: possible circular locking dependency detected
6.1.0-next-20221216 #211 Not tainted
------------------------------------------------------
kworker/u16:2/71 is trying to acquire lock:
ffff076280ba98a0 (&hba->dev_cmd.lock){+.+.}-{3:3}, at: ufshcd_query_flag+0x50/0x1c0

but task is already holding lock:
ffff076280ba9cf0 (&hba->clk_scaling_lock){++++}-{3:3}, at: ufshcd_devfreq_scale+0x2b8/0x380

which lock already depends on the new lock.
[ +0.011606]
the existing dependency chain (in reverse order) is:

-> #1 (&hba->clk_scaling_lock){++++}-{3:3}:
lock_acquire+0x68/0x90
down_read+0x58/0x80
ufshcd_exec_dev_cmd+0x70/0x2c0
ufshcd_verify_dev_init+0x68/0x170
ufshcd_probe_hba+0x398/0x1180
ufshcd_async_scan+0x30/0x320
async_run_entry_fn+0x34/0x150
process_one_work+0x288/0x6c0
worker_thread+0x74/0x450
kthread+0x118/0x120
ret_from_fork+0x10/0x20

-> #0 (&hba->dev_cmd.lock){+.+.}-{3:3}:
__lock_acquire+0x12a0/0x2240
lock_acquire.part.0+0xcc/0x220
lock_acquire+0x68/0x90
__mutex_lock+0x98/0x430
mutex_lock_nested+0x2c/0x40
ufshcd_query_flag+0x50/0x1c0
ufshcd_query_flag_retry+0x64/0x100
ufshcd_wb_toggle+0x5c/0x120
ufshcd_devfreq_scale+0x2c4/0x380
ufshcd_devfreq_target+0xf4/0x230
devfreq_set_target+0x84/0x2f0
devfreq_update_target+0xc4/0xf0
devfreq_monitor+0x38/0x1f0
process_one_work+0x288/0x6c0
worker_thread+0x74/0x450
kthread+0x118/0x120
ret_from_fork+0x10/0x20

other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&hba->clk_scaling_lock);
lock(&hba->dev_cmd.lock);
lock(&hba->clk_scaling_lock);
lock(&hba->dev_cmd.lock);

*** DEADLOCK ***

Fixes: 0e9d4ca43ba8 ("scsi: ufs: Protect some contexts from unexpected clock scaling")
Cc: stable@vger.kernel.org # 5.12
Cc: Can Guo <quic_cang@quicinc.com>
Tested-by: Andrew Halaney <ahalaney@redhat.com>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20230116161201.16923-1-johan+linaro@kernel.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Johan Hovold and committed by
Martin K. Petersen
ba810437 bbbd2549

+17 -14
+15 -14
drivers/ufs/core/ufshcd.c
··· 1234 1234 * clock scaling is in progress 1235 1235 */ 1236 1236 ufshcd_scsi_block_requests(hba); 1237 + mutex_lock(&hba->wb_mutex); 1237 1238 down_write(&hba->clk_scaling_lock); 1238 1239 1239 1240 if (!hba->clk_scaling.is_allowed || 1240 1241 ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) { 1241 1242 ret = -EBUSY; 1242 1243 up_write(&hba->clk_scaling_lock); 1244 + mutex_unlock(&hba->wb_mutex); 1243 1245 ufshcd_scsi_unblock_requests(hba); 1244 1246 goto out; 1245 1247 } ··· 1253 1251 return ret; 1254 1252 } 1255 1253 1256 - static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock) 1254 + static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool scale_up) 1257 1255 { 1258 - if (writelock) 1259 - up_write(&hba->clk_scaling_lock); 1260 - else 1261 - up_read(&hba->clk_scaling_lock); 1256 + up_write(&hba->clk_scaling_lock); 1257 + 1258 + /* Enable Write Booster if we have scaled up else disable it */ 1259 + if (ufshcd_enable_wb_if_scaling_up(hba) && !err) 1260 + ufshcd_wb_toggle(hba, scale_up); 1261 + 1262 + mutex_unlock(&hba->wb_mutex); 1263 + 1262 1264 ufshcd_scsi_unblock_requests(hba); 1263 1265 ufshcd_release(hba); 1264 1266 } ··· 1279 1273 static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up) 1280 1274 { 1281 1275 int ret = 0; 1282 - bool is_writelock = true; 1283 1276 1284 1277 ret = ufshcd_clock_scaling_prepare(hba); 1285 1278 if (ret) ··· 1307 1302 } 1308 1303 } 1309 1304 1310 - /* Enable Write Booster if we have scaled up else disable it */ 1311 - if (ufshcd_enable_wb_if_scaling_up(hba)) { 1312 - downgrade_write(&hba->clk_scaling_lock); 1313 - is_writelock = false; 1314 - ufshcd_wb_toggle(hba, scale_up); 1315 - } 1316 - 1317 1305 out_unprepare: 1318 - ufshcd_clock_scaling_unprepare(hba, is_writelock); 1306 + ufshcd_clock_scaling_unprepare(hba, ret, scale_up); 1319 1307 return ret; 1320 1308 } 1321 1309 ··· 6064 6066 6065 6067 static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow) 6066 6068 { 6069 + mutex_lock(&hba->wb_mutex); 6067 6070 down_write(&hba->clk_scaling_lock); 6068 6071 hba->clk_scaling.is_allowed = allow; 6069 6072 up_write(&hba->clk_scaling_lock); 6073 + mutex_unlock(&hba->wb_mutex); 6070 6074 } 6071 6075 6072 6076 static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend) ··· 9793 9793 /* Initialize mutex for exception event control */ 9794 9794 mutex_init(&hba->ee_ctrl_mutex); 9795 9795 9796 + mutex_init(&hba->wb_mutex); 9796 9797 init_rwsem(&hba->clk_scaling_lock); 9797 9798 9798 9799 ufshcd_init_clk_gating(hba);
+2
include/ufs/ufshcd.h
··· 808 808 * @urgent_bkops_lvl: keeps track of urgent bkops level for device 809 809 * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for 810 810 * device is known or not. 811 + * @wb_mutex: used to serialize devfreq and sysfs write booster toggling 811 812 * @clk_scaling_lock: used to serialize device commands and clock scaling 812 813 * @desc_size: descriptor sizes reported by device 813 814 * @scsi_block_reqs_cnt: reference counting for scsi block requests ··· 952 951 enum bkops_status urgent_bkops_lvl; 953 952 bool is_urgent_bkops_lvl_checked; 954 953 954 + struct mutex wb_mutex; 955 955 struct rw_semaphore clk_scaling_lock; 956 956 unsigned char desc_size[QUERY_DESC_IDN_MAX]; 957 957 atomic_t scsi_block_reqs_cnt;