ocfs2: Concurrent access of o2hb_region->hr_task was not locked

This means that a build-up and a teardown could race which would result in a
double-kthread_stop().

Protect the setting and clearing of hr_task with o2hb_live_lock, as it's not
a common thing and not performance critical.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>

authored by Joel Becker and committed by Mark Fasheh e6c352db c24f72cc

+32 -12
+32 -12
fs/ocfs2/cluster/heartbeat.c
··· 1234 1234 const char *page, 1235 1235 size_t count) 1236 1236 { 1237 + struct task_struct *hb_task; 1237 1238 long fd; 1238 1239 int sectsize; 1239 1240 char *p = (char *)page; ··· 1320 1319 */ 1321 1320 atomic_set(&reg->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1); 1322 1321 1323 - reg->hr_task = kthread_run(o2hb_thread, reg, "o2hb-%s", 1324 - reg->hr_item.ci_name); 1325 - if (IS_ERR(reg->hr_task)) { 1326 - ret = PTR_ERR(reg->hr_task); 1322 + hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", 1323 + reg->hr_item.ci_name); 1324 + if (IS_ERR(hb_task)) { 1325 + ret = PTR_ERR(hb_task); 1327 1326 mlog_errno(ret); 1328 - reg->hr_task = NULL; 1329 1327 goto out; 1330 1328 } 1329 + 1330 + spin_lock(&o2hb_live_lock); 1331 + reg->hr_task = hb_task; 1332 + spin_unlock(&o2hb_live_lock); 1331 1333 1332 1334 ret = wait_event_interruptible(o2hb_steady_queue, 1333 1335 atomic_read(&reg->hr_steady_iterations) == 0); 1334 1336 if (ret) { 1335 - kthread_stop(reg->hr_task); 1337 + spin_lock(&o2hb_live_lock); 1338 + hb_task = reg->hr_task; 1336 1339 reg->hr_task = NULL; 1340 + spin_unlock(&o2hb_live_lock); 1341 + 1342 + if (hb_task) 1343 + kthread_stop(hb_task); 1337 1344 goto out; 1338 1345 } 1339 1346 ··· 1363 1354 static ssize_t o2hb_region_pid_read(struct o2hb_region *reg, 1364 1355 char *page) 1365 1356 { 1366 - if (!reg->hr_task) 1357 + pid_t pid = 0; 1358 + 1359 + spin_lock(&o2hb_live_lock); 1360 + if (reg->hr_task) 1361 + pid = reg->hr_task->pid; 1362 + spin_unlock(&o2hb_live_lock); 1363 + 1364 + if (!pid) 1367 1365 return 0; 1368 1366 1369 - return sprintf(page, "%u\n", reg->hr_task->pid); 1367 + return sprintf(page, "%u\n", pid); 1370 1368 } 1371 1369 1372 1370 struct o2hb_region_attribute { ··· 1511 1495 static void o2hb_heartbeat_group_drop_item(struct config_group *group, 1512 1496 struct config_item *item) 1513 1497 { 1498 + struct task_struct *hb_task; 1514 1499 struct o2hb_region *reg = to_o2hb_region(item); 1515 1500 1516 1501 /* stop the thread when the user removes the region dir */ 1517 - if (reg->hr_task) { 1518 - kthread_stop(reg->hr_task); 1519 - reg->hr_task = NULL; 1520 - } 1502 + spin_lock(&o2hb_live_lock); 1503 + hb_task = reg->hr_task; 1504 + reg->hr_task = NULL; 1505 + spin_unlock(&o2hb_live_lock); 1506 + 1507 + if (hb_task) 1508 + kthread_stop(hb_task); 1521 1509 1522 1510 config_item_put(item); 1523 1511 }