Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ceph: avoid block operation when !TASK_RUNNING (ceph_mdsc_sync)

check_cap_flush() calls mutex_lock(), which may block. So we can't
use it as condition check function for wait_event();

Signed-off-by: Yan, Zheng <zyan@redhat.com>

authored by

Yan, Zheng and committed by
Ilya Dryomov
d3383a8e 73e39e4d

+35 -18
+1 -1
fs/ceph/caps.c
··· 1450 1450 spin_lock(&mdsc->cap_dirty_lock); 1451 1451 list_del_init(&ci->i_dirty_item); 1452 1452 1453 - ci->i_cap_flush_seq = ++mdsc->cap_flush_seq; 1454 1453 if (list_empty(&ci->i_flushing_item)) { 1454 + ci->i_cap_flush_seq = ++mdsc->cap_flush_seq; 1455 1455 list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); 1456 1456 mdsc->num_cap_flushing++; 1457 1457 dout(" inode %p now flushing seq %lld\n", inode,
+34 -17
fs/ceph/mds_client.c
··· 1464 1464 return err; 1465 1465 } 1466 1466 1467 + static int check_cap_flush(struct inode *inode, u64 want_flush_seq) 1468 + { 1469 + struct ceph_inode_info *ci = ceph_inode(inode); 1470 + int ret; 1471 + spin_lock(&ci->i_ceph_lock); 1472 + if (ci->i_flushing_caps) 1473 + ret = ci->i_cap_flush_seq >= want_flush_seq; 1474 + else 1475 + ret = 1; 1476 + spin_unlock(&ci->i_ceph_lock); 1477 + return ret; 1478 + } 1479 + 1467 1480 /* 1468 1481 * flush all dirty inode data to disk. 1469 1482 * 1470 1483 * returns true if we've flushed through want_flush_seq 1471 1484 */ 1472 - static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) 1485 + static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) 1473 1486 { 1474 - int mds, ret = 1; 1487 + int mds; 1475 1488 1476 1489 dout("check_cap_flush want %lld\n", want_flush_seq); 1477 1490 mutex_lock(&mdsc->mutex); 1478 - for (mds = 0; ret && mds < mdsc->max_sessions; mds++) { 1491 + for (mds = 0; mds < mdsc->max_sessions; mds++) { 1479 1492 struct ceph_mds_session *session = mdsc->sessions[mds]; 1493 + struct inode *inode = NULL; 1480 1494 1481 1495 if (!session) 1482 1496 continue; ··· 1503 1489 list_entry(session->s_cap_flushing.next, 1504 1490 struct ceph_inode_info, 1505 1491 i_flushing_item); 1506 - struct inode *inode = &ci->vfs_inode; 1507 1492 1508 - spin_lock(&ci->i_ceph_lock); 1509 - if (ci->i_cap_flush_seq <= want_flush_seq) { 1493 + if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) { 1510 1494 dout("check_cap_flush still flushing %p " 1511 - "seq %lld <= %lld to mds%d\n", inode, 1512 - ci->i_cap_flush_seq, want_flush_seq, 1513 - session->s_mds); 1514 - ret = 0; 1495 + "seq %lld <= %lld to mds%d\n", 1496 + &ci->vfs_inode, ci->i_cap_flush_seq, 1497 + want_flush_seq, session->s_mds); 1498 + inode = igrab(&ci->vfs_inode); 1515 1499 } 1516 - spin_unlock(&ci->i_ceph_lock); 1517 1500 } 1518 1501 mutex_unlock(&session->s_mutex); 1519 1502 ceph_put_mds_session(session); 1520 1503 1521 - if (!ret) 1522 - return ret; 1504 + if (inode) { 1505 + wait_event(mdsc->cap_flushing_wq, 1506 + check_cap_flush(inode, want_flush_seq)); 1507 + iput(inode); 1508 + } 1509 + 1523 1510 mutex_lock(&mdsc->mutex); 1524 1511 } 1525 1512 1526 1513 mutex_unlock(&mdsc->mutex); 1527 1514 dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq); 1528 - return ret; 1529 1515 } 1530 1516 1531 1517 /* ··· 3461 3447 dout("sync\n"); 3462 3448 mutex_lock(&mdsc->mutex); 3463 3449 want_tid = mdsc->last_tid; 3464 - want_flush = mdsc->cap_flush_seq; 3465 3450 mutex_unlock(&mdsc->mutex); 3466 - dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); 3467 3451 3468 3452 ceph_flush_dirty_caps(mdsc); 3453 + spin_lock(&mdsc->cap_dirty_lock); 3454 + want_flush = mdsc->cap_flush_seq; 3455 + spin_unlock(&mdsc->cap_dirty_lock); 3456 + 3457 + dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); 3469 3458 3470 3459 wait_unsafe_requests(mdsc, want_tid); 3471 - wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); 3460 + wait_caps_flush(mdsc, want_flush); 3472 3461 } 3473 3462 3474 3463 /*