Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ceph: fix potential use-after-free bug when trimming caps

When trimming the caps and just after the 'session->s_cap_lock' is
released in ceph_iterate_session_caps() the cap maybe removed by
another thread, and when using the stale cap memory in the callbacks
it will trigger use-after-free crash.

We need to check the existence of the cap just after the 'ci->i_ceph_lock'
being acquired. And do nothing if it's already removed.

Cc: stable@vger.kernel.org
Link: https://tracker.ceph.com/issues/43272
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Luís Henriques <lhenriques@suse.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

authored by

Xiubo Li and committed by
Ilya Dryomov
aaf67de7 7d41870d

+61 -34
+1 -1
fs/ceph/caps.c
··· 431 431 * 432 432 * Called with i_ceph_lock held. 433 433 */ 434 - static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) 434 + struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) 435 435 { 436 436 struct ceph_cap *cap; 437 437 struct rb_node *n = ci->i_caps.rb_node;
+11 -5
fs/ceph/debugfs.c
··· 248 248 return 0; 249 249 } 250 250 251 - static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p) 251 + static int caps_show_cb(struct inode *inode, int mds, void *p) 252 252 { 253 + struct ceph_inode_info *ci = ceph_inode(inode); 253 254 struct seq_file *s = p; 255 + struct ceph_cap *cap; 254 256 255 - seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode), 256 - cap->session->s_mds, 257 - ceph_cap_string(cap->issued), 258 - ceph_cap_string(cap->implemented)); 257 + spin_lock(&ci->i_ceph_lock); 258 + cap = __get_cap_for_mds(ci, mds); 259 + if (cap) 260 + seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode), 261 + cap->session->s_mds, 262 + ceph_cap_string(cap->issued), 263 + ceph_cap_string(cap->implemented)); 264 + spin_unlock(&ci->i_ceph_lock); 259 265 return 0; 260 266 } 261 267
+46 -26
fs/ceph/mds_client.c
··· 1632 1632 * Caller must hold session s_mutex. 1633 1633 */ 1634 1634 int ceph_iterate_session_caps(struct ceph_mds_session *session, 1635 - int (*cb)(struct inode *, struct ceph_cap *, 1636 - void *), void *arg) 1635 + int (*cb)(struct inode *, int mds, void *), 1636 + void *arg) 1637 1637 { 1638 1638 struct list_head *p; 1639 1639 struct ceph_cap *cap; ··· 1645 1645 spin_lock(&session->s_cap_lock); 1646 1646 p = session->s_caps.next; 1647 1647 while (p != &session->s_caps) { 1648 + int mds; 1649 + 1648 1650 cap = list_entry(p, struct ceph_cap, session_caps); 1649 1651 inode = igrab(&cap->ci->netfs.inode); 1650 1652 if (!inode) { ··· 1654 1652 continue; 1655 1653 } 1656 1654 session->s_cap_iterator = cap; 1655 + mds = cap->mds; 1657 1656 spin_unlock(&session->s_cap_lock); 1658 1657 1659 1658 if (last_inode) { ··· 1666 1663 old_cap = NULL; 1667 1664 } 1668 1665 1669 - ret = cb(inode, cap, arg); 1666 + ret = cb(inode, mds, arg); 1670 1667 last_inode = inode; 1671 1668 1672 1669 spin_lock(&session->s_cap_lock); ··· 1699 1696 return ret; 1700 1697 } 1701 1698 1702 - static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, 1703 - void *arg) 1699 + static int remove_session_caps_cb(struct inode *inode, int mds, void *arg) 1704 1700 { 1705 1701 struct ceph_inode_info *ci = ceph_inode(inode); 1706 1702 bool invalidate = false; 1707 - int iputs; 1703 + struct ceph_cap *cap; 1704 + int iputs = 0; 1708 1705 1709 - dout("removing cap %p, ci is %p, inode is %p\n", 1710 - cap, ci, &ci->netfs.inode); 1711 1706 spin_lock(&ci->i_ceph_lock); 1712 - iputs = ceph_purge_inode_cap(inode, cap, &invalidate); 1707 + cap = __get_cap_for_mds(ci, mds); 1708 + if (cap) { 1709 + dout(" removing cap %p, ci is %p, inode is %p\n", 1710 + cap, ci, &ci->netfs.inode); 1711 + 1712 + iputs = ceph_purge_inode_cap(inode, cap, &invalidate); 1713 + } 1713 1714 spin_unlock(&ci->i_ceph_lock); 1714 1715 1715 - wake_up_all(&ci->i_cap_wq); 1716 + if (cap) 1717 + wake_up_all(&ci->i_cap_wq); 1716 1718 if (invalidate) 1717 1719 ceph_queue_invalidate(inode); 1718 1720 while (iputs--) ··· 1788 1780 * 1789 1781 * caller must hold s_mutex. 1790 1782 */ 1791 - static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, 1792 - void *arg) 1783 + static int wake_up_session_cb(struct inode *inode, int mds, void *arg) 1793 1784 { 1794 1785 struct ceph_inode_info *ci = ceph_inode(inode); 1795 1786 unsigned long ev = (unsigned long)arg; ··· 1799 1792 ci->i_requested_max_size = 0; 1800 1793 spin_unlock(&ci->i_ceph_lock); 1801 1794 } else if (ev == RENEWCAPS) { 1802 - if (cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) { 1803 - /* mds did not re-issue stale cap */ 1804 - spin_lock(&ci->i_ceph_lock); 1795 + struct ceph_cap *cap; 1796 + 1797 + spin_lock(&ci->i_ceph_lock); 1798 + cap = __get_cap_for_mds(ci, mds); 1799 + /* mds did not re-issue stale cap */ 1800 + if (cap && cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) 1805 1801 cap->issued = cap->implemented = CEPH_CAP_PIN; 1806 - spin_unlock(&ci->i_ceph_lock); 1807 - } 1802 + spin_unlock(&ci->i_ceph_lock); 1808 1803 } else if (ev == FORCE_RO) { 1809 1804 } 1810 1805 wake_up_all(&ci->i_cap_wq); ··· 1968 1959 * Yes, this is a bit sloppy. Our only real goal here is to respond to 1969 1960 * memory pressure from the MDS, though, so it needn't be perfect. 1970 1961 */ 1971 - static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) 1962 + static int trim_caps_cb(struct inode *inode, int mds, void *arg) 1972 1963 { 1973 1964 int *remaining = arg; 1974 1965 struct ceph_inode_info *ci = ceph_inode(inode); 1975 1966 int used, wanted, oissued, mine; 1967 + struct ceph_cap *cap; 1976 1968 1977 1969 if (*remaining <= 0) 1978 1970 return -1; 1979 1971 1980 1972 spin_lock(&ci->i_ceph_lock); 1973 + cap = __get_cap_for_mds(ci, mds); 1974 + if (!cap) { 1975 + spin_unlock(&ci->i_ceph_lock); 1976 + return 0; 1977 + } 1981 1978 mine = cap->issued | cap->implemented; 1982 1979 used = __ceph_caps_used(ci); 1983 1980 wanted = __ceph_caps_file_wanted(ci); ··· 3926 3911 /* 3927 3912 * Encode information about a cap for a reconnect with the MDS. 3928 3913 */ 3929 - static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, 3930 - void *arg) 3914 + static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) 3931 3915 { 3932 3916 union { 3933 3917 struct ceph_mds_cap_reconnect v2; 3934 3918 struct ceph_mds_cap_reconnect_v1 v1; 3935 3919 } rec; 3936 - struct ceph_inode_info *ci = cap->ci; 3920 + struct ceph_inode_info *ci = ceph_inode(inode); 3937 3921 struct ceph_reconnect_state *recon_state = arg; 3938 3922 struct ceph_pagelist *pagelist = recon_state->pagelist; 3939 3923 struct dentry *dentry; 3924 + struct ceph_cap *cap; 3940 3925 char *path; 3941 - int pathlen = 0, err; 3926 + int pathlen = 0, err = 0; 3942 3927 u64 pathbase; 3943 3928 u64 snap_follows; 3944 - 3945 - dout(" adding %p ino %llx.%llx cap %p %lld %s\n", 3946 - inode, ceph_vinop(inode), cap, cap->cap_id, 3947 - ceph_cap_string(cap->issued)); 3948 3929 3949 3930 dentry = d_find_primary(inode); 3950 3931 if (dentry) { ··· 3958 3947 } 3959 3948 3960 3949 spin_lock(&ci->i_ceph_lock); 3950 + cap = __get_cap_for_mds(ci, mds); 3951 + if (!cap) { 3952 + spin_unlock(&ci->i_ceph_lock); 3953 + goto out_err; 3954 + } 3955 + dout(" adding %p ino %llx.%llx cap %p %lld %s\n", 3956 + inode, ceph_vinop(inode), cap, cap->cap_id, 3957 + ceph_cap_string(cap->issued)); 3958 + 3961 3959 cap->seq = 0; /* reset cap seq */ 3962 3960 cap->issue_seq = 0; /* and issue_seq */ 3963 3961 cap->mseq = 0; /* and migrate_seq */
+1 -2
fs/ceph/mds_client.h
··· 541 541 extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); 542 542 extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); 543 543 extern int ceph_iterate_session_caps(struct ceph_mds_session *session, 544 - int (*cb)(struct inode *, 545 - struct ceph_cap *, void *), 544 + int (*cb)(struct inode *, int mds, void *), 546 545 void *arg); 547 546 extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); 548 547
+2
fs/ceph/super.h
··· 1192 1192 struct ceph_mds_session *session); 1193 1193 void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session, 1194 1194 struct ceph_inode_info *ci); 1195 + extern struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, 1196 + int mds); 1195 1197 extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, 1196 1198 int mds); 1197 1199 extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,