Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

libceph: fix potential use-after-free in have_mon_and_osd_map()

The wait loop in __ceph_open_session() can race with the client
receiving a new monmap or osdmap shortly after the initial map is
received. Both ceph_monc_handle_map() and handle_one_map() install
a new map immediately after freeing the old one

kfree(monc->monmap);
monc->monmap = monmap;

ceph_osdmap_destroy(osdc->osdmap);
osdc->osdmap = newmap;

under client->monc.mutex and client->osdc.lock respectively, but
because neither is taken in have_mon_and_osd_map() it's possible for
client->monc.monmap->epoch and client->osdc.osdmap->epoch arms in

client->monc.monmap && client->monc.monmap->epoch &&
client->osdc.osdmap && client->osdc.osdmap->epoch;

condition to dereference an already freed map. This happens to be
reproducible with generic/395 and generic/397 with KASAN enabled:

BUG: KASAN: slab-use-after-free in have_mon_and_osd_map+0x56/0x70
Read of size 4 at addr ffff88811012d810 by task mount.ceph/13305
CPU: 2 UID: 0 PID: 13305 Comm: mount.ceph Not tainted 6.14.0-rc2-build2+ #1266
...
Call Trace:
<TASK>
have_mon_and_osd_map+0x56/0x70
ceph_open_session+0x182/0x290
ceph_get_tree+0x333/0x680
vfs_get_tree+0x49/0x180
do_new_mount+0x1a3/0x2d0
path_mount+0x6dd/0x730
do_mount+0x99/0xe0
__do_sys_mount+0x141/0x180
do_syscall_64+0x9f/0x100
entry_SYSCALL_64_after_hwframe+0x76/0x7e
</TASK>

Allocated by task 13305:
ceph_osdmap_alloc+0x16/0x130
ceph_osdc_init+0x27a/0x4c0
ceph_create_client+0x153/0x190
create_fs_client+0x50/0x2a0
ceph_get_tree+0xff/0x680
vfs_get_tree+0x49/0x180
do_new_mount+0x1a3/0x2d0
path_mount+0x6dd/0x730
do_mount+0x99/0xe0
__do_sys_mount+0x141/0x180
do_syscall_64+0x9f/0x100
entry_SYSCALL_64_after_hwframe+0x76/0x7e

Freed by task 9475:
kfree+0x212/0x290
handle_one_map+0x23c/0x3b0
ceph_osdc_handle_map+0x3c9/0x590
mon_dispatch+0x655/0x6f0
ceph_con_process_message+0xc3/0xe0
ceph_con_v1_try_read+0x614/0x760
ceph_con_workfn+0x2de/0x650
process_one_work+0x486/0x7c0
process_scheduled_works+0x73/0x90
worker_thread+0x1c8/0x2a0
kthread+0x2ec/0x300
ret_from_fork+0x24/0x40
ret_from_fork_asm+0x1a/0x30

Rewrite the wait loop to check the above condition directly with
client->monc.mutex and client->osdc.lock taken as appropriate. While
at it, improve the timeout handling (previously mount_timeout could be
exceeded in case wait_event_interruptible_timeout() slept more than
once) and access client->auth_err under client->monc.mutex to match
how it's set in finish_auth().

monmap_show() and osdmap_show() now take the respective lock before
accessing the map as well.

Cc: stable@vger.kernel.org
Reported-by: David Howells <dhowells@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>

+43 -26
+32 -21
net/ceph/ceph_common.c
··· 786 786 EXPORT_SYMBOL(ceph_reset_client_addr); 787 787 788 788 /* 789 - * true if we have the mon map (and have thus joined the cluster) 790 - */ 791 - static bool have_mon_and_osd_map(struct ceph_client *client) 792 - { 793 - return client->monc.monmap && client->monc.monmap->epoch && 794 - client->osdc.osdmap && client->osdc.osdmap->epoch; 795 - } 796 - 797 - /* 798 789 * mount: join the ceph cluster, and open root directory. 799 790 */ 800 791 int __ceph_open_session(struct ceph_client *client, unsigned long started) 801 792 { 802 - unsigned long timeout = client->options->mount_timeout; 803 - long err; 793 + DEFINE_WAIT_FUNC(wait, woken_wake_function); 794 + long timeout = ceph_timeout_jiffies(client->options->mount_timeout); 795 + bool have_monmap, have_osdmap; 796 + int err; 804 797 805 798 /* open session, and wait for mon and osd maps */ 806 799 err = ceph_monc_open_session(&client->monc); 807 800 if (err < 0) 808 801 return err; 809 802 810 - while (!have_mon_and_osd_map(client)) { 811 - if (timeout && time_after_eq(jiffies, started + timeout)) 812 - return -ETIMEDOUT; 803 + add_wait_queue(&client->auth_wq, &wait); 804 + for (;;) { 805 + mutex_lock(&client->monc.mutex); 806 + err = client->auth_err; 807 + have_monmap = client->monc.monmap && client->monc.monmap->epoch; 808 + mutex_unlock(&client->monc.mutex); 809 + 810 + down_read(&client->osdc.lock); 811 + have_osdmap = client->osdc.osdmap && client->osdc.osdmap->epoch; 812 + up_read(&client->osdc.lock); 813 + 814 + if (err || (have_monmap && have_osdmap)) 815 + break; 816 + 817 + if (signal_pending(current)) { 818 + err = -ERESTARTSYS; 819 + break; 820 + } 821 + 822 + if (!timeout) { 823 + err = -ETIMEDOUT; 824 + break; 825 + } 813 826 814 827 /* wait */ 815 828 dout("mount waiting for mon_map\n"); 816 - err = wait_event_interruptible_timeout(client->auth_wq, 817 - have_mon_and_osd_map(client) || (client->auth_err < 0), 818 - ceph_timeout_jiffies(timeout)); 819 - if (err < 0) 820 - return err; 821 - if (client->auth_err < 0) 822 - return client->auth_err; 829 + timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); 823 830 } 831 + remove_wait_queue(&client->auth_wq, &wait); 832 + 833 + if (err) 834 + return err; 824 835 825 836 pr_info("client%llu fsid %pU\n", ceph_client_gid(client), 826 837 &client->fsid);
+11 -5
net/ceph/debugfs.c
··· 36 36 int i; 37 37 struct ceph_client *client = s->private; 38 38 39 + mutex_lock(&client->monc.mutex); 39 40 if (client->monc.monmap == NULL) 40 - return 0; 41 + goto out_unlock; 41 42 42 43 seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); 43 44 for (i = 0; i < client->monc.monmap->num_mon; i++) { ··· 49 48 ENTITY_NAME(inst->name), 50 49 ceph_pr_addr(&inst->addr)); 51 50 } 51 + 52 + out_unlock: 53 + mutex_unlock(&client->monc.mutex); 52 54 return 0; 53 55 } 54 56 ··· 60 56 int i; 61 57 struct ceph_client *client = s->private; 62 58 struct ceph_osd_client *osdc = &client->osdc; 63 - struct ceph_osdmap *map = osdc->osdmap; 59 + struct ceph_osdmap *map; 64 60 struct rb_node *n; 65 61 66 - if (map == NULL) 67 - return 0; 68 - 69 62 down_read(&osdc->lock); 63 + map = osdc->osdmap; 64 + if (map == NULL) 65 + goto out_unlock; 66 + 70 67 seq_printf(s, "epoch %u barrier %u flags 0x%x\n", map->epoch, 71 68 osdc->epoch_barrier, map->flags); 72 69 ··· 136 131 seq_printf(s, "]\n"); 137 132 } 138 133 134 + out_unlock: 139 135 up_read(&osdc->lock); 140 136 return 0; 141 137 }