Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tty/vt: fix write/write race in ioctl(KDSKBSENT) handler

The bug manifests as an attempt to access deallocated memory:

BUG: unable to handle kernel paging request at ffff9c8735448000
#PF error: [PROT] [WRITE]
PGD 288a05067 P4D 288a05067 PUD 288a07067 PMD 7f60c2063 PTE 80000007f5448161
Oops: 0003 [#1] PREEMPT SMP
CPU: 6 PID: 388 Comm: loadkeys Tainted: G C 5.0.0-rc6-00153-g5ded5871030e #91
Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./H77M-D3H, BIOS F12 11/14/2013
RIP: 0010:__memmove+0x81/0x1a0
Code: 4c 89 4f 10 4c 89 47 18 48 8d 7f 20 73 d4 48 83 c2 20 e9 a2 00 00 00 66 90 48 89 d1 4c 8b 5c 16 f8 4c 8d 54 17 f8 48 c1 e9 03 <f3> 48 a5 4d 89 1a e9 0c 01 00 00 0f 1f 40 00 48 89 d1 4c 8b 1e 49
RSP: 0018:ffffa1b9002d7d08 EFLAGS: 00010203
RAX: ffff9c873541af43 RBX: ffff9c873541af43 RCX: 00000c6f105cd6bf
RDX: 0000637882e986b6 RSI: ffff9c8735447ffb RDI: ffff9c8735447ffb
RBP: ffff9c8739cd3800 R08: ffff9c873b802f00 R09: 00000000fffff73b
R10: ffffffffb82b35f1 R11: 00505b1b004d5b1b R12: 0000000000000000
R13: ffff9c873541af3d R14: 000000000000000b R15: 000000000000000c
FS: 00007f450c390580(0000) GS:ffff9c873f180000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffff9c8735448000 CR3: 00000007e213c002 CR4: 00000000000606e0
Call Trace:
vt_do_kdgkb_ioctl+0x34d/0x440
vt_ioctl+0xba3/0x1190
? __bpf_prog_run32+0x39/0x60
? mem_cgroup_commit_charge+0x7b/0x4e0
tty_ioctl+0x23f/0x920
? preempt_count_sub+0x98/0xe0
? __seccomp_filter+0x67/0x600
do_vfs_ioctl+0xa2/0x6a0
? syscall_trace_enter+0x192/0x2d0
ksys_ioctl+0x3a/0x70
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x54/0xe0
entry_SYSCALL_64_after_hwframe+0x49/0xbe

The bug manifests on systemd systems with multiple vtcon devices:
# cat /sys/devices/virtual/vtconsole/vtcon0/name
(S) dummy device
# cat /sys/devices/virtual/vtconsole/vtcon1/name
(M) frame buffer device

There systemd runs 'loadkeys' tool in tapallel for each vtcon
instance. This causes two parallel ioctl(KDSKBSENT) calls to
race into adding the same entry into 'func_table' array at:

drivers/tty/vt/keyboard.c:vt_do_kdgkb_ioctl()

The function has no locking around writes to 'func_table'.

The simplest reproducer is to have initrams with the following
init on a 8-CPU machine x86_64:

#!/bin/sh

loadkeys -q windowkeys ru4 &
loadkeys -q windowkeys ru4 &
loadkeys -q windowkeys ru4 &
loadkeys -q windowkeys ru4 &

loadkeys -q windowkeys ru4 &
loadkeys -q windowkeys ru4 &
loadkeys -q windowkeys ru4 &
loadkeys -q windowkeys ru4 &
wait

The change adds lock on write path only. Reads are still racy.

CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
CC: Jiri Slaby <jslaby@suse.com>
Link: https://lkml.org/lkml/2019/2/17/256
Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Sergei Trofimovich and committed by
Greg Kroah-Hartman
46ca3f73 1bbb1c31

+27 -6
+27 -6
drivers/tty/vt/keyboard.c
··· 123 123 static struct input_handler kbd_handler; 124 124 static DEFINE_SPINLOCK(kbd_event_lock); 125 125 static DEFINE_SPINLOCK(led_lock); 126 + static DEFINE_SPINLOCK(func_buf_lock); /* guard 'func_buf' and friends */ 126 127 static unsigned long key_down[BITS_TO_LONGS(KEY_CNT)]; /* keyboard key bitmap */ 127 128 static unsigned char shift_down[NR_SHIFT]; /* shift state counters.. */ 128 129 static bool dead_key_next; ··· 1991 1990 char *p; 1992 1991 u_char *q; 1993 1992 u_char __user *up; 1994 - int sz; 1993 + int sz, fnw_sz; 1995 1994 int delta; 1996 1995 char *first_free, *fj, *fnw; 1997 1996 int i, j, k; 1998 1997 int ret; 1998 + unsigned long flags; 1999 1999 2000 2000 if (!capable(CAP_SYS_TTY_CONFIG)) 2001 2001 perm = 0; ··· 2039 2037 goto reterr; 2040 2038 } 2041 2039 2040 + fnw = NULL; 2041 + fnw_sz = 0; 2042 + /* race aginst other writers */ 2043 + again: 2044 + spin_lock_irqsave(&func_buf_lock, flags); 2042 2045 q = func_table[i]; 2046 + 2047 + /* fj pointer to next entry after 'q' */ 2043 2048 first_free = funcbufptr + (funcbufsize - funcbufleft); 2044 2049 for (j = i+1; j < MAX_NR_FUNC && !func_table[j]; j++) 2045 2050 ; ··· 2054 2045 fj = func_table[j]; 2055 2046 else 2056 2047 fj = first_free; 2057 - 2048 + /* buffer usage increase by new entry */ 2058 2049 delta = (q ? -strlen(q) : 1) + strlen(kbs->kb_string); 2050 + 2059 2051 if (delta <= funcbufleft) { /* it fits in current buf */ 2060 2052 if (j < MAX_NR_FUNC) { 2053 + /* make enough space for new entry at 'fj' */ 2061 2054 memmove(fj + delta, fj, first_free - fj); 2062 2055 for (k = j; k < MAX_NR_FUNC; k++) 2063 2056 if (func_table[k]) ··· 2072 2061 sz = 256; 2073 2062 while (sz < funcbufsize - funcbufleft + delta) 2074 2063 sz <<= 1; 2075 - fnw = kmalloc(sz, GFP_KERNEL); 2076 - if(!fnw) { 2077 - ret = -ENOMEM; 2078 - goto reterr; 2064 + if (fnw_sz != sz) { 2065 + spin_unlock_irqrestore(&func_buf_lock, flags); 2066 + kfree(fnw); 2067 + fnw = kmalloc(sz, GFP_KERNEL); 2068 + fnw_sz = sz; 2069 + if (!fnw) { 2070 + ret = -ENOMEM; 2071 + goto reterr; 2072 + } 2073 + goto again; 2079 2074 } 2080 2075 2081 2076 if (!q) 2082 2077 func_table[i] = fj; 2078 + /* copy data before insertion point to new location */ 2083 2079 if (fj > funcbufptr) 2084 2080 memmove(fnw, funcbufptr, fj - funcbufptr); 2085 2081 for (k = 0; k < j; k++) 2086 2082 if (func_table[k]) 2087 2083 func_table[k] = fnw + (func_table[k] - funcbufptr); 2088 2084 2085 + /* copy data after insertion point to new location */ 2089 2086 if (first_free > fj) { 2090 2087 memmove(fnw + (fj - funcbufptr) + delta, fj, first_free - fj); 2091 2088 for (k = j; k < MAX_NR_FUNC; k++) ··· 2106 2087 funcbufleft = funcbufleft - delta + sz - funcbufsize; 2107 2088 funcbufsize = sz; 2108 2089 } 2090 + /* finally insert item itself */ 2109 2091 strcpy(func_table[i], kbs->kb_string); 2092 + spin_unlock_irqrestore(&func_buf_lock, flags); 2110 2093 break; 2111 2094 } 2112 2095 ret = 0;