Merge tag 'usercopy-v4.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull hardened usercopy whitelisting from Kees Cook:
"Currently, hardened usercopy performs dynamic bounds checking on slab
cache objects. This is good, but still leaves a lot of kernel memory
available to be copied to/from userspace in the face of bugs.

To further restrict what memory is available for copying, this creates
a way to whitelist specific areas of a given slab cache object for
copying to/from userspace, allowing much finer granularity of access
control.

Slab caches that are never exposed to userspace can declare no
whitelist for their objects, thereby keeping them unavailable to
userspace via dynamic copy operations. (Note, an implicit form of
whitelisting is the use of constant sizes in usercopy operations and
get_user()/put_user(); these bypass all hardened usercopy checks since
these sizes cannot change at runtime.)

This new check is WARN-by-default, so any mistakes can be found over
the next several releases without breaking anyone's system.

The series has roughly the following sections:
- remove %p and improve reporting with offset
- prepare infrastructure and whitelist kmalloc
- update VFS subsystem with whitelists
- update SCSI subsystem with whitelists
- update network subsystem with whitelists
- update process memory with whitelists
- update per-architecture thread_struct with whitelists
- update KVM with whitelists and fix ioctl bug
- mark all other allocations as not whitelisted
- update lkdtm for more sensible test overage"

* tag 'usercopy-v4.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: (38 commits)
lkdtm: Update usercopy tests for whitelisting
usercopy: Restrict non-usercopy caches to size 0
kvm: x86: fix KVM_XEN_HVM_CONFIG ioctl
kvm: whitelist struct kvm_vcpu_arch
arm: Implement thread_struct whitelist for hardened usercopy
arm64: Implement thread_struct whitelist for hardened usercopy
x86: Implement thread_struct whitelist for hardened usercopy
fork: Provide usercopy whitelisting for task_struct
fork: Define usercopy region in thread_stack slab caches
fork: Define usercopy region in mm_struct slab caches
net: Restrict unwhitelisted proto caches to size 0
sctp: Copy struct sctp_sock.autoclose to userspace using put_user()
sctp: Define usercopy region in SCTP proto slab cache
caif: Define usercopy region in caif proto slab cache
ip: Define usercopy region in IP proto slab cache
net: Define usercopy region in struct proto slab cache
scsi: Define usercopy region in scsi_sense_cache slab cache
cifs: Define usercopy region in cifs_request slab cache
vxfs: Define usercopy region in vxfs_inode slab cache
ufs: Define usercopy region in ufs_inode_cache slab cache
...

Linus Torvalds 8 years ago 617aebe6 0771ad44

+515 -215

45 changed files

expand all

arch

Kconfig

arm

Kconfig

include

asm

processor.h

arm64

Kconfig

include

asm

processor.h

x86

Kconfig

include

asm

processor.h

kvm

x86.c

drivers

misc

lkdtm.h

lkdtm_core.c

lkdtm_usercopy.c

scsi

scsi_lib.c

befs

linuxvfs.c

cifs

cifsfs.c

dcache.c

exofs

super.c

ext2

super.c

ext4

super.c

fhandle.c

freevxfs

vxfs_super.c

jfs

super.c

orangefs

super.c

ufs

super.c

include

linux

sched

task.h

slab.h

slab_def.h

slub_def.h

stddef.h

uaccess.h

net

sctp

structs.h

sock.h

kernel

fork.c

slab.c

slab.h

slab_common.c

slub.c

usercopy.c

net

caif

caif_socket.c

core

sock.c

ipv4

raw.c

ipv6

raw.c

sctp

socket.c

security

Kconfig

tools

objtool

check.c

virt

kvm

kvm_main.c

+11

arch/Kconfig

··· 245 245 config ARCH_TASK_STRUCT_ALLOCATOR 246 246 bool 247 247 248 + config HAVE_ARCH_THREAD_STRUCT_WHITELIST 249 + bool 250 + depends on !ARCH_TASK_STRUCT_ALLOCATOR 251 + help 252 + An architecture should select this to provide hardened usercopy 253 + knowledge about what region of the thread_struct should be 254 + whitelisted for copying to userspace. Normally this is only the 255 + FPU registers. Specifically, arch_thread_struct_whitelist() 256 + should be implemented. Without this, the entire thread_struct 257 + field in task_struct will be left whitelisted. 258 + 248 259 # Select if arch has its private alloc_thread_stack() function 249 260 config ARCH_THREAD_STACK_ALLOCATOR 250 261 bool

arch/arm/Kconfig

··· 51 51 select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU 52 52 select HAVE_ARCH_MMAP_RND_BITS if MMU 53 53 select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) 54 + select HAVE_ARCH_THREAD_STRUCT_WHITELIST 54 55 select HAVE_ARCH_TRACEHOOK 55 56 select HAVE_ARM_SMCCC if CPU_V7 56 57 select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32

+10

arch/arm/include/asm/processor.h

··· 45 45 struct debug_info debug; 46 46 }; 47 47 48 + /* 49 + * Everything usercopied to/from thread_struct is statically-sized, so 50 + * no hardened usercopy whitelist is needed. 51 + */ 52 + static inline void arch_thread_struct_whitelist(unsigned long *offset, 53 + unsigned long *size) 54 + { 55 + *offset = *size = 0; 56 + } 57 + 48 58 #define INIT_THREAD { } 49 59 50 60 #define start_thread(regs,pc,sp) \

arch/arm64/Kconfig

··· 91 91 select HAVE_ARCH_MMAP_RND_BITS 92 92 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT 93 93 select HAVE_ARCH_SECCOMP_FILTER 94 + select HAVE_ARCH_THREAD_STRUCT_WHITELIST 94 95 select HAVE_ARCH_TRACEHOOK 95 96 select HAVE_ARCH_TRANSPARENT_HUGEPAGE 96 97 select HAVE_ARCH_VMAP_STACK

+10

arch/arm64/include/asm/processor.h

··· 113 113 struct debug_info debug; /* debugging */ 114 114 }; 115 115 116 + /* 117 + * Everything usercopied to/from thread_struct is statically-sized, so 118 + * no hardened usercopy whitelist is needed. 119 + */ 120 + static inline void arch_thread_struct_whitelist(unsigned long *offset, 121 + unsigned long *size) 122 + { 123 + *offset = *size = 0; 124 + } 125 + 116 126 #ifdef CONFIG_COMPAT 117 127 #define task_user_tls(t) \ 118 128 ({ \

arch/x86/Kconfig

··· 116 116 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT 117 117 select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT 118 118 select HAVE_ARCH_SECCOMP_FILTER 119 + select HAVE_ARCH_THREAD_STRUCT_WHITELIST 119 120 select HAVE_ARCH_TRACEHOOK 120 121 select HAVE_ARCH_TRANSPARENT_HUGEPAGE 121 122 select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64

arch/x86/include/asm/processor.h

··· 507 507 */ 508 508 }; 509 509 510 + /* Whitelist the FPU state from the task_struct for hardened usercopy. */ 511 + static inline void arch_thread_struct_whitelist(unsigned long *offset, 512 + unsigned long *size) 513 + { 514 + *offset = offsetof(struct thread_struct, fpu.state); 515 + *size = fpu_kernel_xstate_size; 516 + } 517 + 510 518 /* 511 519 * Thread-synchronous status. 512 520 *

+4 -3

arch/x86/kvm/x86.c

··· 4237 4237 mutex_unlock(&kvm->lock); 4238 4238 break; 4239 4239 case KVM_XEN_HVM_CONFIG: { 4240 + struct kvm_xen_hvm_config xhc; 4240 4241 r = -EFAULT; 4241 - if (copy_from_user(&kvm->arch.xen_hvm_config, argp, 4242 - sizeof(struct kvm_xen_hvm_config))) 4242 + if (copy_from_user(&xhc, argp, sizeof(xhc))) 4243 4243 goto out; 4244 4244 r = -EINVAL; 4245 - if (kvm->arch.xen_hvm_config.flags) 4245 + if (xhc.flags) 4246 4246 goto out; 4247 + memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc)); 4247 4248 r = 0; 4248 4249 break; 4249 4250 }

+2 -2

drivers/misc/lkdtm.h

··· 76 76 void __exit lkdtm_usercopy_exit(void); 77 77 void lkdtm_USERCOPY_HEAP_SIZE_TO(void); 78 78 void lkdtm_USERCOPY_HEAP_SIZE_FROM(void); 79 - void lkdtm_USERCOPY_HEAP_FLAG_TO(void); 80 - void lkdtm_USERCOPY_HEAP_FLAG_FROM(void); 79 + void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void); 80 + void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void); 81 81 void lkdtm_USERCOPY_STACK_FRAME_TO(void); 82 82 void lkdtm_USERCOPY_STACK_FRAME_FROM(void); 83 83 void lkdtm_USERCOPY_STACK_BEYOND(void);

+2 -2

drivers/misc/lkdtm_core.c

··· 177 177 CRASHTYPE(ATOMIC_TIMING), 178 178 CRASHTYPE(USERCOPY_HEAP_SIZE_TO), 179 179 CRASHTYPE(USERCOPY_HEAP_SIZE_FROM), 180 - CRASHTYPE(USERCOPY_HEAP_FLAG_TO), 181 - CRASHTYPE(USERCOPY_HEAP_FLAG_FROM), 180 + CRASHTYPE(USERCOPY_HEAP_WHITELIST_TO), 181 + CRASHTYPE(USERCOPY_HEAP_WHITELIST_FROM), 182 182 CRASHTYPE(USERCOPY_STACK_FRAME_TO), 183 183 CRASHTYPE(USERCOPY_STACK_FRAME_FROM), 184 184 CRASHTYPE(USERCOPY_STACK_BEYOND),

+58 -43

drivers/misc/lkdtm_usercopy.c

··· 20 20 */ 21 21 static volatile size_t unconst = 0; 22 22 static volatile size_t cache_size = 1024; 23 - static struct kmem_cache *bad_cache; 23 + static struct kmem_cache *whitelist_cache; 24 24 25 25 static const unsigned char test_text[] = "This is a test.\n"; 26 26 ··· 115 115 vm_munmap(user_addr, PAGE_SIZE); 116 116 } 117 117 118 + /* 119 + * This checks for whole-object size validation with hardened usercopy, 120 + * with or without usercopy whitelisting. 121 + */ 118 122 static void do_usercopy_heap_size(bool to_user) 119 123 { 120 124 unsigned long user_addr; 121 125 unsigned char *one, *two; 126 + void __user *test_user_addr; 127 + void *test_kern_addr; 122 128 size_t size = unconst + 1024; 123 129 124 130 one = kmalloc(size, GFP_KERNEL); ··· 145 139 memset(one, 'A', size); 146 140 memset(two, 'B', size); 147 141 142 + test_user_addr = (void __user *)(user_addr + 16); 143 + test_kern_addr = one + 16; 144 + 148 145 if (to_user) { 149 146 pr_info("attempting good copy_to_user of correct size\n"); 150 - if (copy_to_user((void __user *)user_addr, one, size)) { 147 + if (copy_to_user(test_user_addr, test_kern_addr, size / 2)) { 151 148 pr_warn("copy_to_user failed unexpectedly?!\n"); 152 149 goto free_user; 153 150 } 154 151 155 152 pr_info("attempting bad copy_to_user of too large size\n"); 156 - if (copy_to_user((void __user *)user_addr, one, 2 * size)) { 153 + if (copy_to_user(test_user_addr, test_kern_addr, size)) { 157 154 pr_warn("copy_to_user failed, but lacked Oops\n"); 158 155 goto free_user; 159 156 } 160 157 } else { 161 158 pr_info("attempting good copy_from_user of correct size\n"); 162 - if (copy_from_user(one, (void __user *)user_addr, size)) { 159 + if (copy_from_user(test_kern_addr, test_user_addr, size / 2)) { 163 160 pr_warn("copy_from_user failed unexpectedly?!\n"); 164 161 goto free_user; 165 162 } 166 163 167 164 pr_info("attempting bad copy_from_user of too large size\n"); 168 - if (copy_from_user(one, (void __user *)user_addr, 2 * size)) { 165 + if (copy_from_user(test_kern_addr, test_user_addr, size)) { 169 166 pr_warn("copy_from_user failed, but lacked Oops\n"); 170 167 goto free_user; 171 168 } ··· 181 172 kfree(two); 182 173 } 183 174 184 - static void do_usercopy_heap_flag(bool to_user) 175 + /* 176 + * This checks for the specific whitelist window within an object. If this 177 + * test passes, then do_usercopy_heap_size() tests will pass too. 178 + */ 179 + static void do_usercopy_heap_whitelist(bool to_user) 185 180 { 186 - unsigned long user_addr; 187 - unsigned char *good_buf = NULL; 188 - unsigned char *bad_buf = NULL; 181 + unsigned long user_alloc; 182 + unsigned char *buf = NULL; 183 + unsigned char __user *user_addr; 184 + size_t offset, size; 189 185 190 186 /* Make sure cache was prepared. */ 191 - if (!bad_cache) { 187 + if (!whitelist_cache) { 192 188 pr_warn("Failed to allocate kernel cache\n"); 193 189 return; 194 190 } 195 191 196 192 /* 197 - * Allocate one buffer from each cache (kmalloc will have the 198 - * SLAB_USERCOPY flag already, but "bad_cache" won't). 193 + * Allocate a buffer with a whitelisted window in the buffer. 199 194 */ 200 - good_buf = kmalloc(cache_size, GFP_KERNEL); 201 - bad_buf = kmem_cache_alloc(bad_cache, GFP_KERNEL); 202 - if (!good_buf || !bad_buf) { 203 - pr_warn("Failed to allocate buffers from caches\n"); 195 + buf = kmem_cache_alloc(whitelist_cache, GFP_KERNEL); 196 + if (!buf) { 197 + pr_warn("Failed to allocate buffer from whitelist cache\n"); 204 198 goto free_alloc; 205 199 } 206 200 207 201 /* Allocate user memory we'll poke at. */ 208 - user_addr = vm_mmap(NULL, 0, PAGE_SIZE, 202 + user_alloc = vm_mmap(NULL, 0, PAGE_SIZE, 209 203 PROT_READ | PROT_WRITE | PROT_EXEC, 210 204 MAP_ANONYMOUS | MAP_PRIVATE, 0); 211 - if (user_addr >= TASK_SIZE) { 205 + if (user_alloc >= TASK_SIZE) { 212 206 pr_warn("Failed to allocate user memory\n"); 213 207 goto free_alloc; 214 208 } 209 + user_addr = (void __user *)user_alloc; 215 210 216 - memset(good_buf, 'A', cache_size); 217 - memset(bad_buf, 'B', cache_size); 211 + memset(buf, 'B', cache_size); 212 + 213 + /* Whitelisted window in buffer, from kmem_cache_create_usercopy. */ 214 + offset = (cache_size / 4) + unconst; 215 + size = (cache_size / 16) + unconst; 218 216 219 217 if (to_user) { 220 - pr_info("attempting good copy_to_user with SLAB_USERCOPY\n"); 221 - if (copy_to_user((void __user *)user_addr, good_buf, 222 - cache_size)) { 218 + pr_info("attempting good copy_to_user inside whitelist\n"); 219 + if (copy_to_user(user_addr, buf + offset, size)) { 223 220 pr_warn("copy_to_user failed unexpectedly?!\n"); 224 221 goto free_user; 225 222 } 226 223 227 - pr_info("attempting bad copy_to_user w/o SLAB_USERCOPY\n"); 228 - if (copy_to_user((void __user *)user_addr, bad_buf, 229 - cache_size)) { 224 + pr_info("attempting bad copy_to_user outside whitelist\n"); 225 + if (copy_to_user(user_addr, buf + offset - 1, size)) { 230 226 pr_warn("copy_to_user failed, but lacked Oops\n"); 231 227 goto free_user; 232 228 } 233 229 } else { 234 - pr_info("attempting good copy_from_user with SLAB_USERCOPY\n"); 235 - if (copy_from_user(good_buf, (void __user *)user_addr, 236 - cache_size)) { 230 + pr_info("attempting good copy_from_user inside whitelist\n"); 231 + if (copy_from_user(buf + offset, user_addr, size)) { 237 232 pr_warn("copy_from_user failed unexpectedly?!\n"); 238 233 goto free_user; 239 234 } 240 235 241 - pr_info("attempting bad copy_from_user w/o SLAB_USERCOPY\n"); 242 - if (copy_from_user(bad_buf, (void __user *)user_addr, 243 - cache_size)) { 236 + pr_info("attempting bad copy_from_user outside whitelist\n"); 237 + if (copy_from_user(buf + offset - 1, user_addr, size)) { 244 238 pr_warn("copy_from_user failed, but lacked Oops\n"); 245 239 goto free_user; 246 240 } 247 241 } 248 242 249 243 free_user: 250 - vm_munmap(user_addr, PAGE_SIZE); 244 + vm_munmap(user_alloc, PAGE_SIZE); 251 245 free_alloc: 252 - if (bad_buf) 253 - kmem_cache_free(bad_cache, bad_buf); 254 - kfree(good_buf); 246 + if (buf) 247 + kmem_cache_free(whitelist_cache, buf); 255 248 } 256 249 257 250 /* Callable tests. */ ··· 267 256 do_usercopy_heap_size(false); 268 257 } 269 258 270 - void lkdtm_USERCOPY_HEAP_FLAG_TO(void) 259 + void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void) 271 260 { 272 - do_usercopy_heap_flag(true); 261 + do_usercopy_heap_whitelist(true); 273 262 } 274 263 275 - void lkdtm_USERCOPY_HEAP_FLAG_FROM(void) 264 + void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void) 276 265 { 277 - do_usercopy_heap_flag(false); 266 + do_usercopy_heap_whitelist(false); 278 267 } 279 268 280 269 void lkdtm_USERCOPY_STACK_FRAME_TO(void) ··· 325 314 void __init lkdtm_usercopy_init(void) 326 315 { 327 316 /* Prepare cache that lacks SLAB_USERCOPY flag. */ 328 - bad_cache = kmem_cache_create("lkdtm-no-usercopy", cache_size, 0, 329 - 0, NULL); 317 + whitelist_cache = 318 + kmem_cache_create_usercopy("lkdtm-usercopy", cache_size, 319 + 0, 0, 320 + cache_size / 4, 321 + cache_size / 16, 322 + NULL); 330 323 } 331 324 332 325 void __exit lkdtm_usercopy_exit(void) 333 326 { 334 - kmem_cache_destroy(bad_cache); 327 + kmem_cache_destroy(whitelist_cache); 335 328 }

+5 -4

drivers/scsi/scsi_lib.c

··· 79 79 if (shost->unchecked_isa_dma) { 80 80 scsi_sense_isadma_cache = 81 81 kmem_cache_create("scsi_sense_cache(DMA)", 82 - SCSI_SENSE_BUFFERSIZE, 0, 83 - SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL); 82 + SCSI_SENSE_BUFFERSIZE, 0, 83 + SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL); 84 84 if (!scsi_sense_isadma_cache) 85 85 ret = -ENOMEM; 86 86 } else { 87 87 scsi_sense_cache = 88 - kmem_cache_create("scsi_sense_cache", 89 - SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, NULL); 88 + kmem_cache_create_usercopy("scsi_sense_cache", 89 + SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, 90 + 0, SCSI_SENSE_BUFFERSIZE, NULL); 90 91 if (!scsi_sense_cache) 91 92 ret = -ENOMEM; 92 93 }

+9 -5

fs/befs/linuxvfs.c

··· 444 444 static int __init 445 445 befs_init_inodecache(void) 446 446 { 447 - befs_inode_cachep = kmem_cache_create("befs_inode_cache", 448 - sizeof (struct befs_inode_info), 449 - 0, (SLAB_RECLAIM_ACCOUNT| 450 - SLAB_MEM_SPREAD|SLAB_ACCOUNT), 451 - init_once); 447 + befs_inode_cachep = kmem_cache_create_usercopy("befs_inode_cache", 448 + sizeof(struct befs_inode_info), 0, 449 + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 450 + SLAB_ACCOUNT), 451 + offsetof(struct befs_inode_info, 452 + i_data.symlink), 453 + sizeof_field(struct befs_inode_info, 454 + i_data.symlink), 455 + init_once); 452 456 if (befs_inode_cachep == NULL) 453 457 return -ENOMEM; 454 458

+6 -4

fs/cifs/cifsfs.c

··· 1239 1239 cifs_dbg(VFS, "CIFSMaxBufSize %d 0x%x\n", 1240 1240 CIFSMaxBufSize, CIFSMaxBufSize); 1241 1241 */ 1242 - cifs_req_cachep = kmem_cache_create("cifs_request", 1242 + cifs_req_cachep = kmem_cache_create_usercopy("cifs_request", 1243 1243 CIFSMaxBufSize + max_hdr_size, 0, 1244 - SLAB_HWCACHE_ALIGN, NULL); 1244 + SLAB_HWCACHE_ALIGN, 0, 1245 + CIFSMaxBufSize + max_hdr_size, 1246 + NULL); 1245 1247 if (cifs_req_cachep == NULL) 1246 1248 return -ENOMEM; 1247 1249 ··· 1269 1267 more SMBs to use small buffer alloc and is still much more 1270 1268 efficient to alloc 1 per page off the slab compared to 17K (5page) 1271 1269 alloc of large cifs buffers even when page debugging is on */ 1272 - cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq", 1270 + cifs_sm_req_cachep = kmem_cache_create_usercopy("cifs_small_rq", 1273 1271 MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN, 1274 - NULL); 1272 + 0, MAX_CIFS_SMALL_BUFFER_SIZE, NULL); 1275 1273 if (cifs_sm_req_cachep == NULL) { 1276 1274 mempool_destroy(cifs_req_poolp); 1277 1275 kmem_cache_destroy(cifs_req_cachep);

+5 -4

fs/dcache.c

··· 3602 3602 * but it is probably not worth it because of the cache nature 3603 3603 * of the dcache. 3604 3604 */ 3605 - dentry_cache = KMEM_CACHE(dentry, 3606 - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT); 3605 + dentry_cache = KMEM_CACHE_USERCOPY(dentry, 3606 + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT, 3607 + d_iname); 3607 3608 3608 3609 /* Hash may have been set up in dcache_init_early */ 3609 3610 if (!hashdist) ··· 3642 3641 3643 3642 void __init vfs_caches_init(void) 3644 3643 { 3645 - names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 3646 - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 3644 + names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0, 3645 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL); 3647 3646 3648 3647 dcache_init(); 3649 3648 inode_init();

+5 -2

fs/exofs/super.c

··· 193 193 */ 194 194 static int init_inodecache(void) 195 195 { 196 - exofs_inode_cachep = kmem_cache_create("exofs_inode_cache", 196 + exofs_inode_cachep = kmem_cache_create_usercopy("exofs_inode_cache", 197 197 sizeof(struct exofs_i_info), 0, 198 198 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | 199 - SLAB_ACCOUNT, exofs_init_once); 199 + SLAB_ACCOUNT, 200 + offsetof(struct exofs_i_info, i_data), 201 + sizeof_field(struct exofs_i_info, i_data), 202 + exofs_init_once); 200 203 if (exofs_inode_cachep == NULL) 201 204 return -ENOMEM; 202 205 return 0;

+7 -5

fs/ext2/super.c

··· 221 221 222 222 static int __init init_inodecache(void) 223 223 { 224 - ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", 225 - sizeof(struct ext2_inode_info), 226 - 0, (SLAB_RECLAIM_ACCOUNT| 227 - SLAB_MEM_SPREAD|SLAB_ACCOUNT), 228 - init_once); 224 + ext2_inode_cachep = kmem_cache_create_usercopy("ext2_inode_cache", 225 + sizeof(struct ext2_inode_info), 0, 226 + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 227 + SLAB_ACCOUNT), 228 + offsetof(struct ext2_inode_info, i_data), 229 + sizeof_field(struct ext2_inode_info, i_data), 230 + init_once); 229 231 if (ext2_inode_cachep == NULL) 230 232 return -ENOMEM; 231 233 return 0;

+7 -5

fs/ext4/super.c

··· 1038 1038 1039 1039 static int __init init_inodecache(void) 1040 1040 { 1041 - ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 1042 - sizeof(struct ext4_inode_info), 1043 - 0, (SLAB_RECLAIM_ACCOUNT| 1044 - SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1045 - init_once); 1041 + ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache", 1042 + sizeof(struct ext4_inode_info), 0, 1043 + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 1044 + SLAB_ACCOUNT), 1045 + offsetof(struct ext4_inode_info, i_data), 1046 + sizeof_field(struct ext4_inode_info, i_data), 1047 + init_once); 1046 1048 if (ext4_inode_cachep == NULL) 1047 1049 return -ENOMEM; 1048 1050 return 0;

+1 -2

fs/fhandle.c

··· 69 69 } else 70 70 retval = 0; 71 71 /* copy the mount id */ 72 - if (copy_to_user(mnt_id, &real_mount(path->mnt)->mnt_id, 73 - sizeof(*mnt_id)) || 72 + if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) || 74 73 copy_to_user(ufh, handle, 75 74 sizeof(struct file_handle) + handle_bytes)) 76 75 retval = -EFAULT;

+6 -2

fs/freevxfs/vxfs_super.c

··· 332 332 { 333 333 int rv; 334 334 335 - vxfs_inode_cachep = kmem_cache_create("vxfs_inode", 335 + vxfs_inode_cachep = kmem_cache_create_usercopy("vxfs_inode", 336 336 sizeof(struct vxfs_inode_info), 0, 337 - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 337 + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, 338 + offsetof(struct vxfs_inode_info, vii_immed.vi_immed), 339 + sizeof_field(struct vxfs_inode_info, 340 + vii_immed.vi_immed), 341 + NULL); 338 342 if (!vxfs_inode_cachep) 339 343 return -ENOMEM; 340 344 rv = register_filesystem(&vxfs_fs_type);

+5 -3

fs/jfs/super.c

··· 965 965 int rc; 966 966 967 967 jfs_inode_cachep = 968 - kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, 969 - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, 970 - init_once); 968 + kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info), 969 + 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, 970 + offsetof(struct jfs_inode_info, i_inline), 971 + sizeof_field(struct jfs_inode_info, i_inline), 972 + init_once); 971 973 if (jfs_inode_cachep == NULL) 972 974 return -ENOMEM; 973 975

+10 -5

fs/orangefs/super.c

··· 610 610 611 611 int orangefs_inode_cache_initialize(void) 612 612 { 613 - orangefs_inode_cache = kmem_cache_create("orangefs_inode_cache", 614 - sizeof(struct orangefs_inode_s), 615 - 0, 616 - ORANGEFS_CACHE_CREATE_FLAGS, 617 - orangefs_inode_cache_ctor); 613 + orangefs_inode_cache = kmem_cache_create_usercopy( 614 + "orangefs_inode_cache", 615 + sizeof(struct orangefs_inode_s), 616 + 0, 617 + ORANGEFS_CACHE_CREATE_FLAGS, 618 + offsetof(struct orangefs_inode_s, 619 + link_target), 620 + sizeof_field(struct orangefs_inode_s, 621 + link_target), 622 + orangefs_inode_cache_ctor); 618 623 619 624 if (!orangefs_inode_cache) { 620 625 gossip_err("Cannot create orangefs_inode_cache\n");

+8 -5

fs/ufs/super.c

··· 1467 1467 1468 1468 static int __init init_inodecache(void) 1469 1469 { 1470 - ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", 1471 - sizeof(struct ufs_inode_info), 1472 - 0, (SLAB_RECLAIM_ACCOUNT| 1473 - SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1474 - init_once); 1470 + ufs_inode_cachep = kmem_cache_create_usercopy("ufs_inode_cache", 1471 + sizeof(struct ufs_inode_info), 0, 1472 + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 1473 + SLAB_ACCOUNT), 1474 + offsetof(struct ufs_inode_info, i_u1.i_symlink), 1475 + sizeof_field(struct ufs_inode_info, 1476 + i_u1.i_symlink), 1477 + init_once); 1475 1478 if (ufs_inode_cachep == NULL) 1476 1479 return -ENOMEM; 1477 1480 return 0;

+14

include/linux/sched/task.h

··· 104 104 # define arch_task_struct_size (sizeof(struct task_struct)) 105 105 #endif 106 106 107 + #ifndef CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST 108 + /* 109 + * If an architecture has not declared a thread_struct whitelist we 110 + * must assume something there may need to be copied to userspace. 111 + */ 112 + static inline void arch_thread_struct_whitelist(unsigned long *offset, 113 + unsigned long *size) 114 + { 115 + *offset = 0; 116 + /* Handle dynamically sized thread_struct. */ 117 + *size = arch_task_struct_size - offsetof(struct task_struct, thread); 118 + } 119 + #endif 120 + 107 121 #ifdef CONFIG_VMAP_STACK 108 122 static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) 109 123 {

+27 -14

include/linux/slab.h

··· 135 135 void __init kmem_cache_init(void); 136 136 bool slab_is_available(void); 137 137 138 - struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, 139 - slab_flags_t, 140 - void (*)(void *)); 138 + extern bool usercopy_fallback; 139 + 140 + struct kmem_cache *kmem_cache_create(const char *name, size_t size, 141 + size_t align, slab_flags_t flags, 142 + void (*ctor)(void *)); 143 + struct kmem_cache *kmem_cache_create_usercopy(const char *name, 144 + size_t size, size_t align, slab_flags_t flags, 145 + size_t useroffset, size_t usersize, 146 + void (*ctor)(void *)); 141 147 void kmem_cache_destroy(struct kmem_cache *); 142 148 int kmem_cache_shrink(struct kmem_cache *); 143 149 ··· 159 153 * f.e. add ____cacheline_aligned_in_smp to the struct declaration 160 154 * then the objects will be properly aligned in SMP configurations. 161 155 */ 162 - #define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\ 163 - sizeof(struct __struct), __alignof__(struct __struct),\ 164 - (__flags), NULL) 156 + #define KMEM_CACHE(__struct, __flags) \ 157 + kmem_cache_create(#__struct, sizeof(struct __struct), \ 158 + __alignof__(struct __struct), (__flags), NULL) 159 + 160 + /* 161 + * To whitelist a single field for copying to/from usercopy, use this 162 + * macro instead for KMEM_CACHE() above. 163 + */ 164 + #define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ 165 + kmem_cache_create_usercopy(#__struct, \ 166 + sizeof(struct __struct), \ 167 + __alignof__(struct __struct), (__flags), \ 168 + offsetof(struct __struct, __field), \ 169 + sizeof_field(struct __struct, __field), NULL) 165 170 166 171 /* 167 172 * Common kmalloc functions provided by all allocators ··· 184 167 size_t ksize(const void *); 185 168 186 169 #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR 187 - const char *__check_heap_object(const void *ptr, unsigned long n, 188 - struct page *page); 170 + void __check_heap_object(const void *ptr, unsigned long n, struct page *page, 171 + bool to_user); 189 172 #else 190 - static inline const char *__check_heap_object(const void *ptr, 191 - unsigned long n, 192 - struct page *page) 193 - { 194 - return NULL; 195 - } 173 + static inline void __check_heap_object(const void *ptr, unsigned long n, 174 + struct page *page, bool to_user) { } 196 175 #endif 197 176 198 177 /*

include/linux/slab_def.h

··· 85 85 unsigned int *random_seq; 86 86 #endif 87 87 88 + size_t useroffset; /* Usercopy region offset */ 89 + size_t usersize; /* Usercopy region size */ 90 + 88 91 struct kmem_cache_node *node[MAX_NUMNODES]; 89 92 }; 90 93

include/linux/slub_def.h

··· 135 135 struct kasan_cache kasan_info; 136 136 #endif 137 137 138 + size_t useroffset; /* Usercopy region offset */ 139 + size_t usersize; /* Usercopy region size */ 140 + 138 141 struct kmem_cache_node *node[MAX_NUMNODES]; 139 142 }; 140 143

+9 -1

include/linux/stddef.h

··· 20 20 #endif 21 21 22 22 /** 23 + * sizeof_field(TYPE, MEMBER) 24 + * 25 + * @TYPE: The structure containing the field of interest 26 + * @MEMBER: The field to return the size of 27 + */ 28 + #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) 29 + 30 + /** 23 31 * offsetofend(TYPE, MEMBER) 24 32 * 25 33 * @TYPE: The type of the structure 26 34 * @MEMBER: The member within the structure to get the end offset of 27 35 */ 28 36 #define offsetofend(TYPE, MEMBER) \ 29 - (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) 37 + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) 30 38 31 39 #endif

include/linux/uaccess.h

··· 273 273 #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0) 274 274 #endif 275 275 276 + #ifdef CONFIG_HARDENED_USERCOPY 277 + void usercopy_warn(const char *name, const char *detail, bool to_user, 278 + unsigned long offset, unsigned long len); 279 + void __noreturn usercopy_abort(const char *name, const char *detail, 280 + bool to_user, unsigned long offset, 281 + unsigned long len); 282 + #endif 283 + 276 284 #endif /* __LINUX_UACCESS_H__ */

+7 -2

include/net/sctp/structs.h

··· 203 203 /* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */ 204 204 __u32 param_flags; 205 205 206 - struct sctp_initmsg initmsg; 207 206 struct sctp_rtoinfo rtoinfo; 208 207 struct sctp_paddrparams paddrparam; 209 - struct sctp_event_subscribe subscribe; 210 208 struct sctp_assocparams assocparams; 209 + 210 + /* 211 + * These two structures must be grouped together for the usercopy 212 + * whitelist region. 213 + */ 214 + struct sctp_event_subscribe subscribe; 215 + struct sctp_initmsg initmsg; 211 216 212 217 int user_frag; 213 218

include/net/sock.h

··· 1109 1109 struct kmem_cache *slab; 1110 1110 unsigned int obj_size; 1111 1111 slab_flags_t slab_flags; 1112 + size_t useroffset; /* Usercopy region offset */ 1113 + size_t usersize; /* Usercopy region size */ 1112 1114 1113 1115 struct percpu_counter *orphan_count; 1114 1116

+26 -5

kernel/fork.c

··· 283 283 284 284 void thread_stack_cache_init(void) 285 285 { 286 - thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, 287 - THREAD_SIZE, 0, NULL); 286 + thread_stack_cache = kmem_cache_create_usercopy("thread_stack", 287 + THREAD_SIZE, THREAD_SIZE, 0, 0, 288 + THREAD_SIZE, NULL); 288 289 BUG_ON(thread_stack_cache == NULL); 289 290 } 290 291 # endif ··· 694 693 int arch_task_struct_size __read_mostly; 695 694 #endif 696 695 696 + static void task_struct_whitelist(unsigned long *offset, unsigned long *size) 697 + { 698 + /* Fetch thread_struct whitelist for the architecture. */ 699 + arch_thread_struct_whitelist(offset, size); 700 + 701 + /* 702 + * Handle zero-sized whitelist or empty thread_struct, otherwise 703 + * adjust offset to position of thread_struct in task_struct. 704 + */ 705 + if (unlikely(*size == 0)) 706 + *offset = 0; 707 + else 708 + *offset += offsetof(struct task_struct, thread); 709 + } 710 + 697 711 void __init fork_init(void) 698 712 { 699 713 int i; ··· 717 701 #define ARCH_MIN_TASKALIGN 0 718 702 #endif 719 703 int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); 704 + unsigned long useroffset, usersize; 720 705 721 706 /* create a slab on which task_structs can be allocated */ 722 - task_struct_cachep = kmem_cache_create("task_struct", 707 + task_struct_whitelist(&useroffset, &usersize); 708 + task_struct_cachep = kmem_cache_create_usercopy("task_struct", 723 709 arch_task_struct_size, align, 724 - SLAB_PANIC|SLAB_ACCOUNT, NULL); 710 + SLAB_PANIC|SLAB_ACCOUNT, 711 + useroffset, usersize, NULL); 725 712 #endif 726 713 727 714 /* do the arch specific task caches init */ ··· 2267 2248 * maximum number of CPU's we can ever have. The cpumask_allocation 2268 2249 * is at the end of the structure, exactly for that reason. 2269 2250 */ 2270 - mm_cachep = kmem_cache_create("mm_struct", 2251 + mm_cachep = kmem_cache_create_usercopy("mm_struct", 2271 2252 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 2272 2253 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, 2254 + offsetof(struct mm_struct, saved_auxv), 2255 + sizeof_field(struct mm_struct, saved_auxv), 2273 2256 NULL); 2274 2257 vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); 2275 2258 mmap_init();

+27 -9

mm/slab.c

··· 1281 1281 create_boot_cache(kmem_cache, "kmem_cache", 1282 1282 offsetof(struct kmem_cache, node) + 1283 1283 nr_node_ids * sizeof(struct kmem_cache_node *), 1284 - SLAB_HWCACHE_ALIGN); 1284 + SLAB_HWCACHE_ALIGN, 0, 0); 1285 1285 list_add(&kmem_cache->list, &slab_caches); 1286 1286 slab_state = PARTIAL; 1287 1287 ··· 1291 1291 */ 1292 1292 kmalloc_caches[INDEX_NODE] = create_kmalloc_cache( 1293 1293 kmalloc_info[INDEX_NODE].name, 1294 - kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS); 1294 + kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS, 1295 + 0, kmalloc_size(INDEX_NODE)); 1295 1296 slab_state = PARTIAL_NODE; 1296 1297 setup_kmalloc_cache_index_table(); 1297 1298 ··· 4386 4385 4387 4386 #ifdef CONFIG_HARDENED_USERCOPY 4388 4387 /* 4389 - * Rejects objects that are incorrectly sized. 4388 + * Rejects incorrectly sized objects and objects that are to be copied 4389 + * to/from userspace but do not fall entirely within the containing slab 4390 + * cache's usercopy region. 4390 4391 * 4391 4392 * Returns NULL if check passes, otherwise const char * to name of cache 4392 4393 * to indicate an error. 4393 4394 */ 4394 - const char *__check_heap_object(const void *ptr, unsigned long n, 4395 - struct page *page) 4395 + void __check_heap_object(const void *ptr, unsigned long n, struct page *page, 4396 + bool to_user) 4396 4397 { 4397 4398 struct kmem_cache *cachep; 4398 4399 unsigned int objnr; ··· 4408 4405 /* Find offset within object. */ 4409 4406 offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); 4410 4407 4411 - /* Allow address range falling entirely within object size. */ 4412 - if (offset <= cachep->object_size && n <= cachep->object_size - offset) 4413 - return NULL; 4408 + /* Allow address range falling entirely within usercopy region. */ 4409 + if (offset >= cachep->useroffset && 4410 + offset - cachep->useroffset <= cachep->usersize && 4411 + n <= cachep->useroffset - offset + cachep->usersize) 4412 + return; 4414 4413 4415 - return cachep->name; 4414 + /* 4415 + * If the copy is still within the allocated object, produce 4416 + * a warning instead of rejecting the copy. This is intended 4417 + * to be a temporary method to find any missing usercopy 4418 + * whitelists. 4419 + */ 4420 + if (usercopy_fallback && 4421 + offset <= cachep->object_size && 4422 + n <= cachep->object_size - offset) { 4423 + usercopy_warn("SLAB object", cachep->name, to_user, offset, n); 4424 + return; 4425 + } 4426 + 4427 + usercopy_abort("SLAB object", cachep->name, to_user, offset, n); 4416 4428 } 4417 4429 #endif /* CONFIG_HARDENED_USERCOPY */ 4418 4430

+6 -2

mm/slab.h

··· 22 22 unsigned int size; /* The aligned/padded/added on size */ 23 23 unsigned int align; /* Alignment as calculated */ 24 24 slab_flags_t flags; /* Active flags on the slab */ 25 + size_t useroffset; /* Usercopy region offset */ 26 + size_t usersize; /* Usercopy region size */ 25 27 const char *name; /* Slab name for sysfs */ 26 28 int refcount; /* Use counter */ 27 29 void (*ctor)(void *); /* Called on object slot creation */ ··· 94 92 int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags); 95 93 96 94 extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size, 97 - slab_flags_t flags); 95 + slab_flags_t flags, size_t useroffset, 96 + size_t usersize); 98 97 extern void create_boot_cache(struct kmem_cache *, const char *name, 99 - size_t size, slab_flags_t flags); 98 + size_t size, slab_flags_t flags, size_t useroffset, 99 + size_t usersize); 100 100 101 101 int slab_unmergeable(struct kmem_cache *s); 102 102 struct kmem_cache *find_mergeable(size_t size, size_t align,

+51 -11

mm/slab_common.c

··· 31 31 DEFINE_MUTEX(slab_mutex); 32 32 struct kmem_cache *kmem_cache; 33 33 34 + #ifdef CONFIG_HARDENED_USERCOPY 35 + bool usercopy_fallback __ro_after_init = 36 + IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK); 37 + module_param(usercopy_fallback, bool, 0400); 38 + MODULE_PARM_DESC(usercopy_fallback, 39 + "WARN instead of reject usercopy whitelist violations"); 40 + #endif 41 + 34 42 static LIST_HEAD(slab_caches_to_rcu_destroy); 35 43 static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work); 36 44 static DECLARE_WORK(slab_caches_to_rcu_destroy_work, ··· 318 310 if (s->ctor) 319 311 return 1; 320 312 313 + if (s->usersize) 314 + return 1; 315 + 321 316 /* 322 317 * We may have set a slab to be unmergeable during bootstrap. 323 318 */ ··· 379 368 380 369 static struct kmem_cache *create_cache(const char *name, 381 370 size_t object_size, size_t size, size_t align, 382 - slab_flags_t flags, void (*ctor)(void *), 371 + slab_flags_t flags, size_t useroffset, 372 + size_t usersize, void (*ctor)(void *), 383 373 struct mem_cgroup *memcg, struct kmem_cache *root_cache) 384 374 { 385 375 struct kmem_cache *s; 386 376 int err; 377 + 378 + if (WARN_ON(useroffset + usersize > object_size)) 379 + useroffset = usersize = 0; 387 380 388 381 err = -ENOMEM; 389 382 s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); ··· 399 384 s->size = size; 400 385 s->align = align; 401 386 s->ctor = ctor; 387 + s->useroffset = useroffset; 388 + s->usersize = usersize; 402 389 403 390 err = init_memcg_params(s, memcg, root_cache); 404 391 if (err) ··· 425 408 } 426 409 427 410 /* 428 - * kmem_cache_create - Create a cache. 411 + * kmem_cache_create_usercopy - Create a cache. 429 412 * @name: A string which is used in /proc/slabinfo to identify this cache. 430 413 * @size: The size of objects to be created in this cache. 431 414 * @align: The required alignment for the objects. 432 415 * @flags: SLAB flags 416 + * @useroffset: Usercopy region offset 417 + * @usersize: Usercopy region size 433 418 * @ctor: A constructor for the objects. 434 419 * 435 420 * Returns a ptr to the cache on success, NULL on failure. ··· 451 432 * as davem. 452 433 */ 453 434 struct kmem_cache * 454 - kmem_cache_create(const char *name, size_t size, size_t align, 455 - slab_flags_t flags, void (*ctor)(void *)) 435 + kmem_cache_create_usercopy(const char *name, size_t size, size_t align, 436 + slab_flags_t flags, size_t useroffset, size_t usersize, 437 + void (*ctor)(void *)) 456 438 { 457 439 struct kmem_cache *s = NULL; 458 440 const char *cache_name; ··· 484 464 */ 485 465 flags &= CACHE_CREATE_MASK; 486 466 487 - s = __kmem_cache_alias(name, size, align, flags, ctor); 467 + /* Fail closed on bad usersize of useroffset values. */ 468 + if (WARN_ON(!usersize && useroffset) || 469 + WARN_ON(size < usersize || size - usersize < useroffset)) 470 + usersize = useroffset = 0; 471 + 472 + if (!usersize) 473 + s = __kmem_cache_alias(name, size, align, flags, ctor); 488 474 if (s) 489 475 goto out_unlock; 490 476 ··· 502 476 503 477 s = create_cache(cache_name, size, size, 504 478 calculate_alignment(flags, align, size), 505 - flags, ctor, NULL, NULL); 479 + flags, useroffset, usersize, ctor, NULL, NULL); 506 480 if (IS_ERR(s)) { 507 481 err = PTR_ERR(s); 508 482 kfree_const(cache_name); ··· 527 501 return NULL; 528 502 } 529 503 return s; 504 + } 505 + EXPORT_SYMBOL(kmem_cache_create_usercopy); 506 + 507 + struct kmem_cache * 508 + kmem_cache_create(const char *name, size_t size, size_t align, 509 + slab_flags_t flags, void (*ctor)(void *)) 510 + { 511 + return kmem_cache_create_usercopy(name, size, align, flags, 0, 0, 512 + ctor); 530 513 } 531 514 EXPORT_SYMBOL(kmem_cache_create); 532 515 ··· 649 614 s = create_cache(cache_name, root_cache->object_size, 650 615 root_cache->size, root_cache->align, 651 616 root_cache->flags & CACHE_CREATE_MASK, 617 + root_cache->useroffset, root_cache->usersize, 652 618 root_cache->ctor, memcg, root_cache); 653 619 /* 654 620 * If we could not create a memcg cache, do not complain, because ··· 917 881 #ifndef CONFIG_SLOB 918 882 /* Create a cache during boot when no slab services are available yet */ 919 883 void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, 920 - slab_flags_t flags) 884 + slab_flags_t flags, size_t useroffset, size_t usersize) 921 885 { 922 886 int err; 923 887 924 888 s->name = name; 925 889 s->size = s->object_size = size; 926 890 s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); 891 + s->useroffset = useroffset; 892 + s->usersize = usersize; 927 893 928 894 slab_init_memcg_params(s); 929 895 ··· 939 901 } 940 902 941 903 struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, 942 - slab_flags_t flags) 904 + slab_flags_t flags, size_t useroffset, 905 + size_t usersize) 943 906 { 944 907 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); 945 908 946 909 if (!s) 947 910 panic("Out of memory when creating slab %s\n", name); 948 911 949 - create_boot_cache(s, name, size, flags); 912 + create_boot_cache(s, name, size, flags, useroffset, usersize); 950 913 list_add(&s->list, &slab_caches); 951 914 memcg_link_cache(s); 952 915 s->refcount = 1; ··· 1101 1062 static void __init new_kmalloc_cache(int idx, slab_flags_t flags) 1102 1063 { 1103 1064 kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name, 1104 - kmalloc_info[idx].size, flags); 1065 + kmalloc_info[idx].size, flags, 0, 1066 + kmalloc_info[idx].size); 1105 1067 } 1106 1068 1107 1069 /* ··· 1143 1103 1144 1104 BUG_ON(!n); 1145 1105 kmalloc_dma_caches[i] = create_kmalloc_cache(n, 1146 - size, SLAB_CACHE_DMA | flags); 1106 + size, SLAB_CACHE_DMA | flags, 0, 0); 1147 1107 } 1148 1108 } 1149 1109 #endif

+37 -12

mm/slub.c

··· 3813 3813 3814 3814 #ifdef CONFIG_HARDENED_USERCOPY 3815 3815 /* 3816 - * Rejects objects that are incorrectly sized. 3816 + * Rejects incorrectly sized objects and objects that are to be copied 3817 + * to/from userspace but do not fall entirely within the containing slab 3818 + * cache's usercopy region. 3817 3819 * 3818 3820 * Returns NULL if check passes, otherwise const char * to name of cache 3819 3821 * to indicate an error. 3820 3822 */ 3821 - const char *__check_heap_object(const void *ptr, unsigned long n, 3822 - struct page *page) 3823 + void __check_heap_object(const void *ptr, unsigned long n, struct page *page, 3824 + bool to_user) 3823 3825 { 3824 3826 struct kmem_cache *s; 3825 3827 unsigned long offset; ··· 3829 3827 3830 3828 /* Find object and usable object size. */ 3831 3829 s = page->slab_cache; 3832 - object_size = slab_ksize(s); 3833 3830 3834 3831 /* Reject impossible pointers. */ 3835 3832 if (ptr < page_address(page)) 3836 - return s->name; 3833 + usercopy_abort("SLUB object not in SLUB page?!", NULL, 3834 + to_user, 0, n); 3837 3835 3838 3836 /* Find offset within object. */ 3839 3837 offset = (ptr - page_address(page)) % s->size; ··· 3841 3839 /* Adjust for redzone and reject if within the redzone. */ 3842 3840 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) { 3843 3841 if (offset < s->red_left_pad) 3844 - return s->name; 3842 + usercopy_abort("SLUB object in left red zone", 3843 + s->name, to_user, offset, n); 3845 3844 offset -= s->red_left_pad; 3846 3845 } 3847 3846 3848 - /* Allow address range falling entirely within object size. */ 3849 - if (offset <= object_size && n <= object_size - offset) 3850 - return NULL; 3847 + /* Allow address range falling entirely within usercopy region. */ 3848 + if (offset >= s->useroffset && 3849 + offset - s->useroffset <= s->usersize && 3850 + n <= s->useroffset - offset + s->usersize) 3851 + return; 3851 3852 3852 - return s->name; 3853 + /* 3854 + * If the copy is still within the allocated object, produce 3855 + * a warning instead of rejecting the copy. This is intended 3856 + * to be a temporary method to find any missing usercopy 3857 + * whitelists. 3858 + */ 3859 + object_size = slab_ksize(s); 3860 + if (usercopy_fallback && 3861 + offset <= object_size && n <= object_size - offset) { 3862 + usercopy_warn("SLUB object", s->name, to_user, offset, n); 3863 + return; 3864 + } 3865 + 3866 + usercopy_abort("SLUB object", s->name, to_user, offset, n); 3853 3867 } 3854 3868 #endif /* CONFIG_HARDENED_USERCOPY */ 3855 3869 ··· 4199 4181 kmem_cache = &boot_kmem_cache; 4200 4182 4201 4183 create_boot_cache(kmem_cache_node, "kmem_cache_node", 4202 - sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN); 4184 + sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0); 4203 4185 4204 4186 register_hotmemory_notifier(&slab_memory_callback_nb); 4205 4187 ··· 4209 4191 create_boot_cache(kmem_cache, "kmem_cache", 4210 4192 offsetof(struct kmem_cache, node) + 4211 4193 nr_node_ids * sizeof(struct kmem_cache_node *), 4212 - SLAB_HWCACHE_ALIGN); 4194 + SLAB_HWCACHE_ALIGN, 0, 0); 4213 4195 4214 4196 kmem_cache = bootstrap(&boot_kmem_cache); 4215 4197 ··· 5079 5061 SLAB_ATTR_RO(cache_dma); 5080 5062 #endif 5081 5063 5064 + static ssize_t usersize_show(struct kmem_cache *s, char *buf) 5065 + { 5066 + return sprintf(buf, "%zu\n", s->usersize); 5067 + } 5068 + SLAB_ATTR_RO(usersize); 5069 + 5082 5070 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) 5083 5071 { 5084 5072 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU)); ··· 5459 5435 #ifdef CONFIG_FAILSLAB 5460 5436 &failslab_attr.attr, 5461 5437 #endif 5438 + &usersize_attr.attr, 5462 5439 5463 5440 NULL 5464 5441 };

+74 -59

mm/usercopy.c

··· 58 58 return GOOD_STACK; 59 59 } 60 60 61 - static void report_usercopy(const void *ptr, unsigned long len, 62 - bool to_user, const char *type) 61 + /* 62 + * If these functions are reached, then CONFIG_HARDENED_USERCOPY has found 63 + * an unexpected state during a copy_from_user() or copy_to_user() call. 64 + * There are several checks being performed on the buffer by the 65 + * __check_object_size() function. Normal stack buffer usage should never 66 + * trip the checks, and kernel text addressing will always trip the check. 67 + * For cache objects, it is checking that only the whitelisted range of 68 + * bytes for a given cache is being accessed (via the cache's usersize and 69 + * useroffset fields). To adjust a cache whitelist, use the usercopy-aware 70 + * kmem_cache_create_usercopy() function to create the cache (and 71 + * carefully audit the whitelist range). 72 + */ 73 + void usercopy_warn(const char *name, const char *detail, bool to_user, 74 + unsigned long offset, unsigned long len) 63 75 { 64 - pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n", 65 - to_user ? "exposure" : "overwrite", 66 - to_user ? "from" : "to", ptr, type ? : "unknown", len); 76 + WARN_ONCE(1, "Bad or missing usercopy whitelist? Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n", 77 + to_user ? "exposure" : "overwrite", 78 + to_user ? "from" : "to", 79 + name ? : "unknown?!", 80 + detail ? " '" : "", detail ? : "", detail ? "'" : "", 81 + offset, len); 82 + } 83 + 84 + void __noreturn usercopy_abort(const char *name, const char *detail, 85 + bool to_user, unsigned long offset, 86 + unsigned long len) 87 + { 88 + pr_emerg("Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n", 89 + to_user ? "exposure" : "overwrite", 90 + to_user ? "from" : "to", 91 + name ? : "unknown?!", 92 + detail ? " '" : "", detail ? : "", detail ? "'" : "", 93 + offset, len); 94 + 67 95 /* 68 96 * For greater effect, it would be nice to do do_group_exit(), 69 97 * but BUG() actually hooks all the lock-breaking and per-arch ··· 101 73 } 102 74 103 75 /* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */ 104 - static bool overlaps(const void *ptr, unsigned long n, unsigned long low, 105 - unsigned long high) 76 + static bool overlaps(const unsigned long ptr, unsigned long n, 77 + unsigned long low, unsigned long high) 106 78 { 107 - unsigned long check_low = (uintptr_t)ptr; 79 + const unsigned long check_low = ptr; 108 80 unsigned long check_high = check_low + n; 109 81 110 82 /* Does not overlap if entirely above or entirely below. */ ··· 115 87 } 116 88 117 89 /* Is this address range in the kernel text area? */ 118 - static inline const char *check_kernel_text_object(const void *ptr, 119 - unsigned long n) 90 + static inline void check_kernel_text_object(const unsigned long ptr, 91 + unsigned long n, bool to_user) 120 92 { 121 93 unsigned long textlow = (unsigned long)_stext; 122 94 unsigned long texthigh = (unsigned long)_etext; 123 95 unsigned long textlow_linear, texthigh_linear; 124 96 125 97 if (overlaps(ptr, n, textlow, texthigh)) 126 - return "<kernel text>"; 98 + usercopy_abort("kernel text", NULL, to_user, ptr - textlow, n); 127 99 128 100 /* 129 101 * Some architectures have virtual memory mappings with a secondary ··· 136 108 textlow_linear = (unsigned long)lm_alias(textlow); 137 109 /* No different mapping: we're done. */ 138 110 if (textlow_linear == textlow) 139 - return NULL; 111 + return; 140 112 141 113 /* Check the secondary mapping... */ 142 114 texthigh_linear = (unsigned long)lm_alias(texthigh); 143 115 if (overlaps(ptr, n, textlow_linear, texthigh_linear)) 144 - return "<linear kernel text>"; 145 - 146 - return NULL; 116 + usercopy_abort("linear kernel text", NULL, to_user, 117 + ptr - textlow_linear, n); 147 118 } 148 119 149 - static inline const char *check_bogus_address(const void *ptr, unsigned long n) 120 + static inline void check_bogus_address(const unsigned long ptr, unsigned long n, 121 + bool to_user) 150 122 { 151 123 /* Reject if object wraps past end of memory. */ 152 - if ((unsigned long)ptr + n < (unsigned long)ptr) 153 - return "<wrapped address>"; 124 + if (ptr + n < ptr) 125 + usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n); 154 126 155 127 /* Reject if NULL or ZERO-allocation. */ 156 128 if (ZERO_OR_NULL_PTR(ptr)) 157 - return "<null>"; 158 - 159 - return NULL; 129 + usercopy_abort("null address", NULL, to_user, ptr, n); 160 130 } 161 131 162 132 /* Checks for allocs that are marked in some way as spanning multiple pages. */ 163 - static inline const char *check_page_span(const void *ptr, unsigned long n, 164 - struct page *page, bool to_user) 133 + static inline void check_page_span(const void *ptr, unsigned long n, 134 + struct page *page, bool to_user) 165 135 { 166 136 #ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN 167 137 const void *end = ptr + n - 1; ··· 176 150 if (ptr >= (const void *)__start_rodata && 177 151 end <= (const void *)__end_rodata) { 178 152 if (!to_user) 179 - return "<rodata>"; 180 - return NULL; 153 + usercopy_abort("rodata", NULL, to_user, 0, n); 154 + return; 181 155 } 182 156 183 157 /* Allow kernel data region (if not marked as Reserved). */ 184 158 if (ptr >= (const void *)_sdata && end <= (const void *)_edata) 185 - return NULL; 159 + return; 186 160 187 161 /* Allow kernel bss region (if not marked as Reserved). */ 188 162 if (ptr >= (const void *)__bss_start && 189 163 end <= (const void *)__bss_stop) 190 - return NULL; 164 + return; 191 165 192 166 /* Is the object wholly within one base page? */ 193 167 if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == 194 168 ((unsigned long)end & (unsigned long)PAGE_MASK))) 195 - return NULL; 169 + return; 196 170 197 171 /* Allow if fully inside the same compound (__GFP_COMP) page. */ 198 172 endpage = virt_to_head_page(end); 199 173 if (likely(endpage == page)) 200 - return NULL; 174 + return; 201 175 202 176 /* 203 177 * Reject if range is entirely either Reserved (i.e. special or ··· 207 181 is_reserved = PageReserved(page); 208 182 is_cma = is_migrate_cma_page(page); 209 183 if (!is_reserved && !is_cma) 210 - return "<spans multiple pages>"; 184 + usercopy_abort("spans multiple pages", NULL, to_user, 0, n); 211 185 212 186 for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { 213 187 page = virt_to_head_page(ptr); 214 188 if (is_reserved && !PageReserved(page)) 215 - return "<spans Reserved and non-Reserved pages>"; 189 + usercopy_abort("spans Reserved and non-Reserved pages", 190 + NULL, to_user, 0, n); 216 191 if (is_cma && !is_migrate_cma_page(page)) 217 - return "<spans CMA and non-CMA pages>"; 192 + usercopy_abort("spans CMA and non-CMA pages", NULL, 193 + to_user, 0, n); 218 194 } 219 195 #endif 220 - 221 - return NULL; 222 196 } 223 197 224 - static inline const char *check_heap_object(const void *ptr, unsigned long n, 225 - bool to_user) 198 + static inline void check_heap_object(const void *ptr, unsigned long n, 199 + bool to_user) 226 200 { 227 201 struct page *page; 228 202 229 203 if (!virt_addr_valid(ptr)) 230 - return NULL; 204 + return; 231 205 232 206 page = virt_to_head_page(ptr); 233 207 234 - /* Check slab allocator for flags and size. */ 235 - if (PageSlab(page)) 236 - return __check_heap_object(ptr, n, page); 237 - 238 - /* Verify object does not incorrectly span multiple pages. */ 239 - return check_page_span(ptr, n, page, to_user); 208 + if (PageSlab(page)) { 209 + /* Check slab allocator for flags and size. */ 210 + __check_heap_object(ptr, n, page, to_user); 211 + } else { 212 + /* Verify object does not incorrectly span multiple pages. */ 213 + check_page_span(ptr, n, page, to_user); 214 + } 240 215 } 241 216 242 217 /* ··· 248 221 */ 249 222 void __check_object_size(const void *ptr, unsigned long n, bool to_user) 250 223 { 251 - const char *err; 252 - 253 224 /* Skip all tests if size is zero. */ 254 225 if (!n) 255 226 return; 256 227 257 228 /* Check for invalid addresses. */ 258 - err = check_bogus_address(ptr, n); 259 - if (err) 260 - goto report; 229 + check_bogus_address((const unsigned long)ptr, n, to_user); 261 230 262 231 /* Check for bad heap object. */ 263 - err = check_heap_object(ptr, n, to_user); 264 - if (err) 265 - goto report; 232 + check_heap_object(ptr, n, to_user); 266 233 267 234 /* Check for bad stack object. */ 268 235 switch (check_stack_object(ptr, n)) { ··· 272 251 */ 273 252 return; 274 253 default: 275 - err = "<process stack>"; 276 - goto report; 254 + usercopy_abort("process stack", NULL, to_user, 0, n); 277 255 } 278 256 279 257 /* Check for object in kernel to avoid text exposure. */ 280 - err = check_kernel_text_object(ptr, n); 281 - if (!err) 282 - return; 283 - 284 - report: 285 - report_usercopy(ptr, n, to_user, err); 258 + check_kernel_text_object((const unsigned long)ptr, n, to_user); 286 259 } 287 260 EXPORT_SYMBOL(__check_object_size);

net/caif/caif_socket.c

··· 1032 1032 static struct proto prot = {.name = "PF_CAIF", 1033 1033 .owner = THIS_MODULE, 1034 1034 .obj_size = sizeof(struct caifsock), 1035 + .useroffset = offsetof(struct caifsock, conn_req.param), 1036 + .usersize = sizeof_field(struct caifsock, conn_req.param) 1035 1037 }; 1036 1038 1037 1039 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN))

+3 -1

net/core/sock.c

··· 3191 3191 int proto_register(struct proto *prot, int alloc_slab) 3192 3192 { 3193 3193 if (alloc_slab) { 3194 - prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 3194 + prot->slab = kmem_cache_create_usercopy(prot->name, 3195 + prot->obj_size, 0, 3195 3196 SLAB_HWCACHE_ALIGN | prot->slab_flags, 3197 + prot->useroffset, prot->usersize, 3196 3198 NULL); 3197 3199 3198 3200 if (prot->slab == NULL) {

net/ipv4/raw.c

··· 990 990 .hash = raw_hash_sk, 991 991 .unhash = raw_unhash_sk, 992 992 .obj_size = sizeof(struct raw_sock), 993 + .useroffset = offsetof(struct raw_sock, filter), 994 + .usersize = sizeof_field(struct raw_sock, filter), 993 995 .h.raw_hash = &raw_v4_hashinfo, 994 996 #ifdef CONFIG_COMPAT 995 997 .compat_setsockopt = compat_raw_setsockopt,

net/ipv6/raw.c

··· 1272 1272 .hash = raw_hash_sk, 1273 1273 .unhash = raw_unhash_sk, 1274 1274 .obj_size = sizeof(struct raw6_sock), 1275 + .useroffset = offsetof(struct raw6_sock, filter), 1276 + .usersize = sizeof_field(struct raw6_sock, filter), 1275 1277 .h.raw_hash = &raw_v6_hashinfo, 1276 1278 #ifdef CONFIG_COMPAT 1277 1279 .compat_setsockopt = compat_rawv6_setsockopt,

+9 -1

net/sctp/socket.c

··· 5053 5053 len = sizeof(int); 5054 5054 if (put_user(len, optlen)) 5055 5055 return -EFAULT; 5056 - if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len)) 5056 + if (put_user(sctp_sk(sk)->autoclose, (int __user *)optval)) 5057 5057 return -EFAULT; 5058 5058 return 0; 5059 5059 } ··· 8552 8552 .unhash = sctp_unhash, 8553 8553 .get_port = sctp_get_port, 8554 8554 .obj_size = sizeof(struct sctp_sock), 8555 + .useroffset = offsetof(struct sctp_sock, subscribe), 8556 + .usersize = offsetof(struct sctp_sock, initmsg) - 8557 + offsetof(struct sctp_sock, subscribe) + 8558 + sizeof_field(struct sctp_sock, initmsg), 8555 8559 .sysctl_mem = sysctl_sctp_mem, 8556 8560 .sysctl_rmem = sysctl_sctp_rmem, 8557 8561 .sysctl_wmem = sysctl_sctp_wmem, ··· 8595 8591 .unhash = sctp_unhash, 8596 8592 .get_port = sctp_get_port, 8597 8593 .obj_size = sizeof(struct sctp6_sock), 8594 + .useroffset = offsetof(struct sctp6_sock, sctp.subscribe), 8595 + .usersize = offsetof(struct sctp6_sock, sctp.initmsg) - 8596 + offsetof(struct sctp6_sock, sctp.subscribe) + 8597 + sizeof_field(struct sctp6_sock, sctp.initmsg), 8598 8598 .sysctl_mem = sysctl_sctp_mem, 8599 8599 .sysctl_rmem = sysctl_sctp_rmem, 8600 8600 .sysctl_wmem = sysctl_sctp_wmem,

+14

security/Kconfig

··· 164 164 or are part of the kernel text. This kills entire classes 165 165 of heap overflow exploits and similar kernel memory exposures. 166 166 167 + config HARDENED_USERCOPY_FALLBACK 168 + bool "Allow usercopy whitelist violations to fallback to object size" 169 + depends on HARDENED_USERCOPY 170 + default y 171 + help 172 + This is a temporary option that allows missing usercopy whitelists 173 + to be discovered via a WARN() to the kernel log, instead of 174 + rejecting the copy, falling back to non-whitelisted hardened 175 + usercopy that checks the slab allocation size instead of the 176 + whitelist size. This option will be removed once it seems like 177 + all missing usercopy whitelists have been identified and fixed. 178 + Booting with "slab_common.usercopy_fallback=Y/N" can change 179 + this setting. 180 + 167 181 config HARDENED_USERCOPY_PAGESPAN 168 182 bool "Refuse to copy allocations that span multiple pages" 169 183 depends on HARDENED_USERCOPY

tools/objtool/check.c

··· 138 138 "__reiserfs_panic", 139 139 "lbug_with_loc", 140 140 "fortify_panic", 141 + "usercopy_abort", 141 142 }; 142 143 143 144 if (func->bind == STB_WEAK)

+6 -2

virt/kvm/kvm_main.c

··· 4019 4019 /* A kmem cache lets us meet the alignment requirements of fx_save. */ 4020 4020 if (!vcpu_align) 4021 4021 vcpu_align = __alignof__(struct kvm_vcpu); 4022 - kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, 4023 - SLAB_ACCOUNT, NULL); 4022 + kvm_vcpu_cache = 4023 + kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align, 4024 + SLAB_ACCOUNT, 4025 + offsetof(struct kvm_vcpu, arch), 4026 + sizeof_field(struct kvm_vcpu, arch), 4027 + NULL); 4024 4028 if (!kvm_vcpu_cache) { 4025 4029 r = -ENOMEM; 4026 4030 goto out_free_3;