Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vm: add VM_FAULT_SIGSEGV handling support

The core VM already knows about VM_FAULT_SIGBUS, but cannot return a
"you should SIGSEGV" error, because the SIGSEGV case was generally
handled by the caller - usually the architecture fault handler.

That results in lots of duplication - all the architecture fault
handlers end up doing very similar "look up vma, check permissions, do
retries etc" - but it generally works. However, there are cases where
the VM actually wants to SIGSEGV, and applications _expect_ SIGSEGV.

In particular, when accessing the stack guard page, libsigsegv expects a
SIGSEGV. And it usually got one, because the stack growth is handled by
that duplicated architecture fault handler.

However, when the generic VM layer started propagating the error return
from the stack expansion in commit fee7e49d4514 ("mm: propagate error
from stack expansion even for guard page"), that now exposed the
existing VM_FAULT_SIGBUS result to user space. And user space really
expected SIGSEGV, not SIGBUS.

To fix that case, we need to add a VM_FAULT_SIGSEGV, and teach all those
duplicate architecture fault handlers about it. They all already have
the code to handle SIGSEGV, so it's about just tying that new return
value to the existing code, but it's all a bit annoying.

This is the mindless minimal patch to do this. A more extensive patch
would be to try to gather up the mostly shared fault handling logic into
one generic helper routine, and long-term we really should do that
cleanup.

Just from this patch, you can generally see that most architectures just
copied (directly or indirectly) the old x86 way of doing things, but in
the meantime that original x86 model has been improved to hold the VM
semaphore for shorter times etc and to handle VM_FAULT_RETRY and other
"newer" things, so it would be a good idea to bring all those
improvements to the generic case and teach other architectures about
them too.

Reported-and-tested-by: Takashi Iwai <tiwai@suse.de>
Tested-by: Jan Engelhardt <jengelh@inai.de>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> # "s390 still compiles and boots"
Cc: linux-arch@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

+63 -7
+2
arch/alpha/mm/fault.c
··· 156 156 if (unlikely(fault & VM_FAULT_ERROR)) { 157 157 if (fault & VM_FAULT_OOM) 158 158 goto out_of_memory; 159 + else if (fault & VM_FAULT_SIGSEGV) 160 + goto bad_area; 159 161 else if (fault & VM_FAULT_SIGBUS) 160 162 goto do_sigbus; 161 163 BUG();
+2
arch/arc/mm/fault.c
··· 161 161 162 162 if (fault & VM_FAULT_OOM) 163 163 goto out_of_memory; 164 + else if (fault & VM_FAULT_SIGSEV) 165 + goto bad_area; 164 166 else if (fault & VM_FAULT_SIGBUS) 165 167 goto do_sigbus; 166 168
+2
arch/avr32/mm/fault.c
··· 142 142 if (unlikely(fault & VM_FAULT_ERROR)) { 143 143 if (fault & VM_FAULT_OOM) 144 144 goto out_of_memory; 145 + else if (fault & VM_FAULT_SIGSEGV) 146 + goto bad_area; 145 147 else if (fault & VM_FAULT_SIGBUS) 146 148 goto do_sigbus; 147 149 BUG();
+2
arch/cris/mm/fault.c
··· 176 176 if (unlikely(fault & VM_FAULT_ERROR)) { 177 177 if (fault & VM_FAULT_OOM) 178 178 goto out_of_memory; 179 + else if (fault & VM_FAULT_SIGSEGV) 180 + goto bad_area; 179 181 else if (fault & VM_FAULT_SIGBUS) 180 182 goto do_sigbus; 181 183 BUG();
+2
arch/frv/mm/fault.c
··· 168 168 if (unlikely(fault & VM_FAULT_ERROR)) { 169 169 if (fault & VM_FAULT_OOM) 170 170 goto out_of_memory; 171 + else if (fault & VM_FAULT_SIGSEGV) 172 + goto bad_area; 171 173 else if (fault & VM_FAULT_SIGBUS) 172 174 goto do_sigbus; 173 175 BUG();
+2
arch/ia64/mm/fault.c
··· 172 172 */ 173 173 if (fault & VM_FAULT_OOM) { 174 174 goto out_of_memory; 175 + } else if (fault & VM_FAULT_SIGSEGV) { 176 + goto bad_area; 175 177 } else if (fault & VM_FAULT_SIGBUS) { 176 178 signal = SIGBUS; 177 179 goto bad_area;
+2
arch/m32r/mm/fault.c
··· 200 200 if (unlikely(fault & VM_FAULT_ERROR)) { 201 201 if (fault & VM_FAULT_OOM) 202 202 goto out_of_memory; 203 + else if (fault & VM_FAULT_SIGSEGV) 204 + goto bad_area; 203 205 else if (fault & VM_FAULT_SIGBUS) 204 206 goto do_sigbus; 205 207 BUG();
+2
arch/m68k/mm/fault.c
··· 145 145 if (unlikely(fault & VM_FAULT_ERROR)) { 146 146 if (fault & VM_FAULT_OOM) 147 147 goto out_of_memory; 148 + else if (fault & VM_FAULT_SIGSEGV) 149 + goto map_err; 148 150 else if (fault & VM_FAULT_SIGBUS) 149 151 goto bus_err; 150 152 BUG();
+2
arch/metag/mm/fault.c
··· 141 141 if (unlikely(fault & VM_FAULT_ERROR)) { 142 142 if (fault & VM_FAULT_OOM) 143 143 goto out_of_memory; 144 + else if (fault & VM_FAULT_SIGSEGV) 145 + goto bad_area; 144 146 else if (fault & VM_FAULT_SIGBUS) 145 147 goto do_sigbus; 146 148 BUG();
+2
arch/microblaze/mm/fault.c
··· 224 224 if (unlikely(fault & VM_FAULT_ERROR)) { 225 225 if (fault & VM_FAULT_OOM) 226 226 goto out_of_memory; 227 + else if (fault & VM_FAULT_SIGSEGV) 228 + goto bad_area; 227 229 else if (fault & VM_FAULT_SIGBUS) 228 230 goto do_sigbus; 229 231 BUG();
+2
arch/mips/mm/fault.c
··· 158 158 if (unlikely(fault & VM_FAULT_ERROR)) { 159 159 if (fault & VM_FAULT_OOM) 160 160 goto out_of_memory; 161 + else if (fault & VM_FAULT_SIGSEGV) 162 + goto bad_area; 161 163 else if (fault & VM_FAULT_SIGBUS) 162 164 goto do_sigbus; 163 165 BUG();
+2
arch/mn10300/mm/fault.c
··· 262 262 if (unlikely(fault & VM_FAULT_ERROR)) { 263 263 if (fault & VM_FAULT_OOM) 264 264 goto out_of_memory; 265 + else if (fault & VM_FAULT_SIGSEGV) 266 + goto bad_area; 265 267 else if (fault & VM_FAULT_SIGBUS) 266 268 goto do_sigbus; 267 269 BUG();
+2
arch/nios2/mm/fault.c
··· 135 135 if (unlikely(fault & VM_FAULT_ERROR)) { 136 136 if (fault & VM_FAULT_OOM) 137 137 goto out_of_memory; 138 + else if (fault & VM_FAULT_SIGSEGV) 139 + goto bad_area; 138 140 else if (fault & VM_FAULT_SIGBUS) 139 141 goto do_sigbus; 140 142 BUG();
+2
arch/openrisc/mm/fault.c
··· 171 171 if (unlikely(fault & VM_FAULT_ERROR)) { 172 172 if (fault & VM_FAULT_OOM) 173 173 goto out_of_memory; 174 + else if (fault & VM_FAULT_SIGSEGV) 175 + goto bad_area; 174 176 else if (fault & VM_FAULT_SIGBUS) 175 177 goto do_sigbus; 176 178 BUG();
+2
arch/parisc/mm/fault.c
··· 256 256 */ 257 257 if (fault & VM_FAULT_OOM) 258 258 goto out_of_memory; 259 + else if (fault & VM_FAULT_SIGSEGV) 260 + goto bad_area; 259 261 else if (fault & VM_FAULT_SIGBUS) 260 262 goto bad_area; 261 263 BUG();
+1 -1
arch/powerpc/mm/copro_fault.c
··· 76 76 if (*flt & VM_FAULT_OOM) { 77 77 ret = -ENOMEM; 78 78 goto out_unlock; 79 - } else if (*flt & VM_FAULT_SIGBUS) { 79 + } else if (*flt & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { 80 80 ret = -EFAULT; 81 81 goto out_unlock; 82 82 }
+2
arch/powerpc/mm/fault.c
··· 437 437 */ 438 438 fault = handle_mm_fault(mm, vma, address, flags); 439 439 if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { 440 + if (fault & VM_FAULT_SIGSEGV) 441 + goto bad_area; 440 442 rc = mm_fault_error(regs, address, fault); 441 443 if (rc >= MM_FAULT_RETURN) 442 444 goto bail;
+6
arch/s390/mm/fault.c
··· 374 374 do_no_context(regs); 375 375 else 376 376 pagefault_out_of_memory(); 377 + } else if (fault & VM_FAULT_SIGSEGV) { 378 + /* Kernel mode? Handle exceptions or die */ 379 + if (!user_mode(regs)) 380 + do_no_context(regs); 381 + else 382 + do_sigsegv(regs, SEGV_MAPERR); 377 383 } else if (fault & VM_FAULT_SIGBUS) { 378 384 /* Kernel mode? Handle exceptions or die */ 379 385 if (!user_mode(regs))
+2
arch/score/mm/fault.c
··· 114 114 if (unlikely(fault & VM_FAULT_ERROR)) { 115 115 if (fault & VM_FAULT_OOM) 116 116 goto out_of_memory; 117 + else if (fault & VM_FAULT_SIGSEGV) 118 + goto bad_area; 117 119 else if (fault & VM_FAULT_SIGBUS) 118 120 goto do_sigbus; 119 121 BUG();
+2
arch/sh/mm/fault.c
··· 353 353 } else { 354 354 if (fault & VM_FAULT_SIGBUS) 355 355 do_sigbus(regs, error_code, address); 356 + else if (fault & VM_FAULT_SIGSEGV) 357 + bad_area(regs, error_code, address); 356 358 else 357 359 BUG(); 358 360 }
+2
arch/sparc/mm/fault_32.c
··· 249 249 if (unlikely(fault & VM_FAULT_ERROR)) { 250 250 if (fault & VM_FAULT_OOM) 251 251 goto out_of_memory; 252 + else if (fault & VM_FAULT_SIGSEGV) 253 + goto bad_area; 252 254 else if (fault & VM_FAULT_SIGBUS) 253 255 goto do_sigbus; 254 256 BUG();
+2
arch/sparc/mm/fault_64.c
··· 446 446 if (unlikely(fault & VM_FAULT_ERROR)) { 447 447 if (fault & VM_FAULT_OOM) 448 448 goto out_of_memory; 449 + else if (fault & VM_FAULT_SIGSEGV) 450 + goto bad_area; 449 451 else if (fault & VM_FAULT_SIGBUS) 450 452 goto do_sigbus; 451 453 BUG();
+2
arch/tile/mm/fault.c
··· 442 442 if (unlikely(fault & VM_FAULT_ERROR)) { 443 443 if (fault & VM_FAULT_OOM) 444 444 goto out_of_memory; 445 + else if (fault & VM_FAULT_SIGSEGV) 446 + goto bad_area; 445 447 else if (fault & VM_FAULT_SIGBUS) 446 448 goto do_sigbus; 447 449 BUG();
+2
arch/um/kernel/trap.c
··· 80 80 if (unlikely(fault & VM_FAULT_ERROR)) { 81 81 if (fault & VM_FAULT_OOM) { 82 82 goto out_of_memory; 83 + } else if (fault & VM_FAULT_SIGSEGV) { 84 + goto out; 83 85 } else if (fault & VM_FAULT_SIGBUS) { 84 86 err = -EACCES; 85 87 goto out;
+2
arch/x86/mm/fault.c
··· 898 898 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| 899 899 VM_FAULT_HWPOISON_LARGE)) 900 900 do_sigbus(regs, error_code, address, fault); 901 + else if (fault & VM_FAULT_SIGSEGV) 902 + bad_area_nosemaphore(regs, error_code, address); 901 903 else 902 904 BUG(); 903 905 }
+2
arch/xtensa/mm/fault.c
··· 117 117 if (unlikely(fault & VM_FAULT_ERROR)) { 118 118 if (fault & VM_FAULT_OOM) 119 119 goto out_of_memory; 120 + else if (fault & VM_FAULT_SIGSEGV) 121 + goto bad_area; 120 122 else if (fault & VM_FAULT_SIGBUS) 121 123 goto do_sigbus; 122 124 BUG();
+1 -1
drivers/staging/lustre/lustre/llite/vvp_io.c
··· 632 632 return 0; 633 633 } 634 634 635 - if (cfio->fault.ft_flags & VM_FAULT_SIGBUS) { 635 + if (cfio->fault.ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { 636 636 CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address); 637 637 return -EFAULT; 638 638 }
+4 -2
include/linux/mm.h
··· 1070 1070 #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ 1071 1071 #define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */ 1072 1072 #define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */ 1073 + #define VM_FAULT_SIGSEGV 0x0040 1073 1074 1074 1075 #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ 1075 1076 #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ ··· 1079 1078 1080 1079 #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ 1081 1080 1082 - #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \ 1083 - VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE) 1081 + #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \ 1082 + VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ 1083 + VM_FAULT_FALLBACK) 1084 1084 1085 1085 /* Encode hstate index for a hwpoisoned large page */ 1086 1086 #define VM_FAULT_SET_HINDEX(x) ((x) << 12)
+2 -2
mm/gup.c
··· 296 296 return -ENOMEM; 297 297 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) 298 298 return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT; 299 - if (ret & VM_FAULT_SIGBUS) 299 + if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) 300 300 return -EFAULT; 301 301 BUG(); 302 302 } ··· 571 571 return -ENOMEM; 572 572 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) 573 573 return -EHWPOISON; 574 - if (ret & VM_FAULT_SIGBUS) 574 + if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) 575 575 return -EFAULT; 576 576 BUG(); 577 577 }
+1 -1
mm/ksm.c
··· 376 376 else 377 377 ret = VM_FAULT_WRITE; 378 378 put_page(page); 379 - } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM))); 379 + } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM))); 380 380 /* 381 381 * We must loop because handle_mm_fault() may back out if there's 382 382 * any difficulty e.g. if pte accessed bit gets updated concurrently.