Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

microblaze_mmu_v2: Page fault handling high level - fault.c

Signed-off-by: Michal Simek <monstr@monstr.eu>

+304
+304
arch/microblaze/mm/fault.c
··· 1 + /* 2 + * arch/microblaze/mm/fault.c 3 + * 4 + * Copyright (C) 2007 Xilinx, Inc. All rights reserved. 5 + * 6 + * Derived from "arch/ppc/mm/fault.c" 7 + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 8 + * 9 + * Derived from "arch/i386/mm/fault.c" 10 + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 11 + * 12 + * Modified by Cort Dougan and Paul Mackerras. 13 + * 14 + * This file is subject to the terms and conditions of the GNU General 15 + * Public License. See the file COPYING in the main directory of this 16 + * archive for more details. 17 + * 18 + */ 19 + 20 + #include <linux/module.h> 21 + #include <linux/signal.h> 22 + #include <linux/sched.h> 23 + #include <linux/kernel.h> 24 + #include <linux/errno.h> 25 + #include <linux/string.h> 26 + #include <linux/types.h> 27 + #include <linux/ptrace.h> 28 + #include <linux/mman.h> 29 + #include <linux/mm.h> 30 + #include <linux/interrupt.h> 31 + 32 + #include <asm/page.h> 33 + #include <asm/pgtable.h> 34 + #include <asm/mmu.h> 35 + #include <asm/mmu_context.h> 36 + #include <asm/system.h> 37 + #include <linux/uaccess.h> 38 + #include <asm/exceptions.h> 39 + 40 + #if defined(CONFIG_KGDB) 41 + int debugger_kernel_faults = 1; 42 + #endif 43 + 44 + static unsigned long pte_misses; /* updated by do_page_fault() */ 45 + static unsigned long pte_errors; /* updated by do_page_fault() */ 46 + 47 + /* 48 + * Check whether the instruction at regs->pc is a store using 49 + * an update addressing form which will update r1. 50 + */ 51 + static int store_updates_sp(struct pt_regs *regs) 52 + { 53 + unsigned int inst; 54 + 55 + if (get_user(inst, (unsigned int *)regs->pc)) 56 + return 0; 57 + /* check for 1 in the rD field */ 58 + if (((inst >> 21) & 0x1f) != 1) 59 + return 0; 60 + /* check for store opcodes */ 61 + if ((inst & 0xd0000000) == 0xd0000000) 62 + return 1; 63 + return 0; 64 + } 65 + 66 + 67 + /* 68 + * bad_page_fault is called when we have a bad access from the kernel. 69 + * It is called from do_page_fault above and from some of the procedures 70 + * in traps.c. 71 + */ 72 + static void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) 73 + { 74 + const struct exception_table_entry *fixup; 75 + /* MS: no context */ 76 + /* Are we prepared to handle this fault? */ 77 + fixup = search_exception_tables(regs->pc); 78 + if (fixup) { 79 + regs->pc = fixup->fixup; 80 + return; 81 + } 82 + 83 + /* kernel has accessed a bad area */ 84 + #if defined(CONFIG_KGDB) 85 + if (debugger_kernel_faults) 86 + debugger(regs); 87 + #endif 88 + die("kernel access of bad area", regs, sig); 89 + } 90 + 91 + /* 92 + * The error_code parameter is ESR for a data fault, 93 + * 0 for an instruction fault. 94 + */ 95 + void do_page_fault(struct pt_regs *regs, unsigned long address, 96 + unsigned long error_code) 97 + { 98 + struct vm_area_struct *vma; 99 + struct mm_struct *mm = current->mm; 100 + siginfo_t info; 101 + int code = SEGV_MAPERR; 102 + int is_write = error_code & ESR_S; 103 + int fault; 104 + 105 + regs->ear = address; 106 + regs->esr = error_code; 107 + 108 + /* On a kernel SLB miss we can only check for a valid exception entry */ 109 + if (kernel_mode(regs) && (address >= TASK_SIZE)) { 110 + printk(KERN_WARNING "kernel task_size exceed"); 111 + _exception(SIGSEGV, regs, code, address); 112 + } 113 + 114 + /* for instr TLB miss and instr storage exception ESR_S is undefined */ 115 + if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11) 116 + is_write = 0; 117 + 118 + #if defined(CONFIG_KGDB) 119 + if (debugger_fault_handler && regs->trap == 0x300) { 120 + debugger_fault_handler(regs); 121 + return; 122 + } 123 + #endif /* CONFIG_KGDB */ 124 + 125 + if (in_atomic() || mm == NULL) { 126 + /* FIXME */ 127 + if (kernel_mode(regs)) { 128 + printk(KERN_EMERG 129 + "Page fault in kernel mode - Oooou!!! pid %d\n", 130 + current->pid); 131 + _exception(SIGSEGV, regs, code, address); 132 + return; 133 + } 134 + /* in_atomic() in user mode is really bad, 135 + as is current->mm == NULL. */ 136 + printk(KERN_EMERG "Page fault in user mode with " 137 + "in_atomic(), mm = %p\n", mm); 138 + printk(KERN_EMERG "r15 = %lx MSR = %lx\n", 139 + regs->r15, regs->msr); 140 + die("Weird page fault", regs, SIGSEGV); 141 + } 142 + 143 + /* When running in the kernel we expect faults to occur only to 144 + * addresses in user space. All other faults represent errors in the 145 + * kernel and should generate an OOPS. Unfortunately, in the case of an 146 + * erroneous fault occurring in a code path which already holds mmap_sem 147 + * we will deadlock attempting to validate the fault against the 148 + * address space. Luckily the kernel only validly references user 149 + * space from well defined areas of code, which are listed in the 150 + * exceptions table. 151 + * 152 + * As the vast majority of faults will be valid we will only perform 153 + * the source reference check when there is a possibility of a deadlock. 154 + * Attempt to lock the address space, if we cannot we then validate the 155 + * source. If this is invalid we can skip the address space check, 156 + * thus avoiding the deadlock. 157 + */ 158 + if (!down_read_trylock(&mm->mmap_sem)) { 159 + if (kernel_mode(regs) && !search_exception_tables(regs->pc)) 160 + goto bad_area_nosemaphore; 161 + 162 + down_read(&mm->mmap_sem); 163 + } 164 + 165 + vma = find_vma(mm, address); 166 + if (!vma) 167 + goto bad_area; 168 + 169 + if (vma->vm_start <= address) 170 + goto good_area; 171 + 172 + if (!(vma->vm_flags & VM_GROWSDOWN)) 173 + goto bad_area; 174 + 175 + if (!is_write) 176 + goto bad_area; 177 + 178 + /* 179 + * N.B. The ABI allows programs to access up to 180 + * a few hundred bytes below the stack pointer (TBD). 181 + * The kernel signal delivery code writes up to about 1.5kB 182 + * below the stack pointer (r1) before decrementing it. 183 + * The exec code can write slightly over 640kB to the stack 184 + * before setting the user r1. Thus we allow the stack to 185 + * expand to 1MB without further checks. 186 + */ 187 + if (address + 0x100000 < vma->vm_end) { 188 + 189 + /* get user regs even if this fault is in kernel mode */ 190 + struct pt_regs *uregs = current->thread.regs; 191 + if (uregs == NULL) 192 + goto bad_area; 193 + 194 + /* 195 + * A user-mode access to an address a long way below 196 + * the stack pointer is only valid if the instruction 197 + * is one which would update the stack pointer to the 198 + * address accessed if the instruction completed, 199 + * i.e. either stwu rs,n(r1) or stwux rs,r1,rb 200 + * (or the byte, halfword, float or double forms). 201 + * 202 + * If we don't check this then any write to the area 203 + * between the last mapped region and the stack will 204 + * expand the stack rather than segfaulting. 205 + */ 206 + if (address + 2048 < uregs->r1 207 + && (kernel_mode(regs) || !store_updates_sp(regs))) 208 + goto bad_area; 209 + } 210 + if (expand_stack(vma, address)) 211 + goto bad_area; 212 + 213 + good_area: 214 + code = SEGV_ACCERR; 215 + 216 + /* a write */ 217 + if (is_write) { 218 + if (!(vma->vm_flags & VM_WRITE)) 219 + goto bad_area; 220 + /* a read */ 221 + } else { 222 + /* protection fault */ 223 + if (error_code & 0x08000000) 224 + goto bad_area; 225 + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) 226 + goto bad_area; 227 + } 228 + 229 + /* 230 + * If for any reason at all we couldn't handle the fault, 231 + * make sure we exit gracefully rather than endlessly redo 232 + * the fault. 233 + */ 234 + survive: 235 + fault = handle_mm_fault(mm, vma, address, is_write); 236 + if (unlikely(fault & VM_FAULT_ERROR)) { 237 + if (fault & VM_FAULT_OOM) 238 + goto out_of_memory; 239 + else if (fault & VM_FAULT_SIGBUS) 240 + goto do_sigbus; 241 + BUG(); 242 + } 243 + if (fault & VM_FAULT_MAJOR) 244 + current->maj_flt++; 245 + else 246 + current->min_flt++; 247 + up_read(&mm->mmap_sem); 248 + /* 249 + * keep track of tlb+htab misses that are good addrs but 250 + * just need pte's created via handle_mm_fault() 251 + * -- Cort 252 + */ 253 + pte_misses++; 254 + return; 255 + 256 + bad_area: 257 + up_read(&mm->mmap_sem); 258 + 259 + bad_area_nosemaphore: 260 + pte_errors++; 261 + 262 + /* User mode accesses cause a SIGSEGV */ 263 + if (user_mode(regs)) { 264 + _exception(SIGSEGV, regs, code, address); 265 + /* info.si_signo = SIGSEGV; 266 + info.si_errno = 0; 267 + info.si_code = code; 268 + info.si_addr = (void *) address; 269 + force_sig_info(SIGSEGV, &info, current);*/ 270 + return; 271 + } 272 + 273 + bad_page_fault(regs, address, SIGSEGV); 274 + return; 275 + 276 + /* 277 + * We ran out of memory, or some other thing happened to us that made 278 + * us unable to handle the page fault gracefully. 279 + */ 280 + out_of_memory: 281 + if (current->pid == 1) { 282 + yield(); 283 + down_read(&mm->mmap_sem); 284 + goto survive; 285 + } 286 + up_read(&mm->mmap_sem); 287 + printk(KERN_WARNING "VM: killing process %s\n", current->comm); 288 + if (user_mode(regs)) 289 + do_exit(SIGKILL); 290 + bad_page_fault(regs, address, SIGKILL); 291 + return; 292 + 293 + do_sigbus: 294 + up_read(&mm->mmap_sem); 295 + if (user_mode(regs)) { 296 + info.si_signo = SIGBUS; 297 + info.si_errno = 0; 298 + info.si_code = BUS_ADRERR; 299 + info.si_addr = (void __user *)address; 300 + force_sig_info(SIGBUS, &info, current); 301 + return; 302 + } 303 + bad_page_fault(regs, address, SIGBUS); 304 + }