arch/ppc64/mm/fault.c at v2.6.12

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / arch / ppc64 / mm / fault.c
at v2.6.12 312 lines 8.4 kB view raw
wrap content
  1/*
  2 *  arch/ppc/mm/fault.c
  3 *
  4 *  PowerPC version 
  5 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  6 *
  7 *  Derived from "arch/i386/mm/fault.c"
  8 *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  9 *
 10 *  Modified by Cort Dougan and Paul Mackerras.
 11 *
 12 *  Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
 13 *
 14 *  This program is free software; you can redistribute it and/or
 15 *  modify it under the terms of the GNU General Public License
 16 *  as published by the Free Software Foundation; either version
 17 *  2 of the License, or (at your option) any later version.
 18 */
 19
 20#include <linux/config.h>
 21#include <linux/signal.h>
 22#include <linux/sched.h>
 23#include <linux/kernel.h>
 24#include <linux/errno.h>
 25#include <linux/string.h>
 26#include <linux/types.h>
 27#include <linux/mman.h>
 28#include <linux/mm.h>
 29#include <linux/interrupt.h>
 30#include <linux/smp_lock.h>
 31#include <linux/module.h>
 32
 33#include <asm/page.h>
 34#include <asm/pgtable.h>
 35#include <asm/mmu.h>
 36#include <asm/mmu_context.h>
 37#include <asm/system.h>
 38#include <asm/uaccess.h>
 39#include <asm/kdebug.h>
 40
 41/*
 42 * Check whether the instruction at regs->nip is a store using
 43 * an update addressing form which will update r1.
 44 */
 45static int store_updates_sp(struct pt_regs *regs)
 46{
 47	unsigned int inst;
 48
 49	if (get_user(inst, (unsigned int __user *)regs->nip))
 50		return 0;
 51	/* check for 1 in the rA field */
 52	if (((inst >> 16) & 0x1f) != 1)
 53		return 0;
 54	/* check major opcode */
 55	switch (inst >> 26) {
 56	case 37:	/* stwu */
 57	case 39:	/* stbu */
 58	case 45:	/* sthu */
 59	case 53:	/* stfsu */
 60	case 55:	/* stfdu */
 61		return 1;
 62	case 62:	/* std or stdu */
 63		return (inst & 3) == 1;
 64	case 31:
 65		/* check minor opcode */
 66		switch ((inst >> 1) & 0x3ff) {
 67		case 181:	/* stdux */
 68		case 183:	/* stwux */
 69		case 247:	/* stbux */
 70		case 439:	/* sthux */
 71		case 695:	/* stfsux */
 72		case 759:	/* stfdux */
 73			return 1;
 74		}
 75	}
 76	return 0;
 77}
 78
 79/*
 80 * The error_code parameter is
 81 *  - DSISR for a non-SLB data access fault,
 82 *  - SRR1 & 0x08000000 for a non-SLB instruction access fault
 83 *  - 0 any SLB fault.
 84 * The return value is 0 if the fault was handled, or the signal
 85 * number if this is a kernel fault that can't be handled here.
 86 */
 87int do_page_fault(struct pt_regs *regs, unsigned long address,
 88		  unsigned long error_code)
 89{
 90	struct vm_area_struct * vma;
 91	struct mm_struct *mm = current->mm;
 92	siginfo_t info;
 93	unsigned long code = SEGV_MAPERR;
 94	unsigned long is_write = error_code & DSISR_ISSTORE;
 95	unsigned long trap = TRAP(regs);
 96 	unsigned long is_exec = trap == 0x400;
 97
 98	BUG_ON((trap == 0x380) || (trap == 0x480));
 99
100	if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code,
101				11, SIGSEGV) == NOTIFY_STOP)
102		return 0;
103
104	if (trap == 0x300) {
105		if (debugger_fault_handler(regs))
106			return 0;
107	}
108
109	/* On a kernel SLB miss we can only check for a valid exception entry */
110	if (!user_mode(regs) && (address >= TASK_SIZE))
111		return SIGSEGV;
112
113	if (error_code & DSISR_DABRMATCH) {
114		if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
115					11, SIGSEGV) == NOTIFY_STOP)
116			return 0;
117		if (debugger_dabr_match(regs))
118			return 0;
119	}
120
121	if (in_atomic() || mm == NULL) {
122		if (!user_mode(regs))
123			return SIGSEGV;
124		/* in_atomic() in user mode is really bad,
125		   as is current->mm == NULL. */
126		printk(KERN_EMERG "Page fault in user mode with"
127		       "in_atomic() = %d mm = %p\n", in_atomic(), mm);
128		printk(KERN_EMERG "NIP = %lx  MSR = %lx\n",
129		       regs->nip, regs->msr);
130		die("Weird page fault", regs, SIGSEGV);
131	}
132
133	/* When running in the kernel we expect faults to occur only to
134	 * addresses in user space.  All other faults represent errors in the
135	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
136	 * erroneous fault occuring in a code path which already holds mmap_sem
137	 * we will deadlock attempting to validate the fault against the
138	 * address space.  Luckily the kernel only validly references user
139	 * space from well defined areas of code, which are listed in the
140	 * exceptions table.
141	 *
142	 * As the vast majority of faults will be valid we will only perform
143	 * the source reference check when there is a possibilty of a deadlock.
144	 * Attempt to lock the address space, if we cannot we then validate the
145	 * source.  If this is invalid we can skip the address space check,
146	 * thus avoiding the deadlock.
147	 */
148	if (!down_read_trylock(&mm->mmap_sem)) {
149		if (!user_mode(regs) && !search_exception_tables(regs->nip))
150			goto bad_area_nosemaphore;
151
152		down_read(&mm->mmap_sem);
153	}
154
155	vma = find_vma(mm, address);
156	if (!vma)
157		goto bad_area;
158
159	if (vma->vm_start <= address) {
160		goto good_area;
161	}
162	if (!(vma->vm_flags & VM_GROWSDOWN))
163		goto bad_area;
164
165	/*
166	 * N.B. The POWER/Open ABI allows programs to access up to
167	 * 288 bytes below the stack pointer.
168	 * The kernel signal delivery code writes up to about 1.5kB
169	 * below the stack pointer (r1) before decrementing it.
170	 * The exec code can write slightly over 640kB to the stack
171	 * before setting the user r1.  Thus we allow the stack to
172	 * expand to 1MB without further checks.
173	 */
174	if (address + 0x100000 < vma->vm_end) {
175		/* get user regs even if this fault is in kernel mode */
176		struct pt_regs *uregs = current->thread.regs;
177		if (uregs == NULL)
178			goto bad_area;
179
180		/*
181		 * A user-mode access to an address a long way below
182		 * the stack pointer is only valid if the instruction
183		 * is one which would update the stack pointer to the
184		 * address accessed if the instruction completed,
185		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
186		 * (or the byte, halfword, float or double forms).
187		 *
188		 * If we don't check this then any write to the area
189		 * between the last mapped region and the stack will
190		 * expand the stack rather than segfaulting.
191		 */
192		if (address + 2048 < uregs->gpr[1]
193		    && (!user_mode(regs) || !store_updates_sp(regs)))
194			goto bad_area;
195	}
196
197	if (expand_stack(vma, address))
198		goto bad_area;
199
200good_area:
201	code = SEGV_ACCERR;
202
203	if (is_exec) {
204		/* protection fault */
205		if (error_code & DSISR_PROTFAULT)
206			goto bad_area;
207		if (!(vma->vm_flags & VM_EXEC))
208			goto bad_area;
209	/* a write */
210	} else if (is_write) {
211		if (!(vma->vm_flags & VM_WRITE))
212			goto bad_area;
213	/* a read */
214	} else {
215		if (!(vma->vm_flags & VM_READ))
216			goto bad_area;
217	}
218
219 survive:
220	/*
221	 * If for any reason at all we couldn't handle the fault,
222	 * make sure we exit gracefully rather than endlessly redo
223	 * the fault.
224	 */
225	switch (handle_mm_fault(mm, vma, address, is_write)) {
226
227	case VM_FAULT_MINOR:
228		current->min_flt++;
229		break;
230	case VM_FAULT_MAJOR:
231		current->maj_flt++;
232		break;
233	case VM_FAULT_SIGBUS:
234		goto do_sigbus;
235	case VM_FAULT_OOM:
236		goto out_of_memory;
237	default:
238		BUG();
239	}
240
241	up_read(&mm->mmap_sem);
242	return 0;
243
244bad_area:
245	up_read(&mm->mmap_sem);
246
247bad_area_nosemaphore:
248	/* User mode accesses cause a SIGSEGV */
249	if (user_mode(regs)) {
250		info.si_signo = SIGSEGV;
251		info.si_errno = 0;
252		info.si_code = code;
253		info.si_addr = (void __user *) address;
254		force_sig_info(SIGSEGV, &info, current);
255		return 0;
256	}
257
258	if (trap == 0x400 && (error_code & DSISR_PROTFAULT)
259	    && printk_ratelimit())
260		printk(KERN_CRIT "kernel tried to execute NX-protected"
261		       " page (%lx) - exploit attempt? (uid: %d)\n",
262		       address, current->uid);
263
264	return SIGSEGV;
265
266/*
267 * We ran out of memory, or some other thing happened to us that made
268 * us unable to handle the page fault gracefully.
269 */
270out_of_memory:
271	up_read(&mm->mmap_sem);
272	if (current->pid == 1) {
273		yield();
274		down_read(&mm->mmap_sem);
275		goto survive;
276	}
277	printk("VM: killing process %s\n", current->comm);
278	if (user_mode(regs))
279		do_exit(SIGKILL);
280	return SIGKILL;
281
282do_sigbus:
283	up_read(&mm->mmap_sem);
284	if (user_mode(regs)) {
285		info.si_signo = SIGBUS;
286		info.si_errno = 0;
287		info.si_code = BUS_ADRERR;
288		info.si_addr = (void __user *)address;
289		force_sig_info(SIGBUS, &info, current);
290		return 0;
291	}
292	return SIGBUS;
293}
294
295/*
296 * bad_page_fault is called when we have a bad access from the kernel.
297 * It is called from do_page_fault above and from some of the procedures
298 * in traps.c.
299 */
300void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
301{
302	const struct exception_table_entry *entry;
303
304	/* Are we prepared to handle this fault?  */
305	if ((entry = search_exception_tables(regs->nip)) != NULL) {
306		regs->nip = entry->fixup;
307		return;
308	}
309
310	/* kernel has accessed a bad area */
311	die("Kernel access of bad area", regs, sig);
312}
Configure Feed

Configure Feed