arch/cris/mm/fault.c at v2.6.21

tjh.dev / kernel
fork atom
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork atom
kernel / arch / cris / mm / fault.c
at v2.6.21 458 lines 13 kB view raw
wrap content
  1/*
  2 *  linux/arch/cris/mm/fault.c
  3 *
  4 *  Copyright (C) 2000, 2001  Axis Communications AB
  5 *
  6 *  Authors:  Bjorn Wesen 
  7 * 
  8 *  $Log: fault.c,v $
  9 *  Revision 1.20  2005/03/04 08:16:18  starvik
 10 *  Merge of Linux 2.6.11.
 11 *
 12 *  Revision 1.19  2005/01/14 10:07:59  starvik
 13 *  Fixed warning.
 14 *
 15 *  Revision 1.18  2005/01/12 08:10:14  starvik
 16 *  Readded the change of frametype when handling kernel page fault fixup
 17 *  for v10. This is necessary to avoid that the CPU remakes the faulting
 18 *  access.
 19 *
 20 *  Revision 1.17  2005/01/11 13:53:05  starvik
 21 *  Use raw_printk.
 22 *
 23 *  Revision 1.16  2004/12/17 11:39:41  starvik
 24 *  SMP support.
 25 *
 26 *  Revision 1.15  2004/11/23 18:36:18  starvik
 27 *  Stack is now non-executable.
 28 *  Signal handler trampolines are placed in a reserved page mapped into all
 29 *  processes.
 30 *
 31 *  Revision 1.14  2004/11/23 07:10:21  starvik
 32 *  Moved find_fixup_code to generic code.
 33 *
 34 *  Revision 1.13  2004/11/23 07:00:54  starvik
 35 *  Actually use the execute permission bit in the MMU. This makes it possible
 36 *  to prevent e.g. attacks where executable code is put on the stack.
 37 *
 38 *  Revision 1.12  2004/09/29 06:16:04  starvik
 39 *  Use instruction_pointer
 40 *
 41 *  Revision 1.11  2004/05/14 07:58:05  starvik
 42 *  Merge of changes from 2.4
 43 *
 44 *  Revision 1.10  2003/10/27 14:51:24  starvik
 45 *  Removed debugcode
 46 *
 47 *  Revision 1.9  2003/10/27 14:50:42  starvik
 48 *  Changed do_page_fault signature
 49 *
 50 *  Revision 1.8  2003/07/04 13:02:48  tobiasa
 51 *  Moved code snippet from arch/cris/mm/fault.c that searches for fixup code
 52 *  to seperate function in arch-specific files.
 53 *
 54 *  Revision 1.7  2003/01/22 06:48:38  starvik
 55 *  Fixed warnings issued by GCC 3.2.1
 56 *
 57 *  Revision 1.6  2003/01/09 14:42:52  starvik
 58 *  Merge of Linux 2.5.55
 59 *
 60 *  Revision 1.5  2002/12/11 14:44:48  starvik
 61 *  Extracted v10 (ETRAX 100LX) specific stuff to arch/cris/arch-v10/mm
 62 *
 63 *  Revision 1.4  2002/11/13 15:10:28  starvik
 64 *  pte_offset has been renamed to pte_offset_kernel
 65 *
 66 *  Revision 1.3  2002/11/05 06:45:13  starvik
 67 *  Merge of Linux 2.5.45
 68 *
 69 *  Revision 1.2  2001/12/18 13:35:22  bjornw
 70 *  Applied the 2.4.13->2.4.16 CRIS patch to 2.5.1 (is a copy of 2.4.15).
 71 *
 72 *  Revision 1.20  2001/11/22 13:34:06  bjornw
 73 *  * Bug workaround (LX TR89): force a rerun of the whole of an interrupted
 74 *    unaligned write, because the second half of the write will be corrupted
 75 *    otherwise. Affected unaligned writes spanning not-yet mapped pages.
 76 *  * Optimization: use the wr_rd bit in R_MMU_CAUSE to know whether a miss
 77 *    was due to a read or a write (before we didn't know this until the next
 78 *    restart of the interrupted instruction, thus wasting one fault-irq)
 79 *
 80 *  Revision 1.19  2001/11/12 19:02:10  pkj
 81 *  Fixed compiler warnings.
 82 *
 83 *  Revision 1.18  2001/07/18 22:14:32  bjornw
 84 *  Enable interrupts in the bulk of do_page_fault
 85 *
 86 *  Revision 1.17  2001/07/18 13:07:23  bjornw
 87 *  * Detect non-existant PTE's in vmalloc pmd synchronization
 88 *  * Remove comment about fast-paths for VMALLOC_START etc, because all that
 89 *    was totally bogus anyway it turned out :)
 90 *  * Fix detection of vmalloc-area synchronization
 91 *  * Add some comments
 92 *
 93 *  Revision 1.16  2001/06/13 00:06:08  bjornw
 94 *  current_pgd should be volatile
 95 *
 96 *  Revision 1.15  2001/06/13 00:02:23  bjornw
 97 *  Use a separate variable to store the current pgd to avoid races in schedule
 98 *
 99 *  Revision 1.14  2001/05/16 17:41:07  hp
100 *  Last comment tweak further tweaked.
101 *
102 *  Revision 1.13  2001/05/15 00:58:44  hp
103 *  Expand a bit on the comment why we compare address >= TASK_SIZE rather
104 *  than >= VMALLOC_START.
105 *
106 *  Revision 1.12  2001/04/04 10:51:14  bjornw
107 *  mmap_sem is grabbed for reading
108 *
109 *  Revision 1.11  2001/03/23 07:36:07  starvik
110 *  Corrected according to review remarks
111 *
112 *  Revision 1.10  2001/03/21 16:10:11  bjornw
113 *  CRIS_FRAME_FIXUP not needed anymore, use FRAME_NORMAL
114 *
115 *  Revision 1.9  2001/03/05 13:22:20  bjornw
116 *  Spell-fix and fix in vmalloc_fault handling
117 *
118 *  Revision 1.8  2000/11/22 14:45:31  bjornw
119 *  * 2.4.0-test10 removed the set_pgdir instantaneous kernel global mapping
120 *    into all processes. Instead we fill in the missing PTE entries on demand.
121 *
122 *  Revision 1.7  2000/11/21 16:39:09  bjornw
123 *  fixup switches frametype
124 *
125 *  Revision 1.6  2000/11/17 16:54:08  bjornw
126 *  More detailed siginfo reporting
127 *
128 *
129 */
130
131#include <linux/mm.h>
132#include <linux/interrupt.h>
133#include <linux/module.h>
134#include <asm/uaccess.h>
135
136extern int find_fixup_code(struct pt_regs *);
137extern void die_if_kernel(const char *, struct pt_regs *, long);
138extern int raw_printk(const char *fmt, ...);
139
140/* debug of low-level TLB reload */
141#undef DEBUG
142
143#ifdef DEBUG
144#define D(x) x
145#else
146#define D(x)
147#endif
148
149/* debug of higher-level faults */
150#define DPG(x)
151
152/* current active page directory */
153
154volatile DEFINE_PER_CPU(pgd_t *,current_pgd);
155unsigned long cris_signal_return_page;
156
157/*
158 * This routine handles page faults.  It determines the address,
159 * and the problem, and then passes it off to one of the appropriate
160 * routines.
161 *
162 * Notice that the address we're given is aligned to the page the fault
163 * occurred in, since we only get the PFN in R_MMU_CAUSE not the complete
164 * address.
165 *
166 * error_code:
167 *	bit 0 == 0 means no page found, 1 means protection fault
168 *	bit 1 == 0 means read, 1 means write
169 *
170 * If this routine detects a bad access, it returns 1, otherwise it
171 * returns 0.
172 */
173
174asmlinkage void
175do_page_fault(unsigned long address, struct pt_regs *regs,
176	      int protection, int writeaccess)
177{
178	struct task_struct *tsk;
179	struct mm_struct *mm;
180	struct vm_area_struct * vma;
181	siginfo_t info;
182
183        D(printk("Page fault for %lX on %X at %lX, prot %d write %d\n",
184                 address, smp_processor_id(), instruction_pointer(regs),
185                 protection, writeaccess));
186
187	tsk = current;
188
189	/*
190	 * We fault-in kernel-space virtual memory on-demand. The
191	 * 'reference' page table is init_mm.pgd.
192	 *
193	 * NOTE! We MUST NOT take any locks for this case. We may
194	 * be in an interrupt or a critical region, and should
195	 * only copy the information from the master page table,
196	 * nothing more.
197	 *
198	 * NOTE2: This is done so that, when updating the vmalloc
199	 * mappings we don't have to walk all processes pgdirs and
200	 * add the high mappings all at once. Instead we do it as they
201	 * are used. However vmalloc'ed page entries have the PAGE_GLOBAL
202	 * bit set so sometimes the TLB can use a lingering entry.
203	 *
204	 * This verifies that the fault happens in kernel space
205	 * and that the fault was not a protection error (error_code & 1).
206	 */
207
208	if (address >= VMALLOC_START &&
209	    !protection &&
210	    !user_mode(regs))
211		goto vmalloc_fault;
212
213	/* When stack execution is not allowed we store the signal
214	 * trampolines in the reserved cris_signal_return_page.
215	 * Handle this in the exact same way as vmalloc (we know
216	 * that the mapping is there and is valid so no need to
217	 * call handle_mm_fault).
218	 */
219	if (cris_signal_return_page &&
220	    address == cris_signal_return_page &&
221	    !protection && user_mode(regs))
222		goto vmalloc_fault;
223
224	/* we can and should enable interrupts at this point */
225	local_irq_enable();
226
227	mm = tsk->mm;
228	info.si_code = SEGV_MAPERR;
229
230	/*
231	 * If we're in an interrupt or have no user
232	 * context, we must not take the fault..
233	 */
234
235	if (in_atomic() || !mm)
236		goto no_context;
237
238	down_read(&mm->mmap_sem);
239	vma = find_vma(mm, address);
240	if (!vma)
241		goto bad_area;
242	if (vma->vm_start <= address)
243		goto good_area;
244	if (!(vma->vm_flags & VM_GROWSDOWN))
245		goto bad_area;
246	if (user_mode(regs)) {
247		/*
248		 * accessing the stack below usp is always a bug.
249		 * we get page-aligned addresses so we can only check
250		 * if we're within a page from usp, but that might be
251		 * enough to catch brutal errors at least.
252		 */
253		if (address + PAGE_SIZE < rdusp())
254			goto bad_area;
255	}
256	if (expand_stack(vma, address))
257		goto bad_area;
258
259	/*
260	 * Ok, we have a good vm_area for this memory access, so
261	 * we can handle it..
262	 */
263
264 good_area:
265	info.si_code = SEGV_ACCERR;
266
267	/* first do some preliminary protection checks */
268
269	if (writeaccess == 2){
270		if (!(vma->vm_flags & VM_EXEC))
271			goto bad_area;
272	} else if (writeaccess == 1) {
273		if (!(vma->vm_flags & VM_WRITE))
274			goto bad_area;
275	} else {
276		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
277			goto bad_area;
278	}
279
280	/*
281	 * If for any reason at all we couldn't handle the fault,
282	 * make sure we exit gracefully rather than endlessly redo
283	 * the fault.
284	 */
285
286	switch (handle_mm_fault(mm, vma, address, writeaccess & 1)) {
287	case VM_FAULT_MINOR:
288		tsk->min_flt++;
289		break;
290	case VM_FAULT_MAJOR:
291		tsk->maj_flt++;
292		break;
293	case VM_FAULT_SIGBUS:
294		goto do_sigbus;
295	default:
296		goto out_of_memory;
297	}
298
299	up_read(&mm->mmap_sem);
300	return;
301
302	/*
303	 * Something tried to access memory that isn't in our memory map..
304	 * Fix it, but check if it's kernel or user first..
305	 */
306
307 bad_area:
308	up_read(&mm->mmap_sem);
309
310 bad_area_nosemaphore:
311	DPG(show_registers(regs));
312
313	/* User mode accesses just cause a SIGSEGV */
314
315	if (user_mode(regs)) {
316		info.si_signo = SIGSEGV;
317		info.si_errno = 0;
318		/* info.si_code has been set above */
319		info.si_addr = (void *)address;
320		force_sig_info(SIGSEGV, &info, tsk);
321		return;
322	}
323
324 no_context:
325
326	/* Are we prepared to handle this kernel fault?
327	 *
328	 * (The kernel has valid exception-points in the source 
329	 *  when it acesses user-memory. When it fails in one
330	 *  of those points, we find it in a table and do a jump
331	 *  to some fixup code that loads an appropriate error
332	 *  code)
333	 */
334
335	if (find_fixup_code(regs))
336		return;
337
338	/*
339	 * Oops. The kernel tried to access some bad page. We'll have to
340	 * terminate things with extreme prejudice.
341	 */
342
343	if ((unsigned long) (address) < PAGE_SIZE)
344		raw_printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
345	else
346		raw_printk(KERN_ALERT "Unable to handle kernel access");
347	raw_printk(" at virtual address %08lx\n",address);
348
349	die_if_kernel("Oops", regs, (writeaccess << 1) | protection);
350
351	do_exit(SIGKILL);
352
353	/*
354	 * We ran out of memory, or some other thing happened to us that made
355	 * us unable to handle the page fault gracefully.
356	 */
357
358 out_of_memory:
359	up_read(&mm->mmap_sem);
360	printk("VM: killing process %s\n", tsk->comm);
361	if (user_mode(regs))
362		do_exit(SIGKILL);
363	goto no_context;
364
365 do_sigbus:
366	up_read(&mm->mmap_sem);
367
368	/*
369	 * Send a sigbus, regardless of whether we were in kernel
370	 * or user mode.
371	 */
372	info.si_signo = SIGBUS;
373	info.si_errno = 0;
374	info.si_code = BUS_ADRERR;
375	info.si_addr = (void *)address;
376	force_sig_info(SIGBUS, &info, tsk);
377
378	/* Kernel mode? Handle exceptions or die */
379	if (!user_mode(regs))
380		goto no_context;
381	return;
382
383vmalloc_fault:
384	{
385		/*
386		 * Synchronize this task's top level page-table
387		 * with the 'reference' page table.
388		 *
389		 * Use current_pgd instead of tsk->active_mm->pgd
390		 * since the latter might be unavailable if this
391		 * code is executed in a misfortunately run irq
392		 * (like inside schedule() between switch_mm and
393		 *  switch_to...).
394		 */
395
396		int offset = pgd_index(address);
397		pgd_t *pgd, *pgd_k;
398		pud_t *pud, *pud_k;
399		pmd_t *pmd, *pmd_k;
400		pte_t *pte_k;
401
402		pgd = (pgd_t *)per_cpu(current_pgd, smp_processor_id()) + offset;
403		pgd_k = init_mm.pgd + offset;
404
405		/* Since we're two-level, we don't need to do both
406		 * set_pgd and set_pmd (they do the same thing). If
407		 * we go three-level at some point, do the right thing
408		 * with pgd_present and set_pgd here. 
409		 * 
410		 * Also, since the vmalloc area is global, we don't
411		 * need to copy individual PTE's, it is enough to
412		 * copy the pgd pointer into the pte page of the
413		 * root task. If that is there, we'll find our pte if
414		 * it exists.
415		 */
416
417		pud = pud_offset(pgd, address);
418		pud_k = pud_offset(pgd_k, address);
419		if (!pud_present(*pud_k))
420			goto no_context;
421
422		pmd = pmd_offset(pud, address);
423		pmd_k = pmd_offset(pud_k, address);
424
425		if (!pmd_present(*pmd_k))
426			goto bad_area_nosemaphore;
427
428		set_pmd(pmd, *pmd_k);
429
430		/* Make sure the actual PTE exists as well to
431		 * catch kernel vmalloc-area accesses to non-mapped
432		 * addresses. If we don't do this, this will just
433		 * silently loop forever.
434		 */
435
436		pte_k = pte_offset_kernel(pmd_k, address);
437		if (!pte_present(*pte_k))
438			goto no_context;
439
440		return;
441	}
442}
443
444/* Find fixup code. */
445int
446find_fixup_code(struct pt_regs *regs)
447{
448	const struct exception_table_entry *fixup;
449
450	if ((fixup = search_exception_tables(instruction_pointer(regs))) != 0) {
451		/* Adjust the instruction pointer in the stackframe. */
452		instruction_pointer(regs) = fixup->fixup;
453		arch_fixup(regs);
454		return 1;
455	}
456
457	return 0;
458}