Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] mm: kill check_user_page_readable

check_user_page_readable is a problematic variant of follow_page. It's used
only by oprofile's i386 and arm backtrace code, at interrupt time, to
establish whether a userspace stackframe is currently readable.

This is problematic, because we want to push the page_table_lock down inside
follow_page, and later split it; whereas oprofile is doing a spin_trylock on
it (in the i386 case, forgotten in the arm case), and needs that to pin
perhaps two pages spanned by the stackframe (which might be covered by
different locks when we split).

I think oprofile is going about this in the wrong way: it doesn't need to know
the area is readable (neither i386 nor arm uses read protection of user
pages), it doesn't need to pin the memory, it should simply
__copy_from_user_inatomic, and see if that succeeds or not. Sorry, but I've
not got around to devising the sparse __user annotations for this.

Then we can eliminate check_user_page_readable, and return to a single
follow_page without the __follow_page variants.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Hugh Dickins and committed by
Linus Torvalds
c34d1b4d c0718806

+26 -88
+9 -37
arch/arm/oprofile/backtrace.c
··· 49 49 50 50 static struct frame_tail* user_backtrace(struct frame_tail *tail) 51 51 { 52 - struct frame_tail buftail; 52 + struct frame_tail buftail[2]; 53 53 54 - /* hardware pte might not be valid due to dirty/accessed bit emulation 55 - * so we use copy_from_user and benefit from exception fixups */ 56 - if (copy_from_user(&buftail, tail, sizeof(struct frame_tail))) 54 + /* Also check accessibility of one struct frame_tail beyond */ 55 + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) 56 + return NULL; 57 + if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) 57 58 return NULL; 58 59 59 - oprofile_add_trace(buftail.lr); 60 + oprofile_add_trace(buftail[0].lr); 60 61 61 62 /* frame pointers should strictly progress back up the stack 62 63 * (towards higher addresses) */ 63 - if (tail >= buftail.fp) 64 + if (tail >= buftail[0].fp) 64 65 return NULL; 65 66 66 - return buftail.fp-1; 67 - } 68 - 69 - /* Compare two addresses and see if they're on the same page */ 70 - #define CMP_ADDR_EQUAL(x,y,offset) ((((unsigned long) x) >> PAGE_SHIFT) \ 71 - == ((((unsigned long) y) + offset) >> PAGE_SHIFT)) 72 - 73 - /* check that the page(s) containing the frame tail are present */ 74 - static int pages_present(struct frame_tail *tail) 75 - { 76 - struct mm_struct * mm = current->mm; 77 - 78 - if (!check_user_page_readable(mm, (unsigned long)tail)) 79 - return 0; 80 - 81 - if (CMP_ADDR_EQUAL(tail, tail, 8)) 82 - return 1; 83 - 84 - if (!check_user_page_readable(mm, ((unsigned long)tail) + 8)) 85 - return 0; 86 - 87 - return 1; 67 + return buftail[0].fp-1; 88 68 } 89 69 90 70 /* ··· 98 118 void arm_backtrace(struct pt_regs * const regs, unsigned int depth) 99 119 { 100 120 struct frame_tail *tail; 101 - unsigned long last_address = 0; 102 121 103 122 tail = ((struct frame_tail *) regs->ARM_fp) - 1; 104 123 ··· 111 132 return; 112 133 } 113 134 114 - while (depth-- && tail && !((unsigned long) tail & 3)) { 115 - if ((!CMP_ADDR_EQUAL(last_address, tail, 0) 116 - || !CMP_ADDR_EQUAL(last_address, tail, 8)) 117 - && !pages_present(tail)) 118 - return; 119 - last_address = (unsigned long) tail; 135 + while (depth-- && tail && !((unsigned long) tail & 3)) 120 136 tail = user_backtrace(tail); 121 - } 122 137 } 123 -
+13 -25
arch/i386/oprofile/backtrace.c
··· 12 12 #include <linux/sched.h> 13 13 #include <linux/mm.h> 14 14 #include <asm/ptrace.h> 15 + #include <asm/uaccess.h> 15 16 16 17 struct frame_head { 17 18 struct frame_head * ebp; ··· 22 21 static struct frame_head * 23 22 dump_backtrace(struct frame_head * head) 24 23 { 25 - oprofile_add_trace(head->ret); 24 + struct frame_head bufhead[2]; 25 + 26 + /* Also check accessibility of one struct frame_head beyond */ 27 + if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) 28 + return NULL; 29 + if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) 30 + return NULL; 31 + 32 + oprofile_add_trace(bufhead[0].ret); 26 33 27 34 /* frame pointers should strictly progress back up the stack 28 35 * (towards higher addresses) */ 29 - if (head >= head->ebp) 36 + if (head >= bufhead[0].ebp) 30 37 return NULL; 31 38 32 - return head->ebp; 33 - } 34 - 35 - /* check that the page(s) containing the frame head are present */ 36 - static int pages_present(struct frame_head * head) 37 - { 38 - struct mm_struct * mm = current->mm; 39 - 40 - /* FIXME: only necessary once per page */ 41 - if (!check_user_page_readable(mm, (unsigned long)head)) 42 - return 0; 43 - 44 - return check_user_page_readable(mm, (unsigned long)(head + 1)); 39 + return bufhead[0].ebp; 45 40 } 46 41 47 42 /* ··· 94 97 return; 95 98 } 96 99 97 - #ifdef CONFIG_SMP 98 - if (!spin_trylock(&current->mm->page_table_lock)) 99 - return; 100 - #endif 101 - 102 - while (depth-- && head && pages_present(head)) 100 + while (depth-- && head) 103 101 head = dump_backtrace(head); 104 - 105 - #ifdef CONFIG_SMP 106 - spin_unlock(&current->mm->page_table_lock); 107 - #endif 108 102 }
-1
include/linux/mm.h
··· 944 944 extern unsigned long vmalloc_to_pfn(void *addr); 945 945 extern struct page * follow_page(struct mm_struct *mm, unsigned long address, 946 946 int write); 947 - extern int check_user_page_readable(struct mm_struct *mm, unsigned long address); 948 947 int remap_pfn_range(struct vm_area_struct *, unsigned long, 949 948 unsigned long, unsigned long, pgprot_t); 950 949
+4 -25
mm/memory.c
··· 809 809 * Do a quick page-table lookup for a single page. 810 810 * mm->page_table_lock must be held. 811 811 */ 812 - static struct page *__follow_page(struct mm_struct *mm, unsigned long address, 813 - int read, int write, int accessed) 812 + struct page *follow_page(struct mm_struct *mm, unsigned long address, int write) 814 813 { 815 814 pgd_t *pgd; 816 815 pud_t *pud; ··· 845 846 if (pte_present(pte)) { 846 847 if (write && !pte_write(pte)) 847 848 goto out; 848 - if (read && !pte_read(pte)) 849 - goto out; 850 849 pfn = pte_pfn(pte); 851 850 if (pfn_valid(pfn)) { 852 851 page = pfn_to_page(pfn); 853 - if (accessed) { 854 - if (write && !pte_dirty(pte) &&!PageDirty(page)) 855 - set_page_dirty(page); 856 - mark_page_accessed(page); 857 - } 852 + if (write && !pte_dirty(pte) &&!PageDirty(page)) 853 + set_page_dirty(page); 854 + mark_page_accessed(page); 858 855 return page; 859 856 } 860 857 } ··· 858 863 out: 859 864 return NULL; 860 865 } 861 - 862 - inline struct page * 863 - follow_page(struct mm_struct *mm, unsigned long address, int write) 864 - { 865 - return __follow_page(mm, address, 0, write, 1); 866 - } 867 - 868 - /* 869 - * check_user_page_readable() can be called frm niterrupt context by oprofile, 870 - * so we need to avoid taking any non-irq-safe locks 871 - */ 872 - int check_user_page_readable(struct mm_struct *mm, unsigned long address) 873 - { 874 - return __follow_page(mm, address, 1, 0, 0) != NULL; 875 - } 876 - EXPORT_SYMBOL(check_user_page_readable); 877 866 878 867 static inline int 879 868 untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,