Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] ARM: Fix Xscale copy_page implementation

The ARM copypage changes in 2.6.12-rc4-git1 removed the preempt locking
from the copypage functions which broke the XScale implementation.
This patch fixes the locking on XScale and removes the now unneeded
minicache code.

Signed-off-by: Russell King <rmk@arm.linux.org.uk>
Checked-by: Richard Purdie

+131 -195
-7
arch/arm/mm/Kconfig
··· 228 228 select CPU_CACHE_V4WB 229 229 select CPU_CACHE_VIVT 230 230 select CPU_TLB_V4WB 231 - select CPU_MINICACHE 232 231 233 232 # XScale 234 233 config CPU_XSCALE ··· 238 239 select CPU_ABRT_EV5T 239 240 select CPU_CACHE_VIVT 240 241 select CPU_TLB_V4WBI 241 - select CPU_MINICACHE 242 242 243 243 # ARMv6 244 244 config CPU_V6 ··· 342 344 343 345 config CPU_TLB_V6 344 346 bool 345 - 346 - config CPU_MINICACHE 347 - bool 348 - help 349 - Processor has a minicache. 350 347 351 348 comment "Processor Features" 352 349
-2
arch/arm/mm/Makefile
··· 31 31 obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o 32 32 obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o 33 33 34 - obj-$(CONFIG_CPU_MINICACHE) += minicache.o 35 - 36 34 obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o 37 35 obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o 38 36 obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o
-113
arch/arm/mm/copypage-xscale.S
··· 1 - /* 2 - * linux/arch/arm/lib/copypage-xscale.S 3 - * 4 - * Copyright (C) 2001 Russell King 5 - * 6 - * This program is free software; you can redistribute it and/or modify 7 - * it under the terms of the GNU General Public License version 2 as 8 - * published by the Free Software Foundation. 9 - */ 10 - #include <linux/linkage.h> 11 - #include <linux/init.h> 12 - #include <asm/constants.h> 13 - 14 - /* 15 - * General note: 16 - * We don't really want write-allocate cache behaviour for these functions 17 - * since that will just eat through 8K of the cache. 18 - */ 19 - 20 - .text 21 - .align 5 22 - /* 23 - * XScale optimised copy_user_page 24 - * r0 = destination 25 - * r1 = source 26 - * r2 = virtual user address of ultimate destination page 27 - * 28 - * The source page may have some clean entries in the cache already, but we 29 - * can safely ignore them - break_cow() will flush them out of the cache 30 - * if we eventually end up using our copied page. 31 - * 32 - * What we could do is use the mini-cache to buffer reads from the source 33 - * page. We rely on the mini-cache being smaller than one page, so we'll 34 - * cycle through the complete cache anyway. 35 - */ 36 - ENTRY(xscale_mc_copy_user_page) 37 - stmfd sp!, {r4, r5, lr} 38 - mov r5, r0 39 - mov r0, r1 40 - bl map_page_minicache 41 - mov r1, r5 42 - mov lr, #PAGE_SZ/64-1 43 - 44 - /* 45 - * Strangely enough, best performance is achieved 46 - * when prefetching destination as well. (NP) 47 - */ 48 - pld [r0, #0] 49 - pld [r0, #32] 50 - pld [r1, #0] 51 - pld [r1, #32] 52 - 53 - 1: pld [r0, #64] 54 - pld [r0, #96] 55 - pld [r1, #64] 56 - pld [r1, #96] 57 - 58 - 2: ldrd r2, [r0], #8 59 - ldrd r4, [r0], #8 60 - mov ip, r1 61 - strd r2, [r1], #8 62 - ldrd r2, [r0], #8 63 - strd r4, [r1], #8 64 - ldrd r4, [r0], #8 65 - strd r2, [r1], #8 66 - strd r4, [r1], #8 67 - mcr p15, 0, ip, c7, c10, 1 @ clean D line 68 - ldrd r2, [r0], #8 69 - mcr p15, 0, ip, c7, c6, 1 @ invalidate D line 70 - ldrd r4, [r0], #8 71 - mov ip, r1 72 - strd r2, [r1], #8 73 - ldrd r2, [r0], #8 74 - strd r4, [r1], #8 75 - ldrd r4, [r0], #8 76 - strd r2, [r1], #8 77 - strd r4, [r1], #8 78 - mcr p15, 0, ip, c7, c10, 1 @ clean D line 79 - subs lr, lr, #1 80 - mcr p15, 0, ip, c7, c6, 1 @ invalidate D line 81 - bgt 1b 82 - beq 2b 83 - 84 - ldmfd sp!, {r4, r5, pc} 85 - 86 - .align 5 87 - /* 88 - * XScale optimised clear_user_page 89 - * r0 = destination 90 - * r1 = virtual user address of ultimate destination page 91 - */ 92 - ENTRY(xscale_mc_clear_user_page) 93 - mov r1, #PAGE_SZ/32 94 - mov r2, #0 95 - mov r3, #0 96 - 1: mov ip, r0 97 - strd r2, [r0], #8 98 - strd r2, [r0], #8 99 - strd r2, [r0], #8 100 - strd r2, [r0], #8 101 - mcr p15, 0, ip, c7, c10, 1 @ clean D line 102 - subs r1, r1, #1 103 - mcr p15, 0, ip, c7, c6, 1 @ invalidate D line 104 - bne 1b 105 - mov pc, lr 106 - 107 - __INITDATA 108 - 109 - .type xscale_mc_user_fns, #object 110 - ENTRY(xscale_mc_user_fns) 111 - .long xscale_mc_clear_user_page 112 - .long xscale_mc_copy_user_page 113 - .size xscale_mc_user_fns, . - xscale_mc_user_fns
+131
arch/arm/mm/copypage-xscale.c
··· 1 + /* 2 + * linux/arch/arm/lib/copypage-xscale.S 3 + * 4 + * Copyright (C) 1995-2005 Russell King 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + * 10 + * This handles the mini data cache, as found on SA11x0 and XScale 11 + * processors. When we copy a user page page, we map it in such a way 12 + * that accesses to this page will not touch the main data cache, but 13 + * will be cached in the mini data cache. This prevents us thrashing 14 + * the main data cache on page faults. 15 + */ 16 + #include <linux/init.h> 17 + #include <linux/mm.h> 18 + 19 + #include <asm/page.h> 20 + #include <asm/pgtable.h> 21 + #include <asm/tlbflush.h> 22 + 23 + /* 24 + * 0xffff8000 to 0xffffffff is reserved for any ARM architecture 25 + * specific hacks for copying pages efficiently. 26 + */ 27 + #define COPYPAGE_MINICACHE 0xffff8000 28 + 29 + #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ 30 + L_PTE_CACHEABLE) 31 + 32 + #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) 33 + 34 + static DEFINE_SPINLOCK(minicache_lock); 35 + 36 + /* 37 + * XScale mini-dcache optimised copy_user_page 38 + * 39 + * We flush the destination cache lines just before we write the data into the 40 + * corresponding address. Since the Dcache is read-allocate, this removes the 41 + * Dcache aliasing issue. The writes will be forwarded to the write buffer, 42 + * and merged as appropriate. 43 + */ 44 + static void __attribute__((naked)) 45 + mc_copy_user_page(void *from, void *to) 46 + { 47 + /* 48 + * Strangely enough, best performance is achieved 49 + * when prefetching destination as well. (NP) 50 + */ 51 + asm volatile( 52 + "stmfd sp!, {r4, r5, lr} \n\ 53 + mov lr, %2 \n\ 54 + pld [r0, #0] \n\ 55 + pld [r0, #32] \n\ 56 + pld [r1, #0] \n\ 57 + pld [r1, #32] \n\ 58 + 1: pld [r0, #64] \n\ 59 + pld [r0, #96] \n\ 60 + pld [r1, #64] \n\ 61 + pld [r1, #96] \n\ 62 + 2: ldrd r2, [r0], #8 \n\ 63 + ldrd r4, [r0], #8 \n\ 64 + mov ip, r1 \n\ 65 + strd r2, [r1], #8 \n\ 66 + ldrd r2, [r0], #8 \n\ 67 + strd r4, [r1], #8 \n\ 68 + ldrd r4, [r0], #8 \n\ 69 + strd r2, [r1], #8 \n\ 70 + strd r4, [r1], #8 \n\ 71 + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ 72 + ldrd r2, [r0], #8 \n\ 73 + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ 74 + ldrd r4, [r0], #8 \n\ 75 + mov ip, r1 \n\ 76 + strd r2, [r1], #8 \n\ 77 + ldrd r2, [r0], #8 \n\ 78 + strd r4, [r1], #8 \n\ 79 + ldrd r4, [r0], #8 \n\ 80 + strd r2, [r1], #8 \n\ 81 + strd r4, [r1], #8 \n\ 82 + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ 83 + subs lr, lr, #1 \n\ 84 + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ 85 + bgt 1b \n\ 86 + beq 2b \n\ 87 + ldmfd sp!, {r4, r5, pc} " 88 + : 89 + : "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1)); 90 + } 91 + 92 + void xscale_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) 93 + { 94 + spin_lock(&minicache_lock); 95 + 96 + set_pte(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot)); 97 + flush_tlb_kernel_page(COPYPAGE_MINICACHE); 98 + 99 + mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); 100 + 101 + spin_unlock(&minicache_lock); 102 + } 103 + 104 + /* 105 + * XScale optimised clear_user_page 106 + */ 107 + void __attribute__((naked)) 108 + xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr) 109 + { 110 + asm volatile( 111 + "mov r1, %0 \n\ 112 + mov r2, #0 \n\ 113 + mov r3, #0 \n\ 114 + 1: mov ip, r0 \n\ 115 + strd r2, [r0], #8 \n\ 116 + strd r2, [r0], #8 \n\ 117 + strd r2, [r0], #8 \n\ 118 + strd r2, [r0], #8 \n\ 119 + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ 120 + subs r1, r1, #1 \n\ 121 + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ 122 + bne 1b \n\ 123 + mov pc, lr" 124 + : 125 + : "I" (PAGE_SIZE / 32)); 126 + } 127 + 128 + struct cpu_user_fns xscale_mc_user_fns __initdata = { 129 + .cpu_clear_user_page = xscale_mc_clear_user_page, 130 + .cpu_copy_user_page = xscale_mc_copy_user_page, 131 + };
-73
arch/arm/mm/minicache.c
··· 1 - /* 2 - * linux/arch/arm/mm/minicache.c 3 - * 4 - * Copyright (C) 2001 Russell King 5 - * 6 - * This program is free software; you can redistribute it and/or modify 7 - * it under the terms of the GNU General Public License version 2 as 8 - * published by the Free Software Foundation. 9 - * 10 - * This handles the mini data cache, as found on SA11x0 and XScale 11 - * processors. When we copy a user page page, we map it in such a way 12 - * that accesses to this page will not touch the main data cache, but 13 - * will be cached in the mini data cache. This prevents us thrashing 14 - * the main data cache on page faults. 15 - */ 16 - #include <linux/init.h> 17 - #include <linux/mm.h> 18 - 19 - #include <asm/page.h> 20 - #include <asm/pgtable.h> 21 - #include <asm/tlbflush.h> 22 - 23 - /* 24 - * 0xffff8000 to 0xffffffff is reserved for any ARM architecture 25 - * specific hacks for copying pages efficiently. 26 - */ 27 - #define minicache_address (0xffff8000) 28 - #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ 29 - L_PTE_CACHEABLE) 30 - 31 - static pte_t *minicache_pte; 32 - 33 - /* 34 - * Note that this is intended to be called only from the copy_user_page 35 - * asm code; anything else will require special locking to prevent the 36 - * mini-cache space being re-used. (Note: probably preempt unsafe). 37 - * 38 - * We rely on the fact that the minicache is 2K, and we'll be pushing 39 - * 4K of data through it, so we don't actually have to specifically 40 - * flush the minicache when we change the mapping. 41 - * 42 - * Note also: assert(PAGE_OFFSET <= virt < high_memory). 43 - * Unsafe: preempt, kmap. 44 - */ 45 - unsigned long map_page_minicache(unsigned long virt) 46 - { 47 - set_pte(minicache_pte, pfn_pte(__pa(virt) >> PAGE_SHIFT, minicache_pgprot)); 48 - flush_tlb_kernel_page(minicache_address); 49 - 50 - return minicache_address; 51 - } 52 - 53 - static int __init minicache_init(void) 54 - { 55 - pgd_t *pgd; 56 - pmd_t *pmd; 57 - 58 - spin_lock(&init_mm.page_table_lock); 59 - 60 - pgd = pgd_offset_k(minicache_address); 61 - pmd = pmd_alloc(&init_mm, pgd, minicache_address); 62 - if (!pmd) 63 - BUG(); 64 - minicache_pte = pte_alloc_kernel(&init_mm, pmd, minicache_address); 65 - if (!minicache_pte) 66 - BUG(); 67 - 68 - spin_unlock(&init_mm.page_table_lock); 69 - 70 - return 0; 71 - } 72 - 73 - core_initcall(minicache_init);