Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARM: 6396/1: Add SWP/SWPB emulation for ARMv7 processors

The SWP instruction was deprecated in the ARMv6 architecture,
superseded by the LDREX/STREX family of instructions for
load-linked/store-conditional operations. The ARMv7 multiprocessing
extensions mandate that SWP/SWPB instructions are treated as undefined
from reset, with the ability to enable them through the System Control
Register SW bit.

This patch adds the alternative solution to emulate the SWP and SWPB
instructions using LDREX/STREX sequences, and log statistics to
/proc/cpu/swp_emulation. To correctly deal with copy-on-write, it also
modifies cpu_v7_set_pte_ext to change the mappings to priviliged RO when
user RO.

Signed-off-by: Leif Lindholm <leif.lindholm@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

authored by

Leif Lindholm and committed by
Russell King
64d2dc38 247055aa

+328
+2
Documentation/arm/00-INDEX
··· 34 34 - description of the virtual memory layout 35 35 nwfpe/ 36 36 - NWFPE floating point emulator documentation 37 + swp_emulation 38 + - SWP/SWPB emulation handler/logging description
+27
Documentation/arm/swp_emulation
··· 1 + Software emulation of deprecated SWP instruction (CONFIG_SWP_EMULATE) 2 + --------------------------------------------------------------------- 3 + 4 + ARMv6 architecture deprecates use of the SWP/SWPB instructions, and recommeds 5 + moving to the load-locked/store-conditional instructions LDREX and STREX. 6 + 7 + ARMv7 multiprocessing extensions introduce the ability to disable these 8 + instructions, triggering an undefined instruction exception when executed. 9 + Trapped instructions are emulated using an LDREX/STREX or LDREXB/STREXB 10 + sequence. If a memory access fault (an abort) occurs, a segmentation fault is 11 + signalled to the triggering process. 12 + 13 + /proc/cpu/swp_emulation holds some statistics/information, including the PID of 14 + the last process to trigger the emulation to be invocated. For example: 15 + --- 16 + Emulated SWP: 12 17 + Emulated SWPB: 0 18 + Aborted SWP{B}: 1 19 + Last process: 314 20 + --- 21 + 22 + NOTE: when accessing uncached shared regions, LDREX/STREX rely on an external 23 + transaction monitoring block called a global monitor to maintain update 24 + atomicity. If your system does not implement a global monitor, this option can 25 + cause programs that perform SWP operations to uncached memory to deadlock, as 26 + the STREX operation will always fail. 27 +
+1
arch/arm/kernel/Makefile
··· 42 42 obj-$(CONFIG_ARM_UNWIND) += unwind.o 43 43 obj-$(CONFIG_HAVE_TCM) += tcm.o 44 44 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 45 + obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o 45 46 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 46 47 47 48 obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o
+267
arch/arm/kernel/swp_emulate.c
··· 1 + /* 2 + * linux/arch/arm/kernel/swp_emulate.c 3 + * 4 + * Copyright (C) 2009 ARM Limited 5 + * __user_* functions adapted from include/asm/uaccess.h 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + * 11 + * Implements emulation of the SWP/SWPB instructions using load-exclusive and 12 + * store-exclusive for processors that have them disabled (or future ones that 13 + * might not implement them). 14 + * 15 + * Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>] 16 + * Where: Rt = destination 17 + * Rt2 = source 18 + * Rn = address 19 + */ 20 + 21 + #include <linux/init.h> 22 + #include <linux/kernel.h> 23 + #include <linux/proc_fs.h> 24 + #include <linux/sched.h> 25 + #include <linux/syscalls.h> 26 + #include <linux/perf_event.h> 27 + 28 + #include <asm/traps.h> 29 + #include <asm/uaccess.h> 30 + 31 + /* 32 + * Error-checking SWP macros implemented using ldrex{b}/strex{b} 33 + */ 34 + #define __user_swpX_asm(data, addr, res, temp, B) \ 35 + __asm__ __volatile__( \ 36 + " mov %2, %1\n" \ 37 + "0: ldrex"B" %1, [%3]\n" \ 38 + "1: strex"B" %0, %2, [%3]\n" \ 39 + " cmp %0, #0\n" \ 40 + " movne %0, %4\n" \ 41 + "2:\n" \ 42 + " .section .fixup,\"ax\"\n" \ 43 + " .align 2\n" \ 44 + "3: mov %0, %5\n" \ 45 + " b 2b\n" \ 46 + " .previous\n" \ 47 + " .section __ex_table,\"a\"\n" \ 48 + " .align 3\n" \ 49 + " .long 0b, 3b\n" \ 50 + " .long 1b, 3b\n" \ 51 + " .previous" \ 52 + : "=&r" (res), "+r" (data), "=&r" (temp) \ 53 + : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ 54 + : "cc", "memory") 55 + 56 + #define __user_swp_asm(data, addr, res, temp) \ 57 + __user_swpX_asm(data, addr, res, temp, "") 58 + #define __user_swpb_asm(data, addr, res, temp) \ 59 + __user_swpX_asm(data, addr, res, temp, "b") 60 + 61 + /* 62 + * Macros/defines for extracting register numbers from instruction. 63 + */ 64 + #define EXTRACT_REG_NUM(instruction, offset) \ 65 + (((instruction) & (0xf << (offset))) >> (offset)) 66 + #define RN_OFFSET 16 67 + #define RT_OFFSET 12 68 + #define RT2_OFFSET 0 69 + /* 70 + * Bit 22 of the instruction encoding distinguishes between 71 + * the SWP and SWPB variants (bit set means SWPB). 72 + */ 73 + #define TYPE_SWPB (1 << 22) 74 + 75 + static unsigned long swpcounter; 76 + static unsigned long swpbcounter; 77 + static unsigned long abtcounter; 78 + static pid_t previous_pid; 79 + 80 + #ifdef CONFIG_PROC_FS 81 + static int proc_read_status(char *page, char **start, off_t off, int count, 82 + int *eof, void *data) 83 + { 84 + char *p = page; 85 + int len; 86 + 87 + p += sprintf(p, "Emulated SWP:\t\t%lu\n", swpcounter); 88 + p += sprintf(p, "Emulated SWPB:\t\t%lu\n", swpbcounter); 89 + p += sprintf(p, "Aborted SWP{B}:\t\t%lu\n", abtcounter); 90 + if (previous_pid != 0) 91 + p += sprintf(p, "Last process:\t\t%d\n", previous_pid); 92 + 93 + len = (p - page) - off; 94 + if (len < 0) 95 + len = 0; 96 + 97 + *eof = (len <= count) ? 1 : 0; 98 + *start = page + off; 99 + 100 + return len; 101 + } 102 + #endif 103 + 104 + /* 105 + * Set up process info to signal segmentation fault - called on access error. 106 + */ 107 + static void set_segfault(struct pt_regs *regs, unsigned long addr) 108 + { 109 + siginfo_t info; 110 + 111 + if (find_vma(current->mm, addr) == NULL) 112 + info.si_code = SEGV_MAPERR; 113 + else 114 + info.si_code = SEGV_ACCERR; 115 + 116 + info.si_signo = SIGSEGV; 117 + info.si_errno = 0; 118 + info.si_addr = (void *) instruction_pointer(regs); 119 + 120 + pr_debug("SWP{B} emulation: access caused memory abort!\n"); 121 + arm_notify_die("Illegal memory access", regs, &info, 0, 0); 122 + 123 + abtcounter++; 124 + } 125 + 126 + static int emulate_swpX(unsigned int address, unsigned int *data, 127 + unsigned int type) 128 + { 129 + unsigned int res = 0; 130 + 131 + if ((type != TYPE_SWPB) && (address & 0x3)) { 132 + /* SWP to unaligned address not permitted */ 133 + pr_debug("SWP instruction on unaligned pointer!\n"); 134 + return -EFAULT; 135 + } 136 + 137 + while (1) { 138 + unsigned long temp; 139 + 140 + /* 141 + * Barrier required between accessing protected resource and 142 + * releasing a lock for it. Legacy code might not have done 143 + * this, and we cannot determine that this is not the case 144 + * being emulated, so insert always. 145 + */ 146 + smp_mb(); 147 + 148 + if (type == TYPE_SWPB) 149 + __user_swpb_asm(*data, address, res, temp); 150 + else 151 + __user_swp_asm(*data, address, res, temp); 152 + 153 + if (likely(res != -EAGAIN) || signal_pending(current)) 154 + break; 155 + 156 + cond_resched(); 157 + } 158 + 159 + if (res == 0) { 160 + /* 161 + * Barrier also required between aquiring a lock for a 162 + * protected resource and accessing the resource. Inserted for 163 + * same reason as above. 164 + */ 165 + smp_mb(); 166 + 167 + if (type == TYPE_SWPB) 168 + swpbcounter++; 169 + else 170 + swpcounter++; 171 + } 172 + 173 + return res; 174 + } 175 + 176 + /* 177 + * swp_handler logs the id of calling process, dissects the instruction, sanity 178 + * checks the memory location, calls emulate_swpX for the actual operation and 179 + * deals with fixup/error handling before returning 180 + */ 181 + static int swp_handler(struct pt_regs *regs, unsigned int instr) 182 + { 183 + unsigned int address, destreg, data, type; 184 + unsigned int res = 0; 185 + 186 + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc); 187 + 188 + if (current->pid != previous_pid) { 189 + pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n", 190 + current->comm, (unsigned long)current->pid); 191 + previous_pid = current->pid; 192 + } 193 + 194 + address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)]; 195 + data = regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)]; 196 + destreg = EXTRACT_REG_NUM(instr, RT_OFFSET); 197 + 198 + type = instr & TYPE_SWPB; 199 + 200 + pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n", 201 + EXTRACT_REG_NUM(instr, RN_OFFSET), address, 202 + destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data); 203 + 204 + /* Check access in reasonable access range for both SWP and SWPB */ 205 + if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { 206 + pr_debug("SWP{B} emulation: access to %p not allowed!\n", 207 + (void *)address); 208 + res = -EFAULT; 209 + } else { 210 + res = emulate_swpX(address, &data, type); 211 + } 212 + 213 + if (res == 0) { 214 + /* 215 + * On successful emulation, revert the adjustment to the PC 216 + * made in kernel/traps.c in order to resume execution at the 217 + * instruction following the SWP{B}. 218 + */ 219 + regs->ARM_pc += 4; 220 + regs->uregs[destreg] = data; 221 + } else if (res == -EFAULT) { 222 + /* 223 + * Memory errors do not mean emulation failed. 224 + * Set up signal info to return SEGV, then return OK 225 + */ 226 + set_segfault(regs, address); 227 + } 228 + 229 + return 0; 230 + } 231 + 232 + /* 233 + * Only emulate SWP/SWPB executed in ARM state/User mode. 234 + * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE. 235 + */ 236 + static struct undef_hook swp_hook = { 237 + .instr_mask = 0x0fb00ff0, 238 + .instr_val = 0x01000090, 239 + .cpsr_mask = MODE_MASK | PSR_T_BIT | PSR_J_BIT, 240 + .cpsr_val = USR_MODE, 241 + .fn = swp_handler 242 + }; 243 + 244 + /* 245 + * Register handler and create status file in /proc/cpu 246 + * Invoked as late_initcall, since not needed before init spawned. 247 + */ 248 + static int __init swp_emulation_init(void) 249 + { 250 + #ifdef CONFIG_PROC_FS 251 + struct proc_dir_entry *res; 252 + 253 + res = create_proc_entry("cpu/swp_emulation", S_IRUGO, NULL); 254 + 255 + if (!res) 256 + return -ENOMEM; 257 + 258 + res->read_proc = proc_read_status; 259 + #endif /* CONFIG_PROC_FS */ 260 + 261 + printk(KERN_NOTICE "Registering SWP/SWPB emulation handler\n"); 262 + register_undef_hook(&swp_hook); 263 + 264 + return 0; 265 + } 266 + 267 + late_initcall(swp_emulation_init);
+27
arch/arm/mm/Kconfig
··· 636 636 Say Y here if you have a CPU with the ThumbEE extension and code to 637 637 make use of it. Say N for code that can run on CPUs without ThumbEE. 638 638 639 + config SWP_EMULATE 640 + bool "Emulate SWP/SWPB instructions" 641 + depends on CPU_V7 642 + select HAVE_PROC_CPU if PROC_FS 643 + default y if SMP 644 + help 645 + ARMv6 architecture deprecates use of the SWP/SWPB instructions. 646 + ARMv7 multiprocessing extensions introduce the ability to disable 647 + these instructions, triggering an undefined instruction exception 648 + when executed. Say Y here to enable software emulation of these 649 + instructions for userspace (not kernel) using LDREX/STREX. 650 + Also creates /proc/cpu/swp_emulation for statistics. 651 + 652 + In some older versions of glibc [<=2.8] SWP is used during futex 653 + trylock() operations with the assumption that the code will not 654 + be preempted. This invalid assumption may be more likely to fail 655 + with SWP emulation enabled, leading to deadlock of the user 656 + application. 657 + 658 + NOTE: when accessing uncached shared regions, LDREX/STREX rely 659 + on an external transaction monitoring block called a global 660 + monitor to maintain update atomicity. If your system does not 661 + implement a global monitor, this option can cause programs that 662 + perform SWP operations to uncached memory to deadlock. 663 + 664 + If unsure, say Y. 665 + 639 666 config CPU_BIG_ENDIAN 640 667 bool "Build big-endian kernel" 641 668 depends on ARCH_SUPPORTS_BIG_ENDIAN
+4
arch/arm/mm/proc-v7.S
··· 314 314 #ifdef CONFIG_CPU_ENDIAN_BE8 315 315 orr r6, r6, #1 << 25 @ big-endian page tables 316 316 #endif 317 + #ifdef CONFIG_SWP_EMULATE 318 + orr r5, r5, #(1 << 10) @ set SW bit in "clear" 319 + bic r6, r6, #(1 << 10) @ clear it in "mmuset" 320 + #endif 317 321 mrc p15, 0, r0, c1, c0, 0 @ read control register 318 322 bic r0, r0, r5 @ clear bits them 319 323 orr r0, r0, r6 @ set them