Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86, amd: Avoid cache aliasing penalties on AMD family 15h

This patch provides performance tuning for the "Bulldozer" CPU. With its
shared instruction cache there is a chance of generating an excessive
number of cache cross-invalidates when running specific workloads on the
cores of a compute module.

This excessive amount of cross-invalidations can be observed if cache
lines backed by shared physical memory alias in bits [14:12] of their
virtual addresses, as those bits are used for the index generation.

This patch addresses the issue by clearing all the bits in the [14:12]
slice of the file mapping's virtual address at generation time, thus
forcing those bits the same for all mappings of a single shared library
across processes and, in doing so, avoids instruction cache aliases.

It also adds the command line option "align_va_addr=(32|64|on|off)" with
which virtual address alignment can be enabled for 32-bit or 64-bit x86
individually, or both, or be completely disabled.

This change leaves virtual region address allocation on other families
and/or vendors unaffected.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1312550110-24160-2-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

authored by

Borislav Petkov and committed by
H. Peter Anvin
dfb09f9b 13f9a373

+144 -18
+13
Documentation/kernel-parameters.txt
··· 299 299 behaviour to be specified. Bit 0 enables warnings, 300 300 bit 1 enables fixups, and bit 2 sends a segfault. 301 301 302 + align_va_addr= [X86-64] 303 + Align virtual addresses by clearing slice [14:12] when 304 + allocating a VMA at process creation time. This option 305 + gives you up to 3% performance improvement on AMD F15h 306 + machines (where it is enabled by default) for a 307 + CPU-intensive style benchmark, and it can vary highly in 308 + a microbenchmark depending on workload and compiler. 309 + 310 + 1: only for 32-bit processes 311 + 2: only for 64-bit processes 312 + on: enable for both 32- and 64-bit processes 313 + off: disable for both 32- and 64-bit processes 314 + 302 315 amd_iommu= [HW,X86-84] 303 316 Pass parameters to the AMD IOMMU driver in the system. 304 317 Possible values are:
+31
arch/x86/include/asm/elf.h
··· 4 4 /* 5 5 * ELF register definitions.. 6 6 */ 7 + #include <linux/thread_info.h> 7 8 8 9 #include <asm/ptrace.h> 9 10 #include <asm/user.h> ··· 321 320 extern unsigned long arch_randomize_brk(struct mm_struct *mm); 322 321 #define arch_randomize_brk arch_randomize_brk 323 322 323 + /* 324 + * True on X86_32 or when emulating IA32 on X86_64 325 + */ 326 + static inline int mmap_is_ia32(void) 327 + { 328 + #ifdef CONFIG_X86_32 329 + return 1; 330 + #endif 331 + #ifdef CONFIG_IA32_EMULATION 332 + if (test_thread_flag(TIF_IA32)) 333 + return 1; 334 + #endif 335 + return 0; 336 + } 337 + 338 + /* The first two values are special, do not change. See align_addr() */ 339 + enum align_flags { 340 + ALIGN_VA_32 = BIT(0), 341 + ALIGN_VA_64 = BIT(1), 342 + ALIGN_VDSO = BIT(2), 343 + ALIGN_TOPDOWN = BIT(3), 344 + }; 345 + 346 + struct va_alignment { 347 + int flags; 348 + unsigned long mask; 349 + } ____cacheline_aligned; 350 + 351 + extern struct va_alignment va_align; 352 + extern unsigned long align_addr(unsigned long, struct file *, enum align_flags); 324 353 #endif /* _ASM_X86_ELF_H */
+13
arch/x86/kernel/cpu/amd.c
··· 458 458 "with P0 frequency!\n"); 459 459 } 460 460 } 461 + 462 + if (c->x86 == 0x15) { 463 + unsigned long upperbit; 464 + u32 cpuid, assoc; 465 + 466 + cpuid = cpuid_edx(0x80000005); 467 + assoc = cpuid >> 16 & 0xff; 468 + upperbit = ((cpuid >> 24) << 10) / assoc; 469 + 470 + va_align.mask = (upperbit - 1) & PAGE_MASK; 471 + va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; 472 + 473 + } 461 474 } 462 475 463 476 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
+78 -3
arch/x86/kernel/sys_x86_64.c
··· 18 18 #include <asm/ia32.h> 19 19 #include <asm/syscalls.h> 20 20 21 + struct __read_mostly va_alignment va_align = { 22 + .flags = -1, 23 + }; 24 + 25 + /* 26 + * Align a virtual address to avoid aliasing in the I$ on AMD F15h. 27 + * 28 + * @flags denotes the allocation direction - bottomup or topdown - 29 + * or vDSO; see call sites below. 30 + */ 31 + unsigned long align_addr(unsigned long addr, struct file *filp, 32 + enum align_flags flags) 33 + { 34 + unsigned long tmp_addr; 35 + 36 + /* handle 32- and 64-bit case with a single conditional */ 37 + if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32()))) 38 + return addr; 39 + 40 + if (!(current->flags & PF_RANDOMIZE)) 41 + return addr; 42 + 43 + if (!((flags & ALIGN_VDSO) || filp)) 44 + return addr; 45 + 46 + tmp_addr = addr; 47 + 48 + /* 49 + * We need an address which is <= than the original 50 + * one only when in topdown direction. 51 + */ 52 + if (!(flags & ALIGN_TOPDOWN)) 53 + tmp_addr += va_align.mask; 54 + 55 + tmp_addr &= ~va_align.mask; 56 + 57 + return tmp_addr; 58 + } 59 + 60 + static int __init control_va_addr_alignment(char *str) 61 + { 62 + /* guard against enabling this on other CPU families */ 63 + if (va_align.flags < 0) 64 + return 1; 65 + 66 + if (*str == 0) 67 + return 1; 68 + 69 + if (*str == '=') 70 + str++; 71 + 72 + if (!strcmp(str, "32")) 73 + va_align.flags = ALIGN_VA_32; 74 + else if (!strcmp(str, "64")) 75 + va_align.flags = ALIGN_VA_64; 76 + else if (!strcmp(str, "off")) 77 + va_align.flags = 0; 78 + else if (!strcmp(str, "on")) 79 + va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; 80 + else 81 + return 0; 82 + 83 + return 1; 84 + } 85 + __setup("align_va_addr", control_va_addr_alignment); 86 + 21 87 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, 22 88 unsigned long, prot, unsigned long, flags, 23 89 unsigned long, fd, unsigned long, off) ··· 158 92 start_addr = addr; 159 93 160 94 full_search: 95 + 96 + addr = align_addr(addr, filp, 0); 97 + 161 98 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 162 99 /* At this point: (!vma || addr < vma->vm_end). */ 163 100 if (end - len < addr) { ··· 186 117 mm->cached_hole_size = vma->vm_start - addr; 187 118 188 119 addr = vma->vm_end; 120 + addr = align_addr(addr, filp, 0); 189 121 } 190 122 } 191 123 ··· 231 161 232 162 /* make sure it can fit in the remaining address space */ 233 163 if (addr > len) { 234 - vma = find_vma(mm, addr-len); 235 - if (!vma || addr <= vma->vm_start) 164 + unsigned long tmp_addr = align_addr(addr - len, filp, 165 + ALIGN_TOPDOWN); 166 + 167 + vma = find_vma(mm, tmp_addr); 168 + if (!vma || tmp_addr + len <= vma->vm_start) 236 169 /* remember the address as a hint for next time */ 237 - return mm->free_area_cache = addr-len; 170 + return mm->free_area_cache = tmp_addr; 238 171 } 239 172 240 173 if (mm->mmap_base < len) ··· 246 173 addr = mm->mmap_base-len; 247 174 248 175 do { 176 + addr = align_addr(addr, filp, ALIGN_TOPDOWN); 177 + 249 178 /* 250 179 * Lookup failure means no vma is above this address, 251 180 * else if new region fits below vma->vm_start,
-15
arch/x86/mm/mmap.c
··· 51 51 #define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) 52 52 #define MAX_GAP (TASK_SIZE/6*5) 53 53 54 - /* 55 - * True on X86_32 or when emulating IA32 on X86_64 56 - */ 57 - static int mmap_is_ia32(void) 58 - { 59 - #ifdef CONFIG_X86_32 60 - return 1; 61 - #endif 62 - #ifdef CONFIG_IA32_EMULATION 63 - if (test_thread_flag(TIF_IA32)) 64 - return 1; 65 - #endif 66 - return 0; 67 - } 68 - 69 54 static int mmap_is_legacy(void) 70 55 { 71 56 if (current->personality & ADDR_COMPAT_LAYOUT)
+9
arch/x86/vdso/vma.c
··· 69 69 addr = start + (offset << PAGE_SHIFT); 70 70 if (addr >= end) 71 71 addr = end; 72 + 73 + /* 74 + * page-align it here so that get_unmapped_area doesn't 75 + * align it wrongfully again to the next page. addr can come in 4K 76 + * unaligned here as a result of stack start randomization. 77 + */ 78 + addr = PAGE_ALIGN(addr); 79 + addr = align_addr(addr, NULL, ALIGN_VDSO); 80 + 72 81 return addr; 73 82 } 74 83