Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tile: make __write_once a synonym for __read_mostly

This was really only useful for TILE64 when we mapped the
kernel data with small pages. Now we use a huge page and we
really don't want to map different parts of the kernel
data in different ways.

We retain the __write_once name in case we want to bring
it back to life at some point in the future.

Note that this change uncovered a latent bug where the
"smp_topology" variable happened to always be aligned mod 8
so we could store two "int" values at once, but when we
eliminated __write_once it ended up only aligned mod 4.
Fix with an explicit annotation.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>

+17 -27
+10 -3
arch/tile/include/asm/cache.h
··· 49 49 #define __read_mostly __attribute__((__section__(".data..read_mostly"))) 50 50 51 51 /* 52 - * Attribute for data that is kept read/write coherent until the end of 53 - * initialization, then bumped to read/only incoherent for performance. 52 + * Originally we used small TLB pages for kernel data and grouped some 53 + * things together as "write once", enforcing the property at the end 54 + * of initialization by making those pages read-only and non-coherent. 55 + * This allowed better cache utilization since cache inclusion did not 56 + * need to be maintained. However, to do this requires an extra TLB 57 + * entry, which on balance is more of a performance hit than the 58 + * non-coherence is a performance gain, so we now just make "read 59 + * mostly" and "write once" be synonyms. We keep the attribute 60 + * separate in case we change our minds at a future date. 54 61 */ 55 - #define __write_once __attribute__((__section__(".w1data"))) 62 + #define __write_once __read_mostly 56 63 57 64 #endif /* _ASM_TILE_CACHE_H */
+5 -1
arch/tile/kernel/smp.c
··· 22 22 #include <asm/cacheflush.h> 23 23 #include <asm/homecache.h> 24 24 25 - HV_Topology smp_topology __write_once; 25 + /* 26 + * We write to width and height with a single store in head_NN.S, 27 + * so make the variable aligned to "long". 28 + */ 29 + HV_Topology smp_topology __write_once __aligned(sizeof(long)); 26 30 EXPORT_SYMBOL(smp_topology); 27 31 28 32 #if CHIP_HAS_IPI()
-12
arch/tile/kernel/vmlinux.lds.S
··· 74 74 __init_end = .; 75 75 76 76 _sdata = .; /* Start of data section */ 77 - 78 77 RO_DATA_SECTION(PAGE_SIZE) 79 - 80 - /* initially writeable, then read-only */ 81 - . = ALIGN(PAGE_SIZE); 82 - __w1data_begin = .; 83 - .w1data : AT(ADDR(.w1data) - LOAD_OFFSET) { 84 - VMLINUX_SYMBOL(__w1data_begin) = .; 85 - *(.w1data) 86 - VMLINUX_SYMBOL(__w1data_end) = .; 87 - } 88 - 89 78 RW_DATA_SECTION(L2_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) 90 - 91 79 _edata = .; 92 80 93 81 EXCEPTION_TABLE(L2_CACHE_BYTES)
+2 -11
arch/tile/mm/init.c
··· 271 271 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); 272 272 273 273 /* 274 - * Make the w1data homed like heap to start with, to avoid 275 - * making it part of the page-striped data area when we're just 276 - * going to convert it to read-only soon anyway. 277 - */ 278 - if (address >= (ulong)__w1data_begin && address < (ulong)__w1data_end) 279 - return construct_pgprot(PAGE_KERNEL, initial_heap_home()); 280 - 281 - /* 282 274 * Otherwise we just hand out consecutive cpus. To avoid 283 275 * requiring this function to hold state, we just walk forward from 284 276 * _sdata by PAGE_SIZE, skipping the readonly and init data, to reach 285 277 * the requested address, while walking cpu home around kdata_mask. 286 278 * This is typically no more than a dozen or so iterations. 287 279 */ 288 - page = (((ulong)__w1data_end) + PAGE_SIZE - 1) & PAGE_MASK; 280 + page = (((ulong)__end_rodata) + PAGE_SIZE - 1) & PAGE_MASK; 289 281 BUG_ON(address < page || address >= (ulong)_end); 290 282 cpu = cpumask_first(&kdata_mask); 291 283 for (; page < address; page += PAGE_SIZE) { ··· 972 980 const unsigned long text_delta = MEM_SV_START - PAGE_OFFSET; 973 981 974 982 /* 975 - * Evict the dirty initdata on the boot cpu, evict the w1data 976 - * wherever it's homed, and evict all the init code everywhere. 983 + * Evict the cache on all cores to avoid incoherence. 977 984 * We are guaranteed that no one will touch the init pages any more. 978 985 */ 979 986 homecache_evict(&cpu_cacheable_map);