arch/tile: Various cleanups.

This change rolls up random cleanups not representing any actual bugs.

- Remove a stale CONFIG_ value from the default tile_defconfig
- Remove unused tns_atomic_xxx() family of methods from <asm/atomic.h>
- Optimize get_order() using Tile's "clz" instruction
- Fix a bad hypervisor upcall name (not currently used in Linux anyway)
- Use __copy_in_user_inatomic() name for consistency, and export it
- Export some additional hypervisor driver I/O upcalls and some homecache calls
- Remove the obfuscating MEMCPY_TEST_WH64 support code
- Other stray comment cleanups, #if 0 removal, etc.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>

+40 -92
-1
arch/tile/configs/tile_defconfig
··· 231 231 CONFIG_MEMPROF=y 232 232 CONFIG_XGBE=y 233 233 CONFIG_NET_TILE=y 234 - CONFIG_PSEUDO_NAPI=y 235 234 CONFIG_TILEPCI_ENDP=y 236 235 CONFIG_TILEPCI_HOST_SUBSET=m 237 236 CONFIG_TILE_IDE_GPIO=y
-37
arch/tile/include/asm/atomic_32.h
··· 255 255 #define smp_mb__after_atomic_dec() do { } while (0) 256 256 #define smp_mb__after_atomic_inc() do { } while (0) 257 257 258 - 259 - /* 260 - * Support "tns" atomic integers. These are atomic integers that can 261 - * hold any value but "1". They are more efficient than regular atomic 262 - * operations because the "lock" (aka acquire) step is a single "tns" 263 - * in the uncontended case, and the "unlock" (aka release) step is a 264 - * single "store" without an mf. (However, note that on tilepro the 265 - * "tns" will evict the local cache line, so it's not all upside.) 266 - * 267 - * Note that you can ONLY observe the value stored in the pointer 268 - * using these operations; a direct read of the value may confusingly 269 - * return the special value "1". 270 - */ 271 - 272 - int __tns_atomic_acquire(atomic_t *); 273 - void __tns_atomic_release(atomic_t *p, int v); 274 - 275 - static inline void tns_atomic_set(atomic_t *v, int i) 276 - { 277 - __tns_atomic_acquire(v); 278 - __tns_atomic_release(v, i); 279 - } 280 - 281 - static inline int tns_atomic_cmpxchg(atomic_t *v, int o, int n) 282 - { 283 - int ret = __tns_atomic_acquire(v); 284 - __tns_atomic_release(v, (ret == o) ? n : ret); 285 - return ret; 286 - } 287 - 288 - static inline int tns_atomic_xchg(atomic_t *v, int n) 289 - { 290 - int ret = __tns_atomic_acquire(v); 291 - __tns_atomic_release(v, n); 292 - return ret; 293 - } 294 - 295 258 #endif /* !__ASSEMBLY__ */ 296 259 297 260 /*
+5 -1
arch/tile/include/asm/page.h
··· 129 129 130 130 #endif 131 131 132 + static inline __attribute_const__ int get_order(unsigned long size) 133 + { 134 + return BITS_PER_LONG - __builtin_clzl((size - 1) >> PAGE_SHIFT); 135 + } 136 + 132 137 #endif /* !__ASSEMBLY__ */ 133 138 134 139 #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) ··· 337 332 (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 338 333 339 334 #include <asm-generic/memory_model.h> 340 - #include <asm-generic/getorder.h> 341 335 342 336 #endif /* __KERNEL__ */ 343 337
+2 -2
arch/tile/include/asm/uaccess.h
··· 389 389 * Returns number of bytes that could not be copied. 390 390 * On success, this will be zero. 391 391 */ 392 - extern unsigned long __copy_in_user_asm( 392 + extern unsigned long __copy_in_user_inatomic( 393 393 void __user *to, const void __user *from, unsigned long n); 394 394 395 395 static inline unsigned long __must_check 396 396 __copy_in_user(void __user *to, const void __user *from, unsigned long n) 397 397 { 398 398 might_sleep(); 399 - return __copy_in_user_asm(to, from, n); 399 + return __copy_in_user_inatomic(to, from, n); 400 400 } 401 401 402 402 static inline unsigned long __must_check
+4 -4
arch/tile/include/hv/hypervisor.h
··· 532 532 */ 533 533 void hv_clear_intr(HV_IntrMask clear_mask); 534 534 535 - /** Assert a set of device interrupts. 535 + /** Raise a set of device interrupts. 536 536 * 537 - * @param assert_mask Bitmap of interrupts to clear. 537 + * @param raise_mask Bitmap of interrupts to raise. 538 538 */ 539 - void hv_assert_intr(HV_IntrMask assert_mask); 539 + void hv_raise_intr(HV_IntrMask raise_mask); 540 540 541 541 /** Trigger a one-shot interrupt on some tile 542 542 * ··· 1712 1712 * @param cache_control This argument allows you to specify a length of 1713 1713 * physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN). 1714 1714 * You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache. 1715 - * You can "or" in HV_FLUSH_EVICT_LI1 to flush the whole LII cache. 1715 + * You can "or" in HV_FLUSH_EVICT_L1I to flush the whole L1I cache. 1716 1716 * HV_FLUSH_ALL flushes all caches. 1717 1717 * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of 1718 1718 * tile indices to perform cache flush on. The low bit of the first
+3 -1
arch/tile/lib/Makefile
··· 7 7 memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \ 8 8 strchr_$(BITS).o strlen_$(BITS).o 9 9 10 - ifneq ($(CONFIG_TILEGX),y) 10 + ifeq ($(CONFIG_TILEGX),y) 11 + lib-y += memcpy_user_64.o 12 + else 11 13 lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o 12 14 endif 13 15
+11 -5
arch/tile/lib/exports.c
··· 36 36 EXPORT_SYMBOL(current_text_addr); 37 37 EXPORT_SYMBOL(dump_stack); 38 38 39 - /* arch/tile/lib/__memcpy.S */ 40 - /* NOTE: on TILE64, these symbols appear in arch/tile/lib/memcpy_tile64.c */ 39 + /* arch/tile/lib/, various memcpy files */ 41 40 EXPORT_SYMBOL(memcpy); 42 41 EXPORT_SYMBOL(__copy_to_user_inatomic); 43 42 EXPORT_SYMBOL(__copy_from_user_inatomic); 44 43 EXPORT_SYMBOL(__copy_from_user_zeroing); 44 + #ifdef __tilegx__ 45 + EXPORT_SYMBOL(__copy_in_user_inatomic); 46 + #endif 45 47 46 48 /* hypervisor glue */ 47 49 #include <hv/hypervisor.h> 48 50 EXPORT_SYMBOL(hv_dev_open); 49 51 EXPORT_SYMBOL(hv_dev_pread); 50 52 EXPORT_SYMBOL(hv_dev_pwrite); 53 + EXPORT_SYMBOL(hv_dev_preada); 54 + EXPORT_SYMBOL(hv_dev_pwritea); 55 + EXPORT_SYMBOL(hv_dev_poll); 56 + EXPORT_SYMBOL(hv_dev_poll_cancel); 51 57 EXPORT_SYMBOL(hv_dev_close); 58 + EXPORT_SYMBOL(hv_sysconf); 59 + EXPORT_SYMBOL(hv_confstr); 52 60 53 - /* -ltile-cc */ 61 + /* libgcc.a */ 54 62 uint32_t __udivsi3(uint32_t dividend, uint32_t divisor); 55 63 EXPORT_SYMBOL(__udivsi3); 56 64 int32_t __divsi3(int32_t dividend, int32_t divisor); ··· 78 70 #ifndef __tilegx__ 79 71 uint64_t __ll_mul(uint64_t n0, uint64_t n1); 80 72 EXPORT_SYMBOL(__ll_mul); 81 - #endif 82 - #ifndef __tilegx__ 83 73 int64_t __muldi3(int64_t, int64_t); 84 74 EXPORT_SYMBOL(__muldi3); 85 75 uint64_t __lshrdi3(uint64_t, unsigned int);
+4 -16
arch/tile/lib/memcpy_32.S
··· 17 17 18 18 #include <arch/chip.h> 19 19 20 - #if CHIP_HAS_WH64() || defined(MEMCPY_TEST_WH64) 21 - #define MEMCPY_USE_WH64 22 - #endif 23 - 24 20 25 21 #include <linux/linkage.h> 26 22 ··· 156 160 157 161 { addi r3, r1, 60; andi r9, r9, -64 } 158 162 159 - #ifdef MEMCPY_USE_WH64 163 + #if CHIP_HAS_WH64() 160 164 /* No need to prefetch dst, we'll just do the wh64 161 165 * right before we copy a line. 162 166 */ ··· 169 173 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 170 174 { bnzt zero, . } 171 175 EX: { lw r7, r3; addi r3, r3, 64 } 172 - #ifndef MEMCPY_USE_WH64 176 + #if !CHIP_HAS_WH64() 173 177 /* Prefetch the dest */ 174 178 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 175 179 { bnzt zero, . } ··· 284 288 /* Fill second L1D line. */ 285 289 EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ 286 290 287 - #ifdef MEMCPY_TEST_WH64 288 - /* Issue a fake wh64 that clobbers the destination words 289 - * with random garbage, for testing. 290 - */ 291 - { movei r19, 64; crc32_32 r10, r2, r9 } 292 - .Lwh64_test_loop: 293 - EX: { sw r9, r10; addi r9, r9, 4; addi r19, r19, -4 } 294 - { bnzt r19, .Lwh64_test_loop; crc32_32 r10, r10, r19 } 295 - #elif CHIP_HAS_WH64() 291 + #if CHIP_HAS_WH64() 296 292 /* Prepare destination line for writing. */ 297 293 EX: { wh64 r9; addi r9, r9, 64 } 298 294 #else ··· 328 340 EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ 329 341 EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ 330 342 EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ 331 - #ifdef MEMCPY_USE_WH64 343 + #if CHIP_HAS_WH64() 332 344 EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ 333 345 #else 334 346 /* Back up the r9 to a cache line we are already storing to
-25
arch/tile/lib/memset_32.c
··· 141 141 */ 142 142 __insn_prefetch(&out32[ahead32]); 143 143 144 - #if 1 145 144 #if CACHE_LINE_SIZE_IN_WORDS % 4 != 0 146 145 #error "Unhandled CACHE_LINE_SIZE_IN_WORDS" 147 146 #endif ··· 156 157 *out32++ = v32; 157 158 *out32++ = v32; 158 159 } 159 - #else 160 - /* Unfortunately, due to a code generator flaw this 161 - * allocates a separate register for each of these 162 - * stores, which requires a large number of spills, 163 - * which makes this procedure enormously bigger 164 - * (something like 70%) 165 - */ 166 - *out32++ = v32; 167 - *out32++ = v32; 168 - *out32++ = v32; 169 - *out32++ = v32; 170 - *out32++ = v32; 171 - *out32++ = v32; 172 - *out32++ = v32; 173 - *out32++ = v32; 174 - *out32++ = v32; 175 - *out32++ = v32; 176 - *out32++ = v32; 177 - *out32++ = v32; 178 - *out32++ = v32; 179 - *out32++ = v32; 180 - *out32++ = v32; 181 - n32 -= 16; 182 - #endif 183 160 184 161 /* To save compiled code size, reuse this loop even 185 162 * when we run out of prefetching to do by dropping
+8
arch/tile/mm/fault.c
··· 567 567 * since that might indicate we have not yet squirreled the SPR 568 568 * contents away and can thus safely take a recursive interrupt. 569 569 * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2. 570 + * 571 + * Note that this routine is called before homecache_tlb_defer_enter(), 572 + * which means that we can properly unlock any atomics that might 573 + * be used there (good), but also means we must be very sensitive 574 + * to not touch any data structures that might be located in memory 575 + * that could migrate, as we could be entering the kernel on a dataplane 576 + * cpu that has been deferring kernel TLB updates. This means, for 577 + * example, that we can't migrate init_mm or its pgd. 570 578 */ 571 579 struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, 572 580 unsigned long address,
+3
arch/tile/mm/homecache.c
··· 29 29 #include <linux/timex.h> 30 30 #include <linux/cache.h> 31 31 #include <linux/smp.h> 32 + #include <linux/module.h> 32 33 33 34 #include <asm/page.h> 34 35 #include <asm/sections.h> ··· 349 348 350 349 return pte; 351 350 } 351 + EXPORT_SYMBOL(pte_set_home); 352 352 353 353 /* 354 354 * The routines in this section are the "static" versions of the normal ··· 405 403 homecache_change_page_home(page, order, home); 406 404 return page; 407 405 } 406 + EXPORT_SYMBOL(homecache_alloc_pages); 408 407 409 408 struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, 410 409 unsigned int order, int home)