Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull second set of arm64 updates from Catalin Marinas:
"A second pull request for this merging window, mainly with fixes and
docs clarification:

- Documentation clarification on CPU topology and booting
requirements
- Additional cache flushing during boot (needed in the presence of
external caches or under virtualisation)
- DMA range invalidation fix for non cache line aligned buffers
- Build failure fix with !COMPAT
- Kconfig update for STRICT_DEVMEM"

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
arm64: Fix DMA range invalidation for cache line unaligned buffers
arm64: Add missing Kconfig for CONFIG_STRICT_DEVMEM
arm64: fix !CONFIG_COMPAT build failures
Revert "arm64: virt: ensure visibility of __boot_cpu_mode"
arm64: Relax the kernel cache requirements for boot
arm64: Update the TCR_EL1 translation granule definitions for 16K pages
ARM: topology: Make it clear that all CPUs need to be described

+99 -36
+8 -2
Documentation/arm64/booting.txt
··· 111 111 - Caches, MMUs 112 112 The MMU must be off. 113 113 Instruction cache may be on or off. 114 - Data cache must be off and invalidated. 115 - External caches (if present) must be configured and disabled. 114 + The address range corresponding to the loaded kernel image must be 115 + cleaned to the PoC. In the presence of a system cache or other 116 + coherent masters with caches enabled, this will typically require 117 + cache maintenance by VA rather than set/way operations. 118 + System caches which respect the architected cache maintenance by VA 119 + operations must be configured and may be enabled. 120 + System caches which do not respect architected cache maintenance by VA 121 + operations (not recommended) must be configured and disabled. 116 122 117 123 - Architected timers 118 124 CNTFRQ must be programmed with the timer frequency and CNTVOFF must
+4 -3
Documentation/devicetree/bindings/arm/topology.txt
··· 75 75 76 76 whose bindings are described in paragraph 3. 77 77 78 - The nodes describing the CPU topology (cluster/core/thread) can only be 79 - defined within the cpu-map node. 80 - Any other configuration is consider invalid and therefore must be ignored. 78 + The nodes describing the CPU topology (cluster/core/thread) can only 79 + be defined within the cpu-map node and every core/thread in the system 80 + must be defined within the topology. Any other configuration is 81 + invalid and therefore must be ignored. 81 82 82 83 =========================================== 83 84 2.1 - cpu-map child nodes naming convention
+14
arch/arm64/Kconfig.debug
··· 6 6 bool 7 7 default y 8 8 9 + config STRICT_DEVMEM 10 + bool "Filter access to /dev/mem" 11 + depends on MMU 12 + help 13 + If this option is disabled, you allow userspace (root) access to all 14 + of memory, including kernel and userspace memory. Accidental 15 + access to this is obviously disastrous, but specific access can 16 + be used by people debugging the kernel. 17 + 18 + If this option is switched on, the /dev/mem file only allows 19 + userspace access to memory mapped peripherals. 20 + 21 + If in doubt, say Y. 22 + 9 23 config EARLY_PRINTK 10 24 bool "Early printk support" 11 25 default y
+5 -1
arch/arm64/include/asm/pgtable-hwdef.h
··· 120 120 #define TCR_ORGN_WBnWA ((UL(3) << 10) | (UL(3) << 26)) 121 121 #define TCR_ORGN_MASK ((UL(3) << 10) | (UL(3) << 26)) 122 122 #define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28)) 123 + #define TCR_TG0_4K (UL(0) << 14) 123 124 #define TCR_TG0_64K (UL(1) << 14) 124 - #define TCR_TG1_64K (UL(1) << 30) 125 + #define TCR_TG0_16K (UL(2) << 14) 126 + #define TCR_TG1_16K (UL(1) << 30) 127 + #define TCR_TG1_4K (UL(2) << 30) 128 + #define TCR_TG1_64K (UL(3) << 30) 125 129 #define TCR_ASID16 (UL(1) << 36) 126 130 #define TCR_TBI0 (UL(1) << 37) 127 131
-13
arch/arm64/include/asm/virt.h
··· 22 22 #define BOOT_CPU_MODE_EL2 (0xe12) 23 23 24 24 #ifndef __ASSEMBLY__ 25 - #include <asm/cacheflush.h> 26 25 27 26 /* 28 27 * __boot_cpu_mode records what mode CPUs were booted in. ··· 37 38 void __hyp_set_vectors(phys_addr_t phys_vector_base); 38 39 phys_addr_t __hyp_get_vectors(void); 39 40 40 - static inline void sync_boot_mode(void) 41 - { 42 - /* 43 - * As secondaries write to __boot_cpu_mode with caches disabled, we 44 - * must flush the corresponding cache entries to ensure the visibility 45 - * of their writes. 46 - */ 47 - __flush_dcache_area(__boot_cpu_mode, sizeof(__boot_cpu_mode)); 48 - } 49 - 50 41 /* Reports the availability of HYP mode */ 51 42 static inline bool is_hyp_mode_available(void) 52 43 { 53 - sync_boot_mode(); 54 44 return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && 55 45 __boot_cpu_mode[1] == BOOT_CPU_MODE_EL2); 56 46 } ··· 47 59 /* Check if the bootloader has booted CPUs in different modes */ 48 60 static inline bool is_hyp_mode_mismatched(void) 49 61 { 50 - sync_boot_mode(); 51 62 return __boot_cpu_mode[0] != __boot_cpu_mode[1]; 52 63 } 53 64
+28 -2
arch/arm64/kernel/head.S
··· 26 26 #include <asm/assembler.h> 27 27 #include <asm/ptrace.h> 28 28 #include <asm/asm-offsets.h> 29 + #include <asm/cache.h> 29 30 #include <asm/cputype.h> 30 31 #include <asm/memory.h> 31 32 #include <asm/thread_info.h> ··· 230 229 cmp w20, #BOOT_CPU_MODE_EL2 231 230 b.ne 1f 232 231 add x1, x1, #4 233 - 1: str w20, [x1] // This CPU has booted in EL1 232 + 1: dc cvac, x1 // Clean potentially dirty cache line 233 + dsb sy 234 + str w20, [x1] // This CPU has booted in EL1 235 + dc civac, x1 // Clean&invalidate potentially stale cache line 236 + dsb sy 234 237 ret 235 238 ENDPROC(set_cpu_boot_mode_flag) 236 239 ··· 245 240 * This is not in .bss, because we set it sufficiently early that the boot-time 246 241 * zeroing of .bss would clobber it. 247 242 */ 248 - .pushsection .data 243 + .pushsection .data..cacheline_aligned 249 244 ENTRY(__boot_cpu_mode) 245 + .align L1_CACHE_SHIFT 250 246 .long BOOT_CPU_MODE_EL2 251 247 .long 0 252 248 .popsection ··· 414 408 */ 415 409 __create_page_tables: 416 410 pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses 411 + mov x27, lr 412 + 413 + /* 414 + * Invalidate the idmap and swapper page tables to avoid potential 415 + * dirty cache lines being evicted. 416 + */ 417 + mov x0, x25 418 + add x1, x26, #SWAPPER_DIR_SIZE 419 + bl __inval_cache_range 417 420 418 421 /* 419 422 * Clear the idmap and swapper page tables. ··· 482 467 ldr x5, =FIXADDR_TOP // Fixed mapping virtual address 483 468 add x0, x26, #2 * PAGE_SIZE // section table address 484 469 create_pgd_entry x26, x0, x5, x6, x7 470 + 471 + /* 472 + * Since the page tables have been populated with non-cacheable 473 + * accesses (MMU disabled), invalidate the idmap and swapper page 474 + * tables again to remove any speculatively loaded cache lines. 475 + */ 476 + mov x0, x25 477 + add x1, x26, #SWAPPER_DIR_SIZE 478 + bl __inval_cache_range 479 + 480 + mov lr, x27 485 481 ret 486 482 ENDPROC(__create_page_tables) 487 483 .ltorg
+4
arch/arm64/kernel/perf_event.c
··· 1386 1386 return buftail.fp; 1387 1387 } 1388 1388 1389 + #ifdef CONFIG_COMPAT 1389 1390 /* 1390 1391 * The registers we're interested in are at the end of the variable 1391 1392 * length saved register structure. The fp points at the end of this ··· 1431 1430 1432 1431 return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; 1433 1432 } 1433 + #endif /* CONFIG_COMPAT */ 1434 1434 1435 1435 void perf_callchain_user(struct perf_callchain_entry *entry, 1436 1436 struct pt_regs *regs) ··· 1453 1451 tail && !((unsigned long)tail & 0xf)) 1454 1452 tail = user_backtrace(tail, entry); 1455 1453 } else { 1454 + #ifdef CONFIG_COMPAT 1456 1455 /* AARCH32 compat mode */ 1457 1456 struct compat_frame_tail __user *tail; 1458 1457 ··· 1462 1459 while ((entry->nr < PERF_MAX_STACK_DEPTH) && 1463 1460 tail && !((unsigned long)tail & 0x3)) 1464 1461 tail = compat_user_backtrace(tail, entry); 1462 + #endif 1465 1463 } 1466 1464 } 1467 1465
+2
arch/arm64/kernel/perf_regs.c
··· 2 2 #include <linux/kernel.h> 3 3 #include <linux/perf_event.h> 4 4 #include <linux/bug.h> 5 + 6 + #include <asm/compat.h> 5 7 #include <asm/perf_regs.h> 6 8 #include <asm/ptrace.h> 7 9
+20 -4
arch/arm64/mm/cache.S
··· 168 168 ENDPROC(__flush_dcache_area) 169 169 170 170 /* 171 + * __inval_cache_range(start, end) 172 + * - start - start address of region 173 + * - end - end address of region 174 + */ 175 + ENTRY(__inval_cache_range) 176 + /* FALLTHROUGH */ 177 + 178 + /* 171 179 * __dma_inv_range(start, end) 172 180 * - start - virtual start address of region 173 181 * - end - virtual end address of region ··· 183 175 __dma_inv_range: 184 176 dcache_line_size x2, x3 185 177 sub x3, x2, #1 186 - bic x0, x0, x3 178 + tst x1, x3 // end cache line aligned? 187 179 bic x1, x1, x3 188 - 1: dc ivac, x0 // invalidate D / U line 189 - add x0, x0, x2 180 + b.eq 1f 181 + dc civac, x1 // clean & invalidate D / U line 182 + 1: tst x0, x3 // start cache line aligned? 183 + bic x0, x0, x3 184 + b.eq 2f 185 + dc civac, x0 // clean & invalidate D / U line 186 + b 3f 187 + 2: dc ivac, x0 // invalidate D / U line 188 + 3: add x0, x0, x2 190 189 cmp x0, x1 191 - b.lo 1b 190 + b.lo 2b 192 191 dsb sy 193 192 ret 193 + ENDPROC(__inval_cache_range) 194 194 ENDPROC(__dma_inv_range) 195 195 196 196 /*
+14 -11
arch/arm64/mm/proc.S
··· 28 28 29 29 #include "proc-macros.S" 30 30 31 - #ifndef CONFIG_SMP 32 - /* PTWs cacheable, inner/outer WBWA not shareable */ 33 - #define TCR_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA 31 + #ifdef CONFIG_ARM64_64K_PAGES 32 + #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K 34 33 #else 35 - /* PTWs cacheable, inner/outer WBWA shareable */ 36 - #define TCR_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA | TCR_SHARED 34 + #define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K 37 35 #endif 36 + 37 + #ifdef CONFIG_SMP 38 + #define TCR_SMP_FLAGS TCR_SHARED 39 + #else 40 + #define TCR_SMP_FLAGS 0 41 + #endif 42 + 43 + /* PTWs cacheable, inner/outer WBWA */ 44 + #define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA 38 45 39 46 #define MAIR(attr, mt) ((attr) << ((mt) * 8)) 40 47 ··· 216 209 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for 217 210 * both user and kernel. 218 211 */ 219 - ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \ 220 - TCR_ASID16 | TCR_TBI0 | (1 << 31) 212 + ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ 213 + TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 221 214 /* 222 215 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in 223 216 * TCR_EL1. 224 217 */ 225 218 mrs x9, ID_AA64MMFR0_EL1 226 219 bfi x10, x9, #32, #3 227 - #ifdef CONFIG_ARM64_64K_PAGES 228 - orr x10, x10, TCR_TG0_64K 229 - orr x10, x10, TCR_TG1_64K 230 - #endif 231 220 msr tcr_el1, x10 232 221 ret // return to head.S 233 222 ENDPROC(__cpu_setup)