Merge tag 'loongarch-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson

Pull LoongArch updates from Huacai Chen:
- Select HAVE_CMPXCHG_{LOCAL,DOUBLE}
- Add 128-bit atomic cmpxchg support
- Add HOTPLUG_SMT implementation
- Wire up memfd_secret system call
- Fix boot errors and unwind errors for KASAN
- Use BPF prog pack allocator and add BPF arena support
- Update dts files to add nand controllers
- Some bug fixes and other small changes

* tag 'loongarch-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson:
LoongArch: dts: loongson-2k1000: Add nand controller support
LoongArch: dts: loongson-2k0500: Add nand controller support
LoongArch: BPF: Implement bpf_addr_space_cast instruction
LoongArch: BPF: Implement PROBE_MEM32 pseudo instructions
LoongArch: BPF: Use BPF prog pack allocator
LoongArch: Use IS_ERR_PCPU() macro for KGDB
LoongArch: Rework KASAN initialization for PTW-enabled systems
LoongArch: Disable instrumentation for setup_ptwalker()
LoongArch: Remove some extern variables in source files
LoongArch: Guard percpu handler under !CONFIG_PREEMPT_RT
LoongArch: Handle percpu handler address for ORC unwinder
LoongArch: Use %px to print unmodified unwinding address
LoongArch: Prefer top-down allocation after arch_mem_init()
LoongArch: Add HOTPLUG_SMT implementation
LoongArch: Make cpumask_of_node() robust against NUMA_NO_NODE
LoongArch: Wire up memfd_secret system call
LoongArch: Replace seq_printf() with seq_puts() for simple strings
LoongArch: Add 128-bit atomic cmpxchg support
LoongArch: Add detection for SC.Q support
LoongArch: Select HAVE_CMPXCHG_LOCAL in Kconfig

+450 -153
+1 -1
Documentation/admin-guide/kernel-parameters.txt
··· 4661 4661 nosmt [KNL,MIPS,PPC,EARLY] Disable symmetric multithreading (SMT). 4662 4662 Equivalent to smt=1. 4663 4663 4664 - [KNL,X86,PPC,S390] Disable symmetric multithreading (SMT). 4664 + [KNL,LOONGARCH,X86,PPC,S390] Disable symmetric multithreading (SMT). 4665 4665 nosmt=force: Force disable SMT, cannot be undone 4666 4666 via the sysfs control file. 4667 4667
+4
arch/loongarch/Kconfig
··· 114 114 select GENERIC_TIME_VSYSCALL 115 115 select GPIOLIB 116 116 select HAS_IOPORT 117 + select HAVE_ALIGNED_STRUCT_PAGE 117 118 select HAVE_ARCH_AUDITSYSCALL 118 119 select HAVE_ARCH_BITREVERSE 119 120 select HAVE_ARCH_JUMP_LABEL ··· 131 130 select HAVE_ARCH_TRANSPARENT_HUGEPAGE 132 131 select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD 133 132 select HAVE_ASM_MODVERSIONS 133 + select HAVE_CMPXCHG_DOUBLE 134 + select HAVE_CMPXCHG_LOCAL 134 135 select HAVE_CONTEXT_TRACKING_USER 135 136 select HAVE_C_RECORDMCOUNT 136 137 select HAVE_DEBUG_KMEMLEAK ··· 186 183 select HAVE_SYSCALL_TRACEPOINTS 187 184 select HAVE_TIF_NOHZ 188 185 select HAVE_VIRT_CPU_ACCOUNTING_GEN 186 + select HOTPLUG_SMT if HOTPLUG_CPU 189 187 select IRQ_FORCED_THREADING 190 188 select IRQ_LOONGARCH_CPU 191 189 select LOCK_MM_AND_FIND_VMA
+19
arch/loongarch/boot/dts/loongson-2k0500-ref.dts
··· 41 41 }; 42 42 }; 43 43 44 + &apbdma0 { 45 + status = "okay"; 46 + }; 47 + 48 + &nand { 49 + status = "okay"; 50 + 51 + #address-cells = <1>; 52 + #size-cells = <0>; 53 + nand@0 { 54 + reg = <0>; 55 + label = "ls2k0500-nand"; 56 + nand-use-soft-ecc-engine; 57 + nand-ecc-algo = "bch"; 58 + nand-ecc-strength = <8>; 59 + nand-ecc-step-size = <512>; 60 + }; 61 + }; 62 + 44 63 &apbdma3 { 45 64 status = "okay"; 46 65 };
+11 -1
arch/loongarch/boot/dts/loongson-2k0500.dtsi
··· 84 84 clock-names = "ref_100m"; 85 85 }; 86 86 87 - dma-controller@1fe10c00 { 87 + apbdma0: dma-controller@1fe10c00 { 88 88 compatible = "loongson,ls2k0500-apbdma", "loongson,ls2k1000-apbdma"; 89 89 reg = <0 0x1fe10c00 0 0x8>; 90 90 interrupt-parent = <&eiointc>; ··· 170 170 #interrupt-cells = <1>; 171 171 interrupt-parent = <&cpuintc>; 172 172 interrupts = <3>; 173 + }; 174 + 175 + nand: nand-controller@1ff58000 { 176 + compatible = "loongson,ls2k0500-nand-controller"; 177 + reg = <0 0x1ff58000 0 0x24>, 178 + <0 0x1ff58040 0 0x4>; 179 + reg-names = "nand", "nand-dma"; 180 + dmas = <&apbdma0 0>; 181 + dma-names = "rxtx"; 182 + status = "disabled"; 173 183 }; 174 184 175 185 pwm@1ff5c000 {
+22
arch/loongarch/boot/dts/loongson-2k1000-ref.dts
··· 48 48 }; 49 49 }; 50 50 51 + &apbdma0 { 52 + status = "okay"; 53 + }; 54 + 55 + &nand { 56 + status = "okay"; 57 + 58 + pinctrl-0 = <&nand_pins_default>; 59 + pinctrl-names = "default"; 60 + 61 + #address-cells = <1>; 62 + #size-cells = <0>; 63 + nand@0 { 64 + reg = <0>; 65 + label = "ls2k1000-nand"; 66 + nand-use-soft-ecc-engine; 67 + nand-ecc-algo = "bch"; 68 + nand-ecc-strength = <8>; 69 + nand-ecc-step-size = <512>; 70 + }; 71 + }; 72 + 51 73 &apbdma1 { 52 74 status = "okay"; 53 75 };
+12 -1
arch/loongarch/boot/dts/loongson-2k1000.dtsi
··· 248 248 #thermal-sensor-cells = <1>; 249 249 }; 250 250 251 - dma-controller@1fe00c00 { 251 + apbdma0: dma-controller@1fe00c00 { 252 252 compatible = "loongson,ls2k1000-apbdma"; 253 253 reg = <0x0 0x1fe00c00 0x0 0x8>; 254 254 interrupt-parent = <&liointc1>; ··· 361 361 interrupts = <27 IRQ_TYPE_LEVEL_HIGH>; 362 362 clocks = <&clk LOONGSON2_APB_CLK>; 363 363 #pwm-cells = <3>; 364 + status = "disabled"; 365 + }; 366 + 367 + nand: nand-controller@1fe26000 { 368 + compatible = "loongson,ls2k1000-nand-controller"; 369 + reg = <0 0x1fe26000 0 0x24>, 370 + <0 0x1fe26040 0 0x4>, 371 + <0 0x1fe00438 0 0x8>; 372 + reg-names = "nand", "nand-dma", "dma-config"; 373 + dmas = <&apbdma0 0>; 374 + dma-names = "rxtx"; 364 375 status = "disabled"; 365 376 }; 366 377
+54
arch/loongarch/include/asm/cmpxchg.h
··· 8 8 #include <linux/bits.h> 9 9 #include <linux/build_bug.h> 10 10 #include <asm/barrier.h> 11 + #include <asm/cpu-features.h> 11 12 12 13 #define __xchg_amo_asm(amswap_db, m, val) \ 13 14 ({ \ ··· 237 236 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 238 237 arch_cmpxchg((ptr), (o), (n)); \ 239 238 }) 239 + 240 + union __u128_halves { 241 + u128 full; 242 + struct { 243 + u64 low; 244 + u64 high; 245 + }; 246 + }; 247 + 248 + #define system_has_cmpxchg128() cpu_opt(LOONGARCH_CPU_SCQ) 249 + 250 + #define __arch_cmpxchg128(ptr, old, new, llsc_mb) \ 251 + ({ \ 252 + union __u128_halves __old, __new, __ret; \ 253 + volatile u64 *__ptr = (volatile u64 *)(ptr); \ 254 + \ 255 + __old.full = (old); \ 256 + __new.full = (new); \ 257 + \ 258 + __asm__ __volatile__( \ 259 + "1: ll.d %0, %3 # 128-bit cmpxchg low \n" \ 260 + llsc_mb \ 261 + " ld.d %1, %4 # 128-bit cmpxchg high \n" \ 262 + " move $t0, %0 \n" \ 263 + " move $t1, %1 \n" \ 264 + " bne %0, %z5, 2f \n" \ 265 + " bne %1, %z6, 2f \n" \ 266 + " move $t0, %z7 \n" \ 267 + " move $t1, %z8 \n" \ 268 + "2: sc.q $t0, $t1, %2 \n" \ 269 + " beqz $t0, 1b \n" \ 270 + llsc_mb \ 271 + : "=&r" (__ret.low), "=&r" (__ret.high) \ 272 + : "r" (__ptr), \ 273 + "ZC" (__ptr[0]), "m" (__ptr[1]), \ 274 + "Jr" (__old.low), "Jr" (__old.high), \ 275 + "Jr" (__new.low), "Jr" (__new.high) \ 276 + : "t0", "t1", "memory"); \ 277 + \ 278 + __ret.full; \ 279 + }) 280 + 281 + #define arch_cmpxchg128(ptr, o, n) \ 282 + ({ \ 283 + BUILD_BUG_ON(sizeof(*(ptr)) != 16); \ 284 + __arch_cmpxchg128(ptr, o, n, __WEAK_LLSC_MB); \ 285 + }) 286 + 287 + #define arch_cmpxchg128_local(ptr, o, n) \ 288 + ({ \ 289 + BUILD_BUG_ON(sizeof(*(ptr)) != 16); \ 290 + __arch_cmpxchg128(ptr, o, n, ""); \ 291 + }) 240 292 #else 241 293 #include <asm-generic/cmpxchg-local.h> 242 294 #define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
+1
arch/loongarch/include/asm/cpu-features.h
··· 35 35 */ 36 36 #define cpu_has_cpucfg cpu_opt(LOONGARCH_CPU_CPUCFG) 37 37 #define cpu_has_lam cpu_opt(LOONGARCH_CPU_LAM) 38 + #define cpu_has_scq cpu_opt(LOONGARCH_CPU_SCQ) 38 39 #define cpu_has_ual cpu_opt(LOONGARCH_CPU_UAL) 39 40 #define cpu_has_fpu cpu_opt(LOONGARCH_CPU_FPU) 40 41 #define cpu_has_lsx cpu_opt(LOONGARCH_CPU_LSX)
+32 -30
arch/loongarch/include/asm/cpu.h
··· 95 95 */ 96 96 #define CPU_FEATURE_CPUCFG 0 /* CPU has CPUCFG */ 97 97 #define CPU_FEATURE_LAM 1 /* CPU has Atomic instructions */ 98 - #define CPU_FEATURE_UAL 2 /* CPU supports unaligned access */ 99 - #define CPU_FEATURE_FPU 3 /* CPU has FPU */ 100 - #define CPU_FEATURE_LSX 4 /* CPU has LSX (128-bit SIMD) */ 101 - #define CPU_FEATURE_LASX 5 /* CPU has LASX (256-bit SIMD) */ 102 - #define CPU_FEATURE_CRC32 6 /* CPU has CRC32 instructions */ 103 - #define CPU_FEATURE_COMPLEX 7 /* CPU has Complex instructions */ 104 - #define CPU_FEATURE_CRYPTO 8 /* CPU has Crypto instructions */ 105 - #define CPU_FEATURE_LVZ 9 /* CPU has Virtualization extension */ 106 - #define CPU_FEATURE_LBT_X86 10 /* CPU has X86 Binary Translation */ 107 - #define CPU_FEATURE_LBT_ARM 11 /* CPU has ARM Binary Translation */ 108 - #define CPU_FEATURE_LBT_MIPS 12 /* CPU has MIPS Binary Translation */ 109 - #define CPU_FEATURE_TLB 13 /* CPU has TLB */ 110 - #define CPU_FEATURE_CSR 14 /* CPU has CSR */ 111 - #define CPU_FEATURE_IOCSR 15 /* CPU has IOCSR */ 112 - #define CPU_FEATURE_WATCH 16 /* CPU has watchpoint registers */ 113 - #define CPU_FEATURE_VINT 17 /* CPU has vectored interrupts */ 114 - #define CPU_FEATURE_CSRIPI 18 /* CPU has CSR-IPI */ 115 - #define CPU_FEATURE_EXTIOI 19 /* CPU has EXT-IOI */ 116 - #define CPU_FEATURE_PREFETCH 20 /* CPU has prefetch instructions */ 117 - #define CPU_FEATURE_PMP 21 /* CPU has perfermance counter */ 118 - #define CPU_FEATURE_SCALEFREQ 22 /* CPU supports cpufreq scaling */ 119 - #define CPU_FEATURE_FLATMODE 23 /* CPU has flat mode */ 120 - #define CPU_FEATURE_EIODECODE 24 /* CPU has EXTIOI interrupt pin decode mode */ 121 - #define CPU_FEATURE_GUESTID 25 /* CPU has GuestID feature */ 122 - #define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */ 123 - #define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */ 124 - #define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */ 125 - #define CPU_FEATURE_MSGINT 29 /* CPU has MSG interrupt */ 126 - #define CPU_FEATURE_AVECINT 30 /* CPU has AVEC interrupt */ 127 - #define CPU_FEATURE_REDIRECTINT 31 /* CPU has interrupt remapping */ 98 + #define CPU_FEATURE_SCQ 2 /* CPU has SC.Q instruction */ 99 + #define CPU_FEATURE_UAL 3 /* CPU supports unaligned access */ 100 + #define CPU_FEATURE_FPU 4 /* CPU has FPU */ 101 + #define CPU_FEATURE_LSX 5 /* CPU has LSX (128-bit SIMD) */ 102 + #define CPU_FEATURE_LASX 6 /* CPU has LASX (256-bit SIMD) */ 103 + #define CPU_FEATURE_CRC32 7 /* CPU has CRC32 instructions */ 104 + #define CPU_FEATURE_COMPLEX 8 /* CPU has Complex instructions */ 105 + #define CPU_FEATURE_CRYPTO 9 /* CPU has Crypto instructions */ 106 + #define CPU_FEATURE_LVZ 10 /* CPU has Virtualization extension */ 107 + #define CPU_FEATURE_LBT_X86 11 /* CPU has X86 Binary Translation */ 108 + #define CPU_FEATURE_LBT_ARM 12 /* CPU has ARM Binary Translation */ 109 + #define CPU_FEATURE_LBT_MIPS 13 /* CPU has MIPS Binary Translation */ 110 + #define CPU_FEATURE_TLB 14 /* CPU has TLB */ 111 + #define CPU_FEATURE_CSR 15 /* CPU has CSR */ 112 + #define CPU_FEATURE_IOCSR 16 /* CPU has IOCSR */ 113 + #define CPU_FEATURE_WATCH 17 /* CPU has watchpoint registers */ 114 + #define CPU_FEATURE_VINT 18 /* CPU has vectored interrupts */ 115 + #define CPU_FEATURE_CSRIPI 19 /* CPU has CSR-IPI */ 116 + #define CPU_FEATURE_EXTIOI 20 /* CPU has EXT-IOI */ 117 + #define CPU_FEATURE_PREFETCH 21 /* CPU has prefetch instructions */ 118 + #define CPU_FEATURE_PMP 22 /* CPU has perfermance counter */ 119 + #define CPU_FEATURE_SCALEFREQ 23 /* CPU supports cpufreq scaling */ 120 + #define CPU_FEATURE_FLATMODE 24 /* CPU has flat mode */ 121 + #define CPU_FEATURE_EIODECODE 25 /* CPU has EXTIOI interrupt pin decode mode */ 122 + #define CPU_FEATURE_GUESTID 26 /* CPU has GuestID feature */ 123 + #define CPU_FEATURE_HYPERVISOR 27 /* CPU has hypervisor (running in VM) */ 124 + #define CPU_FEATURE_PTW 28 /* CPU has hardware page table walker */ 125 + #define CPU_FEATURE_LSPW 29 /* CPU has LSPW (lddir/ldpte instructions) */ 126 + #define CPU_FEATURE_MSGINT 30 /* CPU has MSG interrupt */ 127 + #define CPU_FEATURE_AVECINT 31 /* CPU has AVEC interrupt */ 128 + #define CPU_FEATURE_REDIRECTINT 32 /* CPU has interrupt remapping */ 128 129 129 130 #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) 130 131 #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) 132 + #define LOONGARCH_CPU_SCQ BIT_ULL(CPU_FEATURE_SCQ) 131 133 #define LOONGARCH_CPU_UAL BIT_ULL(CPU_FEATURE_UAL) 132 134 #define LOONGARCH_CPU_FPU BIT_ULL(CPU_FEATURE_FPU) 133 135 #define LOONGARCH_CPU_LSX BIT_ULL(CPU_FEATURE_LSX)
+3
arch/loongarch/include/asm/setup.h
··· 7 7 #define _LOONGARCH_SETUP_H 8 8 9 9 #include <linux/types.h> 10 + #include <linux/threads.h> 10 11 #include <asm/sections.h> 11 12 #include <uapi/asm/setup.h> 12 13 ··· 15 14 16 15 extern unsigned long eentry; 17 16 extern unsigned long tlbrentry; 17 + extern unsigned long pcpu_handlers[NR_CPUS]; 18 + extern long exception_handlers[VECSIZE * 128 / sizeof(long)]; 18 19 extern char init_command_line[COMMAND_LINE_SIZE]; 19 20 extern void tlb_init(int cpu); 20 21 extern void cpu_cache_init(void);
+1 -1
arch/loongarch/include/asm/topology.h
··· 12 12 13 13 extern cpumask_t cpus_on_node[]; 14 14 15 - #define cpumask_of_node(node) (&cpus_on_node[node]) 15 + #define cpumask_of_node(node) ((node) == NUMA_NO_NODE ? cpu_all_mask : &cpus_on_node[node]) 16 16 17 17 struct pci_bus; 18 18 extern int pcibus_to_node(struct pci_bus *);
+1
arch/loongarch/include/asm/unistd.h
··· 10 10 11 11 #define __ARCH_WANT_NEW_STAT 12 12 #define __ARCH_WANT_SYS_CLONE 13 + #define __ARCH_WANT_MEMFD_SECRET 13 14 14 15 #define NR_syscalls (__NR_syscalls)
+1
arch/loongarch/include/uapi/asm/hwcap.h
··· 18 18 #define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) 19 19 #define HWCAP_LOONGARCH_PTW (1 << 13) 20 20 #define HWCAP_LOONGARCH_LSPW (1 << 14) 21 + #define HWCAP_LOONGARCH_SCQ (1 << 15) 21 22 22 23 #endif /* _UAPI_ASM_HWCAP_H */
+2 -3
arch/loongarch/kernel/Makefile.syscalls
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 3 - # No special ABIs on loongarch so far 4 - syscall_abis_32 += 5 - syscall_abis_64 += 3 + syscall_abis_32 += memfd_secret 4 + syscall_abis_64 += memfd_secret
+4
arch/loongarch/kernel/cpu-probe.c
··· 177 177 c->options |= LOONGARCH_CPU_LAM; 178 178 elf_hwcap |= HWCAP_LOONGARCH_LAM; 179 179 } 180 + if (config & CPUCFG2_SCQ) { 181 + c->options |= LOONGARCH_CPU_SCQ; 182 + elf_hwcap |= HWCAP_LOONGARCH_SCQ; 183 + } 180 184 if (config & CPUCFG2_FP) { 181 185 c->options |= LOONGARCH_CPU_FPU; 182 186 elf_hwcap |= HWCAP_LOONGARCH_FPU;
+1 -1
arch/loongarch/kernel/kgdb.c
··· 697 697 continue; 698 698 699 699 breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL); 700 - if (IS_ERR((void * __force)breakinfo[i].pev)) { 700 + if (IS_ERR_PCPU(breakinfo[i].pev)) { 701 701 pr_err("kgdb: Could not allocate hw breakpoints.\n"); 702 702 breakinfo[i].pev = NULL; 703 703 return;
+40 -23
arch/loongarch/kernel/proc.c
··· 50 50 seq_printf(m, "Address Sizes\t\t: %d bits physical, %d bits virtual\n", 51 51 cpu_pabits + 1, cpu_vabits + 1); 52 52 53 - seq_printf(m, "ISA\t\t\t:"); 53 + seq_puts(m, "ISA\t\t\t:"); 54 54 if (isa & LOONGARCH_CPU_ISA_LA32R) 55 - seq_printf(m, " loongarch32r"); 55 + seq_puts(m, " loongarch32r"); 56 56 if (isa & LOONGARCH_CPU_ISA_LA32S) 57 - seq_printf(m, " loongarch32s"); 57 + seq_puts(m, " loongarch32s"); 58 58 if (isa & LOONGARCH_CPU_ISA_LA64) 59 - seq_printf(m, " loongarch64"); 60 - seq_printf(m, "\n"); 59 + seq_puts(m, " loongarch64"); 60 + seq_puts(m, "\n"); 61 61 62 - seq_printf(m, "Features\t\t:"); 63 - if (cpu_has_cpucfg) seq_printf(m, " cpucfg"); 64 - if (cpu_has_lam) seq_printf(m, " lam"); 65 - if (cpu_has_ual) seq_printf(m, " ual"); 66 - if (cpu_has_fpu) seq_printf(m, " fpu"); 67 - if (cpu_has_lsx) seq_printf(m, " lsx"); 68 - if (cpu_has_lasx) seq_printf(m, " lasx"); 69 - if (cpu_has_crc32) seq_printf(m, " crc32"); 70 - if (cpu_has_complex) seq_printf(m, " complex"); 71 - if (cpu_has_crypto) seq_printf(m, " crypto"); 72 - if (cpu_has_ptw) seq_printf(m, " ptw"); 73 - if (cpu_has_lspw) seq_printf(m, " lspw"); 74 - if (cpu_has_lvz) seq_printf(m, " lvz"); 75 - if (cpu_has_lbt_x86) seq_printf(m, " lbt_x86"); 76 - if (cpu_has_lbt_arm) seq_printf(m, " lbt_arm"); 77 - if (cpu_has_lbt_mips) seq_printf(m, " lbt_mips"); 78 - seq_printf(m, "\n"); 62 + seq_puts(m, "Features\t\t:"); 63 + if (cpu_has_cpucfg) 64 + seq_puts(m, " cpucfg"); 65 + if (cpu_has_lam) 66 + seq_puts(m, " lam"); 67 + if (cpu_has_scq) 68 + seq_puts(m, " scq"); 69 + if (cpu_has_ual) 70 + seq_puts(m, " ual"); 71 + if (cpu_has_fpu) 72 + seq_puts(m, " fpu"); 73 + if (cpu_has_lsx) 74 + seq_puts(m, " lsx"); 75 + if (cpu_has_lasx) 76 + seq_puts(m, " lasx"); 77 + if (cpu_has_crc32) 78 + seq_puts(m, " crc32"); 79 + if (cpu_has_complex) 80 + seq_puts(m, " complex"); 81 + if (cpu_has_crypto) 82 + seq_puts(m, " crypto"); 83 + if (cpu_has_ptw) 84 + seq_puts(m, " ptw"); 85 + if (cpu_has_lspw) 86 + seq_puts(m, " lspw"); 87 + if (cpu_has_lvz) 88 + seq_puts(m, " lvz"); 89 + if (cpu_has_lbt_x86) 90 + seq_puts(m, " lbt_x86"); 91 + if (cpu_has_lbt_arm) 92 + seq_puts(m, " lbt_arm"); 93 + if (cpu_has_lbt_mips) 94 + seq_puts(m, " lbt_mips"); 95 + seq_puts(m, "\n"); 79 96 80 97 seq_printf(m, "Hardware Watchpoint\t: %s", str_yes_no(cpu_has_watch)); 81 98 if (cpu_has_watch) { ··· 100 83 cpu_data[n].watch_ireg_count, cpu_data[n].watch_dreg_count); 101 84 } 102 85 103 - seq_printf(m, "\n\n"); 86 + seq_puts(m, "\n\n"); 104 87 105 88 return 0; 106 89 }
+1
arch/loongarch/kernel/setup.c
··· 413 413 PFN_UP(__pa_symbol(&__nosave_end))); 414 414 415 415 memblock_dump_all(); 416 + memblock_set_bottom_up(false); 416 417 417 418 early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn)); 418 419 }
+13
arch/loongarch/kernel/smp.c
··· 365 365 void __init loongson_prepare_cpus(unsigned int max_cpus) 366 366 { 367 367 int i = 0; 368 + int threads_per_core = 0; 368 369 369 370 parse_acpi_topology(); 370 371 cpu_data[0].global_id = cpu_logical_map(0); 372 + 373 + if (!pptt_enabled) 374 + threads_per_core = 1; 375 + else { 376 + for_each_possible_cpu(i) { 377 + if (cpu_to_node(i) != 0) 378 + continue; 379 + if (cpus_are_siblings(0, i)) 380 + threads_per_core++; 381 + } 382 + } 371 383 372 384 for (i = 0; i < loongson_sysconf.nr_cpus; i++) { 373 385 set_cpu_present(i, true); ··· 387 375 } 388 376 389 377 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 378 + cpu_smt_set_num_threads(threads_per_core, threads_per_core); 390 379 } 391 380 392 381 /*
+16 -2
arch/loongarch/kernel/unwind_orc.c
··· 350 350 351 351 static inline unsigned long bt_address(unsigned long ra) 352 352 { 353 - extern unsigned long eentry; 353 + #if defined(CONFIG_NUMA) && !defined(CONFIG_PREEMPT_RT) 354 + int cpu; 355 + int vec_sz = sizeof(exception_handlers); 356 + 357 + for_each_possible_cpu(cpu) { 358 + if (!pcpu_handlers[cpu]) 359 + continue; 360 + 361 + if (ra >= pcpu_handlers[cpu] && 362 + ra < pcpu_handlers[cpu] + vec_sz) { 363 + ra = ra + eentry - pcpu_handlers[cpu]; 364 + break; 365 + } 366 + } 367 + #endif 354 368 355 369 if (ra >= eentry && ra < eentry + EXCCODE_INT_END * VECSIZE) { 356 370 unsigned long func; ··· 508 494 509 495 state->pc = bt_address(pc); 510 496 if (!state->pc) { 511 - pr_err("cannot find unwind pc at %p\n", (void *)pc); 497 + pr_err("cannot find unwind pc at %px\n", (void *)pc); 512 498 goto err; 513 499 } 514 500
+1 -5
arch/loongarch/kernel/unwind_prologue.c
··· 23 23 extern const int unwind_hint_lbt; 24 24 extern const int unwind_hint_ri; 25 25 extern const int unwind_hint_watch; 26 - extern unsigned long eentry; 27 - #ifdef CONFIG_NUMA 28 - extern unsigned long pcpu_handlers[NR_CPUS]; 29 - #endif 30 26 31 27 static inline bool scan_handlers(unsigned long entry_offset) 32 28 { ··· 61 65 62 66 static inline bool fix_exception(unsigned long pc) 63 67 { 64 - #ifdef CONFIG_NUMA 68 + #if defined(CONFIG_NUMA) && !defined(CONFIG_PREEMPT_RT) 65 69 int cpu; 66 70 67 71 for_each_possible_cpu(cpu) {
+41 -39
arch/loongarch/mm/kasan_init.c
··· 40 40 #define __pte_none(early, pte) (early ? pte_none(pte) : \ 41 41 ((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page))) 42 42 43 + static void *mem_to_shadow(const void *addr) 44 + { 45 + unsigned long offset = 0; 46 + unsigned long maddr = (unsigned long)addr; 47 + unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; 48 + 49 + if (maddr >= FIXADDR_START) 50 + return (void *)(kasan_early_shadow_page); 51 + 52 + maddr &= XRANGE_SHADOW_MASK; 53 + switch (xrange) { 54 + case XKPRANGE_CC_SEG: 55 + offset = XKPRANGE_CC_SHADOW_OFFSET; 56 + break; 57 + case XKPRANGE_UC_SEG: 58 + offset = XKPRANGE_UC_SHADOW_OFFSET; 59 + break; 60 + case XKPRANGE_WC_SEG: 61 + offset = XKPRANGE_WC_SHADOW_OFFSET; 62 + break; 63 + case XKVRANGE_VC_SEG: 64 + offset = XKVRANGE_VC_SHADOW_OFFSET; 65 + break; 66 + default: 67 + WARN_ON(1); 68 + return NULL; 69 + } 70 + 71 + return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); 72 + } 73 + 43 74 void *kasan_mem_to_shadow(const void *addr) 44 75 { 45 - if (!kasan_enabled()) { 76 + if (kasan_enabled()) 77 + return mem_to_shadow(addr); 78 + else 46 79 return (void *)(kasan_early_shadow_page); 47 - } else { 48 - unsigned long maddr = (unsigned long)addr; 49 - unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; 50 - unsigned long offset = 0; 51 - 52 - if (maddr >= FIXADDR_START) 53 - return (void *)(kasan_early_shadow_page); 54 - 55 - maddr &= XRANGE_SHADOW_MASK; 56 - switch (xrange) { 57 - case XKPRANGE_CC_SEG: 58 - offset = XKPRANGE_CC_SHADOW_OFFSET; 59 - break; 60 - case XKPRANGE_UC_SEG: 61 - offset = XKPRANGE_UC_SHADOW_OFFSET; 62 - break; 63 - case XKPRANGE_WC_SEG: 64 - offset = XKPRANGE_WC_SHADOW_OFFSET; 65 - break; 66 - case XKVRANGE_VC_SEG: 67 - offset = XKVRANGE_VC_SHADOW_OFFSET; 68 - break; 69 - default: 70 - WARN_ON(1); 71 - return NULL; 72 - } 73 - 74 - return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); 75 - } 76 80 } 77 81 78 82 const void *kasan_shadow_to_mem(const void *shadow_addr) ··· 297 293 /* Maps everything to a single page of zeroes */ 298 294 kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true); 299 295 300 - kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START), 301 - kasan_mem_to_shadow((void *)KFENCE_AREA_END)); 302 - 303 - /* Enable KASAN here before kasan_mem_to_shadow(). */ 304 - kasan_init_generic(); 296 + kasan_populate_early_shadow(mem_to_shadow((void *)VMALLOC_START), 297 + mem_to_shadow((void *)KFENCE_AREA_END)); 305 298 306 299 /* Populate the linear mapping */ 307 300 for_each_mem_range(i, &pa_start, &pa_end) { ··· 308 307 if (start >= end) 309 308 break; 310 309 311 - kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), 312 - (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE); 310 + kasan_map_populate((unsigned long)mem_to_shadow(start), 311 + (unsigned long)mem_to_shadow(end), NUMA_NO_NODE); 313 312 } 314 313 315 314 /* Populate modules mapping */ 316 - kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR), 317 - (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE); 315 + kasan_map_populate((unsigned long)mem_to_shadow((void *)MODULES_VADDR), 316 + (unsigned long)mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE); 318 317 /* 319 318 * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we 320 319 * should make sure that it maps the zero page read-only. ··· 329 328 330 329 /* At this point kasan is fully initialized. Enable error messages */ 331 330 init_task.kasan_depth = 0; 331 + kasan_init_generic(); 332 332 }
+1 -2
arch/loongarch/mm/tlb.c
··· 202 202 local_irq_restore(flags); 203 203 } 204 204 205 - static void setup_ptwalker(void) 205 + static void __no_sanitize_address setup_ptwalker(void) 206 206 { 207 207 unsigned long pwctl0, pwctl1; 208 208 unsigned long pgd_i = 0, pgd_w = 0; ··· 262 262 #ifdef CONFIG_NUMA 263 263 unsigned long pcpu_handlers[NR_CPUS]; 264 264 #endif 265 - extern long exception_handlers[VECSIZE * 128 / sizeof(long)]; 266 265 267 266 static void setup_tlb_handler(int cpu) 268 267 {
+164 -42
arch/loongarch/net/bpf_jit.c
··· 17 17 #define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) 18 18 19 19 #define REG_TCC LOONGARCH_GPR_A6 20 + #define REG_ARENA LOONGARCH_GPR_S6 /* For storing arena_vm_start */ 20 21 #define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80) 21 22 22 23 static const int regmap[] = { ··· 137 136 /* To store tcc and tcc_ptr */ 138 137 stack_adjust += sizeof(long) * 2; 139 138 139 + if (ctx->arena_vm_start) 140 + stack_adjust += 8; 141 + 140 142 stack_adjust = round_up(stack_adjust, 16); 141 143 stack_adjust += bpf_stack_adjust; 142 144 ··· 182 178 store_offset -= sizeof(long); 183 179 emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset); 184 180 181 + if (ctx->arena_vm_start) { 182 + store_offset -= sizeof(long); 183 + emit_insn(ctx, std, REG_ARENA, LOONGARCH_GPR_SP, store_offset); 184 + } 185 + 185 186 prepare_bpf_tail_call_cnt(ctx, &store_offset); 186 187 187 188 emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust); ··· 195 186 emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust); 196 187 197 188 ctx->stack_size = stack_adjust; 189 + 190 + if (ctx->arena_vm_start) 191 + move_imm(ctx, REG_ARENA, ctx->arena_vm_start, false); 198 192 } 199 193 200 194 static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) ··· 228 216 229 217 load_offset -= sizeof(long); 230 218 emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset); 219 + 220 + if (ctx->arena_vm_start) { 221 + load_offset -= sizeof(long); 222 + emit_insn(ctx, ldd, REG_ARENA, LOONGARCH_GPR_SP, load_offset); 223 + } 231 224 232 225 /* 233 226 * When push into the stack, follow the order of tcc then tcc_ptr. ··· 459 442 460 443 #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 461 444 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 445 + #define REG_DONT_CLEAR_MARKER 0 462 446 463 447 bool ex_handler_bpf(const struct exception_table_entry *ex, 464 448 struct pt_regs *regs) ··· 467 449 int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); 468 450 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); 469 451 470 - regs->regs[dst_reg] = 0; 452 + if (dst_reg != REG_DONT_CLEAR_MARKER) 453 + regs->regs[dst_reg] = 0; 471 454 regs->csr_era = (unsigned long)&ex->fixup - offset; 472 455 473 456 return true; ··· 480 461 int dst_reg) 481 462 { 482 463 unsigned long pc; 483 - off_t offset; 464 + off_t ins_offset, fixup_offset; 484 465 struct exception_table_entry *ex; 485 466 486 - if (!ctx->image || !ctx->prog->aux->extable) 467 + if (!ctx->image || !ctx->ro_image || !ctx->prog->aux->extable) 487 468 return 0; 488 469 489 470 if (BPF_MODE(insn->code) != BPF_PROBE_MEM && 490 - BPF_MODE(insn->code) != BPF_PROBE_MEMSX) 471 + BPF_MODE(insn->code) != BPF_PROBE_MEMSX && 472 + BPF_MODE(insn->code) != BPF_PROBE_MEM32) 491 473 return 0; 492 474 493 475 if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries)) 494 476 return -EINVAL; 495 477 496 478 ex = &ctx->prog->aux->extable[ctx->num_exentries]; 497 - pc = (unsigned long)&ctx->image[ctx->idx - 1]; 479 + pc = (unsigned long)&ctx->ro_image[ctx->idx - 1]; 498 480 499 - offset = pc - (long)&ex->insn; 500 - if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 481 + /* 482 + * This is the relative offset of the instruction that may fault from 483 + * the exception table itself. This will be written to the exception 484 + * table and if this instruction faults, the destination register will 485 + * be set to '0' and the execution will jump to the next instruction. 486 + */ 487 + ins_offset = pc - (long)&ex->insn; 488 + if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) 501 489 return -ERANGE; 502 - 503 - ex->insn = offset; 504 490 505 491 /* 506 492 * Since the extable follows the program, the fixup offset is always ··· 514 490 * bits. We don't need to worry about buildtime or runtime sort 515 491 * modifying the upper bits because the table is already sorted, and 516 492 * isn't part of the main exception table. 493 + * 494 + * The fixup_offset is set to the next instruction from the instruction 495 + * that may fault. The execution will jump to this after handling the fault. 517 496 */ 518 - offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE); 519 - if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) 497 + fixup_offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE); 498 + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) 520 499 return -ERANGE; 521 500 501 + /* 502 + * The offsets above have been calculated using the RO buffer but we 503 + * need to use the R/W buffer for writes. Switch ex to rw buffer for writing. 504 + */ 505 + ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image); 506 + ex->insn = ins_offset; 507 + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | 508 + FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 522 509 ex->type = EX_TYPE_BPF; 523 - ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 524 510 525 511 ctx->num_exentries++; 526 512 ··· 548 514 const u8 cond = BPF_OP(code); 549 515 const u8 t1 = LOONGARCH_GPR_T1; 550 516 const u8 t2 = LOONGARCH_GPR_T2; 551 - const u8 src = regmap[insn->src_reg]; 552 - const u8 dst = regmap[insn->dst_reg]; 517 + const u8 t3 = LOONGARCH_GPR_T3; 518 + u8 src = regmap[insn->src_reg]; 519 + u8 dst = regmap[insn->dst_reg]; 553 520 const s16 off = insn->off; 554 521 const s32 imm = insn->imm; 555 522 const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32; ··· 559 524 /* dst = src */ 560 525 case BPF_ALU | BPF_MOV | BPF_X: 561 526 case BPF_ALU64 | BPF_MOV | BPF_X: 527 + if (insn_is_cast_user(insn)) { 528 + move_reg(ctx, t1, src); 529 + emit_zext_32(ctx, t1, true); 530 + move_imm(ctx, dst, (ctx->user_vm_start >> 32) << 32, false); 531 + emit_insn(ctx, beq, t1, LOONGARCH_GPR_ZERO, 1); 532 + emit_insn(ctx, or, t1, dst, t1); 533 + move_reg(ctx, dst, t1); 534 + break; 535 + } 562 536 switch (off) { 563 537 case 0: 564 538 move_reg(ctx, dst, src); ··· 1065 1021 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 1066 1022 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 1067 1023 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 1068 - sign_extend = BPF_MODE(insn->code) == BPF_MEMSX || 1069 - BPF_MODE(insn->code) == BPF_PROBE_MEMSX; 1024 + /* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + REG_ARENA + off) */ 1025 + case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: 1026 + case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: 1027 + case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: 1028 + case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: 1029 + sign_extend = BPF_MODE(code) == BPF_MEMSX || 1030 + BPF_MODE(code) == BPF_PROBE_MEMSX; 1031 + 1032 + if (BPF_MODE(code) == BPF_PROBE_MEM32) { 1033 + emit_insn(ctx, addd, t2, src, REG_ARENA); 1034 + src = t2; 1035 + } 1036 + 1070 1037 switch (BPF_SIZE(code)) { 1071 1038 case BPF_B: 1072 1039 if (is_signed_imm12(off)) { ··· 1137 1082 case BPF_ST | BPF_MEM | BPF_H: 1138 1083 case BPF_ST | BPF_MEM | BPF_W: 1139 1084 case BPF_ST | BPF_MEM | BPF_DW: 1085 + /* ST | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = imm */ 1086 + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: 1087 + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: 1088 + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: 1089 + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: 1090 + if (BPF_MODE(code) == BPF_PROBE_MEM32) { 1091 + emit_insn(ctx, addd, t3, dst, REG_ARENA); 1092 + dst = t3; 1093 + } 1094 + 1140 1095 switch (BPF_SIZE(code)) { 1141 1096 case BPF_B: 1142 1097 move_imm(ctx, t1, imm, is32); ··· 1189 1124 } 1190 1125 break; 1191 1126 } 1127 + 1128 + ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER); 1129 + if (ret) 1130 + return ret; 1192 1131 break; 1193 1132 1194 1133 /* *(size *)(dst + off) = src */ ··· 1200 1131 case BPF_STX | BPF_MEM | BPF_H: 1201 1132 case BPF_STX | BPF_MEM | BPF_W: 1202 1133 case BPF_STX | BPF_MEM | BPF_DW: 1134 + /* STX | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = src */ 1135 + case BPF_STX | BPF_PROBE_MEM32 | BPF_B: 1136 + case BPF_STX | BPF_PROBE_MEM32 | BPF_H: 1137 + case BPF_STX | BPF_PROBE_MEM32 | BPF_W: 1138 + case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: 1139 + if (BPF_MODE(code) == BPF_PROBE_MEM32) { 1140 + emit_insn(ctx, addd, t2, dst, REG_ARENA); 1141 + dst = t2; 1142 + } 1143 + 1203 1144 switch (BPF_SIZE(code)) { 1204 1145 case BPF_B: 1205 1146 if (is_signed_imm12(off)) { ··· 1248 1169 } 1249 1170 break; 1250 1171 } 1172 + 1173 + ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER); 1174 + if (ret) 1175 + return ret; 1251 1176 break; 1252 1177 1253 1178 case BPF_STX | BPF_ATOMIC | BPF_W: ··· 1912 1829 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1913 1830 { 1914 1831 bool tmp_blinded = false, extra_pass = false; 1915 - u8 *image_ptr; 1832 + u8 *image_ptr, *ro_image_ptr; 1916 1833 int image_size, prog_size, extable_size; 1917 1834 struct jit_ctx ctx; 1918 1835 struct jit_data *jit_data; 1919 1836 struct bpf_binary_header *header; 1837 + struct bpf_binary_header *ro_header; 1920 1838 struct bpf_prog *tmp, *orig_prog = prog; 1921 1839 1922 1840 /* ··· 1952 1868 } 1953 1869 if (jit_data->ctx.offset) { 1954 1870 ctx = jit_data->ctx; 1955 - image_ptr = jit_data->image; 1871 + ro_header = jit_data->ro_header; 1872 + ro_image_ptr = (void *)ctx.ro_image; 1956 1873 header = jit_data->header; 1874 + image_ptr = (void *)header + ((void *)ro_image_ptr - (void *)ro_header); 1957 1875 extra_pass = true; 1958 1876 prog_size = sizeof(u32) * ctx.idx; 1959 1877 goto skip_init_ctx; ··· 1963 1877 1964 1878 memset(&ctx, 0, sizeof(ctx)); 1965 1879 ctx.prog = prog; 1880 + ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); 1881 + ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); 1966 1882 1967 1883 ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL); 1968 1884 if (ctx.offset == NULL) { ··· 1991 1903 prog_size = sizeof(u32) * ctx.idx; 1992 1904 image_size = prog_size + extable_size; 1993 1905 /* Now we know the size of the structure to make */ 1994 - header = bpf_jit_binary_alloc(image_size, &image_ptr, 1995 - sizeof(u32), jit_fill_hole); 1996 - if (header == NULL) { 1906 + ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u32), 1907 + &header, &image_ptr, jit_fill_hole); 1908 + if (!ro_header) { 1997 1909 prog = orig_prog; 1998 1910 goto out_offset; 1999 1911 } 2000 1912 2001 1913 /* 2. Now, the actual pass to generate final JIT code */ 1914 + /* 1915 + * Use the image (RW) for writing the JITed instructions. But also save 1916 + * the ro_image (RX) for calculating the offsets in the image. The RW 1917 + * image will be later copied to the RX image from where the program will 1918 + * run. The bpf_jit_binary_pack_finalize() will do this copy in the final 1919 + * step. 1920 + */ 2002 1921 ctx.image = (union loongarch_instruction *)image_ptr; 1922 + ctx.ro_image = (union loongarch_instruction *)ro_image_ptr; 2003 1923 if (extable_size) 2004 - prog->aux->extable = (void *)image_ptr + prog_size; 1924 + prog->aux->extable = (void *)ro_image_ptr + prog_size; 2005 1925 2006 1926 skip_init_ctx: 2007 1927 ctx.idx = 0; ··· 2017 1921 2018 1922 build_prologue(&ctx); 2019 1923 if (build_body(&ctx, extra_pass)) { 2020 - bpf_jit_binary_free(header); 2021 1924 prog = orig_prog; 2022 - goto out_offset; 1925 + goto out_free; 2023 1926 } 2024 1927 build_epilogue(&ctx); 2025 1928 2026 1929 /* 3. Extra pass to validate JITed code */ 2027 1930 if (validate_ctx(&ctx)) { 2028 - bpf_jit_binary_free(header); 2029 1931 prog = orig_prog; 2030 - goto out_offset; 1932 + goto out_free; 2031 1933 } 2032 1934 2033 1935 /* And we're done */ 2034 1936 if (bpf_jit_enable > 1) 2035 1937 bpf_jit_dump(prog->len, prog_size, 2, ctx.image); 2036 1938 2037 - /* Update the icache */ 2038 - flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx)); 2039 - 2040 1939 if (!prog->is_func || extra_pass) { 2041 - int err; 2042 - 2043 1940 if (extra_pass && ctx.idx != jit_data->ctx.idx) { 2044 1941 pr_err_once("multi-func JIT bug %d != %d\n", 2045 1942 ctx.idx, jit_data->ctx.idx); 2046 1943 goto out_free; 2047 1944 } 2048 - err = bpf_jit_binary_lock_ro(header); 2049 - if (err) { 2050 - pr_err_once("bpf_jit_binary_lock_ro() returned %d\n", 2051 - err); 1945 + if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) { 1946 + /* ro_header has been freed */ 1947 + ro_header = NULL; 1948 + prog = orig_prog; 2052 1949 goto out_free; 2053 1950 } 1951 + /* 1952 + * The instructions have now been copied to the ROX region from 1953 + * where they will execute. Now the data cache has to be cleaned 1954 + * to the PoU and the I-cache has to be invalidated for the VAs. 1955 + */ 1956 + bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx); 2054 1957 } else { 2055 1958 jit_data->ctx = ctx; 2056 - jit_data->image = image_ptr; 2057 1959 jit_data->header = header; 1960 + jit_data->ro_header = ro_header; 2058 1961 } 2059 1962 prog->jited = 1; 2060 1963 prog->jited_len = prog_size; 2061 - prog->bpf_func = (void *)ctx.image; 1964 + prog->bpf_func = (void *)ctx.ro_image; 2062 1965 2063 1966 if (!prog->is_func || extra_pass) { 2064 1967 int i; ··· 2077 1982 if (tmp_blinded) 2078 1983 bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog); 2079 1984 2080 - 2081 1985 return prog; 2082 1986 2083 1987 out_free: 2084 - bpf_jit_binary_free(header); 2085 - prog->bpf_func = NULL; 2086 - prog->jited = 0; 2087 - prog->jited_len = 0; 1988 + if (header) { 1989 + bpf_arch_text_copy(&ro_header->size, &header->size, sizeof(header->size)); 1990 + bpf_jit_binary_pack_free(ro_header, header); 1991 + } 2088 1992 goto out_offset; 1993 + } 1994 + 1995 + void bpf_jit_free(struct bpf_prog *prog) 1996 + { 1997 + if (prog->jited) { 1998 + struct jit_data *jit_data = prog->aux->jit_data; 1999 + struct bpf_binary_header *hdr; 2000 + 2001 + /* 2002 + * If we fail the final pass of JIT (from jit_subprogs), the 2003 + * program may not be finalized yet. Call finalize here before 2004 + * freeing it. 2005 + */ 2006 + if (jit_data) { 2007 + bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header); 2008 + kfree(jit_data); 2009 + } 2010 + hdr = bpf_jit_binary_pack_hdr(prog); 2011 + bpf_jit_binary_pack_free(hdr, NULL); 2012 + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); 2013 + } 2014 + 2015 + bpf_prog_unlock_free(prog); 2089 2016 } 2090 2017 2091 2018 bool bpf_jit_bypass_spec_v1(void) ··· 2116 1999 } 2117 2000 2118 2001 bool bpf_jit_bypass_spec_v4(void) 2002 + { 2003 + return true; 2004 + } 2005 + 2006 + bool bpf_jit_supports_arena(void) 2119 2007 { 2120 2008 return true; 2121 2009 }
+3 -1
arch/loongarch/net/bpf_jit.h
··· 20 20 union loongarch_instruction *image; 21 21 union loongarch_instruction *ro_image; 22 22 u32 stack_size; 23 + u64 arena_vm_start; 24 + u64 user_vm_start; 23 25 }; 24 26 25 27 struct jit_data { 26 28 struct bpf_binary_header *header; 27 - u8 *image; 29 + struct bpf_binary_header *ro_header; 28 30 struct jit_ctx ctx; 29 31 }; 30 32
+1 -1
tools/testing/selftests/mm/Makefile
··· 72 72 TEST_GEN_FILES += map_fixed_noreplace 73 73 TEST_GEN_FILES += map_hugetlb 74 74 TEST_GEN_FILES += map_populate 75 - ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) 75 + ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64 loongarch32 loongarch64)) 76 76 TEST_GEN_FILES += memfd_secret 77 77 endif 78 78 TEST_GEN_FILES += migration