Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'loongarch-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson

Pull LoongArch updates from Huacai Chen:

- Allow usage of LSX/LASX in the kernel, and use them for
SIMD-optimized RAID5/RAID6 routines

- Add Loongson Binary Translation (LBT) extension support

- Add basic KGDB & KDB support

- Add building with kcov coverage

- Add KFENCE (Kernel Electric-Fence) support

- Add KASAN (Kernel Address Sanitizer) support

- Some bug fixes and other small changes

- Update the default config file

* tag 'loongarch-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson: (25 commits)
LoongArch: Update Loongson-3 default config file
LoongArch: Add KASAN (Kernel Address Sanitizer) support
LoongArch: Simplify the processing of jumping new kernel for KASLR
kasan: Add (pmd|pud)_init for LoongArch zero_(pud|p4d)_populate process
kasan: Add __HAVE_ARCH_SHADOW_MAP to support arch specific mapping
LoongArch: Add KFENCE (Kernel Electric-Fence) support
LoongArch: Get partial stack information when providing regs parameter
LoongArch: mm: Add page table mapped mode support for virt_to_page()
kfence: Defer the assignment of the local variable addr
LoongArch: Allow building with kcov coverage
LoongArch: Provide kaslr_offset() to get kernel offset
LoongArch: Add basic KGDB & KDB support
LoongArch: Add Loongson Binary Translation (LBT) extension support
raid6: Add LoongArch SIMD recovery implementation
raid6: Add LoongArch SIMD syndrome calculation
LoongArch: Add SIMD-optimized XOR routines
LoongArch: Allow usage of LSX/LASX in the kernel
LoongArch: Define symbol 'fault' as a local label in fpu.S
LoongArch: Adjust {copy, clear}_user exception handler behavior
LoongArch: Use static defined zero page rather than allocated
...

+3862 -461
+2 -2
Documentation/dev-tools/kasan.rst
··· 41 41 Architectures 42 42 ~~~~~~~~~~~~~ 43 43 44 - Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and 45 - xtensa, and the tag-based KASAN modes are supported only on arm64. 44 + Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, xtensa, 45 + and loongarch, and the tag-based KASAN modes are supported only on arm64. 46 46 47 47 Compilers 48 48 ~~~~~~~~~
+1 -1
Documentation/features/debug/KASAN/arch-support.txt
··· 13 13 | csky: | TODO | 14 14 | hexagon: | TODO | 15 15 | ia64: | TODO | 16 - | loongarch: | TODO | 16 + | loongarch: | ok | 17 17 | m68k: | TODO | 18 18 | microblaze: | TODO | 19 19 | mips: | TODO |
+1 -1
Documentation/features/debug/kcov/arch-support.txt
··· 13 13 | csky: | TODO | 14 14 | hexagon: | TODO | 15 15 | ia64: | TODO | 16 - | loongarch: | TODO | 16 + | loongarch: | ok | 17 17 | m68k: | TODO | 18 18 | microblaze: | TODO | 19 19 | mips: | ok |
+1 -1
Documentation/features/debug/kgdb/arch-support.txt
··· 13 13 | csky: | TODO | 14 14 | hexagon: | ok | 15 15 | ia64: | TODO | 16 - | loongarch: | TODO | 16 + | loongarch: | ok | 17 17 | m68k: | TODO | 18 18 | microblaze: | ok | 19 19 | mips: | ok |
+1 -1
Documentation/translations/zh_CN/dev-tools/kasan.rst
··· 42 42 体系架构 43 43 ~~~~~~~~ 44 44 45 - 在x86_64、arm、arm64、powerpc、riscv、s390和xtensa上支持通用KASAN, 45 + 在x86_64、arm、arm64、powerpc、riscv、s390、xtensa和loongarch上支持通用KASAN, 46 46 而基于标签的KASAN模式只在arm64上支持。 47 47 48 48 编译器
+26
arch/loongarch/Kconfig
··· 8 8 select ACPI_PPTT if ACPI 9 9 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI 10 10 select ARCH_BINFMT_ELF_STATE 11 + select ARCH_DISABLE_KASAN_INLINE 11 12 select ARCH_ENABLE_MEMORY_HOTPLUG 12 13 select ARCH_ENABLE_MEMORY_HOTREMOVE 13 14 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI 14 15 select ARCH_HAS_CPU_FINALIZE_INIT 15 16 select ARCH_HAS_FORTIFY_SOURCE 17 + select ARCH_HAS_KCOV 16 18 select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS 17 19 select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 18 20 select ARCH_HAS_PTE_SPECIAL ··· 93 91 select HAVE_ARCH_AUDITSYSCALL 94 92 select HAVE_ARCH_JUMP_LABEL 95 93 select HAVE_ARCH_JUMP_LABEL_RELATIVE 94 + select HAVE_ARCH_KASAN 95 + select HAVE_ARCH_KFENCE 96 + select HAVE_ARCH_KGDB if PERF_EVENTS 96 97 select HAVE_ARCH_MMAP_RND_BITS if MMU 97 98 select HAVE_ARCH_SECCOMP_FILTER 98 99 select HAVE_ARCH_TRACEHOOK ··· 120 115 select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER 121 116 select HAVE_FUNCTION_GRAPH_TRACER 122 117 select HAVE_FUNCTION_TRACER 118 + select HAVE_GCC_PLUGINS 123 119 select HAVE_GENERIC_VDSO 124 120 select HAVE_HW_BREAKPOINT if PERF_EVENTS 125 121 select HAVE_IOREMAP_PROT ··· 259 253 260 254 config AS_HAS_LASX_EXTENSION 261 255 def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0) 256 + 257 + config AS_HAS_LBT_EXTENSION 258 + def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0) 262 259 263 260 menu "Kernel type and options" 264 261 ··· 543 534 544 535 If unsure, say Y. 545 536 537 + config CPU_HAS_LBT 538 + bool "Support for the Loongson Binary Translation Extension" 539 + depends on AS_HAS_LBT_EXTENSION 540 + help 541 + Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0 542 + to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop). 543 + Enabling this option allows the kernel to allocate and switch registers 544 + specific to LBT. 545 + 546 + If you want to use this feature, such as the Loongson Architecture 547 + Translator (LAT), say Y. 548 + 546 549 config CPU_HAS_PREFETCH 547 550 bool 548 551 default y ··· 658 637 659 638 config ARCH_SUPPORTS_UPROBES 660 639 def_bool y 640 + 641 + config KASAN_SHADOW_OFFSET 642 + hex 643 + default 0x0 644 + depends on KASAN 661 645 662 646 menu "Power management options" 663 647
+3
arch/loongarch/Makefile
··· 84 84 endif 85 85 86 86 cflags-y += $(call cc-option, -mno-check-zero-division) 87 + 88 + ifndef CONFIG_KASAN 87 89 cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset 90 + endif 88 91 89 92 load-y = 0x9000000000200000 90 93 bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
+70 -4
arch/loongarch/configs/loongson3_defconfig
··· 30 30 CONFIG_USER_NS=y 31 31 CONFIG_CHECKPOINT_RESTORE=y 32 32 CONFIG_SCHED_AUTOGROUP=y 33 - CONFIG_SYSFS_DEPRECATED=y 34 33 CONFIG_RELAY=y 35 34 CONFIG_BLK_DEV_INITRD=y 36 35 CONFIG_EXPERT=y ··· 46 47 CONFIG_HOTPLUG_CPU=y 47 48 CONFIG_NR_CPUS=64 48 49 CONFIG_NUMA=y 50 + CONFIG_CPU_HAS_FPU=y 51 + CONFIG_CPU_HAS_LSX=y 52 + CONFIG_CPU_HAS_LASX=y 49 53 CONFIG_KEXEC=y 50 54 CONFIG_CRASH_DUMP=y 55 + CONFIG_RANDOMIZE_BASE=y 51 56 CONFIG_SUSPEND=y 52 57 CONFIG_HIBERNATION=y 53 58 CONFIG_ACPI=y ··· 66 63 CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y 67 64 CONFIG_EFI_CAPSULE_LOADER=m 68 65 CONFIG_EFI_TEST=m 66 + CONFIG_JUMP_LABEL=y 69 67 CONFIG_MODULES=y 70 68 CONFIG_MODULE_FORCE_LOAD=y 71 69 CONFIG_MODULE_UNLOAD=y ··· 112 108 CONFIG_IP_PNP_RARP=y 113 109 CONFIG_NET_IPIP=m 114 110 CONFIG_NET_IPGRE_DEMUX=m 111 + CONFIG_NET_IPGRE=m 112 + CONFIG_NET_IPGRE_BROADCAST=y 115 113 CONFIG_IP_MROUTE=y 114 + CONFIG_IP_MROUTE_MULTIPLE_TABLES=y 115 + CONFIG_IP_PIMSM_V1=y 116 + CONFIG_IP_PIMSM_V2=y 116 117 CONFIG_INET_ESP=m 117 118 CONFIG_INET_UDP_DIAG=y 118 119 CONFIG_TCP_CONG_ADVANCED=y ··· 146 137 CONFIG_NFT_REDIR=m 147 138 CONFIG_NFT_NAT=m 148 139 CONFIG_NFT_TUNNEL=m 149 - CONFIG_NFT_OBJREF=m 150 140 CONFIG_NFT_QUEUE=m 151 141 CONFIG_NFT_QUOTA=m 152 142 CONFIG_NFT_REJECT=m ··· 216 208 CONFIG_IP_VS_IPV6=y 217 209 CONFIG_IP_VS_PROTO_TCP=y 218 210 CONFIG_IP_VS_PROTO_UDP=y 211 + CONFIG_IP_VS_PROTO_ESP=y 212 + CONFIG_IP_VS_PROTO_AH=y 213 + CONFIG_IP_VS_PROTO_SCTP=y 219 214 CONFIG_IP_VS_RR=m 215 + CONFIG_IP_VS_WRR=m 220 216 CONFIG_IP_VS_NFCT=y 221 217 CONFIG_NF_TABLES_IPV4=y 222 218 CONFIG_NFT_DUP_IPV4=m ··· 239 227 CONFIG_IP_NF_TARGET_NETMAP=m 240 228 CONFIG_IP_NF_TARGET_REDIRECT=m 241 229 CONFIG_IP_NF_MANGLE=m 242 - CONFIG_IP_NF_TARGET_CLUSTERIP=m 243 230 CONFIG_IP_NF_TARGET_ECN=m 244 231 CONFIG_IP_NF_TARGET_TTL=m 245 232 CONFIG_IP_NF_RAW=m ··· 374 363 CONFIG_MTD_CFI_STAA=m 375 364 CONFIG_MTD_RAM=m 376 365 CONFIG_MTD_ROM=m 366 + CONFIG_MTD_UBI=m 367 + CONFIG_MTD_UBI_BLOCK=y 377 368 CONFIG_PARPORT=y 378 369 CONFIG_PARPORT_PC=y 379 370 CONFIG_PARPORT_SERIAL=y ··· 383 370 CONFIG_ZRAM=m 384 371 CONFIG_ZRAM_DEF_COMP_ZSTD=y 385 372 CONFIG_BLK_DEV_LOOP=y 373 + CONFIG_BLK_DEV_DRBD=m 386 374 CONFIG_BLK_DEV_NBD=m 387 375 CONFIG_BLK_DEV_RAM=y 388 376 CONFIG_BLK_DEV_RAM_SIZE=8192 ··· 530 516 # CONFIG_NET_VENDOR_TEHUTI is not set 531 517 # CONFIG_NET_VENDOR_TI is not set 532 518 # CONFIG_NET_VENDOR_VIA is not set 519 + CONFIG_NGBE=y 520 + CONFIG_TXGBE=y 533 521 # CONFIG_NET_VENDOR_WIZNET is not set 534 522 # CONFIG_NET_VENDOR_XILINX is not set 535 523 CONFIG_PPP=m ··· 618 602 CONFIG_I2C_CHARDEV=y 619 603 CONFIG_I2C_PIIX4=y 620 604 CONFIG_I2C_GPIO=y 605 + CONFIG_I2C_LS2X=y 621 606 CONFIG_SPI=y 607 + CONFIG_SPI_LOONGSON_PCI=m 608 + CONFIG_SPI_LOONGSON_PLATFORM=m 609 + CONFIG_PINCTRL=y 610 + CONFIG_PINCTRL_LOONGSON2=y 622 611 CONFIG_GPIO_SYSFS=y 623 612 CONFIG_GPIO_LOONGSON=y 613 + CONFIG_GPIO_LOONGSON_64BIT=y 624 614 CONFIG_POWER_RESET=y 625 615 CONFIG_POWER_RESET_RESTART=y 626 616 CONFIG_POWER_RESET_SYSCON=y ··· 636 614 CONFIG_SENSORS_LM93=m 637 615 CONFIG_SENSORS_W83795=m 638 616 CONFIG_SENSORS_W83627HF=m 617 + CONFIG_LOONGSON2_THERMAL=m 639 618 CONFIG_RC_CORE=m 640 619 CONFIG_LIRC=y 641 620 CONFIG_RC_DECODERS=y ··· 666 643 CONFIG_DRM_AST=y 667 644 CONFIG_DRM_QXL=m 668 645 CONFIG_DRM_VIRTIO_GPU=m 646 + CONFIG_DRM_LOONGSON=y 669 647 CONFIG_FB=y 670 648 CONFIG_FB_EFI=y 671 649 CONFIG_FB_RADEON=y ··· 736 712 CONFIG_INFINIBAND=m 737 713 CONFIG_RTC_CLASS=y 738 714 CONFIG_RTC_DRV_EFI=y 715 + CONFIG_RTC_DRV_LOONGSON=y 739 716 CONFIG_DMADEVICES=y 740 717 CONFIG_UIO=m 741 718 CONFIG_UIO_PDRV_GENIRQ=m ··· 770 745 CONFIG_COMEDI_NI_PCIDIO=m 771 746 CONFIG_COMEDI_NI_PCIMIO=m 772 747 CONFIG_STAGING=y 773 - CONFIG_R8188EU=m 748 + CONFIG_COMMON_CLK_LOONGSON2=y 749 + CONFIG_LOONGSON2_GUTS=y 750 + CONFIG_LOONGSON2_PM=y 774 751 CONFIG_PM_DEVFREQ=y 775 752 CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y 776 753 CONFIG_DEVFREQ_GOV_PERFORMANCE=y ··· 786 759 CONFIG_EXT3_FS=y 787 760 CONFIG_EXT3_FS_POSIX_ACL=y 788 761 CONFIG_EXT3_FS_SECURITY=y 762 + CONFIG_JFS_FS=m 763 + CONFIG_JFS_POSIX_ACL=y 764 + CONFIG_JFS_SECURITY=y 789 765 CONFIG_XFS_FS=y 790 766 CONFIG_XFS_QUOTA=y 791 767 CONFIG_XFS_POSIX_ACL=y 768 + CONFIG_GFS2_FS=m 769 + CONFIG_GFS2_FS_LOCKING_DLM=y 770 + CONFIG_OCFS2_FS=m 792 771 CONFIG_BTRFS_FS=y 772 + CONFIG_BTRFS_FS_POSIX_ACL=y 793 773 CONFIG_FANOTIFY=y 794 774 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y 795 775 CONFIG_QUOTA=y ··· 805 771 CONFIG_QFMT_V2=m 806 772 CONFIG_AUTOFS_FS=y 807 773 CONFIG_FUSE_FS=m 774 + CONFIG_CUSE=m 775 + CONFIG_VIRTIO_FS=m 808 776 CONFIG_OVERLAY_FS=y 809 777 CONFIG_OVERLAY_FS_INDEX=y 810 778 CONFIG_OVERLAY_FS_XINO_AUTO=y 811 779 CONFIG_OVERLAY_FS_METACOPY=y 812 780 CONFIG_FSCACHE=y 781 + CONFIG_CACHEFILES=m 813 782 CONFIG_ISO9660_FS=y 814 783 CONFIG_JOLIET=y 815 784 CONFIG_ZISOFS=y ··· 821 784 CONFIG_VFAT_FS=m 822 785 CONFIG_FAT_DEFAULT_CODEPAGE=936 823 786 CONFIG_FAT_DEFAULT_IOCHARSET="gb2312" 787 + CONFIG_EXFAT_FS=m 788 + CONFIG_NTFS3_FS=m 789 + CONFIG_NTFS3_64BIT_CLUSTER=y 790 + CONFIG_NTFS3_LZX_XPRESS=y 824 791 CONFIG_PROC_KCORE=y 825 792 CONFIG_TMPFS=y 826 793 CONFIG_TMPFS_POSIX_ACL=y 827 794 CONFIG_HUGETLBFS=y 828 795 CONFIG_CONFIGFS_FS=y 796 + CONFIG_ORANGEFS_FS=m 797 + CONFIG_ECRYPT_FS=m 798 + CONFIG_ECRYPT_FS_MESSAGING=y 829 799 CONFIG_HFS_FS=m 830 800 CONFIG_HFSPLUS_FS=m 801 + CONFIG_UBIFS_FS=m 802 + CONFIG_UBIFS_FS_ADVANCED_COMPR=y 831 803 CONFIG_CRAMFS=m 832 804 CONFIG_SQUASHFS=y 833 805 CONFIG_SQUASHFS_XATTR=y 834 806 CONFIG_SQUASHFS_LZ4=y 835 807 CONFIG_SQUASHFS_LZO=y 836 808 CONFIG_SQUASHFS_XZ=y 809 + CONFIG_MINIX_FS=m 810 + CONFIG_ROMFS_FS=m 811 + CONFIG_PSTORE=m 812 + CONFIG_PSTORE_LZO_COMPRESS=m 813 + CONFIG_PSTORE_LZ4_COMPRESS=m 814 + CONFIG_PSTORE_LZ4HC_COMPRESS=m 815 + CONFIG_PSTORE_842_COMPRESS=y 816 + CONFIG_PSTORE_ZSTD_COMPRESS=y 817 + CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y 818 + CONFIG_SYSV_FS=m 819 + CONFIG_UFS_FS=m 820 + CONFIG_EROFS_FS=m 821 + CONFIG_EROFS_FS_ZIP_LZMA=y 822 + CONFIG_EROFS_FS_PCPU_KTHREAD=y 837 823 CONFIG_NFS_FS=y 838 824 CONFIG_NFS_V3_ACL=y 839 825 CONFIG_NFS_V4=y ··· 867 807 CONFIG_NFSD_V3_ACL=y 868 808 CONFIG_NFSD_V4=y 869 809 CONFIG_NFSD_BLOCKLAYOUT=y 810 + CONFIG_CEPH_FS=m 811 + CONFIG_CEPH_FSCACHE=y 812 + CONFIG_CEPH_FS_POSIX_ACL=y 813 + CONFIG_CEPH_FS_SECURITY_LABEL=y 870 814 CONFIG_CIFS=m 871 815 # CONFIG_CIFS_DEBUG is not set 872 816 CONFIG_9P_FS=y ··· 878 814 CONFIG_NLS_CODEPAGE_936=y 879 815 CONFIG_NLS_ASCII=y 880 816 CONFIG_NLS_UTF8=y 817 + CONFIG_DLM=m 881 818 CONFIG_KEY_DH_OPERATIONS=y 882 819 CONFIG_SECURITY=y 883 820 CONFIG_SECURITY_SELINUX=y ··· 912 847 CONFIG_CRYPTO_USER_API_SKCIPHER=m 913 848 CONFIG_CRYPTO_USER_API_RNG=m 914 849 CONFIG_CRYPTO_USER_API_AEAD=m 850 + CONFIG_CRYPTO_CRC32_LOONGARCH=m 915 851 CONFIG_CRYPTO_DEV_VIRTIO=m 916 852 CONFIG_PRINTK_TIME=y 917 853 CONFIG_STRIP_ASM_SYMS=y
+1
arch/loongarch/include/asm/asm-prototypes.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 #include <linux/uaccess.h> 3 3 #include <asm/fpu.h> 4 + #include <asm/lbt.h> 4 5 #include <asm/mmu_context.h> 5 6 #include <asm/page.h> 6 7 #include <asm/ftrace.h>
+45 -113
arch/loongarch/include/asm/asmmacro.h
··· 10 10 #include <asm/fpregdef.h> 11 11 #include <asm/loongarch.h> 12 12 13 - .macro parse_v var val 14 - \var = \val 15 - .endm 16 - 17 - .macro parse_r var r 18 - \var = -1 19 - .ifc \r, $r0 20 - \var = 0 21 - .endif 22 - .ifc \r, $r1 23 - \var = 1 24 - .endif 25 - .ifc \r, $r2 26 - \var = 2 27 - .endif 28 - .ifc \r, $r3 29 - \var = 3 30 - .endif 31 - .ifc \r, $r4 32 - \var = 4 33 - .endif 34 - .ifc \r, $r5 35 - \var = 5 36 - .endif 37 - .ifc \r, $r6 38 - \var = 6 39 - .endif 40 - .ifc \r, $r7 41 - \var = 7 42 - .endif 43 - .ifc \r, $r8 44 - \var = 8 45 - .endif 46 - .ifc \r, $r9 47 - \var = 9 48 - .endif 49 - .ifc \r, $r10 50 - \var = 10 51 - .endif 52 - .ifc \r, $r11 53 - \var = 11 54 - .endif 55 - .ifc \r, $r12 56 - \var = 12 57 - .endif 58 - .ifc \r, $r13 59 - \var = 13 60 - .endif 61 - .ifc \r, $r14 62 - \var = 14 63 - .endif 64 - .ifc \r, $r15 65 - \var = 15 66 - .endif 67 - .ifc \r, $r16 68 - \var = 16 69 - .endif 70 - .ifc \r, $r17 71 - \var = 17 72 - .endif 73 - .ifc \r, $r18 74 - \var = 18 75 - .endif 76 - .ifc \r, $r19 77 - \var = 19 78 - .endif 79 - .ifc \r, $r20 80 - \var = 20 81 - .endif 82 - .ifc \r, $r21 83 - \var = 21 84 - .endif 85 - .ifc \r, $r22 86 - \var = 22 87 - .endif 88 - .ifc \r, $r23 89 - \var = 23 90 - .endif 91 - .ifc \r, $r24 92 - \var = 24 93 - .endif 94 - .ifc \r, $r25 95 - \var = 25 96 - .endif 97 - .ifc \r, $r26 98 - \var = 26 99 - .endif 100 - .ifc \r, $r27 101 - \var = 27 102 - .endif 103 - .ifc \r, $r28 104 - \var = 28 105 - .endif 106 - .ifc \r, $r29 107 - \var = 29 108 - .endif 109 - .ifc \r, $r30 110 - \var = 30 111 - .endif 112 - .ifc \r, $r31 113 - \var = 31 114 - .endif 115 - .iflt \var 116 - .error "Unable to parse register name \r" 117 - .endif 118 - .endm 119 - 120 13 .macro cpu_save_nonscratch thread 121 14 stptr.d s0, \thread, THREAD_REG23 122 15 stptr.d s1, \thread, THREAD_REG24 ··· 41 148 42 149 .macro fpu_save_csr thread tmp 43 150 movfcsr2gr \tmp, fcsr0 44 - stptr.w \tmp, \thread, THREAD_FCSR 151 + stptr.w \tmp, \thread, THREAD_FCSR 152 + #ifdef CONFIG_CPU_HAS_LBT 153 + /* TM bit is always 0 if LBT not supported */ 154 + andi \tmp, \tmp, FPU_CSR_TM 155 + beqz \tmp, 1f 156 + /* Save FTOP */ 157 + x86mftop \tmp 158 + stptr.w \tmp, \thread, THREAD_FTOP 159 + /* Turn off TM to ensure the order of FPR in memory independent of TM */ 160 + x86clrtm 161 + 1: 162 + #endif 45 163 .endm 46 164 47 - .macro fpu_restore_csr thread tmp 48 - ldptr.w \tmp, \thread, THREAD_FCSR 49 - movgr2fcsr fcsr0, \tmp 165 + .macro fpu_restore_csr thread tmp0 tmp1 166 + ldptr.w \tmp0, \thread, THREAD_FCSR 167 + movgr2fcsr fcsr0, \tmp0 168 + #ifdef CONFIG_CPU_HAS_LBT 169 + /* TM bit is always 0 if LBT not supported */ 170 + andi \tmp0, \tmp0, FPU_CSR_TM 171 + beqz \tmp0, 2f 172 + /* Restore FTOP */ 173 + ldptr.w \tmp0, \thread, THREAD_FTOP 174 + andi \tmp0, \tmp0, 0x7 175 + la.pcrel \tmp1, 1f 176 + alsl.d \tmp1, \tmp0, \tmp1, 3 177 + jr \tmp1 178 + 1: 179 + x86mttop 0 180 + b 2f 181 + x86mttop 1 182 + b 2f 183 + x86mttop 2 184 + b 2f 185 + x86mttop 3 186 + b 2f 187 + x86mttop 4 188 + b 2f 189 + x86mttop 5 190 + b 2f 191 + x86mttop 6 192 + b 2f 193 + x86mttop 7 194 + 2: 195 + #endif 50 196 .endm 51 197 52 198 .macro fpu_save_cc thread tmp0 tmp1 ··· 285 353 .macro lsx_restore_all thread tmp0 tmp1 286 354 lsx_restore_data \thread, \tmp0 287 355 fpu_restore_cc \thread, \tmp0, \tmp1 288 - fpu_restore_csr \thread, \tmp0 356 + fpu_restore_csr \thread, \tmp0, \tmp1 289 357 .endm 290 358 291 359 .macro lsx_save_upper vd base tmp off ··· 495 563 .macro lasx_restore_all thread tmp0 tmp1 496 564 lasx_restore_data \thread, \tmp0 497 565 fpu_restore_cc \thread, \tmp0, \tmp1 498 - fpu_restore_csr \thread, \tmp0 566 + fpu_restore_csr \thread, \tmp0, \tmp1 499 567 .endm 500 568 501 569 .macro lasx_save_upper xd base tmp off
+126
arch/loongarch/include/asm/kasan.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __ASM_KASAN_H 3 + #define __ASM_KASAN_H 4 + 5 + #ifndef __ASSEMBLY__ 6 + 7 + #include <linux/linkage.h> 8 + #include <linux/mmzone.h> 9 + #include <asm/addrspace.h> 10 + #include <asm/io.h> 11 + #include <asm/pgtable.h> 12 + 13 + #define __HAVE_ARCH_SHADOW_MAP 14 + 15 + #define KASAN_SHADOW_SCALE_SHIFT 3 16 + #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) 17 + 18 + #define XRANGE_SHIFT (48) 19 + 20 + /* Valid address length */ 21 + #define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) 22 + /* Used for taking out the valid address */ 23 + #define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0) 24 + /* One segment whole address space size */ 25 + #define XRANGE_SIZE (XRANGE_SHADOW_MASK + 1) 26 + 27 + /* 64-bit segment value. */ 28 + #define XKPRANGE_UC_SEG (0x8000) 29 + #define XKPRANGE_CC_SEG (0x9000) 30 + #define XKVRANGE_VC_SEG (0xffff) 31 + 32 + /* Cached */ 33 + #define XKPRANGE_CC_START CACHE_BASE 34 + #define XKPRANGE_CC_SIZE XRANGE_SIZE 35 + #define XKPRANGE_CC_KASAN_OFFSET (0) 36 + #define XKPRANGE_CC_SHADOW_SIZE (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) 37 + #define XKPRANGE_CC_SHADOW_END (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE) 38 + 39 + /* UnCached */ 40 + #define XKPRANGE_UC_START UNCACHE_BASE 41 + #define XKPRANGE_UC_SIZE XRANGE_SIZE 42 + #define XKPRANGE_UC_KASAN_OFFSET XKPRANGE_CC_SHADOW_END 43 + #define XKPRANGE_UC_SHADOW_SIZE (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) 44 + #define XKPRANGE_UC_SHADOW_END (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE) 45 + 46 + /* VMALLOC (Cached or UnCached) */ 47 + #define XKVRANGE_VC_START MODULES_VADDR 48 + #define XKVRANGE_VC_SIZE round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE) 49 + #define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END 50 + #define XKVRANGE_VC_SHADOW_SIZE (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) 51 + #define XKVRANGE_VC_SHADOW_END (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE) 52 + 53 + /* KAsan shadow memory start right after vmalloc. */ 54 + #define KASAN_SHADOW_START round_up(KFENCE_AREA_END, PGDIR_SIZE) 55 + #define KASAN_SHADOW_SIZE (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET) 56 + #define KASAN_SHADOW_END round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE) 57 + 58 + #define XKPRANGE_CC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET) 59 + #define XKPRANGE_UC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET) 60 + #define XKVRANGE_VC_SHADOW_OFFSET (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET) 61 + 62 + extern bool kasan_early_stage; 63 + extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; 64 + 65 + #define kasan_arch_is_ready kasan_arch_is_ready 66 + static __always_inline bool kasan_arch_is_ready(void) 67 + { 68 + return !kasan_early_stage; 69 + } 70 + 71 + static inline void *kasan_mem_to_shadow(const void *addr) 72 + { 73 + if (!kasan_arch_is_ready()) { 74 + return (void *)(kasan_early_shadow_page); 75 + } else { 76 + unsigned long maddr = (unsigned long)addr; 77 + unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; 78 + unsigned long offset = 0; 79 + 80 + maddr &= XRANGE_SHADOW_MASK; 81 + switch (xrange) { 82 + case XKPRANGE_CC_SEG: 83 + offset = XKPRANGE_CC_SHADOW_OFFSET; 84 + break; 85 + case XKPRANGE_UC_SEG: 86 + offset = XKPRANGE_UC_SHADOW_OFFSET; 87 + break; 88 + case XKVRANGE_VC_SEG: 89 + offset = XKVRANGE_VC_SHADOW_OFFSET; 90 + break; 91 + default: 92 + WARN_ON(1); 93 + return NULL; 94 + } 95 + 96 + return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); 97 + } 98 + } 99 + 100 + static inline const void *kasan_shadow_to_mem(const void *shadow_addr) 101 + { 102 + unsigned long addr = (unsigned long)shadow_addr; 103 + 104 + if (unlikely(addr > KASAN_SHADOW_END) || 105 + unlikely(addr < KASAN_SHADOW_START)) { 106 + WARN_ON(1); 107 + return NULL; 108 + } 109 + 110 + if (addr >= XKVRANGE_VC_SHADOW_OFFSET) 111 + return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); 112 + else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) 113 + return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); 114 + else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) 115 + return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START); 116 + else { 117 + WARN_ON(1); 118 + return NULL; 119 + } 120 + } 121 + 122 + void kasan_init(void); 123 + asmlinkage void kasan_early_init(void); 124 + 125 + #endif 126 + #endif
+61
arch/loongarch/include/asm/kfence.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * KFENCE support for LoongArch. 4 + * 5 + * Author: Enze Li <lienze@kylinos.cn> 6 + * Copyright (C) 2022-2023 KylinSoft Corporation. 7 + */ 8 + 9 + #ifndef _ASM_LOONGARCH_KFENCE_H 10 + #define _ASM_LOONGARCH_KFENCE_H 11 + 12 + #include <linux/kfence.h> 13 + #include <asm/pgtable.h> 14 + #include <asm/tlb.h> 15 + 16 + static inline bool arch_kfence_init_pool(void) 17 + { 18 + int err; 19 + char *kfence_pool = __kfence_pool; 20 + struct vm_struct *area; 21 + 22 + area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP, 23 + KFENCE_AREA_START, KFENCE_AREA_END, 24 + __builtin_return_address(0)); 25 + if (!area) 26 + return false; 27 + 28 + __kfence_pool = (char *)area->addr; 29 + err = ioremap_page_range((unsigned long)__kfence_pool, 30 + (unsigned long)__kfence_pool + KFENCE_POOL_SIZE, 31 + virt_to_phys((void *)kfence_pool), PAGE_KERNEL); 32 + if (err) { 33 + free_vm_area(area); 34 + __kfence_pool = kfence_pool; 35 + return false; 36 + } 37 + 38 + return true; 39 + } 40 + 41 + /* Protect the given page and flush TLB. */ 42 + static inline bool kfence_protect_page(unsigned long addr, bool protect) 43 + { 44 + pte_t *pte = virt_to_kpte(addr); 45 + 46 + if (WARN_ON(!pte) || pte_none(*pte)) 47 + return false; 48 + 49 + if (protect) 50 + set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT))); 51 + else 52 + set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT))); 53 + 54 + preempt_disable(); 55 + local_flush_tlb_one(addr); 56 + preempt_enable(); 57 + 58 + return true; 59 + } 60 + 61 + #endif /* _ASM_LOONGARCH_KFENCE_H */
+97
arch/loongarch/include/asm/kgdb.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2023 Loongson Technology Corporation Limited 4 + */ 5 + 6 + #ifndef _ASM_LOONGARCH_KGDB_H 7 + #define _ASM_LOONGARCH_KGDB_H 8 + 9 + #define GDB_SIZEOF_REG sizeof(u64) 10 + 11 + /* gdb remote procotol expects the following register layout. */ 12 + 13 + /* 14 + * General purpose registers: 15 + * r0-r31: 64 bit 16 + * orig_a0: 64 bit 17 + * pc : 64 bit 18 + * csr_badvaddr: 64 bit 19 + */ 20 + #define DBG_PT_REGS_BASE 0 21 + #define DBG_PT_REGS_NUM 35 22 + #define DBG_PT_REGS_END (DBG_PT_REGS_BASE + DBG_PT_REGS_NUM - 1) 23 + 24 + /* 25 + * Floating point registers: 26 + * f0-f31: 64 bit 27 + */ 28 + #define DBG_FPR_BASE (DBG_PT_REGS_END + 1) 29 + #define DBG_FPR_NUM 32 30 + #define DBG_FPR_END (DBG_FPR_BASE + DBG_FPR_NUM - 1) 31 + 32 + /* 33 + * Condition Flag registers: 34 + * fcc0-fcc8: 8 bit 35 + */ 36 + #define DBG_FCC_BASE (DBG_FPR_END + 1) 37 + #define DBG_FCC_NUM 8 38 + #define DBG_FCC_END (DBG_FCC_BASE + DBG_FCC_NUM - 1) 39 + 40 + /* 41 + * Floating-point Control and Status registers: 42 + * fcsr: 32 bit 43 + */ 44 + #define DBG_FCSR_NUM 1 45 + #define DBG_FCSR (DBG_FCC_END + 1) 46 + 47 + #define DBG_MAX_REG_NUM (DBG_FCSR + 1) 48 + 49 + /* 50 + * Size of I/O buffer for gdb packet. 51 + * considering to hold all register contents, size is set 52 + */ 53 + #define BUFMAX 2048 54 + 55 + /* 56 + * Number of bytes required for gdb_regs buffer. 57 + * PT_REGS and FPR: 8 bytes; FCSR: 4 bytes; FCC: 1 bytes. 58 + * GDB fails to connect for size beyond this with error 59 + * "'g' packet reply is too long" 60 + */ 61 + #define NUMREGBYTES ((DBG_PT_REGS_NUM + DBG_FPR_NUM) * GDB_SIZEOF_REG + DBG_FCC_NUM * 1 + DBG_FCSR_NUM * 4) 62 + 63 + #define BREAK_INSTR_SIZE 4 64 + #define CACHE_FLUSH_IS_SAFE 0 65 + 66 + /* Register numbers of various important registers. */ 67 + enum dbg_loongarch_regnum { 68 + DBG_LOONGARCH_ZERO = 0, 69 + DBG_LOONGARCH_RA, 70 + DBG_LOONGARCH_TP, 71 + DBG_LOONGARCH_SP, 72 + DBG_LOONGARCH_A0, 73 + DBG_LOONGARCH_FP = 22, 74 + DBG_LOONGARCH_S0, 75 + DBG_LOONGARCH_S1, 76 + DBG_LOONGARCH_S2, 77 + DBG_LOONGARCH_S3, 78 + DBG_LOONGARCH_S4, 79 + DBG_LOONGARCH_S5, 80 + DBG_LOONGARCH_S6, 81 + DBG_LOONGARCH_S7, 82 + DBG_LOONGARCH_S8, 83 + DBG_LOONGARCH_ORIG_A0, 84 + DBG_LOONGARCH_PC, 85 + DBG_LOONGARCH_BADV 86 + }; 87 + 88 + void kgdb_breakinst(void); 89 + void arch_kgdb_breakpoint(void); 90 + 91 + #ifdef CONFIG_KGDB 92 + bool kgdb_breakpoint_handler(struct pt_regs *regs); 93 + #else /* !CONFIG_KGDB */ 94 + static inline bool kgdb_breakpoint_handler(struct pt_regs *regs) { return false; } 95 + #endif /* CONFIG_KGDB */ 96 + 97 + #endif /* __ASM_KGDB_H_ */
+109
arch/loongarch/include/asm/lbt.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Author: Qi Hu <huqi@loongson.cn> 4 + * Huacai Chen <chenhuacai@loongson.cn> 5 + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited 6 + */ 7 + #ifndef _ASM_LBT_H 8 + #define _ASM_LBT_H 9 + 10 + #include <asm/cpu.h> 11 + #include <asm/current.h> 12 + #include <asm/loongarch.h> 13 + #include <asm/processor.h> 14 + 15 + extern void _init_lbt(void); 16 + extern void _save_lbt(struct loongarch_lbt *); 17 + extern void _restore_lbt(struct loongarch_lbt *); 18 + 19 + static inline int is_lbt_enabled(void) 20 + { 21 + if (!cpu_has_lbt) 22 + return 0; 23 + 24 + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ? 25 + 1 : 0; 26 + } 27 + 28 + static inline int is_lbt_owner(void) 29 + { 30 + return test_thread_flag(TIF_USEDLBT); 31 + } 32 + 33 + #ifdef CONFIG_CPU_HAS_LBT 34 + 35 + static inline void enable_lbt(void) 36 + { 37 + if (cpu_has_lbt) 38 + csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN); 39 + } 40 + 41 + static inline void disable_lbt(void) 42 + { 43 + if (cpu_has_lbt) 44 + csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN); 45 + } 46 + 47 + static inline void __own_lbt(void) 48 + { 49 + enable_lbt(); 50 + set_thread_flag(TIF_USEDLBT); 51 + KSTK_EUEN(current) |= CSR_EUEN_LBTEN; 52 + } 53 + 54 + static inline void own_lbt_inatomic(int restore) 55 + { 56 + if (cpu_has_lbt && !is_lbt_owner()) { 57 + __own_lbt(); 58 + if (restore) 59 + _restore_lbt(&current->thread.lbt); 60 + } 61 + } 62 + 63 + static inline void own_lbt(int restore) 64 + { 65 + preempt_disable(); 66 + own_lbt_inatomic(restore); 67 + preempt_enable(); 68 + } 69 + 70 + static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) 71 + { 72 + if (cpu_has_lbt && is_lbt_owner()) { 73 + if (save) 74 + _save_lbt(&tsk->thread.lbt); 75 + 76 + disable_lbt(); 77 + clear_tsk_thread_flag(tsk, TIF_USEDLBT); 78 + } 79 + KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN); 80 + } 81 + 82 + static inline void lose_lbt(int save) 83 + { 84 + preempt_disable(); 85 + lose_lbt_inatomic(save, current); 86 + preempt_enable(); 87 + } 88 + 89 + static inline void init_lbt(void) 90 + { 91 + __own_lbt(); 92 + _init_lbt(); 93 + } 94 + #else 95 + static inline void own_lbt_inatomic(int restore) {} 96 + static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {} 97 + static inline void init_lbt(void) {} 98 + static inline void lose_lbt(int save) {} 99 + #endif 100 + 101 + static inline int thread_lbt_context_live(void) 102 + { 103 + if (!cpu_has_lbt) 104 + return 0; 105 + 106 + return test_thread_flag(TIF_LBT_CTX_LIVE); 107 + } 108 + 109 + #endif /* _ASM_LBT_H */
+4 -43
arch/loongarch/include/asm/loongarch.h
··· 12 12 #ifndef __ASSEMBLY__ 13 13 #include <larchintrin.h> 14 14 15 - /* 16 - * parse_r var, r - Helper assembler macro for parsing register names. 17 - * 18 - * This converts the register name in $n form provided in \r to the 19 - * corresponding register number, which is assigned to the variable \var. It is 20 - * needed to allow explicit encoding of instructions in inline assembly where 21 - * registers are chosen by the compiler in $n form, allowing us to avoid using 22 - * fixed register numbers. 23 - * 24 - * It also allows newer instructions (not implemented by the assembler) to be 25 - * transparently implemented using assembler macros, instead of needing separate 26 - * cases depending on toolchain support. 27 - * 28 - * Simple usage example: 29 - * __asm__ __volatile__("parse_r addr, %0\n\t" 30 - * "#invtlb op, 0, %0\n\t" 31 - * ".word ((0x6498000) | (addr << 10) | (0 << 5) | op)" 32 - * : "=r" (status); 33 - */ 34 - 35 - /* Match an individual register number and assign to \var */ 36 - #define _IFC_REG(n) \ 37 - ".ifc \\r, $r" #n "\n\t" \ 38 - "\\var = " #n "\n\t" \ 39 - ".endif\n\t" 40 - 41 - __asm__(".macro parse_r var r\n\t" 42 - "\\var = -1\n\t" 43 - _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) 44 - _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) 45 - _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) 46 - _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) 47 - _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) 48 - _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) 49 - _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) 50 - _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) 51 - ".iflt \\var\n\t" 52 - ".error \"Unable to parse register name \\r\"\n\t" 53 - ".endif\n\t" 54 - ".endm"); 55 - 56 - #undef _IFC_REG 57 - 58 15 /* CPUCFG */ 59 16 #define read_cpucfg(reg) __cpucfg(reg) 60 17 ··· 1409 1452 #define FPU_CSR_RZ 0x100 /* towards zero */ 1410 1453 #define FPU_CSR_RU 0x200 /* towards +Infinity */ 1411 1454 #define FPU_CSR_RD 0x300 /* towards -Infinity */ 1455 + 1456 + /* Bit 6 of FPU Status Register specify the LBT TOP simulation mode */ 1457 + #define FPU_CSR_TM_SHIFT 0x6 1458 + #define FPU_CSR_TM (_ULCAST_(1) << FPU_CSR_TM_SHIFT) 1412 1459 1413 1460 #define read_fcsr(source) \ 1414 1461 ({ \
-2
arch/loongarch/include/asm/mmzone.h
··· 13 13 14 14 #define NODE_DATA(nid) (node_data[(nid)]) 15 15 16 - extern void setup_zero_pages(void); 17 - 18 16 #endif /* _ASM_MMZONE_H_ */
+6 -1
arch/loongarch/include/asm/page.h
··· 84 84 #define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) 85 85 86 86 #define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) 87 - #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) 87 + 88 + #define virt_to_page(kaddr) \ 89 + ({ \ 90 + (likely((unsigned long)kaddr < vm_map_base)) ? \ 91 + dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\ 92 + }) 88 93 89 94 extern int __virt_addr_valid(volatile void *kaddr); 90 95 #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
+1
arch/loongarch/include/asm/pgalloc.h
··· 94 94 95 95 #endif /* __PAGETABLE_PUD_FOLDED */ 96 96 97 + extern pte_t * __init populate_kernel_pte(unsigned long addr); 97 98 #endif /* _ASM_PGALLOC_H */
+25 -6
arch/loongarch/include/asm/pgtable.h
··· 70 70 * for zero-mapped memory areas etc.. 71 71 */ 72 72 73 - extern unsigned long empty_zero_page; 74 - extern unsigned long zero_page_mask; 73 + extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; 75 74 76 - #define ZERO_PAGE(vaddr) \ 77 - (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))) 78 - #define __HAVE_COLOR_ZERO_PAGE 75 + #define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) 79 76 80 77 /* 81 78 * TLB refill handlers may also map the vmalloc area into xkvrange. ··· 82 85 #define MODULES_VADDR (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE)) 83 86 #define MODULES_END (MODULES_VADDR + SZ_256M) 84 87 88 + #ifdef CONFIG_KFENCE 89 + #define KFENCE_AREA_SIZE (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE) 90 + #else 91 + #define KFENCE_AREA_SIZE 0 92 + #endif 93 + 85 94 #define VMALLOC_START MODULES_END 95 + 96 + #ifndef CONFIG_KASAN 86 97 #define VMALLOC_END \ 87 98 (vm_map_base + \ 88 - min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE) 99 + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE) 100 + #else 101 + #define VMALLOC_END \ 102 + (vm_map_base + \ 103 + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE) 104 + #endif 89 105 90 106 #define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK)) 91 107 #define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1) 108 + 109 + #define KFENCE_AREA_START (VMEMMAP_END + 1) 110 + #define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1) 92 111 93 112 #define pte_ERROR(e) \ 94 113 pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) ··· 363 350 extern pgd_t swapper_pg_dir[]; 364 351 extern pgd_t invalid_pg_dir[]; 365 352 353 + struct page *dmw_virt_to_page(unsigned long kaddr); 354 + struct page *tlb_virt_to_page(unsigned long kaddr); 355 + 366 356 /* 367 357 * The following only work if pte_present() is true. 368 358 * Undefined behaviour if not.. ··· 611 595 return (pmd_val(pmd) & _PAGE_PROTNONE); 612 596 } 613 597 #endif /* CONFIG_NUMA_BALANCING */ 598 + 599 + #define pmd_leaf(pmd) ((pmd_val(pmd) & _PAGE_HUGE) != 0) 600 + #define pud_leaf(pud) ((pud_val(pud) & _PAGE_HUGE) != 0) 614 601 615 602 /* 616 603 * We provide our own get_unmapped area to cope with the virtual aliasing
+15 -11
arch/loongarch/include/asm/processor.h
··· 80 80 BUILD_FPR_ACCESS(64) 81 81 82 82 struct loongarch_fpu { 83 - unsigned int fcsr; 84 83 uint64_t fcc; /* 8x8 */ 84 + uint32_t fcsr; 85 + uint32_t ftop; 85 86 union fpureg fpr[NUM_FPU_REGS]; 87 + }; 88 + 89 + struct loongarch_lbt { 90 + /* Scratch registers */ 91 + unsigned long scr0; 92 + unsigned long scr1; 93 + unsigned long scr2; 94 + unsigned long scr3; 95 + /* Eflags register */ 96 + unsigned long eflags; 86 97 }; 87 98 88 99 #define INIT_CPUMASK { \ ··· 124 113 unsigned long csr_ecfg; 125 114 unsigned long csr_badvaddr; /* Last user fault */ 126 115 127 - /* Scratch registers */ 128 - unsigned long scr0; 129 - unsigned long scr1; 130 - unsigned long scr2; 131 - unsigned long scr3; 132 - 133 - /* Eflags register */ 134 - unsigned long eflags; 135 - 136 116 /* Other stuff associated with the thread. */ 137 117 unsigned long trap_nr; 138 118 unsigned long error_code; ··· 135 133 * context because they are conditionally copied at fork(). 136 134 */ 137 135 struct loongarch_fpu fpu FPU_ALIGN; 136 + struct loongarch_lbt lbt; /* Also conditionally copied */ 138 137 139 138 /* Hardware breakpoints pinned to this task. */ 140 139 struct perf_event *hbp_break[LOONGARCH_MAX_BRP]; ··· 177 174 * FPU & vector registers \ 178 175 */ \ 179 176 .fpu = { \ 180 - .fcsr = 0, \ 181 177 .fcc = 0, \ 178 + .fcsr = 0, \ 179 + .ftop = 0, \ 182 180 .fpr = {{{0,},},}, \ 183 181 }, \ 184 182 .hbp_break = {0}, \
+7 -1
arch/loongarch/include/asm/setup.h
··· 7 7 #define _LOONGARCH_SETUP_H 8 8 9 9 #include <linux/types.h> 10 + #include <asm/sections.h> 10 11 #include <uapi/asm/setup.h> 11 12 12 13 #define VECSIZE 0x200 ··· 34 33 extern long __rela_dyn_begin; 35 34 extern long __rela_dyn_end; 36 35 37 - extern void * __init relocate_kernel(void); 36 + extern unsigned long __init relocate_kernel(void); 38 37 39 38 #endif 39 + 40 + static inline unsigned long kaslr_offset(void) 41 + { 42 + return (unsigned long)&_text - VMLINUX_LOAD_ADDRESS; 43 + } 40 44 41 45 #endif /* __SETUP_H */
+4
arch/loongarch/include/asm/stackframe.h
··· 158 158 cfi_st u0, PT_R21, \docfi 159 159 csrrd u0, PERCPU_BASE_KS 160 160 9: 161 + #ifdef CONFIG_KGDB 162 + li.w t0, CSR_CRMD_WE 163 + csrxchg t0, t0, LOONGARCH_CSR_CRMD 164 + #endif 161 165 .endm 162 166 163 167 .macro SAVE_ALL docfi=0
+20
arch/loongarch/include/asm/string.h
··· 7 7 8 8 #define __HAVE_ARCH_MEMSET 9 9 extern void *memset(void *__s, int __c, size_t __count); 10 + extern void *__memset(void *__s, int __c, size_t __count); 10 11 11 12 #define __HAVE_ARCH_MEMCPY 12 13 extern void *memcpy(void *__to, __const__ void *__from, size_t __n); 14 + extern void *__memcpy(void *__to, __const__ void *__from, size_t __n); 13 15 14 16 #define __HAVE_ARCH_MEMMOVE 15 17 extern void *memmove(void *__dest, __const__ void *__src, size_t __n); 18 + extern void *__memmove(void *__dest, __const__ void *__src, size_t __n); 19 + 20 + #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) 21 + 22 + /* 23 + * For files that are not instrumented (e.g. mm/slub.c) we 24 + * should use not instrumented version of mem* functions. 25 + */ 26 + 27 + #define memset(s, c, n) __memset(s, c, n) 28 + #define memcpy(dst, src, len) __memcpy(dst, src, len) 29 + #define memmove(dst, src, len) __memmove(dst, src, len) 30 + 31 + #ifndef __NO_FORTIFY 32 + #define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ 33 + #endif 34 + 35 + #endif 16 36 17 37 #endif /* _ASM_STRING_H */
+2
arch/loongarch/include/asm/switch_to.h
··· 7 7 8 8 #include <asm/cpu-features.h> 9 9 #include <asm/fpu.h> 10 + #include <asm/lbt.h> 10 11 11 12 struct task_struct; 12 13 ··· 35 34 #define switch_to(prev, next, last) \ 36 35 do { \ 37 36 lose_fpu_inatomic(1, prev); \ 37 + lose_lbt_inatomic(1, prev); \ 38 38 hw_breakpoint_thread_switch(next); \ 39 39 (last) = __switch_to(prev, next, task_thread_info(next), \ 40 40 __builtin_return_address(0), __builtin_frame_address(0)); \
+4
arch/loongarch/include/asm/thread_info.h
··· 84 84 #define TIF_SINGLESTEP 16 /* Single Step */ 85 85 #define TIF_LSX_CTX_LIVE 17 /* LSX context must be preserved */ 86 86 #define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */ 87 + #define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */ 88 + #define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */ 87 89 88 90 #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) 89 91 #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) ··· 103 101 #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) 104 102 #define _TIF_LSX_CTX_LIVE (1<<TIF_LSX_CTX_LIVE) 105 103 #define _TIF_LASX_CTX_LIVE (1<<TIF_LASX_CTX_LIVE) 104 + #define _TIF_USEDLBT (1<<TIF_USEDLBT) 105 + #define _TIF_LBT_CTX_LIVE (1<<TIF_LBT_CTX_LIVE) 106 106 107 107 #endif /* __KERNEL__ */ 108 108 #endif /* _ASM_THREAD_INFO_H */
+68
arch/loongarch/include/asm/xor.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 4 + */ 5 + #ifndef _ASM_LOONGARCH_XOR_H 6 + #define _ASM_LOONGARCH_XOR_H 7 + 8 + #include <asm/cpu-features.h> 9 + #include <asm/xor_simd.h> 10 + 11 + #ifdef CONFIG_CPU_HAS_LSX 12 + static struct xor_block_template xor_block_lsx = { 13 + .name = "lsx", 14 + .do_2 = xor_lsx_2, 15 + .do_3 = xor_lsx_3, 16 + .do_4 = xor_lsx_4, 17 + .do_5 = xor_lsx_5, 18 + }; 19 + 20 + #define XOR_SPEED_LSX() \ 21 + do { \ 22 + if (cpu_has_lsx) \ 23 + xor_speed(&xor_block_lsx); \ 24 + } while (0) 25 + #else /* CONFIG_CPU_HAS_LSX */ 26 + #define XOR_SPEED_LSX() 27 + #endif /* CONFIG_CPU_HAS_LSX */ 28 + 29 + #ifdef CONFIG_CPU_HAS_LASX 30 + static struct xor_block_template xor_block_lasx = { 31 + .name = "lasx", 32 + .do_2 = xor_lasx_2, 33 + .do_3 = xor_lasx_3, 34 + .do_4 = xor_lasx_4, 35 + .do_5 = xor_lasx_5, 36 + }; 37 + 38 + #define XOR_SPEED_LASX() \ 39 + do { \ 40 + if (cpu_has_lasx) \ 41 + xor_speed(&xor_block_lasx); \ 42 + } while (0) 43 + #else /* CONFIG_CPU_HAS_LASX */ 44 + #define XOR_SPEED_LASX() 45 + #endif /* CONFIG_CPU_HAS_LASX */ 46 + 47 + /* 48 + * For grins, also test the generic routines. 49 + * 50 + * More importantly: it cannot be ruled out at this point of time, that some 51 + * future (maybe reduced) models could run the vector algorithms slower than 52 + * the scalar ones, maybe for errata or micro-op reasons. It may be 53 + * appropriate to revisit this after one or two more uarch generations. 54 + */ 55 + #include <asm-generic/xor.h> 56 + 57 + #undef XOR_TRY_TEMPLATES 58 + #define XOR_TRY_TEMPLATES \ 59 + do { \ 60 + xor_speed(&xor_block_8regs); \ 61 + xor_speed(&xor_block_8regs_p); \ 62 + xor_speed(&xor_block_32regs); \ 63 + xor_speed(&xor_block_32regs_p); \ 64 + XOR_SPEED_LSX(); \ 65 + XOR_SPEED_LASX(); \ 66 + } while (0) 67 + 68 + #endif /* _ASM_LOONGARCH_XOR_H */
+34
arch/loongarch/include/asm/xor_simd.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 4 + */ 5 + #ifndef _ASM_LOONGARCH_XOR_SIMD_H 6 + #define _ASM_LOONGARCH_XOR_SIMD_H 7 + 8 + #ifdef CONFIG_CPU_HAS_LSX 9 + void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1, 10 + const unsigned long * __restrict p2); 11 + void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1, 12 + const unsigned long * __restrict p2, const unsigned long * __restrict p3); 13 + void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1, 14 + const unsigned long * __restrict p2, const unsigned long * __restrict p3, 15 + const unsigned long * __restrict p4); 16 + void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1, 17 + const unsigned long * __restrict p2, const unsigned long * __restrict p3, 18 + const unsigned long * __restrict p4, const unsigned long * __restrict p5); 19 + #endif /* CONFIG_CPU_HAS_LSX */ 20 + 21 + #ifdef CONFIG_CPU_HAS_LASX 22 + void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1, 23 + const unsigned long * __restrict p2); 24 + void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1, 25 + const unsigned long * __restrict p2, const unsigned long * __restrict p3); 26 + void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1, 27 + const unsigned long * __restrict p2, const unsigned long * __restrict p3, 28 + const unsigned long * __restrict p4); 29 + void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1, 30 + const unsigned long * __restrict p2, const unsigned long * __restrict p3, 31 + const unsigned long * __restrict p4, const unsigned long * __restrict p5); 32 + #endif /* CONFIG_CPU_HAS_LASX */ 33 + 34 + #endif /* _ASM_LOONGARCH_XOR_SIMD_H */
+6
arch/loongarch/include/uapi/asm/ptrace.h
··· 56 56 uint64_t vregs[32*4]; 57 57 }; 58 58 59 + struct user_lbt_state { 60 + uint64_t scr[4]; 61 + uint32_t eflags; 62 + uint32_t ftop; 63 + }; 64 + 59 65 struct user_watch_state { 60 66 uint64_t dbg_info; 61 67 struct {
+10
arch/loongarch/include/uapi/asm/sigcontext.h
··· 59 59 __u32 fcsr; 60 60 }; 61 61 62 + /* LBT context */ 63 + #define LBT_CTX_MAGIC 0x42540001 64 + #define LBT_CTX_ALIGN 8 65 + struct lbt_context { 66 + __u64 regs[4]; 67 + __u32 eflags; 68 + __u32 ftop; 69 + }; 70 + 71 + 62 72 #endif /* _UAPI_ASM_SIGCONTEXT_H */
+9
arch/loongarch/kernel/Makefile
··· 15 15 16 16 obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o 17 17 18 + obj-$(CONFIG_CPU_HAS_LBT) += lbt.o 19 + 18 20 obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o 19 21 20 22 ifdef CONFIG_FUNCTION_TRACER ··· 33 31 CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) 34 32 CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE) 35 33 endif 34 + 35 + KASAN_SANITIZE_efi.o := n 36 + KASAN_SANITIZE_cpu-probe.o := n 37 + KASAN_SANITIZE_traps.o := n 38 + KASAN_SANITIZE_smp.o := n 39 + KASAN_SANITIZE_vdso.o := n 36 40 37 41 obj-$(CONFIG_MODULES) += module.o module-sections.o 38 42 obj-$(CONFIG_STACKTRACE) += stacktrace.o ··· 62 54 obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o 63 55 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 64 56 57 + obj-$(CONFIG_KGDB) += kgdb.o 65 58 obj-$(CONFIG_KPROBES) += kprobes.o 66 59 obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o 67 60 obj-$(CONFIG_UPROBES) += uprobes.o
+11 -7
arch/loongarch/kernel/asm-offsets.c
··· 118 118 OFFSET(THREAD_CSRECFG, task_struct, 119 119 thread.csr_ecfg); 120 120 121 - OFFSET(THREAD_SCR0, task_struct, thread.scr0); 122 - OFFSET(THREAD_SCR1, task_struct, thread.scr1); 123 - OFFSET(THREAD_SCR2, task_struct, thread.scr2); 124 - OFFSET(THREAD_SCR3, task_struct, thread.scr3); 125 - 126 - OFFSET(THREAD_EFLAGS, task_struct, thread.eflags); 127 - 128 121 OFFSET(THREAD_FPU, task_struct, thread.fpu); 129 122 130 123 OFFSET(THREAD_BVADDR, task_struct, \ ··· 165 172 166 173 OFFSET(THREAD_FCSR, loongarch_fpu, fcsr); 167 174 OFFSET(THREAD_FCC, loongarch_fpu, fcc); 175 + OFFSET(THREAD_FTOP, loongarch_fpu, ftop); 176 + BLANK(); 177 + } 178 + 179 + void output_thread_lbt_defines(void) 180 + { 181 + OFFSET(THREAD_SCR0, loongarch_lbt, scr0); 182 + OFFSET(THREAD_SCR1, loongarch_lbt, scr1); 183 + OFFSET(THREAD_SCR2, loongarch_lbt, scr2); 184 + OFFSET(THREAD_SCR3, loongarch_lbt, scr3); 185 + OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags); 168 186 BLANK(); 169 187 } 170 188
+14
arch/loongarch/kernel/cpu-probe.c
··· 144 144 c->options |= LOONGARCH_CPU_LVZ; 145 145 elf_hwcap |= HWCAP_LOONGARCH_LVZ; 146 146 } 147 + #ifdef CONFIG_CPU_HAS_LBT 148 + if (config & CPUCFG2_X86BT) { 149 + c->options |= LOONGARCH_CPU_LBT_X86; 150 + elf_hwcap |= HWCAP_LOONGARCH_LBT_X86; 151 + } 152 + if (config & CPUCFG2_ARMBT) { 153 + c->options |= LOONGARCH_CPU_LBT_ARM; 154 + elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM; 155 + } 156 + if (config & CPUCFG2_MIPSBT) { 157 + c->options |= LOONGARCH_CPU_LBT_MIPS; 158 + elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS; 159 + } 160 + #endif 147 161 148 162 config = read_cpucfg(LOONGARCH_CPUCFG6); 149 163 if (config & CPUCFG6_PMP)
+5
arch/loongarch/kernel/entry.S
··· 58 58 59 59 SAVE_STATIC 60 60 61 + #ifdef CONFIG_KGDB 62 + li.w t1, CSR_CRMD_WE 63 + csrxchg t1, t1, LOONGARCH_CSR_CRMD 64 + #endif 65 + 61 66 move u0, t0 62 67 li.d tp, ~_THREAD_MASK 63 68 and tp, tp, sp
+10 -4
arch/loongarch/kernel/fpu.S
··· 22 22 23 23 .macro EX insn, reg, src, offs 24 24 .ex\@: \insn \reg, \src, \offs 25 - _asm_extable .ex\@, fault 25 + _asm_extable .ex\@, .L_fpu_fault 26 26 .endm 27 27 28 28 .macro sc_save_fp base ··· 138 138 .macro sc_save_fcsr base, tmp0 139 139 movfcsr2gr \tmp0, fcsr0 140 140 EX st.w \tmp0, \base, 0 141 + #if defined(CONFIG_CPU_HAS_LBT) 142 + /* TM bit is always 0 if LBT not supported */ 143 + andi \tmp0, \tmp0, FPU_CSR_TM 144 + beqz \tmp0, 1f 145 + x86clrtm 146 + 1: 147 + #endif 141 148 .endm 142 149 143 150 .macro sc_restore_fcsr base, tmp0 ··· 316 309 */ 317 310 SYM_FUNC_START(_restore_fp) 318 311 fpu_restore_double a0 t1 # clobbers t1 319 - fpu_restore_csr a0 t1 312 + fpu_restore_csr a0 t1 t2 320 313 fpu_restore_cc a0 t1 t2 # clobbers t1, t2 321 314 jr ra 322 315 SYM_FUNC_END(_restore_fp) ··· 521 514 jr ra 522 515 SYM_FUNC_END(_restore_lasx_context) 523 516 524 - SYM_FUNC_START(fault) 517 + .L_fpu_fault: 525 518 li.w a0, -EFAULT # failure 526 519 jr ra 527 - SYM_FUNC_END(fault)
+9 -4
arch/loongarch/kernel/head.S
··· 95 95 PTR_LI sp, (_THREAD_SIZE - PT_SIZE) 96 96 PTR_ADD sp, sp, tp 97 97 set_saved_sp sp, t0, t1 98 - #endif 99 98 100 - /* relocate_kernel() returns the new kernel entry point */ 101 - jr a0 102 - ASM_BUG() 99 + /* Jump to the new kernel: new_pc = current_pc + random_offset */ 100 + pcaddi t0, 0 101 + add.d t0, t0, a0 102 + jirl zero, t0, 0xc 103 + #endif /* CONFIG_RANDOMIZE_BASE */ 103 104 105 + #endif /* CONFIG_RELOCATABLE */ 106 + 107 + #ifdef CONFIG_KASAN 108 + bl kasan_early_init 104 109 #endif 105 110 106 111 bl start_kernel
+51 -4
arch/loongarch/kernel/kfpu.c
··· 8 8 #include <asm/fpu.h> 9 9 #include <asm/smp.h> 10 10 11 + static unsigned int euen_mask = CSR_EUEN_FPEN; 12 + 13 + /* 14 + * The critical section between kernel_fpu_begin() and kernel_fpu_end() 15 + * is non-reentrant. It is the caller's responsibility to avoid reentrance. 16 + * See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example. 17 + */ 11 18 static DEFINE_PER_CPU(bool, in_kernel_fpu); 19 + static DEFINE_PER_CPU(unsigned int, euen_current); 12 20 13 21 void kernel_fpu_begin(void) 14 22 { 23 + unsigned int *euen_curr; 24 + 15 25 preempt_disable(); 16 26 17 27 WARN_ON(this_cpu_read(in_kernel_fpu)); 18 28 19 29 this_cpu_write(in_kernel_fpu, true); 30 + euen_curr = this_cpu_ptr(&euen_current); 20 31 21 - if (!is_fpu_owner()) 22 - enable_fpu(); 32 + *euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN); 33 + 34 + #ifdef CONFIG_CPU_HAS_LASX 35 + if (*euen_curr & CSR_EUEN_LASXEN) 36 + _save_lasx(&current->thread.fpu); 23 37 else 38 + #endif 39 + #ifdef CONFIG_CPU_HAS_LSX 40 + if (*euen_curr & CSR_EUEN_LSXEN) 41 + _save_lsx(&current->thread.fpu); 42 + else 43 + #endif 44 + if (*euen_curr & CSR_EUEN_FPEN) 24 45 _save_fp(&current->thread.fpu); 25 46 26 47 write_fcsr(LOONGARCH_FCSR0, 0); ··· 50 29 51 30 void kernel_fpu_end(void) 52 31 { 32 + unsigned int *euen_curr; 33 + 53 34 WARN_ON(!this_cpu_read(in_kernel_fpu)); 54 35 55 - if (!is_fpu_owner()) 56 - disable_fpu(); 36 + euen_curr = this_cpu_ptr(&euen_current); 37 + 38 + #ifdef CONFIG_CPU_HAS_LASX 39 + if (*euen_curr & CSR_EUEN_LASXEN) 40 + _restore_lasx(&current->thread.fpu); 57 41 else 42 + #endif 43 + #ifdef CONFIG_CPU_HAS_LSX 44 + if (*euen_curr & CSR_EUEN_LSXEN) 45 + _restore_lsx(&current->thread.fpu); 46 + else 47 + #endif 48 + if (*euen_curr & CSR_EUEN_FPEN) 58 49 _restore_fp(&current->thread.fpu); 50 + 51 + *euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN); 59 52 60 53 this_cpu_write(in_kernel_fpu, false); 61 54 62 55 preempt_enable(); 63 56 } 64 57 EXPORT_SYMBOL_GPL(kernel_fpu_end); 58 + 59 + static int __init init_euen_mask(void) 60 + { 61 + if (cpu_has_lsx) 62 + euen_mask |= CSR_EUEN_LSXEN; 63 + 64 + if (cpu_has_lasx) 65 + euen_mask |= CSR_EUEN_LASXEN; 66 + 67 + return 0; 68 + } 69 + arch_initcall(init_euen_mask);
+727
arch/loongarch/kernel/kgdb.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * LoongArch KGDB support 4 + * 5 + * Copyright (C) 2023 Loongson Technology Corporation Limited 6 + */ 7 + 8 + #include <linux/hw_breakpoint.h> 9 + #include <linux/kdebug.h> 10 + #include <linux/kgdb.h> 11 + #include <linux/processor.h> 12 + #include <linux/ptrace.h> 13 + #include <linux/sched.h> 14 + #include <linux/smp.h> 15 + 16 + #include <asm/cacheflush.h> 17 + #include <asm/fpu.h> 18 + #include <asm/hw_breakpoint.h> 19 + #include <asm/inst.h> 20 + #include <asm/irq_regs.h> 21 + #include <asm/ptrace.h> 22 + #include <asm/sigcontext.h> 23 + 24 + int kgdb_watch_activated; 25 + static unsigned int stepped_opcode; 26 + static unsigned long stepped_address; 27 + 28 + struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { 29 + { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) }, 30 + { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) }, 31 + { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) }, 32 + { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) }, 33 + { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) }, 34 + { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) }, 35 + { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) }, 36 + { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) }, 37 + { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) }, 38 + { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) }, 39 + { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) }, 40 + { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) }, 41 + { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) }, 42 + { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) }, 43 + { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) }, 44 + { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) }, 45 + { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) }, 46 + { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) }, 47 + { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) }, 48 + { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) }, 49 + { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) }, 50 + { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) }, 51 + { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) }, 52 + { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) }, 53 + { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) }, 54 + { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) }, 55 + { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) }, 56 + { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) }, 57 + { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) }, 58 + { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) }, 59 + { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) }, 60 + { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) }, 61 + { "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) }, 62 + { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) }, 63 + { "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) }, 64 + { "f0", GDB_SIZEOF_REG, 0 }, 65 + { "f1", GDB_SIZEOF_REG, 1 }, 66 + { "f2", GDB_SIZEOF_REG, 2 }, 67 + { "f3", GDB_SIZEOF_REG, 3 }, 68 + { "f4", GDB_SIZEOF_REG, 4 }, 69 + { "f5", GDB_SIZEOF_REG, 5 }, 70 + { "f6", GDB_SIZEOF_REG, 6 }, 71 + { "f7", GDB_SIZEOF_REG, 7 }, 72 + { "f8", GDB_SIZEOF_REG, 8 }, 73 + { "f9", GDB_SIZEOF_REG, 9 }, 74 + { "f10", GDB_SIZEOF_REG, 10 }, 75 + { "f11", GDB_SIZEOF_REG, 11 }, 76 + { "f12", GDB_SIZEOF_REG, 12 }, 77 + { "f13", GDB_SIZEOF_REG, 13 }, 78 + { "f14", GDB_SIZEOF_REG, 14 }, 79 + { "f15", GDB_SIZEOF_REG, 15 }, 80 + { "f16", GDB_SIZEOF_REG, 16 }, 81 + { "f17", GDB_SIZEOF_REG, 17 }, 82 + { "f18", GDB_SIZEOF_REG, 18 }, 83 + { "f19", GDB_SIZEOF_REG, 19 }, 84 + { "f20", GDB_SIZEOF_REG, 20 }, 85 + { "f21", GDB_SIZEOF_REG, 21 }, 86 + { "f22", GDB_SIZEOF_REG, 22 }, 87 + { "f23", GDB_SIZEOF_REG, 23 }, 88 + { "f24", GDB_SIZEOF_REG, 24 }, 89 + { "f25", GDB_SIZEOF_REG, 25 }, 90 + { "f26", GDB_SIZEOF_REG, 26 }, 91 + { "f27", GDB_SIZEOF_REG, 27 }, 92 + { "f28", GDB_SIZEOF_REG, 28 }, 93 + { "f29", GDB_SIZEOF_REG, 29 }, 94 + { "f30", GDB_SIZEOF_REG, 30 }, 95 + { "f31", GDB_SIZEOF_REG, 31 }, 96 + { "fcc0", 1, 0 }, 97 + { "fcc1", 1, 1 }, 98 + { "fcc2", 1, 2 }, 99 + { "fcc3", 1, 3 }, 100 + { "fcc4", 1, 4 }, 101 + { "fcc5", 1, 5 }, 102 + { "fcc6", 1, 6 }, 103 + { "fcc7", 1, 7 }, 104 + { "fcsr", 4, 0 }, 105 + }; 106 + 107 + char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) 108 + { 109 + int reg_offset, reg_size; 110 + 111 + if (regno < 0 || regno >= DBG_MAX_REG_NUM) 112 + return NULL; 113 + 114 + reg_offset = dbg_reg_def[regno].offset; 115 + reg_size = dbg_reg_def[regno].size; 116 + 117 + if (reg_offset == -1) 118 + goto out; 119 + 120 + /* Handle general-purpose/orig_a0/pc/badv registers */ 121 + if (regno <= DBG_PT_REGS_END) { 122 + memcpy(mem, (void *)regs + reg_offset, reg_size); 123 + goto out; 124 + } 125 + 126 + if (!(regs->csr_euen & CSR_EUEN_FPEN)) 127 + goto out; 128 + 129 + save_fp(current); 130 + 131 + /* Handle FP registers */ 132 + switch (regno) { 133 + case DBG_FCSR: /* Process the fcsr */ 134 + memcpy(mem, (void *)&current->thread.fpu.fcsr, reg_size); 135 + break; 136 + case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */ 137 + memcpy(mem, (void *)&current->thread.fpu.fcc + reg_offset, reg_size); 138 + break; 139 + case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */ 140 + memcpy(mem, (void *)&current->thread.fpu.fpr[reg_offset], reg_size); 141 + break; 142 + default: 143 + break; 144 + } 145 + 146 + out: 147 + return dbg_reg_def[regno].name; 148 + } 149 + 150 + int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) 151 + { 152 + int reg_offset, reg_size; 153 + 154 + if (regno < 0 || regno >= DBG_MAX_REG_NUM) 155 + return -EINVAL; 156 + 157 + reg_offset = dbg_reg_def[regno].offset; 158 + reg_size = dbg_reg_def[regno].size; 159 + 160 + if (reg_offset == -1) 161 + return 0; 162 + 163 + /* Handle general-purpose/orig_a0/pc/badv registers */ 164 + if (regno <= DBG_PT_REGS_END) { 165 + memcpy((void *)regs + reg_offset, mem, reg_size); 166 + return 0; 167 + } 168 + 169 + if (!(regs->csr_euen & CSR_EUEN_FPEN)) 170 + return 0; 171 + 172 + /* Handle FP registers */ 173 + switch (regno) { 174 + case DBG_FCSR: /* Process the fcsr */ 175 + memcpy((void *)&current->thread.fpu.fcsr, mem, reg_size); 176 + break; 177 + case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */ 178 + memcpy((void *)&current->thread.fpu.fcc + reg_offset, mem, reg_size); 179 + break; 180 + case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */ 181 + memcpy((void *)&current->thread.fpu.fpr[reg_offset], mem, reg_size); 182 + break; 183 + default: 184 + break; 185 + } 186 + 187 + restore_fp(current); 188 + 189 + return 0; 190 + } 191 + 192 + /* 193 + * Similar to regs_to_gdb_regs() except that process is sleeping and so 194 + * we may not be able to get all the info. 195 + */ 196 + void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) 197 + { 198 + /* Initialize to zero */ 199 + memset((char *)gdb_regs, 0, NUMREGBYTES); 200 + 201 + gdb_regs[DBG_LOONGARCH_RA] = p->thread.reg01; 202 + gdb_regs[DBG_LOONGARCH_TP] = (long)p; 203 + gdb_regs[DBG_LOONGARCH_SP] = p->thread.reg03; 204 + 205 + /* S0 - S8 */ 206 + gdb_regs[DBG_LOONGARCH_S0] = p->thread.reg23; 207 + gdb_regs[DBG_LOONGARCH_S1] = p->thread.reg24; 208 + gdb_regs[DBG_LOONGARCH_S2] = p->thread.reg25; 209 + gdb_regs[DBG_LOONGARCH_S3] = p->thread.reg26; 210 + gdb_regs[DBG_LOONGARCH_S4] = p->thread.reg27; 211 + gdb_regs[DBG_LOONGARCH_S5] = p->thread.reg28; 212 + gdb_regs[DBG_LOONGARCH_S6] = p->thread.reg29; 213 + gdb_regs[DBG_LOONGARCH_S7] = p->thread.reg30; 214 + gdb_regs[DBG_LOONGARCH_S8] = p->thread.reg31; 215 + 216 + /* 217 + * PC use return address (RA), i.e. the moment after return from __switch_to() 218 + */ 219 + gdb_regs[DBG_LOONGARCH_PC] = p->thread.reg01; 220 + } 221 + 222 + void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) 223 + { 224 + regs->csr_era = pc; 225 + } 226 + 227 + void arch_kgdb_breakpoint(void) 228 + { 229 + __asm__ __volatile__ ( \ 230 + ".globl kgdb_breakinst\n\t" \ 231 + "nop\n" \ 232 + "kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */ 233 + } 234 + 235 + /* 236 + * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, 237 + * then try to fall into the debugger 238 + */ 239 + static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, void *ptr) 240 + { 241 + struct die_args *args = (struct die_args *)ptr; 242 + struct pt_regs *regs = args->regs; 243 + 244 + /* Userspace events, ignore. */ 245 + if (user_mode(regs)) 246 + return NOTIFY_DONE; 247 + 248 + if (!kgdb_io_module_registered) 249 + return NOTIFY_DONE; 250 + 251 + if (atomic_read(&kgdb_active) != -1) 252 + kgdb_nmicallback(smp_processor_id(), regs); 253 + 254 + if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs)) 255 + return NOTIFY_DONE; 256 + 257 + if (atomic_read(&kgdb_setting_breakpoint)) 258 + if (regs->csr_era == (unsigned long)&kgdb_breakinst) 259 + regs->csr_era += LOONGARCH_INSN_SIZE; 260 + 261 + return NOTIFY_STOP; 262 + } 263 + 264 + bool kgdb_breakpoint_handler(struct pt_regs *regs) 265 + { 266 + struct die_args args = { 267 + .regs = regs, 268 + .str = "Break", 269 + .err = BRK_KDB, 270 + .trapnr = read_csr_excode(), 271 + .signr = SIGTRAP, 272 + 273 + }; 274 + 275 + return (kgdb_loongarch_notify(NULL, DIE_TRAP, &args) == NOTIFY_STOP) ? true : false; 276 + } 277 + 278 + static struct notifier_block kgdb_notifier = { 279 + .notifier_call = kgdb_loongarch_notify, 280 + }; 281 + 282 + static inline void kgdb_arch_update_addr(struct pt_regs *regs, 283 + char *remcom_in_buffer) 284 + { 285 + unsigned long addr; 286 + char *ptr; 287 + 288 + ptr = &remcom_in_buffer[1]; 289 + if (kgdb_hex2long(&ptr, &addr)) 290 + regs->csr_era = addr; 291 + } 292 + 293 + /* Calculate the new address for after a step */ 294 + static int get_step_address(struct pt_regs *regs, unsigned long *next_addr) 295 + { 296 + char cj_val; 297 + unsigned int si, si_l, si_h, rd, rj, cj; 298 + unsigned long pc = instruction_pointer(regs); 299 + union loongarch_instruction *ip = (union loongarch_instruction *)pc; 300 + 301 + if (pc & 3) { 302 + pr_warn("%s: invalid pc 0x%lx\n", __func__, pc); 303 + return -EINVAL; 304 + } 305 + 306 + *next_addr = pc + LOONGARCH_INSN_SIZE; 307 + 308 + si_h = ip->reg0i26_format.immediate_h; 309 + si_l = ip->reg0i26_format.immediate_l; 310 + switch (ip->reg0i26_format.opcode) { 311 + case b_op: 312 + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27); 313 + return 0; 314 + case bl_op: 315 + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27); 316 + regs->regs[1] = pc + LOONGARCH_INSN_SIZE; 317 + return 0; 318 + } 319 + 320 + rj = ip->reg1i21_format.rj; 321 + cj = (rj & 0x07) + DBG_FCC_BASE; 322 + si_l = ip->reg1i21_format.immediate_l; 323 + si_h = ip->reg1i21_format.immediate_h; 324 + dbg_get_reg(cj, &cj_val, regs); 325 + switch (ip->reg1i21_format.opcode) { 326 + case beqz_op: 327 + if (regs->regs[rj] == 0) 328 + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); 329 + return 0; 330 + case bnez_op: 331 + if (regs->regs[rj] != 0) 332 + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); 333 + return 0; 334 + case bceqz_op: /* bceqz_op = bcnez_op */ 335 + if (((rj & 0x18) == 0x00) && !cj_val) /* bceqz */ 336 + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); 337 + if (((rj & 0x18) == 0x08) && cj_val) /* bcnez */ 338 + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); 339 + return 0; 340 + } 341 + 342 + rj = ip->reg2i16_format.rj; 343 + rd = ip->reg2i16_format.rd; 344 + si = ip->reg2i16_format.immediate; 345 + switch (ip->reg2i16_format.opcode) { 346 + case beq_op: 347 + if (regs->regs[rj] == regs->regs[rd]) 348 + *next_addr = pc + sign_extend64(si << 2, 17); 349 + return 0; 350 + case bne_op: 351 + if (regs->regs[rj] != regs->regs[rd]) 352 + *next_addr = pc + sign_extend64(si << 2, 17); 353 + return 0; 354 + case blt_op: 355 + if ((long)regs->regs[rj] < (long)regs->regs[rd]) 356 + *next_addr = pc + sign_extend64(si << 2, 17); 357 + return 0; 358 + case bge_op: 359 + if ((long)regs->regs[rj] >= (long)regs->regs[rd]) 360 + *next_addr = pc + sign_extend64(si << 2, 17); 361 + return 0; 362 + case bltu_op: 363 + if (regs->regs[rj] < regs->regs[rd]) 364 + *next_addr = pc + sign_extend64(si << 2, 17); 365 + return 0; 366 + case bgeu_op: 367 + if (regs->regs[rj] >= regs->regs[rd]) 368 + *next_addr = pc + sign_extend64(si << 2, 17); 369 + return 0; 370 + case jirl_op: 371 + regs->regs[rd] = pc + LOONGARCH_INSN_SIZE; 372 + *next_addr = regs->regs[rj] + sign_extend64(si << 2, 17); 373 + return 0; 374 + } 375 + 376 + return 0; 377 + } 378 + 379 + static int do_single_step(struct pt_regs *regs) 380 + { 381 + int error = 0; 382 + unsigned long addr = 0; /* Determine where the target instruction will send us to */ 383 + 384 + error = get_step_address(regs, &addr); 385 + if (error) 386 + return error; 387 + 388 + /* Store the opcode in the stepped address */ 389 + error = get_kernel_nofault(stepped_opcode, (void *)addr); 390 + if (error) 391 + return error; 392 + 393 + stepped_address = addr; 394 + 395 + /* Replace the opcode with the break instruction */ 396 + error = copy_to_kernel_nofault((void *)stepped_address, 397 + arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); 398 + flush_icache_range(addr, addr + BREAK_INSTR_SIZE); 399 + 400 + if (error) { 401 + stepped_opcode = 0; 402 + stepped_address = 0; 403 + } else { 404 + kgdb_single_step = 1; 405 + atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); 406 + } 407 + 408 + return error; 409 + } 410 + 411 + /* Undo a single step */ 412 + static void undo_single_step(struct pt_regs *regs) 413 + { 414 + if (stepped_opcode) { 415 + copy_to_kernel_nofault((void *)stepped_address, 416 + (void *)&stepped_opcode, BREAK_INSTR_SIZE); 417 + flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE); 418 + } 419 + 420 + stepped_opcode = 0; 421 + stepped_address = 0; 422 + kgdb_single_step = 0; 423 + atomic_set(&kgdb_cpu_doing_single_step, -1); 424 + } 425 + 426 + int kgdb_arch_handle_exception(int vector, int signo, int err_code, 427 + char *remcom_in_buffer, char *remcom_out_buffer, 428 + struct pt_regs *regs) 429 + { 430 + int ret = 0; 431 + 432 + undo_single_step(regs); 433 + regs->csr_prmd |= CSR_PRMD_PWE; 434 + 435 + switch (remcom_in_buffer[0]) { 436 + case 'D': 437 + case 'k': 438 + regs->csr_prmd &= ~CSR_PRMD_PWE; 439 + fallthrough; 440 + case 'c': 441 + kgdb_arch_update_addr(regs, remcom_in_buffer); 442 + break; 443 + case 's': 444 + kgdb_arch_update_addr(regs, remcom_in_buffer); 445 + ret = do_single_step(regs); 446 + break; 447 + default: 448 + ret = -1; 449 + } 450 + 451 + return ret; 452 + } 453 + 454 + static struct hw_breakpoint { 455 + unsigned int enabled; 456 + unsigned long addr; 457 + int len; 458 + int type; 459 + struct perf_event * __percpu *pev; 460 + } breakinfo[LOONGARCH_MAX_BRP]; 461 + 462 + static int hw_break_reserve_slot(int breakno) 463 + { 464 + int cpu, cnt = 0; 465 + struct perf_event **pevent; 466 + 467 + for_each_online_cpu(cpu) { 468 + cnt++; 469 + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); 470 + if (dbg_reserve_bp_slot(*pevent)) 471 + goto fail; 472 + } 473 + 474 + return 0; 475 + 476 + fail: 477 + for_each_online_cpu(cpu) { 478 + cnt--; 479 + if (!cnt) 480 + break; 481 + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); 482 + dbg_release_bp_slot(*pevent); 483 + } 484 + 485 + return -1; 486 + } 487 + 488 + static int hw_break_release_slot(int breakno) 489 + { 490 + int cpu; 491 + struct perf_event **pevent; 492 + 493 + if (dbg_is_early) 494 + return 0; 495 + 496 + for_each_online_cpu(cpu) { 497 + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); 498 + if (dbg_release_bp_slot(*pevent)) 499 + /* 500 + * The debugger is responsible for handing the retry on 501 + * remove failure. 502 + */ 503 + return -1; 504 + } 505 + 506 + return 0; 507 + } 508 + 509 + static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) 510 + { 511 + int i; 512 + 513 + for (i = 0; i < LOONGARCH_MAX_BRP; i++) 514 + if (!breakinfo[i].enabled) 515 + break; 516 + 517 + if (i == LOONGARCH_MAX_BRP) 518 + return -1; 519 + 520 + switch (bptype) { 521 + case BP_HARDWARE_BREAKPOINT: 522 + breakinfo[i].type = HW_BREAKPOINT_X; 523 + break; 524 + case BP_READ_WATCHPOINT: 525 + breakinfo[i].type = HW_BREAKPOINT_R; 526 + break; 527 + case BP_WRITE_WATCHPOINT: 528 + breakinfo[i].type = HW_BREAKPOINT_W; 529 + break; 530 + case BP_ACCESS_WATCHPOINT: 531 + breakinfo[i].type = HW_BREAKPOINT_RW; 532 + break; 533 + default: 534 + return -1; 535 + } 536 + 537 + switch (len) { 538 + case 1: 539 + breakinfo[i].len = HW_BREAKPOINT_LEN_1; 540 + break; 541 + case 2: 542 + breakinfo[i].len = HW_BREAKPOINT_LEN_2; 543 + break; 544 + case 4: 545 + breakinfo[i].len = HW_BREAKPOINT_LEN_4; 546 + break; 547 + case 8: 548 + breakinfo[i].len = HW_BREAKPOINT_LEN_8; 549 + break; 550 + default: 551 + return -1; 552 + } 553 + 554 + breakinfo[i].addr = addr; 555 + if (hw_break_reserve_slot(i)) { 556 + breakinfo[i].addr = 0; 557 + return -1; 558 + } 559 + breakinfo[i].enabled = 1; 560 + 561 + return 0; 562 + } 563 + 564 + static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) 565 + { 566 + int i; 567 + 568 + for (i = 0; i < LOONGARCH_MAX_BRP; i++) 569 + if (breakinfo[i].addr == addr && breakinfo[i].enabled) 570 + break; 571 + 572 + if (i == LOONGARCH_MAX_BRP) 573 + return -1; 574 + 575 + if (hw_break_release_slot(i)) { 576 + pr_err("Cannot remove hw breakpoint at %lx\n", addr); 577 + return -1; 578 + } 579 + breakinfo[i].enabled = 0; 580 + 581 + return 0; 582 + } 583 + 584 + static void kgdb_disable_hw_break(struct pt_regs *regs) 585 + { 586 + int i; 587 + int cpu = raw_smp_processor_id(); 588 + struct perf_event *bp; 589 + 590 + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { 591 + if (!breakinfo[i].enabled) 592 + continue; 593 + 594 + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); 595 + if (bp->attr.disabled == 1) 596 + continue; 597 + 598 + arch_uninstall_hw_breakpoint(bp); 599 + bp->attr.disabled = 1; 600 + } 601 + 602 + /* Disable hardware debugging while we are in kgdb */ 603 + csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); 604 + } 605 + 606 + static void kgdb_remove_all_hw_break(void) 607 + { 608 + int i; 609 + int cpu = raw_smp_processor_id(); 610 + struct perf_event *bp; 611 + 612 + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { 613 + if (!breakinfo[i].enabled) 614 + continue; 615 + 616 + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); 617 + if (!bp->attr.disabled) { 618 + arch_uninstall_hw_breakpoint(bp); 619 + bp->attr.disabled = 1; 620 + continue; 621 + } 622 + 623 + if (hw_break_release_slot(i)) 624 + pr_err("KGDB: hw bpt remove failed %lx\n", breakinfo[i].addr); 625 + breakinfo[i].enabled = 0; 626 + } 627 + 628 + csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); 629 + kgdb_watch_activated = 0; 630 + } 631 + 632 + static void kgdb_correct_hw_break(void) 633 + { 634 + int i, activated = 0; 635 + 636 + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { 637 + struct perf_event *bp; 638 + int val; 639 + int cpu = raw_smp_processor_id(); 640 + 641 + if (!breakinfo[i].enabled) 642 + continue; 643 + 644 + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); 645 + if (bp->attr.disabled != 1) 646 + continue; 647 + 648 + bp->attr.bp_addr = breakinfo[i].addr; 649 + bp->attr.bp_len = breakinfo[i].len; 650 + bp->attr.bp_type = breakinfo[i].type; 651 + 652 + val = hw_breakpoint_arch_parse(bp, &bp->attr, counter_arch_bp(bp)); 653 + if (val) 654 + return; 655 + 656 + val = arch_install_hw_breakpoint(bp); 657 + if (!val) 658 + bp->attr.disabled = 0; 659 + activated = 1; 660 + } 661 + 662 + csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); 663 + kgdb_watch_activated = activated; 664 + } 665 + 666 + const struct kgdb_arch arch_kgdb_ops = { 667 + .gdb_bpt_instr = {0x02, 0x00, break_op >> 1, 0x00}, /* BRK_KDB = 2 */ 668 + .flags = KGDB_HW_BREAKPOINT, 669 + .set_hw_breakpoint = kgdb_set_hw_break, 670 + .remove_hw_breakpoint = kgdb_remove_hw_break, 671 + .disable_hw_break = kgdb_disable_hw_break, 672 + .remove_all_hw_break = kgdb_remove_all_hw_break, 673 + .correct_hw_break = kgdb_correct_hw_break, 674 + }; 675 + 676 + int kgdb_arch_init(void) 677 + { 678 + return register_die_notifier(&kgdb_notifier); 679 + } 680 + 681 + void kgdb_arch_late(void) 682 + { 683 + int i, cpu; 684 + struct perf_event_attr attr; 685 + struct perf_event **pevent; 686 + 687 + hw_breakpoint_init(&attr); 688 + 689 + attr.bp_addr = (unsigned long)kgdb_arch_init; 690 + attr.bp_len = HW_BREAKPOINT_LEN_4; 691 + attr.bp_type = HW_BREAKPOINT_W; 692 + attr.disabled = 1; 693 + 694 + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { 695 + if (breakinfo[i].pev) 696 + continue; 697 + 698 + breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL); 699 + if (IS_ERR((void * __force)breakinfo[i].pev)) { 700 + pr_err("kgdb: Could not allocate hw breakpoints.\n"); 701 + breakinfo[i].pev = NULL; 702 + return; 703 + } 704 + 705 + for_each_online_cpu(cpu) { 706 + pevent = per_cpu_ptr(breakinfo[i].pev, cpu); 707 + if (pevent[0]->destroy) { 708 + pevent[0]->destroy = NULL; 709 + release_bp_slot(*pevent); 710 + } 711 + } 712 + } 713 + } 714 + 715 + void kgdb_arch_exit(void) 716 + { 717 + int i; 718 + 719 + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { 720 + if (breakinfo[i].pev) { 721 + unregister_wide_hw_breakpoint(breakinfo[i].pev); 722 + breakinfo[i].pev = NULL; 723 + } 724 + } 725 + 726 + unregister_die_notifier(&kgdb_notifier); 727 + }
+155
arch/loongarch/kernel/lbt.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Author: Qi Hu <huqi@loongson.cn> 4 + * Huacai Chen <chenhuacai@loongson.cn> 5 + * 6 + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited 7 + */ 8 + #include <asm/asm.h> 9 + #include <asm/asmmacro.h> 10 + #include <asm/asm-extable.h> 11 + #include <asm/asm-offsets.h> 12 + #include <asm/errno.h> 13 + #include <asm/regdef.h> 14 + 15 + #define SCR_REG_WIDTH 8 16 + 17 + .macro EX insn, reg, src, offs 18 + .ex\@: \insn \reg, \src, \offs 19 + _asm_extable .ex\@, .L_lbt_fault 20 + .endm 21 + 22 + /* 23 + * Save a thread's lbt context. 24 + */ 25 + SYM_FUNC_START(_save_lbt) 26 + movscr2gr t1, $scr0 # save scr 27 + stptr.d t1, a0, THREAD_SCR0 28 + movscr2gr t1, $scr1 29 + stptr.d t1, a0, THREAD_SCR1 30 + movscr2gr t1, $scr2 31 + stptr.d t1, a0, THREAD_SCR2 32 + movscr2gr t1, $scr3 33 + stptr.d t1, a0, THREAD_SCR3 34 + 35 + x86mfflag t1, 0x3f # save eflags 36 + stptr.d t1, a0, THREAD_EFLAGS 37 + jr ra 38 + SYM_FUNC_END(_save_lbt) 39 + EXPORT_SYMBOL(_save_lbt) 40 + 41 + /* 42 + * Restore a thread's lbt context. 43 + */ 44 + SYM_FUNC_START(_restore_lbt) 45 + ldptr.d t1, a0, THREAD_SCR0 # restore scr 46 + movgr2scr $scr0, t1 47 + ldptr.d t1, a0, THREAD_SCR1 48 + movgr2scr $scr1, t1 49 + ldptr.d t1, a0, THREAD_SCR2 50 + movgr2scr $scr2, t1 51 + ldptr.d t1, a0, THREAD_SCR3 52 + movgr2scr $scr3, t1 53 + 54 + ldptr.d t1, a0, THREAD_EFLAGS # restore eflags 55 + x86mtflag t1, 0x3f 56 + jr ra 57 + SYM_FUNC_END(_restore_lbt) 58 + EXPORT_SYMBOL(_restore_lbt) 59 + 60 + /* 61 + * Load scr/eflag with zero. 62 + */ 63 + SYM_FUNC_START(_init_lbt) 64 + movgr2scr $scr0, zero 65 + movgr2scr $scr1, zero 66 + movgr2scr $scr2, zero 67 + movgr2scr $scr3, zero 68 + 69 + x86mtflag zero, 0x3f 70 + jr ra 71 + SYM_FUNC_END(_init_lbt) 72 + 73 + /* 74 + * a0: scr 75 + * a1: eflag 76 + */ 77 + SYM_FUNC_START(_save_lbt_context) 78 + movscr2gr t1, $scr0 # save scr 79 + EX st.d t1, a0, (0 * SCR_REG_WIDTH) 80 + movscr2gr t1, $scr1 81 + EX st.d t1, a0, (1 * SCR_REG_WIDTH) 82 + movscr2gr t1, $scr2 83 + EX st.d t1, a0, (2 * SCR_REG_WIDTH) 84 + movscr2gr t1, $scr3 85 + EX st.d t1, a0, (3 * SCR_REG_WIDTH) 86 + 87 + x86mfflag t1, 0x3f # save eflags 88 + EX st.w t1, a1, 0 89 + li.w a0, 0 # success 90 + jr ra 91 + SYM_FUNC_END(_save_lbt_context) 92 + 93 + /* 94 + * a0: scr 95 + * a1: eflag 96 + */ 97 + SYM_FUNC_START(_restore_lbt_context) 98 + EX ld.d t1, a0, (0 * SCR_REG_WIDTH) # restore scr 99 + movgr2scr $scr0, t1 100 + EX ld.d t1, a0, (1 * SCR_REG_WIDTH) 101 + movgr2scr $scr1, t1 102 + EX ld.d t1, a0, (2 * SCR_REG_WIDTH) 103 + movgr2scr $scr2, t1 104 + EX ld.d t1, a0, (3 * SCR_REG_WIDTH) 105 + movgr2scr $scr3, t1 106 + 107 + EX ld.w t1, a1, 0 # restore eflags 108 + x86mtflag t1, 0x3f 109 + li.w a0, 0 # success 110 + jr ra 111 + SYM_FUNC_END(_restore_lbt_context) 112 + 113 + /* 114 + * a0: ftop 115 + */ 116 + SYM_FUNC_START(_save_ftop_context) 117 + x86mftop t1 118 + st.w t1, a0, 0 119 + li.w a0, 0 # success 120 + jr ra 121 + SYM_FUNC_END(_save_ftop_context) 122 + 123 + /* 124 + * a0: ftop 125 + */ 126 + SYM_FUNC_START(_restore_ftop_context) 127 + ld.w t1, a0, 0 128 + andi t1, t1, 0x7 129 + la.pcrel a0, 1f 130 + alsl.d a0, t1, a0, 3 131 + jr a0 132 + 1: 133 + x86mttop 0 134 + b 2f 135 + x86mttop 1 136 + b 2f 137 + x86mttop 2 138 + b 2f 139 + x86mttop 3 140 + b 2f 141 + x86mttop 4 142 + b 2f 143 + x86mttop 5 144 + b 2f 145 + x86mttop 6 146 + b 2f 147 + x86mttop 7 148 + 2: 149 + li.w a0, 0 # success 150 + jr ra 151 + SYM_FUNC_END(_restore_ftop_context) 152 + 153 + .L_lbt_fault: 154 + li.w a0, -EFAULT # failure 155 + jr ra
+1 -34
arch/loongarch/kernel/numa.c
··· 67 67 68 68 void __init pcpu_populate_pte(unsigned long addr) 69 69 { 70 - pgd_t *pgd = pgd_offset_k(addr); 71 - p4d_t *p4d = p4d_offset(pgd, addr); 72 - pud_t *pud; 73 - pmd_t *pmd; 74 - 75 - if (p4d_none(*p4d)) { 76 - pud_t *new; 77 - 78 - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 79 - pgd_populate(&init_mm, pgd, new); 80 - #ifndef __PAGETABLE_PUD_FOLDED 81 - pud_init(new); 82 - #endif 83 - } 84 - 85 - pud = pud_offset(p4d, addr); 86 - if (pud_none(*pud)) { 87 - pmd_t *new; 88 - 89 - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 90 - pud_populate(&init_mm, pud, new); 91 - #ifndef __PAGETABLE_PMD_FOLDED 92 - pmd_init(new); 93 - #endif 94 - } 95 - 96 - pmd = pmd_offset(pud, addr); 97 - if (!pmd_present(*pmd)) { 98 - pte_t *new; 99 - 100 - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 101 - pmd_populate_kernel(&init_mm, pmd, new); 102 - } 70 + populate_kernel_pte(addr); 103 71 } 104 72 105 73 void __init setup_per_cpu_areas(void) ··· 438 470 { 439 471 high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); 440 472 memblock_free_all(); 441 - setup_zero_pages(); /* This comes from node 0 */ 442 473 } 443 474 444 475 int pcibus_to_node(struct pci_bus *bus)
+12 -3
arch/loongarch/kernel/process.c
··· 38 38 #include <asm/cpu.h> 39 39 #include <asm/elf.h> 40 40 #include <asm/fpu.h> 41 + #include <asm/lbt.h> 41 42 #include <asm/io.h> 42 43 #include <asm/irq.h> 43 44 #include <asm/irq_regs.h> ··· 83 82 euen = regs->csr_euen & ~(CSR_EUEN_FPEN); 84 83 regs->csr_euen = euen; 85 84 lose_fpu(0); 85 + lose_lbt(0); 86 86 87 87 clear_thread_flag(TIF_LSX_CTX_LIVE); 88 88 clear_thread_flag(TIF_LASX_CTX_LIVE); 89 + clear_thread_flag(TIF_LBT_CTX_LIVE); 89 90 clear_used_math(); 90 91 regs->csr_era = pc; 91 92 regs->regs[3] = sp; ··· 124 121 125 122 preempt_enable(); 126 123 127 - if (used_math()) 128 - memcpy(dst, src, sizeof(struct task_struct)); 129 - else 124 + if (!used_math()) 130 125 memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr)); 126 + else 127 + memcpy(dst, src, offsetof(struct task_struct, thread.lbt.scr0)); 128 + 129 + #ifdef CONFIG_CPU_HAS_LBT 130 + memcpy(&dst->thread.lbt, &src->thread.lbt, sizeof(struct loongarch_lbt)); 131 + #endif 131 132 132 133 return 0; 133 134 } ··· 196 189 ptrace_hw_copy_thread(p); 197 190 clear_tsk_thread_flag(p, TIF_USEDFPU); 198 191 clear_tsk_thread_flag(p, TIF_USEDSIMD); 192 + clear_tsk_thread_flag(p, TIF_USEDLBT); 199 193 clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); 200 194 clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); 195 + clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE); 201 196 202 197 return 0; 203 198 }
+54
arch/loongarch/kernel/ptrace.c
··· 38 38 #include <asm/cpu.h> 39 39 #include <asm/cpu-info.h> 40 40 #include <asm/fpu.h> 41 + #include <asm/lbt.h> 41 42 #include <asm/loongarch.h> 42 43 #include <asm/page.h> 43 44 #include <asm/pgtable.h> ··· 338 337 } 339 338 340 339 #endif /* CONFIG_CPU_HAS_LSX */ 340 + 341 + #ifdef CONFIG_CPU_HAS_LBT 342 + static int lbt_get(struct task_struct *target, 343 + const struct user_regset *regset, 344 + struct membuf to) 345 + { 346 + int r; 347 + 348 + r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0)); 349 + r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1)); 350 + r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2)); 351 + r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3)); 352 + r = membuf_write(&to, &target->thread.lbt.eflags, sizeof(u32)); 353 + r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(u32)); 354 + 355 + return r; 356 + } 357 + 358 + static int lbt_set(struct task_struct *target, 359 + const struct user_regset *regset, 360 + unsigned int pos, unsigned int count, 361 + const void *kbuf, const void __user *ubuf) 362 + { 363 + int err = 0; 364 + const int eflags_start = 4 * sizeof(target->thread.lbt.scr0); 365 + const int ftop_start = eflags_start + sizeof(u32); 366 + 367 + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, 368 + &target->thread.lbt.scr0, 369 + 0, 4 * sizeof(target->thread.lbt.scr0)); 370 + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, 371 + &target->thread.lbt.eflags, 372 + eflags_start, ftop_start); 373 + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, 374 + &target->thread.fpu.ftop, 375 + ftop_start, ftop_start + sizeof(u32)); 376 + 377 + return err; 378 + } 379 + #endif /* CONFIG_CPU_HAS_LBT */ 341 380 342 381 #ifdef CONFIG_HAVE_HW_BREAKPOINT 343 382 ··· 843 802 #ifdef CONFIG_CPU_HAS_LASX 844 803 REGSET_LASX, 845 804 #endif 805 + #ifdef CONFIG_CPU_HAS_LBT 806 + REGSET_LBT, 807 + #endif 846 808 #ifdef CONFIG_HAVE_HW_BREAKPOINT 847 809 REGSET_HW_BREAK, 848 810 REGSET_HW_WATCH, ··· 895 851 .align = 32, 896 852 .regset_get = simd_get, 897 853 .set = simd_set, 854 + }, 855 + #endif 856 + #ifdef CONFIG_CPU_HAS_LBT 857 + [REGSET_LBT] = { 858 + .core_note_type = NT_LOONGARCH_LBT, 859 + .n = 5, 860 + .size = sizeof(u64), 861 + .align = sizeof(u64), 862 + .regset_get = lbt_get, 863 + .set = lbt_set, 898 864 }, 899 865 #endif 900 866 #ifdef CONFIG_HAVE_HW_BREAKPOINT
+2 -6
arch/loongarch/kernel/relocate.c
··· 157 157 *new_addr = (unsigned long)reloc_offset; 158 158 } 159 159 160 - void * __init relocate_kernel(void) 160 + unsigned long __init relocate_kernel(void) 161 161 { 162 162 unsigned long kernel_length; 163 163 unsigned long random_offset = 0; 164 164 void *location_new = _text; /* Default to original kernel start */ 165 - void *kernel_entry = start_kernel; /* Default to original kernel entry point */ 166 165 char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */ 167 166 168 167 strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE); ··· 189 190 190 191 reloc_offset += random_offset; 191 192 192 - /* Return the new kernel's entry point */ 193 - kernel_entry = RELOCATED_KASLR(start_kernel); 194 - 195 193 /* The current thread is now within the relocated kernel */ 196 194 __current_thread_info = RELOCATED_KASLR(__current_thread_info); 197 195 ··· 200 204 201 205 relocate_absolute(random_offset); 202 206 203 - return kernel_entry; 207 + return random_offset; 204 208 } 205 209 206 210 /*
+4
arch/loongarch/kernel/setup.c
··· 626 626 #endif 627 627 628 628 paging_init(); 629 + 630 + #ifdef CONFIG_KASAN 631 + kasan_init(); 632 + #endif 629 633 }
+188
arch/loongarch/kernel/signal.c
··· 32 32 #include <asm/cacheflush.h> 33 33 #include <asm/cpu-features.h> 34 34 #include <asm/fpu.h> 35 + #include <asm/lbt.h> 35 36 #include <asm/ucontext.h> 36 37 #include <asm/vdso.h> 37 38 ··· 45 44 /* Make sure we will not lose FPU ownership */ 46 45 #define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); }) 47 46 #define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); }) 47 + /* Make sure we will not lose LBT ownership */ 48 + #define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); }) 49 + #define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); }) 48 50 49 51 /* Assembly functions to move context to/from the FPU */ 50 52 extern asmlinkage int ··· 62 58 _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); 63 59 extern asmlinkage int 64 60 _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); 61 + 62 + #ifdef CONFIG_CPU_HAS_LBT 63 + extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); 64 + extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); 65 + extern asmlinkage int _save_ftop_context(void __user *ftop); 66 + extern asmlinkage int _restore_ftop_context(void __user *ftop); 67 + #endif 65 68 66 69 struct rt_sigframe { 67 70 struct siginfo rs_info; ··· 86 75 struct _ctx_layout fpu; 87 76 struct _ctx_layout lsx; 88 77 struct _ctx_layout lasx; 78 + struct _ctx_layout lbt; 89 79 struct _ctx_layout end; 90 80 }; 91 81 ··· 227 215 return err; 228 216 } 229 217 218 + #ifdef CONFIG_CPU_HAS_LBT 219 + static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx) 220 + { 221 + int err = 0; 222 + uint64_t __user *regs = (uint64_t *)&ctx->regs; 223 + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; 224 + 225 + err |= __put_user(current->thread.lbt.scr0, &regs[0]); 226 + err |= __put_user(current->thread.lbt.scr1, &regs[1]); 227 + err |= __put_user(current->thread.lbt.scr2, &regs[2]); 228 + err |= __put_user(current->thread.lbt.scr3, &regs[3]); 229 + err |= __put_user(current->thread.lbt.eflags, eflags); 230 + 231 + return err; 232 + } 233 + 234 + static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx) 235 + { 236 + int err = 0; 237 + uint64_t __user *regs = (uint64_t *)&ctx->regs; 238 + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; 239 + 240 + err |= __get_user(current->thread.lbt.scr0, &regs[0]); 241 + err |= __get_user(current->thread.lbt.scr1, &regs[1]); 242 + err |= __get_user(current->thread.lbt.scr2, &regs[2]); 243 + err |= __get_user(current->thread.lbt.scr3, &regs[3]); 244 + err |= __get_user(current->thread.lbt.eflags, eflags); 245 + 246 + return err; 247 + } 248 + 249 + static int copy_ftop_to_sigcontext(struct lbt_context __user *ctx) 250 + { 251 + uint32_t __user *ftop = &ctx->ftop; 252 + 253 + return __put_user(current->thread.fpu.ftop, ftop); 254 + } 255 + 256 + static int copy_ftop_from_sigcontext(struct lbt_context __user *ctx) 257 + { 258 + uint32_t __user *ftop = &ctx->ftop; 259 + 260 + return __get_user(current->thread.fpu.ftop, ftop); 261 + } 262 + #endif 263 + 230 264 /* 231 265 * Wrappers for the assembly _{save,restore}_fp_context functions. 232 266 */ ··· 329 271 330 272 return _restore_lasx_context(regs, fcc, fcsr); 331 273 } 274 + 275 + /* 276 + * Wrappers for the assembly _{save,restore}_lbt_context functions. 277 + */ 278 + #ifdef CONFIG_CPU_HAS_LBT 279 + static int save_hw_lbt_context(struct lbt_context __user *ctx) 280 + { 281 + uint64_t __user *regs = (uint64_t *)&ctx->regs; 282 + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; 283 + 284 + return _save_lbt_context(regs, eflags); 285 + } 286 + 287 + static int restore_hw_lbt_context(struct lbt_context __user *ctx) 288 + { 289 + uint64_t __user *regs = (uint64_t *)&ctx->regs; 290 + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; 291 + 292 + return _restore_lbt_context(regs, eflags); 293 + } 294 + 295 + static int save_hw_ftop_context(struct lbt_context __user *ctx) 296 + { 297 + uint32_t __user *ftop = &ctx->ftop; 298 + 299 + return _save_ftop_context(ftop); 300 + } 301 + 302 + static int restore_hw_ftop_context(struct lbt_context __user *ctx) 303 + { 304 + uint32_t __user *ftop = &ctx->ftop; 305 + 306 + return _restore_ftop_context(ftop); 307 + } 308 + #endif 332 309 333 310 static int fcsr_pending(unsigned int __user *fcsr) 334 311 { ··· 612 519 return err ?: sig; 613 520 } 614 521 522 + #ifdef CONFIG_CPU_HAS_LBT 523 + static int protected_save_lbt_context(struct extctx_layout *extctx) 524 + { 525 + int err = 0; 526 + struct sctx_info __user *info = extctx->lbt.addr; 527 + struct lbt_context __user *lbt_ctx = 528 + (struct lbt_context *)get_ctx_through_ctxinfo(info); 529 + uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs; 530 + uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags; 531 + 532 + while (1) { 533 + lock_lbt_owner(); 534 + if (is_lbt_owner()) 535 + err |= save_hw_lbt_context(lbt_ctx); 536 + else 537 + err |= copy_lbt_to_sigcontext(lbt_ctx); 538 + if (is_fpu_owner()) 539 + err |= save_hw_ftop_context(lbt_ctx); 540 + else 541 + err |= copy_ftop_to_sigcontext(lbt_ctx); 542 + unlock_lbt_owner(); 543 + 544 + err |= __put_user(LBT_CTX_MAGIC, &info->magic); 545 + err |= __put_user(extctx->lbt.size, &info->size); 546 + 547 + if (likely(!err)) 548 + break; 549 + /* Touch the LBT context and try again */ 550 + err = __put_user(0, &regs[0]) | __put_user(0, eflags); 551 + 552 + if (err) 553 + return err; 554 + } 555 + 556 + return err; 557 + } 558 + 559 + static int protected_restore_lbt_context(struct extctx_layout *extctx) 560 + { 561 + int err = 0, tmp __maybe_unused; 562 + struct sctx_info __user *info = extctx->lbt.addr; 563 + struct lbt_context __user *lbt_ctx = 564 + (struct lbt_context *)get_ctx_through_ctxinfo(info); 565 + uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs; 566 + uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags; 567 + 568 + while (1) { 569 + lock_lbt_owner(); 570 + if (is_lbt_owner()) 571 + err |= restore_hw_lbt_context(lbt_ctx); 572 + else 573 + err |= copy_lbt_from_sigcontext(lbt_ctx); 574 + if (is_fpu_owner()) 575 + err |= restore_hw_ftop_context(lbt_ctx); 576 + else 577 + err |= copy_ftop_from_sigcontext(lbt_ctx); 578 + unlock_lbt_owner(); 579 + 580 + if (likely(!err)) 581 + break; 582 + /* Touch the LBT context and try again */ 583 + err = __get_user(tmp, &regs[0]) | __get_user(tmp, eflags); 584 + 585 + if (err) 586 + return err; 587 + } 588 + 589 + return err; 590 + } 591 + #endif 592 + 615 593 static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, 616 594 struct extctx_layout *extctx) 617 595 { ··· 702 538 err |= protected_save_lsx_context(extctx); 703 539 else if (extctx->fpu.addr) 704 540 err |= protected_save_fpu_context(extctx); 541 + 542 + #ifdef CONFIG_CPU_HAS_LBT 543 + if (extctx->lbt.addr) 544 + err |= protected_save_lbt_context(extctx); 545 + #endif 705 546 706 547 /* Set the "end" magic */ 707 548 info = (struct sctx_info *)extctx->end.addr; ··· 751 582 sizeof(struct lasx_context))) 752 583 goto invalid; 753 584 extctx->lasx.addr = info; 585 + break; 586 + 587 + case LBT_CTX_MAGIC: 588 + if (size < (sizeof(struct sctx_info) + 589 + sizeof(struct lbt_context))) 590 + goto invalid; 591 + extctx->lbt.addr = info; 754 592 break; 755 593 756 594 default: ··· 811 635 err |= protected_restore_lsx_context(&extctx); 812 636 else if (extctx.fpu.addr) 813 637 err |= protected_restore_fpu_context(&extctx); 638 + 639 + #ifdef CONFIG_CPU_HAS_LBT 640 + if (extctx.lbt.addr) 641 + err |= protected_restore_lbt_context(&extctx); 642 + #endif 814 643 815 644 bad: 816 645 return err; ··· 880 699 new_sp = extframe_alloc(extctx, &extctx->fpu, 881 700 sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp); 882 701 } 702 + 703 + #ifdef CONFIG_CPU_HAS_LBT 704 + if (cpu_has_lbt && thread_lbt_context_live()) { 705 + new_sp = extframe_alloc(extctx, &extctx->lbt, 706 + sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp); 707 + } 708 + #endif 883 709 884 710 return new_sp; 885 711 }
+10 -8
arch/loongarch/kernel/stacktrace.c
··· 18 18 struct pt_regs dummyregs; 19 19 struct unwind_state state; 20 20 21 - regs = &dummyregs; 21 + if (!regs) { 22 + regs = &dummyregs; 22 23 23 - if (task == current) { 24 - regs->regs[3] = (unsigned long)__builtin_frame_address(0); 25 - regs->csr_era = (unsigned long)__builtin_return_address(0); 26 - } else { 27 - regs->regs[3] = thread_saved_fp(task); 28 - regs->csr_era = thread_saved_ra(task); 24 + if (task == current) { 25 + regs->regs[3] = (unsigned long)__builtin_frame_address(0); 26 + regs->csr_era = (unsigned long)__builtin_return_address(0); 27 + } else { 28 + regs->regs[3] = thread_saved_fp(task); 29 + regs->csr_era = thread_saved_ra(task); 30 + } 31 + regs->regs[1] = 0; 29 32 } 30 33 31 - regs->regs[1] = 0; 32 34 for (unwind_start(&state, task, regs); 33 35 !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { 34 36 addr = unwind_get_return_address(&state);
+47 -3
arch/loongarch/kernel/traps.c
··· 36 36 #include <asm/break.h> 37 37 #include <asm/cpu.h> 38 38 #include <asm/fpu.h> 39 + #include <asm/lbt.h> 39 40 #include <asm/inst.h> 41 + #include <asm/kgdb.h> 40 42 #include <asm/loongarch.h> 41 43 #include <asm/mmu_context.h> 42 44 #include <asm/pgtable.h> ··· 704 702 * pertain to them. 705 703 */ 706 704 switch (bcode) { 705 + case BRK_KDB: 706 + if (kgdb_breakpoint_handler(regs)) 707 + goto out; 708 + else 709 + break; 707 710 case BRK_KPROBE_BP: 708 711 if (kprobe_breakpoint_handler(regs)) 709 712 goto out; ··· 775 768 #ifndef CONFIG_HAVE_HW_BREAKPOINT 776 769 pr_warn("Hardware watch point handler not implemented!\n"); 777 770 #else 771 + if (kgdb_breakpoint_handler(regs)) 772 + goto out; 773 + 778 774 if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) { 779 775 int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1); 780 776 unsigned long pc = instruction_pointer(regs); ··· 976 966 irqentry_exit(regs, state); 977 967 } 978 968 969 + static void init_restore_lbt(void) 970 + { 971 + if (!thread_lbt_context_live()) { 972 + /* First time LBT context user */ 973 + init_lbt(); 974 + set_thread_flag(TIF_LBT_CTX_LIVE); 975 + } else { 976 + if (!is_lbt_owner()) 977 + own_lbt_inatomic(1); 978 + } 979 + 980 + BUG_ON(!is_lbt_enabled()); 981 + } 982 + 979 983 asmlinkage void noinstr do_lbt(struct pt_regs *regs) 980 984 { 981 985 irqentry_state_t state = irqentry_enter(regs); 982 986 983 - local_irq_enable(); 984 - force_sig(SIGILL); 985 - local_irq_disable(); 987 + /* 988 + * BTD (Binary Translation Disable exception) can be triggered 989 + * during FP save/restore if TM (Top Mode) is on, which may 990 + * cause irq_enable during 'switch_to'. To avoid this situation 991 + * (including the user using 'MOVGR2GCSR' to turn on TM, which 992 + * will not trigger the BTE), we need to check PRMD first. 993 + */ 994 + if (regs->csr_prmd & CSR_PRMD_PIE) 995 + local_irq_enable(); 996 + 997 + if (!cpu_has_lbt) { 998 + force_sig(SIGILL); 999 + goto out; 1000 + } 1001 + BUG_ON(is_lbt_enabled()); 1002 + 1003 + preempt_disable(); 1004 + init_restore_lbt(); 1005 + preempt_enable(); 1006 + 1007 + out: 1008 + if (regs->csr_prmd & CSR_PRMD_PIE) 1009 + local_irq_disable(); 986 1010 987 1011 irqentry_exit(regs, state); 988 1012 }
+2
arch/loongarch/lib/Makefile
··· 6 6 lib-y += delay.o memset.o memcpy.o memmove.o \ 7 7 clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o 8 8 9 + obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o 10 + 9 11 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+44 -43
arch/loongarch/lib/clear_user.S
··· 11 11 #include <asm/cpu.h> 12 12 #include <asm/regdef.h> 13 13 14 - .irp to, 0, 1, 2, 3, 4, 5, 6, 7 15 - .L_fixup_handle_\to\(): 16 - sub.d a0, a2, a0 17 - addi.d a0, a0, (\to) * (-8) 18 - jr ra 19 - .endr 20 - 21 - .irp to, 0, 2, 4 22 - .L_fixup_handle_s\to\(): 23 - addi.d a0, a1, -\to 24 - jr ra 25 - .endr 26 - 27 14 SYM_FUNC_START(__clear_user) 28 15 /* 29 16 * Some CPUs support hardware unaligned access ··· 38 51 2: move a0, a1 39 52 jr ra 40 53 41 - _asm_extable 1b, .L_fixup_handle_s0 54 + _asm_extable 1b, 2b 42 55 SYM_FUNC_END(__clear_user_generic) 43 56 44 57 /* ··· 160 173 jr ra 161 174 162 175 /* fixup and ex_table */ 163 - _asm_extable 0b, .L_fixup_handle_0 164 - _asm_extable 1b, .L_fixup_handle_0 165 - _asm_extable 2b, .L_fixup_handle_1 166 - _asm_extable 3b, .L_fixup_handle_2 167 - _asm_extable 4b, .L_fixup_handle_3 168 - _asm_extable 5b, .L_fixup_handle_4 169 - _asm_extable 6b, .L_fixup_handle_5 170 - _asm_extable 7b, .L_fixup_handle_6 171 - _asm_extable 8b, .L_fixup_handle_7 172 - _asm_extable 9b, .L_fixup_handle_0 173 - _asm_extable 10b, .L_fixup_handle_1 174 - _asm_extable 11b, .L_fixup_handle_2 175 - _asm_extable 12b, .L_fixup_handle_3 176 - _asm_extable 13b, .L_fixup_handle_0 177 - _asm_extable 14b, .L_fixup_handle_1 178 - _asm_extable 15b, .L_fixup_handle_0 179 - _asm_extable 16b, .L_fixup_handle_0 180 - _asm_extable 17b, .L_fixup_handle_s0 181 - _asm_extable 18b, .L_fixup_handle_s0 182 - _asm_extable 19b, .L_fixup_handle_s0 183 - _asm_extable 20b, .L_fixup_handle_s2 184 - _asm_extable 21b, .L_fixup_handle_s0 185 - _asm_extable 22b, .L_fixup_handle_s0 186 - _asm_extable 23b, .L_fixup_handle_s4 187 - _asm_extable 24b, .L_fixup_handle_s0 188 - _asm_extable 25b, .L_fixup_handle_s4 189 - _asm_extable 26b, .L_fixup_handle_s0 190 - _asm_extable 27b, .L_fixup_handle_s4 191 - _asm_extable 28b, .L_fixup_handle_s0 176 + .Llarge_fixup: 177 + sub.d a1, a2, a0 178 + 179 + .Lsmall_fixup: 180 + 29: st.b zero, a0, 0 181 + addi.d a0, a0, 1 182 + addi.d a1, a1, -1 183 + bgt a1, zero, 29b 184 + 185 + .Lexit: 186 + move a0, a1 187 + jr ra 188 + 189 + _asm_extable 0b, .Lsmall_fixup 190 + _asm_extable 1b, .Llarge_fixup 191 + _asm_extable 2b, .Llarge_fixup 192 + _asm_extable 3b, .Llarge_fixup 193 + _asm_extable 4b, .Llarge_fixup 194 + _asm_extable 5b, .Llarge_fixup 195 + _asm_extable 6b, .Llarge_fixup 196 + _asm_extable 7b, .Llarge_fixup 197 + _asm_extable 8b, .Llarge_fixup 198 + _asm_extable 9b, .Llarge_fixup 199 + _asm_extable 10b, .Llarge_fixup 200 + _asm_extable 11b, .Llarge_fixup 201 + _asm_extable 12b, .Llarge_fixup 202 + _asm_extable 13b, .Llarge_fixup 203 + _asm_extable 14b, .Llarge_fixup 204 + _asm_extable 15b, .Llarge_fixup 205 + _asm_extable 16b, .Llarge_fixup 206 + _asm_extable 17b, .Lexit 207 + _asm_extable 18b, .Lsmall_fixup 208 + _asm_extable 19b, .Lsmall_fixup 209 + _asm_extable 20b, .Lsmall_fixup 210 + _asm_extable 21b, .Lsmall_fixup 211 + _asm_extable 22b, .Lsmall_fixup 212 + _asm_extable 23b, .Lsmall_fixup 213 + _asm_extable 24b, .Lsmall_fixup 214 + _asm_extable 25b, .Lsmall_fixup 215 + _asm_extable 26b, .Lsmall_fixup 216 + _asm_extable 27b, .Lsmall_fixup 217 + _asm_extable 28b, .Lsmall_fixup 218 + _asm_extable 29b, .Lexit 192 219 SYM_FUNC_END(__clear_user_fast)
+83 -78
arch/loongarch/lib/copy_user.S
··· 11 11 #include <asm/cpu.h> 12 12 #include <asm/regdef.h> 13 13 14 - .irp to, 0, 1, 2, 3, 4, 5, 6, 7 15 - .L_fixup_handle_\to\(): 16 - sub.d a0, a2, a0 17 - addi.d a0, a0, (\to) * (-8) 18 - jr ra 19 - .endr 20 - 21 - .irp to, 0, 2, 4 22 - .L_fixup_handle_s\to\(): 23 - addi.d a0, a2, -\to 24 - jr ra 25 - .endr 26 - 27 14 SYM_FUNC_START(__copy_user) 28 15 /* 29 16 * Some CPUs support hardware unaligned access ··· 41 54 3: move a0, a2 42 55 jr ra 43 56 44 - _asm_extable 1b, .L_fixup_handle_s0 45 - _asm_extable 2b, .L_fixup_handle_s0 57 + _asm_extable 1b, 3b 58 + _asm_extable 2b, 3b 46 59 SYM_FUNC_END(__copy_user_generic) 47 60 48 61 /* ··· 56 69 sltui t0, a2, 9 57 70 bnez t0, .Lsmall 58 71 59 - add.d a3, a1, a2 60 - add.d a2, a0, a2 61 72 0: ld.d t0, a1, 0 62 73 1: st.d t0, a0, 0 74 + add.d a3, a1, a2 75 + add.d a2, a0, a2 63 76 64 77 /* align up destination address */ 65 78 andi t1, a0, 7 ··· 81 94 7: ld.d t5, a1, 40 82 95 8: ld.d t6, a1, 48 83 96 9: ld.d t7, a1, 56 84 - addi.d a1, a1, 64 85 97 10: st.d t0, a0, 0 86 98 11: st.d t1, a0, 8 87 99 12: st.d t2, a0, 16 ··· 89 103 15: st.d t5, a0, 40 90 104 16: st.d t6, a0, 48 91 105 17: st.d t7, a0, 56 106 + addi.d a1, a1, 64 92 107 addi.d a0, a0, 64 93 108 bltu a1, a4, .Lloop64 94 109 ··· 101 114 19: ld.d t1, a1, 8 102 115 20: ld.d t2, a1, 16 103 116 21: ld.d t3, a1, 24 104 - addi.d a1, a1, 32 105 117 22: st.d t0, a0, 0 106 118 23: st.d t1, a0, 8 107 119 24: st.d t2, a0, 16 108 120 25: st.d t3, a0, 24 121 + addi.d a1, a1, 32 109 122 addi.d a0, a0, 32 110 123 111 124 .Llt32: ··· 113 126 bgeu a1, a4, .Llt16 114 127 26: ld.d t0, a1, 0 115 128 27: ld.d t1, a1, 8 116 - addi.d a1, a1, 16 117 129 28: st.d t0, a0, 0 118 130 29: st.d t1, a0, 8 131 + addi.d a1, a1, 16 119 132 addi.d a0, a0, 16 120 133 121 134 .Llt16: ··· 123 136 bgeu a1, a4, .Llt8 124 137 30: ld.d t0, a1, 0 125 138 31: st.d t0, a0, 0 139 + addi.d a1, a1, 8 126 140 addi.d a0, a0, 8 127 141 128 142 .Llt8: ··· 202 214 jr ra 203 215 204 216 /* fixup and ex_table */ 205 - _asm_extable 0b, .L_fixup_handle_0 206 - _asm_extable 1b, .L_fixup_handle_0 207 - _asm_extable 2b, .L_fixup_handle_0 208 - _asm_extable 3b, .L_fixup_handle_0 209 - _asm_extable 4b, .L_fixup_handle_0 210 - _asm_extable 5b, .L_fixup_handle_0 211 - _asm_extable 6b, .L_fixup_handle_0 212 - _asm_extable 7b, .L_fixup_handle_0 213 - _asm_extable 8b, .L_fixup_handle_0 214 - _asm_extable 9b, .L_fixup_handle_0 215 - _asm_extable 10b, .L_fixup_handle_0 216 - _asm_extable 11b, .L_fixup_handle_1 217 - _asm_extable 12b, .L_fixup_handle_2 218 - _asm_extable 13b, .L_fixup_handle_3 219 - _asm_extable 14b, .L_fixup_handle_4 220 - _asm_extable 15b, .L_fixup_handle_5 221 - _asm_extable 16b, .L_fixup_handle_6 222 - _asm_extable 17b, .L_fixup_handle_7 223 - _asm_extable 18b, .L_fixup_handle_0 224 - _asm_extable 19b, .L_fixup_handle_0 225 - _asm_extable 20b, .L_fixup_handle_0 226 - _asm_extable 21b, .L_fixup_handle_0 227 - _asm_extable 22b, .L_fixup_handle_0 228 - _asm_extable 23b, .L_fixup_handle_1 229 - _asm_extable 24b, .L_fixup_handle_2 230 - _asm_extable 25b, .L_fixup_handle_3 231 - _asm_extable 26b, .L_fixup_handle_0 232 - _asm_extable 27b, .L_fixup_handle_0 233 - _asm_extable 28b, .L_fixup_handle_0 234 - _asm_extable 29b, .L_fixup_handle_1 235 - _asm_extable 30b, .L_fixup_handle_0 236 - _asm_extable 31b, .L_fixup_handle_0 237 - _asm_extable 32b, .L_fixup_handle_0 238 - _asm_extable 33b, .L_fixup_handle_0 239 - _asm_extable 34b, .L_fixup_handle_s0 240 - _asm_extable 35b, .L_fixup_handle_s0 241 - _asm_extable 36b, .L_fixup_handle_s0 242 - _asm_extable 37b, .L_fixup_handle_s0 243 - _asm_extable 38b, .L_fixup_handle_s0 244 - _asm_extable 39b, .L_fixup_handle_s0 245 - _asm_extable 40b, .L_fixup_handle_s0 246 - _asm_extable 41b, .L_fixup_handle_s2 247 - _asm_extable 42b, .L_fixup_handle_s0 248 - _asm_extable 43b, .L_fixup_handle_s0 249 - _asm_extable 44b, .L_fixup_handle_s0 250 - _asm_extable 45b, .L_fixup_handle_s0 251 - _asm_extable 46b, .L_fixup_handle_s0 252 - _asm_extable 47b, .L_fixup_handle_s4 253 - _asm_extable 48b, .L_fixup_handle_s0 254 - _asm_extable 49b, .L_fixup_handle_s0 255 - _asm_extable 50b, .L_fixup_handle_s0 256 - _asm_extable 51b, .L_fixup_handle_s4 257 - _asm_extable 52b, .L_fixup_handle_s0 258 - _asm_extable 53b, .L_fixup_handle_s0 259 - _asm_extable 54b, .L_fixup_handle_s0 260 - _asm_extable 55b, .L_fixup_handle_s4 261 - _asm_extable 56b, .L_fixup_handle_s0 262 - _asm_extable 57b, .L_fixup_handle_s0 217 + .Llarge_fixup: 218 + sub.d a2, a2, a0 219 + 220 + .Lsmall_fixup: 221 + 58: ld.b t0, a1, 0 222 + 59: st.b t0, a0, 0 223 + addi.d a0, a0, 1 224 + addi.d a1, a1, 1 225 + addi.d a2, a2, -1 226 + bgt a2, zero, 58b 227 + 228 + .Lexit: 229 + move a0, a2 230 + jr ra 231 + 232 + _asm_extable 0b, .Lsmall_fixup 233 + _asm_extable 1b, .Lsmall_fixup 234 + _asm_extable 2b, .Llarge_fixup 235 + _asm_extable 3b, .Llarge_fixup 236 + _asm_extable 4b, .Llarge_fixup 237 + _asm_extable 5b, .Llarge_fixup 238 + _asm_extable 6b, .Llarge_fixup 239 + _asm_extable 7b, .Llarge_fixup 240 + _asm_extable 8b, .Llarge_fixup 241 + _asm_extable 9b, .Llarge_fixup 242 + _asm_extable 10b, .Llarge_fixup 243 + _asm_extable 11b, .Llarge_fixup 244 + _asm_extable 12b, .Llarge_fixup 245 + _asm_extable 13b, .Llarge_fixup 246 + _asm_extable 14b, .Llarge_fixup 247 + _asm_extable 15b, .Llarge_fixup 248 + _asm_extable 16b, .Llarge_fixup 249 + _asm_extable 17b, .Llarge_fixup 250 + _asm_extable 18b, .Llarge_fixup 251 + _asm_extable 19b, .Llarge_fixup 252 + _asm_extable 20b, .Llarge_fixup 253 + _asm_extable 21b, .Llarge_fixup 254 + _asm_extable 22b, .Llarge_fixup 255 + _asm_extable 23b, .Llarge_fixup 256 + _asm_extable 24b, .Llarge_fixup 257 + _asm_extable 25b, .Llarge_fixup 258 + _asm_extable 26b, .Llarge_fixup 259 + _asm_extable 27b, .Llarge_fixup 260 + _asm_extable 28b, .Llarge_fixup 261 + _asm_extable 29b, .Llarge_fixup 262 + _asm_extable 30b, .Llarge_fixup 263 + _asm_extable 31b, .Llarge_fixup 264 + _asm_extable 32b, .Llarge_fixup 265 + _asm_extable 33b, .Llarge_fixup 266 + _asm_extable 34b, .Lexit 267 + _asm_extable 35b, .Lexit 268 + _asm_extable 36b, .Lsmall_fixup 269 + _asm_extable 37b, .Lsmall_fixup 270 + _asm_extable 38b, .Lsmall_fixup 271 + _asm_extable 39b, .Lsmall_fixup 272 + _asm_extable 40b, .Lsmall_fixup 273 + _asm_extable 41b, .Lsmall_fixup 274 + _asm_extable 42b, .Lsmall_fixup 275 + _asm_extable 43b, .Lsmall_fixup 276 + _asm_extable 44b, .Lsmall_fixup 277 + _asm_extable 45b, .Lsmall_fixup 278 + _asm_extable 46b, .Lsmall_fixup 279 + _asm_extable 47b, .Lsmall_fixup 280 + _asm_extable 48b, .Lsmall_fixup 281 + _asm_extable 49b, .Lsmall_fixup 282 + _asm_extable 50b, .Lsmall_fixup 283 + _asm_extable 51b, .Lsmall_fixup 284 + _asm_extable 52b, .Lsmall_fixup 285 + _asm_extable 53b, .Lsmall_fixup 286 + _asm_extable 54b, .Lsmall_fixup 287 + _asm_extable 55b, .Lsmall_fixup 288 + _asm_extable 56b, .Lsmall_fixup 289 + _asm_extable 57b, .Lsmall_fixup 290 + _asm_extable 58b, .Lexit 291 + _asm_extable 59b, .Lexit 263 292 SYM_FUNC_END(__copy_user_fast)
+7 -1
arch/loongarch/lib/memcpy.S
··· 10 10 #include <asm/cpu.h> 11 11 #include <asm/regdef.h> 12 12 13 + .section .noinstr.text, "ax" 14 + 13 15 SYM_FUNC_START(memcpy) 14 16 /* 15 17 * Some CPUs support hardware unaligned access ··· 19 17 ALTERNATIVE "b __memcpy_generic", \ 20 18 "b __memcpy_fast", CPU_FEATURE_UAL 21 19 SYM_FUNC_END(memcpy) 22 - _ASM_NOKPROBE(memcpy) 20 + SYM_FUNC_ALIAS(__memcpy, memcpy) 23 21 24 22 EXPORT_SYMBOL(memcpy) 23 + EXPORT_SYMBOL(__memcpy) 24 + 25 + _ASM_NOKPROBE(memcpy) 26 + _ASM_NOKPROBE(__memcpy) 25 27 26 28 /* 27 29 * void *__memcpy_generic(void *dst, const void *src, size_t n)
+13 -7
arch/loongarch/lib/memmove.S
··· 10 10 #include <asm/cpu.h> 11 11 #include <asm/regdef.h> 12 12 13 + .section .noinstr.text, "ax" 14 + 13 15 SYM_FUNC_START(memmove) 14 - blt a0, a1, memcpy /* dst < src, memcpy */ 15 - blt a1, a0, rmemcpy /* src < dst, rmemcpy */ 16 - jr ra /* dst == src, return */ 16 + blt a0, a1, __memcpy /* dst < src, memcpy */ 17 + blt a1, a0, __rmemcpy /* src < dst, rmemcpy */ 18 + jr ra /* dst == src, return */ 17 19 SYM_FUNC_END(memmove) 18 - _ASM_NOKPROBE(memmove) 20 + SYM_FUNC_ALIAS(__memmove, memmove) 19 21 20 22 EXPORT_SYMBOL(memmove) 23 + EXPORT_SYMBOL(__memmove) 21 24 22 - SYM_FUNC_START(rmemcpy) 25 + _ASM_NOKPROBE(memmove) 26 + _ASM_NOKPROBE(__memmove) 27 + 28 + SYM_FUNC_START(__rmemcpy) 23 29 /* 24 30 * Some CPUs support hardware unaligned access 25 31 */ 26 32 ALTERNATIVE "b __rmemcpy_generic", \ 27 33 "b __rmemcpy_fast", CPU_FEATURE_UAL 28 - SYM_FUNC_END(rmemcpy) 29 - _ASM_NOKPROBE(rmemcpy) 34 + SYM_FUNC_END(__rmemcpy) 35 + _ASM_NOKPROBE(__rmemcpy) 30 36 31 37 /* 32 38 * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
+7 -1
arch/loongarch/lib/memset.S
··· 16 16 bstrins.d \r0, \r0, 63, 32 17 17 .endm 18 18 19 + .section .noinstr.text, "ax" 20 + 19 21 SYM_FUNC_START(memset) 20 22 /* 21 23 * Some CPUs support hardware unaligned access ··· 25 23 ALTERNATIVE "b __memset_generic", \ 26 24 "b __memset_fast", CPU_FEATURE_UAL 27 25 SYM_FUNC_END(memset) 28 - _ASM_NOKPROBE(memset) 26 + SYM_FUNC_ALIAS(__memset, memset) 29 27 30 28 EXPORT_SYMBOL(memset) 29 + EXPORT_SYMBOL(__memset) 30 + 31 + _ASM_NOKPROBE(memset) 32 + _ASM_NOKPROBE(__memset) 31 33 32 34 /* 33 35 * void *__memset_generic(void *s, int c, size_t n)
+93
arch/loongarch/lib/xor_simd.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * LoongArch SIMD XOR operations 4 + * 5 + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 6 + */ 7 + 8 + #include "xor_simd.h" 9 + 10 + /* 11 + * Process one cache line (64 bytes) per loop. This is assuming all future 12 + * popular LoongArch cores are similar performance-characteristics-wise to the 13 + * current models. 14 + */ 15 + #define LINE_WIDTH 64 16 + 17 + #ifdef CONFIG_CPU_HAS_LSX 18 + 19 + #define LD(reg, base, offset) \ 20 + "vld $vr" #reg ", %[" #base "], " #offset "\n\t" 21 + #define ST(reg, base, offset) \ 22 + "vst $vr" #reg ", %[" #base "], " #offset "\n\t" 23 + #define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t" 24 + 25 + #define LD_INOUT_LINE(base) \ 26 + LD(0, base, 0) \ 27 + LD(1, base, 16) \ 28 + LD(2, base, 32) \ 29 + LD(3, base, 48) 30 + 31 + #define LD_AND_XOR_LINE(base) \ 32 + LD(4, base, 0) \ 33 + LD(5, base, 16) \ 34 + LD(6, base, 32) \ 35 + LD(7, base, 48) \ 36 + XOR(0, 4) \ 37 + XOR(1, 5) \ 38 + XOR(2, 6) \ 39 + XOR(3, 7) 40 + 41 + #define ST_LINE(base) \ 42 + ST(0, base, 0) \ 43 + ST(1, base, 16) \ 44 + ST(2, base, 32) \ 45 + ST(3, base, 48) 46 + 47 + #define XOR_FUNC_NAME(nr) __xor_lsx_##nr 48 + #include "xor_template.c" 49 + 50 + #undef LD 51 + #undef ST 52 + #undef XOR 53 + #undef LD_INOUT_LINE 54 + #undef LD_AND_XOR_LINE 55 + #undef ST_LINE 56 + #undef XOR_FUNC_NAME 57 + 58 + #endif /* CONFIG_CPU_HAS_LSX */ 59 + 60 + #ifdef CONFIG_CPU_HAS_LASX 61 + 62 + #define LD(reg, base, offset) \ 63 + "xvld $xr" #reg ", %[" #base "], " #offset "\n\t" 64 + #define ST(reg, base, offset) \ 65 + "xvst $xr" #reg ", %[" #base "], " #offset "\n\t" 66 + #define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t" 67 + 68 + #define LD_INOUT_LINE(base) \ 69 + LD(0, base, 0) \ 70 + LD(1, base, 32) 71 + 72 + #define LD_AND_XOR_LINE(base) \ 73 + LD(2, base, 0) \ 74 + LD(3, base, 32) \ 75 + XOR(0, 2) \ 76 + XOR(1, 3) 77 + 78 + #define ST_LINE(base) \ 79 + ST(0, base, 0) \ 80 + ST(1, base, 32) 81 + 82 + #define XOR_FUNC_NAME(nr) __xor_lasx_##nr 83 + #include "xor_template.c" 84 + 85 + #undef LD 86 + #undef ST 87 + #undef XOR 88 + #undef LD_INOUT_LINE 89 + #undef LD_AND_XOR_LINE 90 + #undef ST_LINE 91 + #undef XOR_FUNC_NAME 92 + 93 + #endif /* CONFIG_CPU_HAS_LASX */
+38
arch/loongarch/lib/xor_simd.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Simple interface to link xor_simd.c and xor_simd_glue.c 4 + * 5 + * Separating these files ensures that no SIMD instructions are run outside of 6 + * the kfpu critical section. 7 + */ 8 + 9 + #ifndef __LOONGARCH_LIB_XOR_SIMD_H 10 + #define __LOONGARCH_LIB_XOR_SIMD_H 11 + 12 + #ifdef CONFIG_CPU_HAS_LSX 13 + void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1, 14 + const unsigned long * __restrict p2); 15 + void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1, 16 + const unsigned long * __restrict p2, const unsigned long * __restrict p3); 17 + void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1, 18 + const unsigned long * __restrict p2, const unsigned long * __restrict p3, 19 + const unsigned long * __restrict p4); 20 + void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1, 21 + const unsigned long * __restrict p2, const unsigned long * __restrict p3, 22 + const unsigned long * __restrict p4, const unsigned long * __restrict p5); 23 + #endif /* CONFIG_CPU_HAS_LSX */ 24 + 25 + #ifdef CONFIG_CPU_HAS_LASX 26 + void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1, 27 + const unsigned long * __restrict p2); 28 + void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1, 29 + const unsigned long * __restrict p2, const unsigned long * __restrict p3); 30 + void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1, 31 + const unsigned long * __restrict p2, const unsigned long * __restrict p3, 32 + const unsigned long * __restrict p4); 33 + void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1, 34 + const unsigned long * __restrict p2, const unsigned long * __restrict p3, 35 + const unsigned long * __restrict p4, const unsigned long * __restrict p5); 36 + #endif /* CONFIG_CPU_HAS_LASX */ 37 + 38 + #endif /* __LOONGARCH_LIB_XOR_SIMD_H */
+72
arch/loongarch/lib/xor_simd_glue.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * LoongArch SIMD XOR operations 4 + * 5 + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 6 + */ 7 + 8 + #include <linux/export.h> 9 + #include <linux/sched.h> 10 + #include <asm/fpu.h> 11 + #include <asm/xor_simd.h> 12 + #include "xor_simd.h" 13 + 14 + #define MAKE_XOR_GLUE_2(flavor) \ 15 + void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1, \ 16 + const unsigned long * __restrict p2) \ 17 + { \ 18 + kernel_fpu_begin(); \ 19 + __xor_##flavor##_2(bytes, p1, p2); \ 20 + kernel_fpu_end(); \ 21 + } \ 22 + EXPORT_SYMBOL_GPL(xor_##flavor##_2) 23 + 24 + #define MAKE_XOR_GLUE_3(flavor) \ 25 + void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1, \ 26 + const unsigned long * __restrict p2, \ 27 + const unsigned long * __restrict p3) \ 28 + { \ 29 + kernel_fpu_begin(); \ 30 + __xor_##flavor##_3(bytes, p1, p2, p3); \ 31 + kernel_fpu_end(); \ 32 + } \ 33 + EXPORT_SYMBOL_GPL(xor_##flavor##_3) 34 + 35 + #define MAKE_XOR_GLUE_4(flavor) \ 36 + void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1, \ 37 + const unsigned long * __restrict p2, \ 38 + const unsigned long * __restrict p3, \ 39 + const unsigned long * __restrict p4) \ 40 + { \ 41 + kernel_fpu_begin(); \ 42 + __xor_##flavor##_4(bytes, p1, p2, p3, p4); \ 43 + kernel_fpu_end(); \ 44 + } \ 45 + EXPORT_SYMBOL_GPL(xor_##flavor##_4) 46 + 47 + #define MAKE_XOR_GLUE_5(flavor) \ 48 + void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1, \ 49 + const unsigned long * __restrict p2, \ 50 + const unsigned long * __restrict p3, \ 51 + const unsigned long * __restrict p4, \ 52 + const unsigned long * __restrict p5) \ 53 + { \ 54 + kernel_fpu_begin(); \ 55 + __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \ 56 + kernel_fpu_end(); \ 57 + } \ 58 + EXPORT_SYMBOL_GPL(xor_##flavor##_5) 59 + 60 + #define MAKE_XOR_GLUES(flavor) \ 61 + MAKE_XOR_GLUE_2(flavor); \ 62 + MAKE_XOR_GLUE_3(flavor); \ 63 + MAKE_XOR_GLUE_4(flavor); \ 64 + MAKE_XOR_GLUE_5(flavor) 65 + 66 + #ifdef CONFIG_CPU_HAS_LSX 67 + MAKE_XOR_GLUES(lsx); 68 + #endif 69 + 70 + #ifdef CONFIG_CPU_HAS_LASX 71 + MAKE_XOR_GLUES(lasx); 72 + #endif
+110
arch/loongarch/lib/xor_template.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 4 + * 5 + * Template for XOR operations, instantiated in xor_simd.c. 6 + * 7 + * Expected preprocessor definitions: 8 + * 9 + * - LINE_WIDTH 10 + * - XOR_FUNC_NAME(nr) 11 + * - LD_INOUT_LINE(buf) 12 + * - LD_AND_XOR_LINE(buf) 13 + * - ST_LINE(buf) 14 + */ 15 + 16 + void XOR_FUNC_NAME(2)(unsigned long bytes, 17 + unsigned long * __restrict v1, 18 + const unsigned long * __restrict v2) 19 + { 20 + unsigned long lines = bytes / LINE_WIDTH; 21 + 22 + do { 23 + __asm__ __volatile__ ( 24 + LD_INOUT_LINE(v1) 25 + LD_AND_XOR_LINE(v2) 26 + ST_LINE(v1) 27 + : : [v1] "r"(v1), [v2] "r"(v2) : "memory" 28 + ); 29 + 30 + v1 += LINE_WIDTH / sizeof(unsigned long); 31 + v2 += LINE_WIDTH / sizeof(unsigned long); 32 + } while (--lines > 0); 33 + } 34 + 35 + void XOR_FUNC_NAME(3)(unsigned long bytes, 36 + unsigned long * __restrict v1, 37 + const unsigned long * __restrict v2, 38 + const unsigned long * __restrict v3) 39 + { 40 + unsigned long lines = bytes / LINE_WIDTH; 41 + 42 + do { 43 + __asm__ __volatile__ ( 44 + LD_INOUT_LINE(v1) 45 + LD_AND_XOR_LINE(v2) 46 + LD_AND_XOR_LINE(v3) 47 + ST_LINE(v1) 48 + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory" 49 + ); 50 + 51 + v1 += LINE_WIDTH / sizeof(unsigned long); 52 + v2 += LINE_WIDTH / sizeof(unsigned long); 53 + v3 += LINE_WIDTH / sizeof(unsigned long); 54 + } while (--lines > 0); 55 + } 56 + 57 + void XOR_FUNC_NAME(4)(unsigned long bytes, 58 + unsigned long * __restrict v1, 59 + const unsigned long * __restrict v2, 60 + const unsigned long * __restrict v3, 61 + const unsigned long * __restrict v4) 62 + { 63 + unsigned long lines = bytes / LINE_WIDTH; 64 + 65 + do { 66 + __asm__ __volatile__ ( 67 + LD_INOUT_LINE(v1) 68 + LD_AND_XOR_LINE(v2) 69 + LD_AND_XOR_LINE(v3) 70 + LD_AND_XOR_LINE(v4) 71 + ST_LINE(v1) 72 + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4) 73 + : "memory" 74 + ); 75 + 76 + v1 += LINE_WIDTH / sizeof(unsigned long); 77 + v2 += LINE_WIDTH / sizeof(unsigned long); 78 + v3 += LINE_WIDTH / sizeof(unsigned long); 79 + v4 += LINE_WIDTH / sizeof(unsigned long); 80 + } while (--lines > 0); 81 + } 82 + 83 + void XOR_FUNC_NAME(5)(unsigned long bytes, 84 + unsigned long * __restrict v1, 85 + const unsigned long * __restrict v2, 86 + const unsigned long * __restrict v3, 87 + const unsigned long * __restrict v4, 88 + const unsigned long * __restrict v5) 89 + { 90 + unsigned long lines = bytes / LINE_WIDTH; 91 + 92 + do { 93 + __asm__ __volatile__ ( 94 + LD_INOUT_LINE(v1) 95 + LD_AND_XOR_LINE(v2) 96 + LD_AND_XOR_LINE(v3) 97 + LD_AND_XOR_LINE(v4) 98 + LD_AND_XOR_LINE(v5) 99 + ST_LINE(v1) 100 + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4), 101 + [v5] "r"(v5) : "memory" 102 + ); 103 + 104 + v1 += LINE_WIDTH / sizeof(unsigned long); 105 + v2 += LINE_WIDTH / sizeof(unsigned long); 106 + v3 += LINE_WIDTH / sizeof(unsigned long); 107 + v4 += LINE_WIDTH / sizeof(unsigned long); 108 + v5 += LINE_WIDTH / sizeof(unsigned long); 109 + } while (--lines > 0); 110 + }
+3
arch/loongarch/mm/Makefile
··· 7 7 fault.o ioremap.o maccess.o mmap.o pgtable.o page.o 8 8 9 9 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 10 + obj-$(CONFIG_KASAN) += kasan_init.o 11 + 12 + KASAN_SANITIZE_kasan_init.o := n
-1
arch/loongarch/mm/cache.c
··· 156 156 157 157 current_cpu_data.cache_leaves_present = leaf; 158 158 current_cpu_data.options |= LOONGARCH_CPU_PREFETCH; 159 - shm_align_mask = PAGE_SIZE - 1; 160 159 } 161 160 162 161 static const pgprot_t protection_map[16] = {
+14 -8
arch/loongarch/mm/fault.c
··· 23 23 #include <linux/kprobes.h> 24 24 #include <linux/perf_event.h> 25 25 #include <linux/uaccess.h> 26 + #include <linux/kfence.h> 26 27 27 28 #include <asm/branch.h> 28 29 #include <asm/mmu_context.h> ··· 31 30 32 31 int show_unhandled_signals = 1; 33 32 34 - static void __kprobes no_context(struct pt_regs *regs, unsigned long address) 33 + static void __kprobes no_context(struct pt_regs *regs, 34 + unsigned long write, unsigned long address) 35 35 { 36 36 const int field = sizeof(unsigned long) * 2; 37 37 38 38 /* Are we prepared to handle this kernel fault? */ 39 39 if (fixup_exception(regs)) 40 + return; 41 + 42 + if (kfence_handle_page_fault(address, write, regs)) 40 43 return; 41 44 42 45 /* ··· 56 51 die("Oops", regs); 57 52 } 58 53 59 - static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address) 54 + static void __kprobes do_out_of_memory(struct pt_regs *regs, 55 + unsigned long write, unsigned long address) 60 56 { 61 57 /* 62 58 * We ran out of memory, call the OOM killer, and return the userspace 63 59 * (which will retry the fault, or kill us if we got oom-killed). 64 60 */ 65 61 if (!user_mode(regs)) { 66 - no_context(regs, address); 62 + no_context(regs, write, address); 67 63 return; 68 64 } 69 65 pagefault_out_of_memory(); ··· 75 69 { 76 70 /* Kernel mode? Handle exceptions or die */ 77 71 if (!user_mode(regs)) { 78 - no_context(regs, address); 72 + no_context(regs, write, address); 79 73 return; 80 74 } 81 75 ··· 96 90 97 91 /* Kernel mode? Handle exceptions or die */ 98 92 if (!user_mode(regs)) { 99 - no_context(regs, address); 93 + no_context(regs, write, address); 100 94 return; 101 95 } 102 96 ··· 155 149 */ 156 150 if (address & __UA_LIMIT) { 157 151 if (!user_mode(regs)) 158 - no_context(regs, address); 152 + no_context(regs, write, address); 159 153 else 160 154 do_sigsegv(regs, write, address, si_code); 161 155 return; ··· 217 211 218 212 if (fault_signal_pending(fault, regs)) { 219 213 if (!user_mode(regs)) 220 - no_context(regs, address); 214 + no_context(regs, write, address); 221 215 return; 222 216 } 223 217 ··· 238 232 if (unlikely(fault & VM_FAULT_ERROR)) { 239 233 mmap_read_unlock(mm); 240 234 if (fault & VM_FAULT_OOM) { 241 - do_out_of_memory(regs, address); 235 + do_out_of_memory(regs, write, address); 242 236 return; 243 237 } else if (fault & VM_FAULT_SIGSEGV) { 244 238 do_sigsegv(regs, write, address, si_code);
+22 -49
arch/loongarch/mm/init.c
··· 35 35 #include <asm/pgalloc.h> 36 36 #include <asm/tlb.h> 37 37 38 - /* 39 - * We have up to 8 empty zeroed pages so we can map one of the right colour 40 - * when needed. Since page is never written to after the initialization we 41 - * don't have to care about aliases on other CPUs. 42 - */ 43 - unsigned long empty_zero_page, zero_page_mask; 38 + unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; 44 39 EXPORT_SYMBOL(empty_zero_page); 45 - EXPORT_SYMBOL(zero_page_mask); 46 - 47 - void setup_zero_pages(void) 48 - { 49 - unsigned int order, i; 50 - struct page *page; 51 - 52 - order = 0; 53 - 54 - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); 55 - if (!empty_zero_page) 56 - panic("Oh boy, that early out of memory?"); 57 - 58 - page = virt_to_page((void *)empty_zero_page); 59 - split_page(page, order); 60 - for (i = 0; i < (1 << order); i++, page++) 61 - mark_page_reserved(page); 62 - 63 - zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; 64 - } 65 40 66 41 void copy_user_highpage(struct page *to, struct page *from, 67 42 unsigned long vaddr, struct vm_area_struct *vma) ··· 81 106 high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); 82 107 83 108 memblock_free_all(); 84 - setup_zero_pages(); /* Setup zeroed pages. */ 85 109 } 86 110 #endif /* !CONFIG_NUMA */ 87 111 ··· 165 191 #endif 166 192 #endif 167 193 168 - static pte_t *fixmap_pte(unsigned long addr) 194 + pte_t * __init populate_kernel_pte(unsigned long addr) 169 195 { 170 - pgd_t *pgd; 171 - p4d_t *p4d; 196 + pgd_t *pgd = pgd_offset_k(addr); 197 + p4d_t *p4d = p4d_offset(pgd, addr); 172 198 pud_t *pud; 173 199 pmd_t *pmd; 174 200 175 - pgd = pgd_offset_k(addr); 176 - p4d = p4d_offset(pgd, addr); 177 - 178 - if (pgd_none(*pgd)) { 179 - pud_t *new __maybe_unused; 180 - 181 - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); 182 - pgd_populate(&init_mm, pgd, new); 201 + if (p4d_none(*p4d)) { 202 + pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 203 + if (!pud) 204 + panic("%s: Failed to allocate memory\n", __func__); 205 + p4d_populate(&init_mm, p4d, pud); 183 206 #ifndef __PAGETABLE_PUD_FOLDED 184 - pud_init(new); 207 + pud_init(pud); 185 208 #endif 186 209 } 187 210 188 211 pud = pud_offset(p4d, addr); 189 212 if (pud_none(*pud)) { 190 - pmd_t *new __maybe_unused; 191 - 192 - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); 193 - pud_populate(&init_mm, pud, new); 213 + pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 214 + if (!pmd) 215 + panic("%s: Failed to allocate memory\n", __func__); 216 + pud_populate(&init_mm, pud, pmd); 194 217 #ifndef __PAGETABLE_PMD_FOLDED 195 - pmd_init(new); 218 + pmd_init(pmd); 196 219 #endif 197 220 } 198 221 199 222 pmd = pmd_offset(pud, addr); 200 - if (pmd_none(*pmd)) { 201 - pte_t *new __maybe_unused; 223 + if (!pmd_present(*pmd)) { 224 + pte_t *pte; 202 225 203 - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); 204 - pmd_populate_kernel(&init_mm, pmd, new); 226 + pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 227 + if (!pte) 228 + panic("%s: Failed to allocate memory\n", __func__); 229 + pmd_populate_kernel(&init_mm, pmd, pte); 205 230 } 206 231 207 232 return pte_offset_kernel(pmd, addr); ··· 214 241 215 242 BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); 216 243 217 - ptep = fixmap_pte(addr); 244 + ptep = populate_kernel_pte(addr); 218 245 if (!pte_none(*ptep)) { 219 246 pte_ERROR(*ptep); 220 247 return;
+243
arch/loongarch/mm/kasan_init.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2023 Loongson Technology Corporation Limited 4 + */ 5 + #define pr_fmt(fmt) "kasan: " fmt 6 + #include <linux/kasan.h> 7 + #include <linux/memblock.h> 8 + #include <linux/sched/task.h> 9 + 10 + #include <asm/tlbflush.h> 11 + #include <asm/pgalloc.h> 12 + #include <asm-generic/sections.h> 13 + 14 + static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); 15 + 16 + #ifdef __PAGETABLE_PUD_FOLDED 17 + #define __p4d_none(early, p4d) (0) 18 + #else 19 + #define __p4d_none(early, p4d) (early ? (p4d_val(p4d) == 0) : \ 20 + (__pa(p4d_val(p4d)) == (unsigned long)__pa(kasan_early_shadow_pud))) 21 + #endif 22 + 23 + #ifdef __PAGETABLE_PMD_FOLDED 24 + #define __pud_none(early, pud) (0) 25 + #else 26 + #define __pud_none(early, pud) (early ? (pud_val(pud) == 0) : \ 27 + (__pa(pud_val(pud)) == (unsigned long)__pa(kasan_early_shadow_pmd))) 28 + #endif 29 + 30 + #define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \ 31 + (__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte))) 32 + 33 + #define __pte_none(early, pte) (early ? pte_none(pte) : \ 34 + ((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page))) 35 + 36 + bool kasan_early_stage = true; 37 + 38 + /* 39 + * Alloc memory for shadow memory page table. 40 + */ 41 + static phys_addr_t __init kasan_alloc_zeroed_page(int node) 42 + { 43 + void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, 44 + __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node); 45 + if (!p) 46 + panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n", 47 + __func__, PAGE_SIZE, PAGE_SIZE, node, __pa(MAX_DMA_ADDRESS)); 48 + 49 + return __pa(p); 50 + } 51 + 52 + static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early) 53 + { 54 + if (__pmd_none(early, READ_ONCE(*pmdp))) { 55 + phys_addr_t pte_phys = early ? 56 + __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node); 57 + if (!early) 58 + memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte)); 59 + pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys)); 60 + } 61 + 62 + return pte_offset_kernel(pmdp, addr); 63 + } 64 + 65 + static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early) 66 + { 67 + if (__pud_none(early, READ_ONCE(*pudp))) { 68 + phys_addr_t pmd_phys = early ? 69 + __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); 70 + if (!early) 71 + memcpy(__va(pmd_phys), kasan_early_shadow_pmd, sizeof(kasan_early_shadow_pmd)); 72 + pud_populate(&init_mm, pudp, (pmd_t *)__va(pmd_phys)); 73 + } 74 + 75 + return pmd_offset(pudp, addr); 76 + } 77 + 78 + static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early) 79 + { 80 + if (__p4d_none(early, READ_ONCE(*p4dp))) { 81 + phys_addr_t pud_phys = early ? 82 + __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); 83 + if (!early) 84 + memcpy(__va(pud_phys), kasan_early_shadow_pud, sizeof(kasan_early_shadow_pud)); 85 + p4d_populate(&init_mm, p4dp, (pud_t *)__va(pud_phys)); 86 + } 87 + 88 + return pud_offset(p4dp, addr); 89 + } 90 + 91 + static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, 92 + unsigned long end, int node, bool early) 93 + { 94 + unsigned long next; 95 + pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early); 96 + 97 + do { 98 + phys_addr_t page_phys = early ? 99 + __pa_symbol(kasan_early_shadow_page) 100 + : kasan_alloc_zeroed_page(node); 101 + next = addr + PAGE_SIZE; 102 + set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); 103 + } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep))); 104 + } 105 + 106 + static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, 107 + unsigned long end, int node, bool early) 108 + { 109 + unsigned long next; 110 + pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early); 111 + 112 + do { 113 + next = pmd_addr_end(addr, end); 114 + kasan_pte_populate(pmdp, addr, next, node, early); 115 + } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp))); 116 + } 117 + 118 + static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr, 119 + unsigned long end, int node, bool early) 120 + { 121 + unsigned long next; 122 + pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early); 123 + 124 + do { 125 + next = pud_addr_end(addr, end); 126 + kasan_pmd_populate(pudp, addr, next, node, early); 127 + } while (pudp++, addr = next, addr != end); 128 + } 129 + 130 + static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr, 131 + unsigned long end, int node, bool early) 132 + { 133 + unsigned long next; 134 + p4d_t *p4dp = p4d_offset(pgdp, addr); 135 + 136 + do { 137 + next = p4d_addr_end(addr, end); 138 + kasan_pud_populate(p4dp, addr, next, node, early); 139 + } while (p4dp++, addr = next, addr != end); 140 + } 141 + 142 + static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, 143 + int node, bool early) 144 + { 145 + unsigned long next; 146 + pgd_t *pgdp; 147 + 148 + pgdp = pgd_offset_k(addr); 149 + 150 + do { 151 + next = pgd_addr_end(addr, end); 152 + kasan_p4d_populate(pgdp, addr, next, node, early); 153 + } while (pgdp++, addr = next, addr != end); 154 + 155 + } 156 + 157 + /* Set up full kasan mappings, ensuring that the mapped pages are zeroed */ 158 + static void __init kasan_map_populate(unsigned long start, unsigned long end, 159 + int node) 160 + { 161 + kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false); 162 + } 163 + 164 + asmlinkage void __init kasan_early_init(void) 165 + { 166 + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); 167 + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); 168 + } 169 + 170 + static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval) 171 + { 172 + WRITE_ONCE(*pgdp, pgdval); 173 + } 174 + 175 + static void __init clear_pgds(unsigned long start, unsigned long end) 176 + { 177 + /* 178 + * Remove references to kasan page tables from 179 + * swapper_pg_dir. pgd_clear() can't be used 180 + * here because it's nop on 2,3-level pagetable setups 181 + */ 182 + for (; start < end; start += PGDIR_SIZE) 183 + kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0)); 184 + } 185 + 186 + void __init kasan_init(void) 187 + { 188 + u64 i; 189 + phys_addr_t pa_start, pa_end; 190 + 191 + /* 192 + * PGD was populated as invalid_pmd_table or invalid_pud_table 193 + * in pagetable_init() which depends on how many levels of page 194 + * table you are using, but we had to clean the gpd of kasan 195 + * shadow memory, as the pgd value is none-zero. 196 + * The assertion pgd_none is going to be false and the formal populate 197 + * afterwards is not going to create any new pgd at all. 198 + */ 199 + memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir)); 200 + csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH); 201 + local_flush_tlb_all(); 202 + 203 + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); 204 + 205 + /* Maps everything to a single page of zeroes */ 206 + kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true); 207 + 208 + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START), 209 + kasan_mem_to_shadow((void *)KFENCE_AREA_END)); 210 + 211 + kasan_early_stage = false; 212 + 213 + /* Populate the linear mapping */ 214 + for_each_mem_range(i, &pa_start, &pa_end) { 215 + void *start = (void *)phys_to_virt(pa_start); 216 + void *end = (void *)phys_to_virt(pa_end); 217 + 218 + if (start >= end) 219 + break; 220 + 221 + kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), 222 + (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE); 223 + } 224 + 225 + /* Populate modules mapping */ 226 + kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR), 227 + (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE); 228 + /* 229 + * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we 230 + * should make sure that it maps the zero page read-only. 231 + */ 232 + for (i = 0; i < PTRS_PER_PTE; i++) 233 + set_pte(&kasan_early_shadow_pte[i], 234 + pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), PAGE_KERNEL_RO)); 235 + 236 + memset(kasan_early_shadow_page, 0, PAGE_SIZE); 237 + csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH); 238 + local_flush_tlb_all(); 239 + 240 + /* At this point kasan is fully initialized. Enable error messages */ 241 + init_task.kasan_depth = 0; 242 + pr_info("KernelAddressSanitizer initialized.\n"); 243 + }
+6 -7
arch/loongarch/mm/mmap.c
··· 8 8 #include <linux/mm.h> 9 9 #include <linux/mman.h> 10 10 11 - unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ 12 - EXPORT_SYMBOL(shm_align_mask); 11 + #define SHM_ALIGN_MASK (SHMLBA - 1) 13 12 14 - #define COLOUR_ALIGN(addr, pgoff) \ 15 - ((((addr) + shm_align_mask) & ~shm_align_mask) + \ 16 - (((pgoff) << PAGE_SHIFT) & shm_align_mask)) 13 + #define COLOUR_ALIGN(addr, pgoff) \ 14 + ((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK) \ 15 + + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK)) 17 16 18 17 enum mmap_allocation_direction {UP, DOWN}; 19 18 ··· 39 40 * cache aliasing constraints. 40 41 */ 41 42 if ((flags & MAP_SHARED) && 42 - ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) 43 + ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK)) 43 44 return -EINVAL; 44 45 return addr; 45 46 } ··· 62 63 } 63 64 64 65 info.length = len; 65 - info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0; 66 + info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; 66 67 info.align_offset = pgoff << PAGE_SHIFT; 67 68 68 69 if (dir == DOWN) {
+12
arch/loongarch/mm/pgtable.c
··· 9 9 #include <asm/pgtable.h> 10 10 #include <asm/tlbflush.h> 11 11 12 + struct page *dmw_virt_to_page(unsigned long kaddr) 13 + { 14 + return pfn_to_page(virt_to_pfn(kaddr)); 15 + } 16 + EXPORT_SYMBOL_GPL(dmw_virt_to_page); 17 + 18 + struct page *tlb_virt_to_page(unsigned long kaddr) 19 + { 20 + return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr))); 21 + } 22 + EXPORT_SYMBOL_GPL(tlb_virt_to_page); 23 + 12 24 pgd_t *pgd_alloc(struct mm_struct *mm) 13 25 { 14 26 pgd_t *init, *ret = NULL;
+3
arch/loongarch/vdso/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 # Objects to go into the VDSO. 3 3 4 + KASAN_SANITIZE := n 5 + KCOV_INSTRUMENT := n 6 + 4 7 # Include the generic Makefile to check the built vdso. 5 8 include $(srctree)/lib/vdso/Makefile 6 9
+2
include/linux/kasan.h
··· 54 54 int kasan_populate_early_shadow(const void *shadow_start, 55 55 const void *shadow_end); 56 56 57 + #ifndef __HAVE_ARCH_SHADOW_MAP 57 58 static inline void *kasan_mem_to_shadow(const void *addr) 58 59 { 59 60 return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) 60 61 + KASAN_SHADOW_OFFSET; 61 62 } 63 + #endif 62 64 63 65 int kasan_add_zero_shadow(void *start, unsigned long size); 64 66 void kasan_remove_zero_shadow(void *start, unsigned long size);
+4
include/linux/raid/pq.h
··· 108 108 extern const struct raid6_calls raid6_vpermxor2; 109 109 extern const struct raid6_calls raid6_vpermxor4; 110 110 extern const struct raid6_calls raid6_vpermxor8; 111 + extern const struct raid6_calls raid6_lsx; 112 + extern const struct raid6_calls raid6_lasx; 111 113 112 114 struct raid6_recov_calls { 113 115 void (*data2)(int, size_t, int, int, void **); ··· 125 123 extern const struct raid6_recov_calls raid6_recov_avx512; 126 124 extern const struct raid6_recov_calls raid6_recov_s390xc; 127 125 extern const struct raid6_recov_calls raid6_recov_neon; 126 + extern const struct raid6_recov_calls raid6_recov_lsx; 127 + extern const struct raid6_recov_calls raid6_recov_lasx; 128 128 129 129 extern const struct raid6_calls raid6_neonx1; 130 130 extern const struct raid6_calls raid6_neonx2;
+1
lib/raid6/Makefile
··· 9 9 vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o 10 10 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o 11 11 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o 12 + raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o 12 13 13 14 hostprogs += mktables 14 15
+16
lib/raid6/algos.c
··· 73 73 &raid6_neonx2, 74 74 &raid6_neonx1, 75 75 #endif 76 + #ifdef CONFIG_LOONGARCH 77 + #ifdef CONFIG_CPU_HAS_LASX 78 + &raid6_lasx, 79 + #endif 80 + #ifdef CONFIG_CPU_HAS_LSX 81 + &raid6_lsx, 82 + #endif 83 + #endif 76 84 #if defined(__ia64__) 77 85 &raid6_intx32, 78 86 &raid6_intx16, ··· 111 103 #endif 112 104 #if defined(CONFIG_KERNEL_MODE_NEON) 113 105 &raid6_recov_neon, 106 + #endif 107 + #ifdef CONFIG_LOONGARCH 108 + #ifdef CONFIG_CPU_HAS_LASX 109 + &raid6_recov_lasx, 110 + #endif 111 + #ifdef CONFIG_CPU_HAS_LSX 112 + &raid6_recov_lsx, 113 + #endif 114 114 #endif 115 115 &raid6_recov_intx1, 116 116 NULL
+38
lib/raid6/loongarch.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 4 + * 5 + * raid6/loongarch.h 6 + * 7 + * Definitions common to LoongArch RAID-6 code only 8 + */ 9 + 10 + #ifndef _LIB_RAID6_LOONGARCH_H 11 + #define _LIB_RAID6_LOONGARCH_H 12 + 13 + #ifdef __KERNEL__ 14 + 15 + #include <asm/cpu-features.h> 16 + #include <asm/fpu.h> 17 + 18 + #else /* for user-space testing */ 19 + 20 + #include <sys/auxv.h> 21 + 22 + /* have to supply these defines for glibc 2.37- and musl */ 23 + #ifndef HWCAP_LOONGARCH_LSX 24 + #define HWCAP_LOONGARCH_LSX (1 << 4) 25 + #endif 26 + #ifndef HWCAP_LOONGARCH_LASX 27 + #define HWCAP_LOONGARCH_LASX (1 << 5) 28 + #endif 29 + 30 + #define kernel_fpu_begin() 31 + #define kernel_fpu_end() 32 + 33 + #define cpu_has_lsx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX) 34 + #define cpu_has_lasx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX) 35 + 36 + #endif /* __KERNEL__ */ 37 + 38 + #endif /* _LIB_RAID6_LOONGARCH_H */
+422
lib/raid6/loongarch_simd.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX) 4 + * 5 + * Copyright 2023 WANG Xuerui <git@xen0n.name> 6 + * 7 + * Based on the generic RAID-6 code (int.uc): 8 + * 9 + * Copyright 2002-2004 H. Peter Anvin 10 + */ 11 + 12 + #include <linux/raid/pq.h> 13 + #include "loongarch.h" 14 + 15 + /* 16 + * The vector algorithms are currently priority 0, which means the generic 17 + * scalar algorithms are not being disabled if vector support is present. 18 + * This is like the similar LoongArch RAID5 XOR code, with the main reason 19 + * repeated here: it cannot be ruled out at this point of time, that some 20 + * future (maybe reduced) models could run the vector algorithms slower than 21 + * the scalar ones, maybe for errata or micro-op reasons. It may be 22 + * appropriate to revisit this after one or two more uarch generations. 23 + */ 24 + 25 + #ifdef CONFIG_CPU_HAS_LSX 26 + #define NSIZE 16 27 + 28 + static int raid6_has_lsx(void) 29 + { 30 + return cpu_has_lsx; 31 + } 32 + 33 + static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs) 34 + { 35 + u8 **dptr = (u8 **)ptrs; 36 + u8 *p, *q; 37 + int d, z, z0; 38 + 39 + z0 = disks - 3; /* Highest data disk */ 40 + p = dptr[z0+1]; /* XOR parity */ 41 + q = dptr[z0+2]; /* RS syndrome */ 42 + 43 + kernel_fpu_begin(); 44 + 45 + /* 46 + * $vr0, $vr1, $vr2, $vr3: wp 47 + * $vr4, $vr5, $vr6, $vr7: wq 48 + * $vr8, $vr9, $vr10, $vr11: wd 49 + * $vr12, $vr13, $vr14, $vr15: w2 50 + * $vr16, $vr17, $vr18, $vr19: w1 51 + */ 52 + for (d = 0; d < bytes; d += NSIZE*4) { 53 + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 54 + asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); 55 + asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); 56 + asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); 57 + asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); 58 + asm volatile("vori.b $vr4, $vr0, 0"); 59 + asm volatile("vori.b $vr5, $vr1, 0"); 60 + asm volatile("vori.b $vr6, $vr2, 0"); 61 + asm volatile("vori.b $vr7, $vr3, 0"); 62 + for (z = z0-1; z >= 0; z--) { 63 + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ 64 + asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); 65 + asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); 66 + asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); 67 + asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); 68 + /* wp$$ ^= wd$$; */ 69 + asm volatile("vxor.v $vr0, $vr0, $vr8"); 70 + asm volatile("vxor.v $vr1, $vr1, $vr9"); 71 + asm volatile("vxor.v $vr2, $vr2, $vr10"); 72 + asm volatile("vxor.v $vr3, $vr3, $vr11"); 73 + /* w2$$ = MASK(wq$$); */ 74 + asm volatile("vslti.b $vr12, $vr4, 0"); 75 + asm volatile("vslti.b $vr13, $vr5, 0"); 76 + asm volatile("vslti.b $vr14, $vr6, 0"); 77 + asm volatile("vslti.b $vr15, $vr7, 0"); 78 + /* w1$$ = SHLBYTE(wq$$); */ 79 + asm volatile("vslli.b $vr16, $vr4, 1"); 80 + asm volatile("vslli.b $vr17, $vr5, 1"); 81 + asm volatile("vslli.b $vr18, $vr6, 1"); 82 + asm volatile("vslli.b $vr19, $vr7, 1"); 83 + /* w2$$ &= NBYTES(0x1d); */ 84 + asm volatile("vandi.b $vr12, $vr12, 0x1d"); 85 + asm volatile("vandi.b $vr13, $vr13, 0x1d"); 86 + asm volatile("vandi.b $vr14, $vr14, 0x1d"); 87 + asm volatile("vandi.b $vr15, $vr15, 0x1d"); 88 + /* w1$$ ^= w2$$; */ 89 + asm volatile("vxor.v $vr16, $vr16, $vr12"); 90 + asm volatile("vxor.v $vr17, $vr17, $vr13"); 91 + asm volatile("vxor.v $vr18, $vr18, $vr14"); 92 + asm volatile("vxor.v $vr19, $vr19, $vr15"); 93 + /* wq$$ = w1$$ ^ wd$$; */ 94 + asm volatile("vxor.v $vr4, $vr16, $vr8"); 95 + asm volatile("vxor.v $vr5, $vr17, $vr9"); 96 + asm volatile("vxor.v $vr6, $vr18, $vr10"); 97 + asm volatile("vxor.v $vr7, $vr19, $vr11"); 98 + } 99 + /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ 100 + asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0])); 101 + asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1])); 102 + asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2])); 103 + asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3])); 104 + /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ 105 + asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0])); 106 + asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1])); 107 + asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2])); 108 + asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3])); 109 + } 110 + 111 + kernel_fpu_end(); 112 + } 113 + 114 + static void raid6_lsx_xor_syndrome(int disks, int start, int stop, 115 + size_t bytes, void **ptrs) 116 + { 117 + u8 **dptr = (u8 **)ptrs; 118 + u8 *p, *q; 119 + int d, z, z0; 120 + 121 + z0 = stop; /* P/Q right side optimization */ 122 + p = dptr[disks-2]; /* XOR parity */ 123 + q = dptr[disks-1]; /* RS syndrome */ 124 + 125 + kernel_fpu_begin(); 126 + 127 + /* 128 + * $vr0, $vr1, $vr2, $vr3: wp 129 + * $vr4, $vr5, $vr6, $vr7: wq 130 + * $vr8, $vr9, $vr10, $vr11: wd 131 + * $vr12, $vr13, $vr14, $vr15: w2 132 + * $vr16, $vr17, $vr18, $vr19: w1 133 + */ 134 + for (d = 0; d < bytes; d += NSIZE*4) { 135 + /* P/Q data pages */ 136 + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 137 + asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); 138 + asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); 139 + asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); 140 + asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); 141 + asm volatile("vori.b $vr4, $vr0, 0"); 142 + asm volatile("vori.b $vr5, $vr1, 0"); 143 + asm volatile("vori.b $vr6, $vr2, 0"); 144 + asm volatile("vori.b $vr7, $vr3, 0"); 145 + for (z = z0-1; z >= start; z--) { 146 + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ 147 + asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); 148 + asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); 149 + asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); 150 + asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); 151 + /* wp$$ ^= wd$$; */ 152 + asm volatile("vxor.v $vr0, $vr0, $vr8"); 153 + asm volatile("vxor.v $vr1, $vr1, $vr9"); 154 + asm volatile("vxor.v $vr2, $vr2, $vr10"); 155 + asm volatile("vxor.v $vr3, $vr3, $vr11"); 156 + /* w2$$ = MASK(wq$$); */ 157 + asm volatile("vslti.b $vr12, $vr4, 0"); 158 + asm volatile("vslti.b $vr13, $vr5, 0"); 159 + asm volatile("vslti.b $vr14, $vr6, 0"); 160 + asm volatile("vslti.b $vr15, $vr7, 0"); 161 + /* w1$$ = SHLBYTE(wq$$); */ 162 + asm volatile("vslli.b $vr16, $vr4, 1"); 163 + asm volatile("vslli.b $vr17, $vr5, 1"); 164 + asm volatile("vslli.b $vr18, $vr6, 1"); 165 + asm volatile("vslli.b $vr19, $vr7, 1"); 166 + /* w2$$ &= NBYTES(0x1d); */ 167 + asm volatile("vandi.b $vr12, $vr12, 0x1d"); 168 + asm volatile("vandi.b $vr13, $vr13, 0x1d"); 169 + asm volatile("vandi.b $vr14, $vr14, 0x1d"); 170 + asm volatile("vandi.b $vr15, $vr15, 0x1d"); 171 + /* w1$$ ^= w2$$; */ 172 + asm volatile("vxor.v $vr16, $vr16, $vr12"); 173 + asm volatile("vxor.v $vr17, $vr17, $vr13"); 174 + asm volatile("vxor.v $vr18, $vr18, $vr14"); 175 + asm volatile("vxor.v $vr19, $vr19, $vr15"); 176 + /* wq$$ = w1$$ ^ wd$$; */ 177 + asm volatile("vxor.v $vr4, $vr16, $vr8"); 178 + asm volatile("vxor.v $vr5, $vr17, $vr9"); 179 + asm volatile("vxor.v $vr6, $vr18, $vr10"); 180 + asm volatile("vxor.v $vr7, $vr19, $vr11"); 181 + } 182 + 183 + /* P/Q left side optimization */ 184 + for (z = start-1; z >= 0; z--) { 185 + /* w2$$ = MASK(wq$$); */ 186 + asm volatile("vslti.b $vr12, $vr4, 0"); 187 + asm volatile("vslti.b $vr13, $vr5, 0"); 188 + asm volatile("vslti.b $vr14, $vr6, 0"); 189 + asm volatile("vslti.b $vr15, $vr7, 0"); 190 + /* w1$$ = SHLBYTE(wq$$); */ 191 + asm volatile("vslli.b $vr16, $vr4, 1"); 192 + asm volatile("vslli.b $vr17, $vr5, 1"); 193 + asm volatile("vslli.b $vr18, $vr6, 1"); 194 + asm volatile("vslli.b $vr19, $vr7, 1"); 195 + /* w2$$ &= NBYTES(0x1d); */ 196 + asm volatile("vandi.b $vr12, $vr12, 0x1d"); 197 + asm volatile("vandi.b $vr13, $vr13, 0x1d"); 198 + asm volatile("vandi.b $vr14, $vr14, 0x1d"); 199 + asm volatile("vandi.b $vr15, $vr15, 0x1d"); 200 + /* wq$$ = w1$$ ^ w2$$; */ 201 + asm volatile("vxor.v $vr4, $vr16, $vr12"); 202 + asm volatile("vxor.v $vr5, $vr17, $vr13"); 203 + asm volatile("vxor.v $vr6, $vr18, $vr14"); 204 + asm volatile("vxor.v $vr7, $vr19, $vr15"); 205 + } 206 + /* 207 + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 208 + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 209 + */ 210 + asm volatile( 211 + "vld $vr20, %0\n\t" 212 + "vld $vr21, %1\n\t" 213 + "vld $vr22, %2\n\t" 214 + "vld $vr23, %3\n\t" 215 + "vld $vr24, %4\n\t" 216 + "vld $vr25, %5\n\t" 217 + "vld $vr26, %6\n\t" 218 + "vld $vr27, %7\n\t" 219 + "vxor.v $vr20, $vr20, $vr0\n\t" 220 + "vxor.v $vr21, $vr21, $vr1\n\t" 221 + "vxor.v $vr22, $vr22, $vr2\n\t" 222 + "vxor.v $vr23, $vr23, $vr3\n\t" 223 + "vxor.v $vr24, $vr24, $vr4\n\t" 224 + "vxor.v $vr25, $vr25, $vr5\n\t" 225 + "vxor.v $vr26, $vr26, $vr6\n\t" 226 + "vxor.v $vr27, $vr27, $vr7\n\t" 227 + "vst $vr20, %0\n\t" 228 + "vst $vr21, %1\n\t" 229 + "vst $vr22, %2\n\t" 230 + "vst $vr23, %3\n\t" 231 + "vst $vr24, %4\n\t" 232 + "vst $vr25, %5\n\t" 233 + "vst $vr26, %6\n\t" 234 + "vst $vr27, %7\n\t" 235 + : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), 236 + "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]), 237 + "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]), 238 + "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3]) 239 + ); 240 + } 241 + 242 + kernel_fpu_end(); 243 + } 244 + 245 + const struct raid6_calls raid6_lsx = { 246 + raid6_lsx_gen_syndrome, 247 + raid6_lsx_xor_syndrome, 248 + raid6_has_lsx, 249 + "lsx", 250 + .priority = 0 /* see the comment near the top of the file for reason */ 251 + }; 252 + 253 + #undef NSIZE 254 + #endif /* CONFIG_CPU_HAS_LSX */ 255 + 256 + #ifdef CONFIG_CPU_HAS_LASX 257 + #define NSIZE 32 258 + 259 + static int raid6_has_lasx(void) 260 + { 261 + return cpu_has_lasx; 262 + } 263 + 264 + static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs) 265 + { 266 + u8 **dptr = (u8 **)ptrs; 267 + u8 *p, *q; 268 + int d, z, z0; 269 + 270 + z0 = disks - 3; /* Highest data disk */ 271 + p = dptr[z0+1]; /* XOR parity */ 272 + q = dptr[z0+2]; /* RS syndrome */ 273 + 274 + kernel_fpu_begin(); 275 + 276 + /* 277 + * $xr0, $xr1: wp 278 + * $xr2, $xr3: wq 279 + * $xr4, $xr5: wd 280 + * $xr6, $xr7: w2 281 + * $xr8, $xr9: w1 282 + */ 283 + for (d = 0; d < bytes; d += NSIZE*2) { 284 + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 285 + asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); 286 + asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); 287 + asm volatile("xvori.b $xr2, $xr0, 0"); 288 + asm volatile("xvori.b $xr3, $xr1, 0"); 289 + for (z = z0-1; z >= 0; z--) { 290 + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ 291 + asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); 292 + asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); 293 + /* wp$$ ^= wd$$; */ 294 + asm volatile("xvxor.v $xr0, $xr0, $xr4"); 295 + asm volatile("xvxor.v $xr1, $xr1, $xr5"); 296 + /* w2$$ = MASK(wq$$); */ 297 + asm volatile("xvslti.b $xr6, $xr2, 0"); 298 + asm volatile("xvslti.b $xr7, $xr3, 0"); 299 + /* w1$$ = SHLBYTE(wq$$); */ 300 + asm volatile("xvslli.b $xr8, $xr2, 1"); 301 + asm volatile("xvslli.b $xr9, $xr3, 1"); 302 + /* w2$$ &= NBYTES(0x1d); */ 303 + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); 304 + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); 305 + /* w1$$ ^= w2$$; */ 306 + asm volatile("xvxor.v $xr8, $xr8, $xr6"); 307 + asm volatile("xvxor.v $xr9, $xr9, $xr7"); 308 + /* wq$$ = w1$$ ^ wd$$; */ 309 + asm volatile("xvxor.v $xr2, $xr8, $xr4"); 310 + asm volatile("xvxor.v $xr3, $xr9, $xr5"); 311 + } 312 + /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ 313 + asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0])); 314 + asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1])); 315 + /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ 316 + asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0])); 317 + asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1])); 318 + } 319 + 320 + kernel_fpu_end(); 321 + } 322 + 323 + static void raid6_lasx_xor_syndrome(int disks, int start, int stop, 324 + size_t bytes, void **ptrs) 325 + { 326 + u8 **dptr = (u8 **)ptrs; 327 + u8 *p, *q; 328 + int d, z, z0; 329 + 330 + z0 = stop; /* P/Q right side optimization */ 331 + p = dptr[disks-2]; /* XOR parity */ 332 + q = dptr[disks-1]; /* RS syndrome */ 333 + 334 + kernel_fpu_begin(); 335 + 336 + /* 337 + * $xr0, $xr1: wp 338 + * $xr2, $xr3: wq 339 + * $xr4, $xr5: wd 340 + * $xr6, $xr7: w2 341 + * $xr8, $xr9: w1 342 + */ 343 + for (d = 0; d < bytes; d += NSIZE*2) { 344 + /* P/Q data pages */ 345 + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 346 + asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); 347 + asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); 348 + asm volatile("xvori.b $xr2, $xr0, 0"); 349 + asm volatile("xvori.b $xr3, $xr1, 0"); 350 + for (z = z0-1; z >= start; z--) { 351 + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ 352 + asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); 353 + asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); 354 + /* wp$$ ^= wd$$; */ 355 + asm volatile("xvxor.v $xr0, $xr0, $xr4"); 356 + asm volatile("xvxor.v $xr1, $xr1, $xr5"); 357 + /* w2$$ = MASK(wq$$); */ 358 + asm volatile("xvslti.b $xr6, $xr2, 0"); 359 + asm volatile("xvslti.b $xr7, $xr3, 0"); 360 + /* w1$$ = SHLBYTE(wq$$); */ 361 + asm volatile("xvslli.b $xr8, $xr2, 1"); 362 + asm volatile("xvslli.b $xr9, $xr3, 1"); 363 + /* w2$$ &= NBYTES(0x1d); */ 364 + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); 365 + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); 366 + /* w1$$ ^= w2$$; */ 367 + asm volatile("xvxor.v $xr8, $xr8, $xr6"); 368 + asm volatile("xvxor.v $xr9, $xr9, $xr7"); 369 + /* wq$$ = w1$$ ^ wd$$; */ 370 + asm volatile("xvxor.v $xr2, $xr8, $xr4"); 371 + asm volatile("xvxor.v $xr3, $xr9, $xr5"); 372 + } 373 + 374 + /* P/Q left side optimization */ 375 + for (z = start-1; z >= 0; z--) { 376 + /* w2$$ = MASK(wq$$); */ 377 + asm volatile("xvslti.b $xr6, $xr2, 0"); 378 + asm volatile("xvslti.b $xr7, $xr3, 0"); 379 + /* w1$$ = SHLBYTE(wq$$); */ 380 + asm volatile("xvslli.b $xr8, $xr2, 1"); 381 + asm volatile("xvslli.b $xr9, $xr3, 1"); 382 + /* w2$$ &= NBYTES(0x1d); */ 383 + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); 384 + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); 385 + /* wq$$ = w1$$ ^ w2$$; */ 386 + asm volatile("xvxor.v $xr2, $xr8, $xr6"); 387 + asm volatile("xvxor.v $xr3, $xr9, $xr7"); 388 + } 389 + /* 390 + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 391 + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 392 + */ 393 + asm volatile( 394 + "xvld $xr10, %0\n\t" 395 + "xvld $xr11, %1\n\t" 396 + "xvld $xr12, %2\n\t" 397 + "xvld $xr13, %3\n\t" 398 + "xvxor.v $xr10, $xr10, $xr0\n\t" 399 + "xvxor.v $xr11, $xr11, $xr1\n\t" 400 + "xvxor.v $xr12, $xr12, $xr2\n\t" 401 + "xvxor.v $xr13, $xr13, $xr3\n\t" 402 + "xvst $xr10, %0\n\t" 403 + "xvst $xr11, %1\n\t" 404 + "xvst $xr12, %2\n\t" 405 + "xvst $xr13, %3\n\t" 406 + : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), 407 + "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]) 408 + ); 409 + } 410 + 411 + kernel_fpu_end(); 412 + } 413 + 414 + const struct raid6_calls raid6_lasx = { 415 + raid6_lasx_gen_syndrome, 416 + raid6_lasx_xor_syndrome, 417 + raid6_has_lasx, 418 + "lasx", 419 + .priority = 0 /* see the comment near the top of the file for reason */ 420 + }; 421 + #undef NSIZE 422 + #endif /* CONFIG_CPU_HAS_LASX */
+513
lib/raid6/recov_loongarch_simd.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX) 4 + * 5 + * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 6 + * 7 + * Originally based on recov_avx2.c and recov_ssse3.c: 8 + * 9 + * Copyright (C) 2012 Intel Corporation 10 + * Author: Jim Kukunas <james.t.kukunas@linux.intel.com> 11 + */ 12 + 13 + #include <linux/raid/pq.h> 14 + #include "loongarch.h" 15 + 16 + /* 17 + * Unlike with the syndrome calculation algorithms, there's no boot-time 18 + * selection of recovery algorithms by benchmarking, so we have to specify 19 + * the priorities and hope the future cores will all have decent vector 20 + * support (i.e. no LASX slower than LSX, or even scalar code). 21 + */ 22 + 23 + #ifdef CONFIG_CPU_HAS_LSX 24 + static int raid6_has_lsx(void) 25 + { 26 + return cpu_has_lsx; 27 + } 28 + 29 + static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, 30 + int failb, void **ptrs) 31 + { 32 + u8 *p, *q, *dp, *dq; 33 + const u8 *pbmul; /* P multiplier table for B data */ 34 + const u8 *qmul; /* Q multiplier table (for both) */ 35 + 36 + p = (u8 *)ptrs[disks - 2]; 37 + q = (u8 *)ptrs[disks - 1]; 38 + 39 + /* 40 + * Compute syndrome with zero for the missing data pages 41 + * Use the dead data pages as temporary storage for 42 + * delta p and delta q 43 + */ 44 + dp = (u8 *)ptrs[faila]; 45 + ptrs[faila] = (void *)raid6_empty_zero_page; 46 + ptrs[disks - 2] = dp; 47 + dq = (u8 *)ptrs[failb]; 48 + ptrs[failb] = (void *)raid6_empty_zero_page; 49 + ptrs[disks - 1] = dq; 50 + 51 + raid6_call.gen_syndrome(disks, bytes, ptrs); 52 + 53 + /* Restore pointer table */ 54 + ptrs[faila] = dp; 55 + ptrs[failb] = dq; 56 + ptrs[disks - 2] = p; 57 + ptrs[disks - 1] = q; 58 + 59 + /* Now, pick the proper data tables */ 60 + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; 61 + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; 62 + 63 + kernel_fpu_begin(); 64 + 65 + /* 66 + * vr20, vr21: qmul 67 + * vr22, vr23: pbmul 68 + */ 69 + asm volatile("vld $vr20, %0" : : "m" (qmul[0])); 70 + asm volatile("vld $vr21, %0" : : "m" (qmul[16])); 71 + asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); 72 + asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); 73 + 74 + while (bytes) { 75 + /* vr4 - vr7: Q */ 76 + asm volatile("vld $vr4, %0" : : "m" (q[0])); 77 + asm volatile("vld $vr5, %0" : : "m" (q[16])); 78 + asm volatile("vld $vr6, %0" : : "m" (q[32])); 79 + asm volatile("vld $vr7, %0" : : "m" (q[48])); 80 + /* vr4 - vr7: Q + Qxy */ 81 + asm volatile("vld $vr8, %0" : : "m" (dq[0])); 82 + asm volatile("vld $vr9, %0" : : "m" (dq[16])); 83 + asm volatile("vld $vr10, %0" : : "m" (dq[32])); 84 + asm volatile("vld $vr11, %0" : : "m" (dq[48])); 85 + asm volatile("vxor.v $vr4, $vr4, $vr8"); 86 + asm volatile("vxor.v $vr5, $vr5, $vr9"); 87 + asm volatile("vxor.v $vr6, $vr6, $vr10"); 88 + asm volatile("vxor.v $vr7, $vr7, $vr11"); 89 + /* vr0 - vr3: P */ 90 + asm volatile("vld $vr0, %0" : : "m" (p[0])); 91 + asm volatile("vld $vr1, %0" : : "m" (p[16])); 92 + asm volatile("vld $vr2, %0" : : "m" (p[32])); 93 + asm volatile("vld $vr3, %0" : : "m" (p[48])); 94 + /* vr0 - vr3: P + Pxy */ 95 + asm volatile("vld $vr8, %0" : : "m" (dp[0])); 96 + asm volatile("vld $vr9, %0" : : "m" (dp[16])); 97 + asm volatile("vld $vr10, %0" : : "m" (dp[32])); 98 + asm volatile("vld $vr11, %0" : : "m" (dp[48])); 99 + asm volatile("vxor.v $vr0, $vr0, $vr8"); 100 + asm volatile("vxor.v $vr1, $vr1, $vr9"); 101 + asm volatile("vxor.v $vr2, $vr2, $vr10"); 102 + asm volatile("vxor.v $vr3, $vr3, $vr11"); 103 + 104 + /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */ 105 + asm volatile("vsrli.b $vr8, $vr4, 4"); 106 + asm volatile("vsrli.b $vr9, $vr5, 4"); 107 + asm volatile("vsrli.b $vr10, $vr6, 4"); 108 + asm volatile("vsrli.b $vr11, $vr7, 4"); 109 + /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */ 110 + asm volatile("vandi.b $vr4, $vr4, 0x0f"); 111 + asm volatile("vandi.b $vr5, $vr5, 0x0f"); 112 + asm volatile("vandi.b $vr6, $vr6, 0x0f"); 113 + asm volatile("vandi.b $vr7, $vr7, 0x0f"); 114 + /* lookup from qmul[0] */ 115 + asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4"); 116 + asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5"); 117 + asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6"); 118 + asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7"); 119 + /* lookup from qmul[16] */ 120 + asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8"); 121 + asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9"); 122 + asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10"); 123 + asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11"); 124 + /* vr16 - vr19: B(Q + Qxy) */ 125 + asm volatile("vxor.v $vr16, $vr8, $vr4"); 126 + asm volatile("vxor.v $vr17, $vr9, $vr5"); 127 + asm volatile("vxor.v $vr18, $vr10, $vr6"); 128 + asm volatile("vxor.v $vr19, $vr11, $vr7"); 129 + 130 + /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */ 131 + asm volatile("vsrli.b $vr4, $vr0, 4"); 132 + asm volatile("vsrli.b $vr5, $vr1, 4"); 133 + asm volatile("vsrli.b $vr6, $vr2, 4"); 134 + asm volatile("vsrli.b $vr7, $vr3, 4"); 135 + /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */ 136 + asm volatile("vandi.b $vr12, $vr0, 0x0f"); 137 + asm volatile("vandi.b $vr13, $vr1, 0x0f"); 138 + asm volatile("vandi.b $vr14, $vr2, 0x0f"); 139 + asm volatile("vandi.b $vr15, $vr3, 0x0f"); 140 + /* lookup from pbmul[0] */ 141 + asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12"); 142 + asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13"); 143 + asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14"); 144 + asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15"); 145 + /* lookup from pbmul[16] */ 146 + asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4"); 147 + asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5"); 148 + asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6"); 149 + asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7"); 150 + /* vr4 - vr7: A(P + Pxy) */ 151 + asm volatile("vxor.v $vr4, $vr4, $vr12"); 152 + asm volatile("vxor.v $vr5, $vr5, $vr13"); 153 + asm volatile("vxor.v $vr6, $vr6, $vr14"); 154 + asm volatile("vxor.v $vr7, $vr7, $vr15"); 155 + 156 + /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */ 157 + asm volatile("vxor.v $vr4, $vr4, $vr16"); 158 + asm volatile("vxor.v $vr5, $vr5, $vr17"); 159 + asm volatile("vxor.v $vr6, $vr6, $vr18"); 160 + asm volatile("vxor.v $vr7, $vr7, $vr19"); 161 + asm volatile("vst $vr4, %0" : "=m" (dq[0])); 162 + asm volatile("vst $vr5, %0" : "=m" (dq[16])); 163 + asm volatile("vst $vr6, %0" : "=m" (dq[32])); 164 + asm volatile("vst $vr7, %0" : "=m" (dq[48])); 165 + 166 + /* vr0 - vr3: P + Pxy + Dx = Dy */ 167 + asm volatile("vxor.v $vr0, $vr0, $vr4"); 168 + asm volatile("vxor.v $vr1, $vr1, $vr5"); 169 + asm volatile("vxor.v $vr2, $vr2, $vr6"); 170 + asm volatile("vxor.v $vr3, $vr3, $vr7"); 171 + asm volatile("vst $vr0, %0" : "=m" (dp[0])); 172 + asm volatile("vst $vr1, %0" : "=m" (dp[16])); 173 + asm volatile("vst $vr2, %0" : "=m" (dp[32])); 174 + asm volatile("vst $vr3, %0" : "=m" (dp[48])); 175 + 176 + bytes -= 64; 177 + p += 64; 178 + q += 64; 179 + dp += 64; 180 + dq += 64; 181 + } 182 + 183 + kernel_fpu_end(); 184 + } 185 + 186 + static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, 187 + void **ptrs) 188 + { 189 + u8 *p, *q, *dq; 190 + const u8 *qmul; /* Q multiplier table */ 191 + 192 + p = (u8 *)ptrs[disks - 2]; 193 + q = (u8 *)ptrs[disks - 1]; 194 + 195 + /* 196 + * Compute syndrome with zero for the missing data page 197 + * Use the dead data page as temporary storage for delta q 198 + */ 199 + dq = (u8 *)ptrs[faila]; 200 + ptrs[faila] = (void *)raid6_empty_zero_page; 201 + ptrs[disks - 1] = dq; 202 + 203 + raid6_call.gen_syndrome(disks, bytes, ptrs); 204 + 205 + /* Restore pointer table */ 206 + ptrs[faila] = dq; 207 + ptrs[disks - 1] = q; 208 + 209 + /* Now, pick the proper data tables */ 210 + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 211 + 212 + kernel_fpu_begin(); 213 + 214 + /* vr22, vr23: qmul */ 215 + asm volatile("vld $vr22, %0" : : "m" (qmul[0])); 216 + asm volatile("vld $vr23, %0" : : "m" (qmul[16])); 217 + 218 + while (bytes) { 219 + /* vr0 - vr3: P + Dx */ 220 + asm volatile("vld $vr0, %0" : : "m" (p[0])); 221 + asm volatile("vld $vr1, %0" : : "m" (p[16])); 222 + asm volatile("vld $vr2, %0" : : "m" (p[32])); 223 + asm volatile("vld $vr3, %0" : : "m" (p[48])); 224 + /* vr4 - vr7: Qx */ 225 + asm volatile("vld $vr4, %0" : : "m" (dq[0])); 226 + asm volatile("vld $vr5, %0" : : "m" (dq[16])); 227 + asm volatile("vld $vr6, %0" : : "m" (dq[32])); 228 + asm volatile("vld $vr7, %0" : : "m" (dq[48])); 229 + /* vr4 - vr7: Q + Qx */ 230 + asm volatile("vld $vr8, %0" : : "m" (q[0])); 231 + asm volatile("vld $vr9, %0" : : "m" (q[16])); 232 + asm volatile("vld $vr10, %0" : : "m" (q[32])); 233 + asm volatile("vld $vr11, %0" : : "m" (q[48])); 234 + asm volatile("vxor.v $vr4, $vr4, $vr8"); 235 + asm volatile("vxor.v $vr5, $vr5, $vr9"); 236 + asm volatile("vxor.v $vr6, $vr6, $vr10"); 237 + asm volatile("vxor.v $vr7, $vr7, $vr11"); 238 + 239 + /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */ 240 + asm volatile("vsrli.b $vr8, $vr4, 4"); 241 + asm volatile("vsrli.b $vr9, $vr5, 4"); 242 + asm volatile("vsrli.b $vr10, $vr6, 4"); 243 + asm volatile("vsrli.b $vr11, $vr7, 4"); 244 + /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */ 245 + asm volatile("vandi.b $vr4, $vr4, 0x0f"); 246 + asm volatile("vandi.b $vr5, $vr5, 0x0f"); 247 + asm volatile("vandi.b $vr6, $vr6, 0x0f"); 248 + asm volatile("vandi.b $vr7, $vr7, 0x0f"); 249 + /* lookup from qmul[0] */ 250 + asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4"); 251 + asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5"); 252 + asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6"); 253 + asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7"); 254 + /* lookup from qmul[16] */ 255 + asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8"); 256 + asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9"); 257 + asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10"); 258 + asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11"); 259 + /* vr4 - vr7: qmul(Q + Qx) = Dx */ 260 + asm volatile("vxor.v $vr4, $vr4, $vr8"); 261 + asm volatile("vxor.v $vr5, $vr5, $vr9"); 262 + asm volatile("vxor.v $vr6, $vr6, $vr10"); 263 + asm volatile("vxor.v $vr7, $vr7, $vr11"); 264 + asm volatile("vst $vr4, %0" : "=m" (dq[0])); 265 + asm volatile("vst $vr5, %0" : "=m" (dq[16])); 266 + asm volatile("vst $vr6, %0" : "=m" (dq[32])); 267 + asm volatile("vst $vr7, %0" : "=m" (dq[48])); 268 + 269 + /* vr0 - vr3: P + Dx + Dx = P */ 270 + asm volatile("vxor.v $vr0, $vr0, $vr4"); 271 + asm volatile("vxor.v $vr1, $vr1, $vr5"); 272 + asm volatile("vxor.v $vr2, $vr2, $vr6"); 273 + asm volatile("vxor.v $vr3, $vr3, $vr7"); 274 + asm volatile("vst $vr0, %0" : "=m" (p[0])); 275 + asm volatile("vst $vr1, %0" : "=m" (p[16])); 276 + asm volatile("vst $vr2, %0" : "=m" (p[32])); 277 + asm volatile("vst $vr3, %0" : "=m" (p[48])); 278 + 279 + bytes -= 64; 280 + p += 64; 281 + q += 64; 282 + dq += 64; 283 + } 284 + 285 + kernel_fpu_end(); 286 + } 287 + 288 + const struct raid6_recov_calls raid6_recov_lsx = { 289 + .data2 = raid6_2data_recov_lsx, 290 + .datap = raid6_datap_recov_lsx, 291 + .valid = raid6_has_lsx, 292 + .name = "lsx", 293 + .priority = 1, 294 + }; 295 + #endif /* CONFIG_CPU_HAS_LSX */ 296 + 297 + #ifdef CONFIG_CPU_HAS_LASX 298 + static int raid6_has_lasx(void) 299 + { 300 + return cpu_has_lasx; 301 + } 302 + 303 + static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, 304 + int failb, void **ptrs) 305 + { 306 + u8 *p, *q, *dp, *dq; 307 + const u8 *pbmul; /* P multiplier table for B data */ 308 + const u8 *qmul; /* Q multiplier table (for both) */ 309 + 310 + p = (u8 *)ptrs[disks - 2]; 311 + q = (u8 *)ptrs[disks - 1]; 312 + 313 + /* 314 + * Compute syndrome with zero for the missing data pages 315 + * Use the dead data pages as temporary storage for 316 + * delta p and delta q 317 + */ 318 + dp = (u8 *)ptrs[faila]; 319 + ptrs[faila] = (void *)raid6_empty_zero_page; 320 + ptrs[disks - 2] = dp; 321 + dq = (u8 *)ptrs[failb]; 322 + ptrs[failb] = (void *)raid6_empty_zero_page; 323 + ptrs[disks - 1] = dq; 324 + 325 + raid6_call.gen_syndrome(disks, bytes, ptrs); 326 + 327 + /* Restore pointer table */ 328 + ptrs[faila] = dp; 329 + ptrs[failb] = dq; 330 + ptrs[disks - 2] = p; 331 + ptrs[disks - 1] = q; 332 + 333 + /* Now, pick the proper data tables */ 334 + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; 335 + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; 336 + 337 + kernel_fpu_begin(); 338 + 339 + /* 340 + * xr20, xr21: qmul 341 + * xr22, xr23: pbmul 342 + */ 343 + asm volatile("vld $vr20, %0" : : "m" (qmul[0])); 344 + asm volatile("vld $vr21, %0" : : "m" (qmul[16])); 345 + asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); 346 + asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); 347 + asm volatile("xvreplve0.q $xr20, $xr20"); 348 + asm volatile("xvreplve0.q $xr21, $xr21"); 349 + asm volatile("xvreplve0.q $xr22, $xr22"); 350 + asm volatile("xvreplve0.q $xr23, $xr23"); 351 + 352 + while (bytes) { 353 + /* xr0, xr1: Q */ 354 + asm volatile("xvld $xr0, %0" : : "m" (q[0])); 355 + asm volatile("xvld $xr1, %0" : : "m" (q[32])); 356 + /* xr0, xr1: Q + Qxy */ 357 + asm volatile("xvld $xr4, %0" : : "m" (dq[0])); 358 + asm volatile("xvld $xr5, %0" : : "m" (dq[32])); 359 + asm volatile("xvxor.v $xr0, $xr0, $xr4"); 360 + asm volatile("xvxor.v $xr1, $xr1, $xr5"); 361 + /* xr2, xr3: P */ 362 + asm volatile("xvld $xr2, %0" : : "m" (p[0])); 363 + asm volatile("xvld $xr3, %0" : : "m" (p[32])); 364 + /* xr2, xr3: P + Pxy */ 365 + asm volatile("xvld $xr4, %0" : : "m" (dp[0])); 366 + asm volatile("xvld $xr5, %0" : : "m" (dp[32])); 367 + asm volatile("xvxor.v $xr2, $xr2, $xr4"); 368 + asm volatile("xvxor.v $xr3, $xr3, $xr5"); 369 + 370 + /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */ 371 + asm volatile("xvsrli.b $xr4, $xr0, 4"); 372 + asm volatile("xvsrli.b $xr5, $xr1, 4"); 373 + /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */ 374 + asm volatile("xvandi.b $xr0, $xr0, 0x0f"); 375 + asm volatile("xvandi.b $xr1, $xr1, 0x0f"); 376 + /* lookup from qmul[0] */ 377 + asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0"); 378 + asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1"); 379 + /* lookup from qmul[16] */ 380 + asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4"); 381 + asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5"); 382 + /* xr6, xr7: B(Q + Qxy) */ 383 + asm volatile("xvxor.v $xr6, $xr4, $xr0"); 384 + asm volatile("xvxor.v $xr7, $xr5, $xr1"); 385 + 386 + /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */ 387 + asm volatile("xvsrli.b $xr4, $xr2, 4"); 388 + asm volatile("xvsrli.b $xr5, $xr3, 4"); 389 + /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */ 390 + asm volatile("xvandi.b $xr0, $xr2, 0x0f"); 391 + asm volatile("xvandi.b $xr1, $xr3, 0x0f"); 392 + /* lookup from pbmul[0] */ 393 + asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0"); 394 + asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1"); 395 + /* lookup from pbmul[16] */ 396 + asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); 397 + asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); 398 + /* xr0, xr1: A(P + Pxy) */ 399 + asm volatile("xvxor.v $xr0, $xr0, $xr4"); 400 + asm volatile("xvxor.v $xr1, $xr1, $xr5"); 401 + 402 + /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */ 403 + asm volatile("xvxor.v $xr0, $xr0, $xr6"); 404 + asm volatile("xvxor.v $xr1, $xr1, $xr7"); 405 + 406 + /* xr2, xr3: P + Pxy + Dx = Dy */ 407 + asm volatile("xvxor.v $xr2, $xr2, $xr0"); 408 + asm volatile("xvxor.v $xr3, $xr3, $xr1"); 409 + 410 + asm volatile("xvst $xr0, %0" : "=m" (dq[0])); 411 + asm volatile("xvst $xr1, %0" : "=m" (dq[32])); 412 + asm volatile("xvst $xr2, %0" : "=m" (dp[0])); 413 + asm volatile("xvst $xr3, %0" : "=m" (dp[32])); 414 + 415 + bytes -= 64; 416 + p += 64; 417 + q += 64; 418 + dp += 64; 419 + dq += 64; 420 + } 421 + 422 + kernel_fpu_end(); 423 + } 424 + 425 + static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, 426 + void **ptrs) 427 + { 428 + u8 *p, *q, *dq; 429 + const u8 *qmul; /* Q multiplier table */ 430 + 431 + p = (u8 *)ptrs[disks - 2]; 432 + q = (u8 *)ptrs[disks - 1]; 433 + 434 + /* 435 + * Compute syndrome with zero for the missing data page 436 + * Use the dead data page as temporary storage for delta q 437 + */ 438 + dq = (u8 *)ptrs[faila]; 439 + ptrs[faila] = (void *)raid6_empty_zero_page; 440 + ptrs[disks - 1] = dq; 441 + 442 + raid6_call.gen_syndrome(disks, bytes, ptrs); 443 + 444 + /* Restore pointer table */ 445 + ptrs[faila] = dq; 446 + ptrs[disks - 1] = q; 447 + 448 + /* Now, pick the proper data tables */ 449 + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 450 + 451 + kernel_fpu_begin(); 452 + 453 + /* xr22, xr23: qmul */ 454 + asm volatile("vld $vr22, %0" : : "m" (qmul[0])); 455 + asm volatile("xvreplve0.q $xr22, $xr22"); 456 + asm volatile("vld $vr23, %0" : : "m" (qmul[16])); 457 + asm volatile("xvreplve0.q $xr23, $xr23"); 458 + 459 + while (bytes) { 460 + /* xr0, xr1: P + Dx */ 461 + asm volatile("xvld $xr0, %0" : : "m" (p[0])); 462 + asm volatile("xvld $xr1, %0" : : "m" (p[32])); 463 + /* xr2, xr3: Qx */ 464 + asm volatile("xvld $xr2, %0" : : "m" (dq[0])); 465 + asm volatile("xvld $xr3, %0" : : "m" (dq[32])); 466 + /* xr2, xr3: Q + Qx */ 467 + asm volatile("xvld $xr4, %0" : : "m" (q[0])); 468 + asm volatile("xvld $xr5, %0" : : "m" (q[32])); 469 + asm volatile("xvxor.v $xr2, $xr2, $xr4"); 470 + asm volatile("xvxor.v $xr3, $xr3, $xr5"); 471 + 472 + /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */ 473 + asm volatile("xvsrli.b $xr4, $xr2, 4"); 474 + asm volatile("xvsrli.b $xr5, $xr3, 4"); 475 + /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */ 476 + asm volatile("xvandi.b $xr2, $xr2, 0x0f"); 477 + asm volatile("xvandi.b $xr3, $xr3, 0x0f"); 478 + /* lookup from qmul[0] */ 479 + asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2"); 480 + asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3"); 481 + /* lookup from qmul[16] */ 482 + asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); 483 + asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); 484 + /* xr2, xr3: qmul(Q + Qx) = Dx */ 485 + asm volatile("xvxor.v $xr2, $xr2, $xr4"); 486 + asm volatile("xvxor.v $xr3, $xr3, $xr5"); 487 + 488 + /* xr0, xr1: P + Dx + Dx = P */ 489 + asm volatile("xvxor.v $xr0, $xr0, $xr2"); 490 + asm volatile("xvxor.v $xr1, $xr1, $xr3"); 491 + 492 + asm volatile("xvst $xr2, %0" : "=m" (dq[0])); 493 + asm volatile("xvst $xr3, %0" : "=m" (dq[32])); 494 + asm volatile("xvst $xr0, %0" : "=m" (p[0])); 495 + asm volatile("xvst $xr1, %0" : "=m" (p[32])); 496 + 497 + bytes -= 64; 498 + p += 64; 499 + q += 64; 500 + dq += 64; 501 + } 502 + 503 + kernel_fpu_end(); 504 + } 505 + 506 + const struct raid6_recov_calls raid6_recov_lasx = { 507 + .data2 = raid6_2data_recov_lasx, 508 + .datap = raid6_datap_recov_lasx, 509 + .valid = raid6_has_lasx, 510 + .name = "lasx", 511 + .priority = 2, 512 + }; 513 + #endif /* CONFIG_CPU_HAS_LASX */
+12
lib/raid6/test/Makefile
··· 41 41 gcc -c -x c - >/dev/null && rm ./-.o && echo yes) 42 42 endif 43 43 44 + ifeq ($(ARCH),loongarch64) 45 + CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1 46 + CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' | \ 47 + gcc -c -x assembler - >/dev/null 2>&1 && \ 48 + rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1) 49 + CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' | \ 50 + gcc -c -x assembler - >/dev/null 2>&1 && \ 51 + rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1) 52 + endif 53 + 44 54 ifeq ($(IS_X86),yes) 45 55 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o 46 56 CFLAGS += -DCONFIG_X86 ··· 64 54 CFLAGS += -DCONFIG_ALTIVEC 65 55 OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ 66 56 vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o 57 + else ifeq ($(ARCH),loongarch64) 58 + OBJS += loongarch_simd.o recov_loongarch_simd.o 67 59 endif 68 60 69 61 .c.o:
+14 -4
mm/kasan/init.c
··· 139 139 return 0; 140 140 } 141 141 142 + void __weak __meminit pmd_init(void *addr) 143 + { 144 + } 145 + 142 146 static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr, 143 147 unsigned long end) 144 148 { ··· 170 166 if (!p) 171 167 return -ENOMEM; 172 168 } else { 173 - pud_populate(&init_mm, pud, 174 - early_alloc(PAGE_SIZE, NUMA_NO_NODE)); 169 + p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); 170 + pmd_init(p); 171 + pud_populate(&init_mm, pud, p); 175 172 } 176 173 } 177 174 zero_pmd_populate(pud, addr, next); 178 175 } while (pud++, addr = next, addr != end); 179 176 180 177 return 0; 178 + } 179 + 180 + void __weak __meminit pud_init(void *addr) 181 + { 181 182 } 182 183 183 184 static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, ··· 216 207 if (!p) 217 208 return -ENOMEM; 218 209 } else { 219 - p4d_populate(&init_mm, p4d, 220 - early_alloc(PAGE_SIZE, NUMA_NO_NODE)); 210 + p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); 211 + pud_init(p); 212 + p4d_populate(&init_mm, p4d, p); 221 213 } 222 214 } 223 215 zero_pud_populate(p4d, addr, next);
+6
mm/kasan/kasan.h
··· 291 291 292 292 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) 293 293 294 + #ifndef __HAVE_ARCH_SHADOW_MAP 294 295 static inline const void *kasan_shadow_to_mem(const void *shadow_addr) 295 296 { 296 297 return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) 297 298 << KASAN_SHADOW_SCALE_SHIFT); 298 299 } 300 + #endif 299 301 300 302 static __always_inline bool addr_has_metadata(const void *addr) 301 303 { 304 + #ifdef __HAVE_ARCH_SHADOW_MAP 305 + return (kasan_mem_to_shadow((void *)addr) != NULL); 306 + #else 302 307 return (kasan_reset_tag(addr) >= 303 308 kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); 309 + #endif 304 310 } 305 311 306 312 /**
+3 -2
mm/kfence/core.c
··· 574 574 */ 575 575 static unsigned long kfence_init_pool(void) 576 576 { 577 - unsigned long addr = (unsigned long)__kfence_pool; 577 + unsigned long addr; 578 578 struct page *pages; 579 579 int i; 580 580 581 581 if (!arch_kfence_init_pool()) 582 - return addr; 582 + return (unsigned long)__kfence_pool; 583 583 584 + addr = (unsigned long)__kfence_pool; 584 585 pages = virt_to_page(__kfence_pool); 585 586 586 587 /*