Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

LoongArch: Select ARCH_SUPPORTS_INT128 if CC_HAS_INT128

This allows compiling a full 128-bit product of two 64-bit integers as a
mul/mulh pair, instead of a nasty long sequence of 20+ instructions.

However, after selecting ARCH_SUPPORTS_INT128, when optimizing for size
the compiler generates calls to __ashlti3, __ashrti3, and __lshrti3 for
shifting __int128 values, causing a link failure:

loongarch64-unknown-linux-gnu-ld: kernel/sched/fair.o: in
function `mul_u64_u32_shr':
<PATH>/include/linux/math64.h:161:(.text+0x5e4): undefined
reference to `__lshrti3'

So provide the implementation of these functions if ARCH_SUPPORTS_INT128.

Closes: https://lore.kernel.org/loongarch/CAAhV-H5EZ=7OF7CSiYyZ8_+wWuenpo=K2WT8-6mAT4CvzUC_4g@mail.gmail.com/
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>

authored by

Xi Ruoyao and committed by
Huacai Chen
5125d033 2cce9059

+65
+1
arch/loongarch/Kconfig
··· 57 57 select ARCH_SUPPORTS_ACPI 58 58 select ARCH_SUPPORTS_ATOMIC_RMW 59 59 select ARCH_SUPPORTS_HUGETLBFS 60 + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 60 61 select ARCH_SUPPORTS_LTO_CLANG 61 62 select ARCH_SUPPORTS_LTO_CLANG_THIN 62 63 select ARCH_SUPPORTS_NUMA_BALANCING
+6
arch/loongarch/include/asm/asm-prototypes.h
··· 6 6 #include <asm/page.h> 7 7 #include <asm/ftrace.h> 8 8 #include <asm-generic/asm-prototypes.h> 9 + 10 + #ifdef CONFIG_ARCH_SUPPORTS_INT128 11 + __int128_t __ashlti3(__int128_t a, int b); 12 + __int128_t __ashrti3(__int128_t a, int b); 13 + __int128_t __lshrti3(__int128_t a, int b); 14 + #endif
+2
arch/loongarch/lib/Makefile
··· 6 6 lib-y += delay.o memset.o memcpy.o memmove.o \ 7 7 clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o 8 8 9 + obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o 10 + 9 11 obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o 10 12 11 13 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+56
arch/loongarch/lib/tishift.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + 3 + #include <asm/asmmacro.h> 4 + #include <linux/export.h> 5 + #include <linux/linkage.h> 6 + 7 + SYM_FUNC_START(__ashlti3) 8 + srli.d t2, a0, 1 9 + nor t3, zero, a2 10 + sll.d t1, a1, a2 11 + srl.d t2, t2, t3 12 + andi t0, a2, 64 13 + sll.d a0, a0, a2 14 + or t1, t2, t1 15 + maskeqz a1, a0, t0 16 + masknez a0, a0, t0 17 + masknez t0, t1, t0 18 + or a1, t0, a1 19 + jr ra 20 + SYM_FUNC_END(__ashlti3) 21 + EXPORT_SYMBOL(__ashlti3) 22 + 23 + SYM_FUNC_START(__ashrti3) 24 + nor t3, zero, a2 25 + slli.d t2, a1, 1 26 + srl.d t1, a0, a2 27 + sll.d t2, t2, t3 28 + andi t0, a2, 64 29 + or t1, t2, t1 30 + sra.d a2, a1, a2 31 + srai.d a1, a1, 63 32 + maskeqz a0, a2, t0 33 + maskeqz a1, a1, t0 34 + masknez a2, a2, t0 35 + masknez t0, t1, t0 36 + or a1, a1, a2 37 + or a0, t0, a0 38 + jr ra 39 + SYM_FUNC_END(__ashrti3) 40 + EXPORT_SYMBOL(__ashrti3) 41 + 42 + SYM_FUNC_START(__lshrti3) 43 + slli.d t2, a1, 1 44 + nor t3, zero, a2 45 + srl.d t1, a0, a2 46 + sll.d t2, t2, t3 47 + andi t0, a2, 64 48 + srl.d a1, a1, a2 49 + or t1, t2, t1 50 + maskeqz a0, a1, t0 51 + masknez a1, a1, t0 52 + masknez t0, t1, t0 53 + or a0, t0, a0 54 + jr ra 55 + SYM_FUNC_END(__lshrti3) 56 + EXPORT_SYMBOL(__lshrti3)