um: Use the x86 checksum implementation on 32-bit

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

When UML is compiled under 32-bit x86, it uses its own copy of
checksum_32.S, which is terribly out-of-date and doesn't support
checksumming unaligned data.

This causes the new "checksum" KUnit test to fail:
./tools/testing/kunit/kunit.py run --kconfig_add CONFIG_64BIT=n --cross_compile i686-linux-gnu- checksum
KTAP version 1
# Subtest: checksum
1..3
# test_csum_fixed_random_inputs: ASSERTION FAILED at lib/checksum_kunit.c:243
Expected result == expec, but
result == 33316 (0x8224)
expec == 33488 (0x82d0)
not ok 1 test_csum_fixed_random_inputs
# test_csum_all_carry_inputs: ASSERTION FAILED at lib/checksum_kunit.c:267
Expected result == expec, but
result == 65280 (0xff00)
expec == 0 (0x0)
not ok 2 test_csum_all_carry_inputs
# test_csum_no_carry_inputs: ASSERTION FAILED at lib/checksum_kunit.c:306
Expected result == expec, but
result == 65531 (0xfffb)
expec == 0 (0x0)
not ok 3 test_csum_no_carry_inputs

Sharing the normal implementation in arch/x86/lib both fixes all of
these issues and means any further fixes only need to be done once.

x86_64 already seems to share the same implementation between UML and
"normal" x86.

Signed-off-by: David Gow <davidgow@google.com>
Signed-off-by: Richard Weinberger <richard@nod.at>

authored by

David Gow and committed by

Richard Weinberger 2 years ago ff3f7860 760ee8f8

+2 -215

2 changed files

expand all

arch

x86

Makefile

checksum_32.S

+2 -1

arch/x86/um/Makefile

··· 17 17 18 18 ifeq ($(CONFIG_X86_32),y) 19 19 20 - obj-y += checksum_32.o syscalls_32.o 20 + obj-y += syscalls_32.o 21 21 obj-$(CONFIG_ELF_CORE) += elfcore.o 22 22 23 23 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o 24 24 subarch-y += ../lib/cmpxchg8b_emu.o ../lib/atomic64_386_32.o 25 + subarch-y += ../lib/checksum_32.o 25 26 subarch-y += ../kernel/sys_ia32.o 26 27 27 28 else

-214

arch/x86/um/checksum_32.S

··· 1 - /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 - /* 3 - * INET An implementation of the TCP/IP protocol suite for the LINUX 4 - * operating system. INET is implemented using the BSD Socket 5 - * interface as the means of communication with the user level. 6 - * 7 - * IP/TCP/UDP checksumming routines 8 - * 9 - * Authors: Jorge Cwik, <jorge@laser.satlink.net> 10 - * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11 - * Tom May, <ftom@netcom.com> 12 - * Pentium Pro/II routines: 13 - * Alexander Kjeldaas <astor@guardian.no> 14 - * Finn Arne Gangstad <finnag@guardian.no> 15 - * Lots of code moved from tcp.c and ip.c; see those files 16 - * for more names. 17 - * 18 - * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 19 - * handling. 20 - * Andi Kleen, add zeroing on error 21 - * converted to pure assembler 22 - */ 23 - 24 - #include <asm/errno.h> 25 - #include <asm/asm.h> 26 - #include <asm/export.h> 27 - 28 - /* 29 - * computes a partial checksum, e.g. for TCP/UDP fragments 30 - */ 31 - 32 - /* 33 - unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 34 - */ 35 - 36 - .text 37 - .align 4 38 - .globl csum_partial 39 - 40 - #ifndef CONFIG_X86_USE_PPRO_CHECKSUM 41 - 42 - /* 43 - * Experiments with Ethernet and SLIP connections show that buff 44 - * is aligned on either a 2-byte or 4-byte boundary. We get at 45 - * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 46 - * Fortunately, it is easy to convert 2-byte alignment to 4-byte 47 - * alignment for the unrolled loop. 48 - */ 49 - csum_partial: 50 - pushl %esi 51 - pushl %ebx 52 - movl 20(%esp),%eax # Function arg: unsigned int sum 53 - movl 16(%esp),%ecx # Function arg: int len 54 - movl 12(%esp),%esi # Function arg: unsigned char *buff 55 - testl $2, %esi # Check alignment. 56 - jz 2f # Jump if alignment is ok. 57 - subl $2, %ecx # Alignment uses up two bytes. 58 - jae 1f # Jump if we had at least two bytes. 59 - addl $2, %ecx # ecx was < 2. Deal with it. 60 - jmp 4f 61 - 1: movw (%esi), %bx 62 - addl $2, %esi 63 - addw %bx, %ax 64 - adcl $0, %eax 65 - 2: 66 - movl %ecx, %edx 67 - shrl $5, %ecx 68 - jz 2f 69 - testl %esi, %esi 70 - 1: movl (%esi), %ebx 71 - adcl %ebx, %eax 72 - movl 4(%esi), %ebx 73 - adcl %ebx, %eax 74 - movl 8(%esi), %ebx 75 - adcl %ebx, %eax 76 - movl 12(%esi), %ebx 77 - adcl %ebx, %eax 78 - movl 16(%esi), %ebx 79 - adcl %ebx, %eax 80 - movl 20(%esi), %ebx 81 - adcl %ebx, %eax 82 - movl 24(%esi), %ebx 83 - adcl %ebx, %eax 84 - movl 28(%esi), %ebx 85 - adcl %ebx, %eax 86 - lea 32(%esi), %esi 87 - dec %ecx 88 - jne 1b 89 - adcl $0, %eax 90 - 2: movl %edx, %ecx 91 - andl $0x1c, %edx 92 - je 4f 93 - shrl $2, %edx # This clears CF 94 - 3: adcl (%esi), %eax 95 - lea 4(%esi), %esi 96 - dec %edx 97 - jne 3b 98 - adcl $0, %eax 99 - 4: andl $3, %ecx 100 - jz 7f 101 - cmpl $2, %ecx 102 - jb 5f 103 - movw (%esi),%cx 104 - leal 2(%esi),%esi 105 - je 6f 106 - shll $16,%ecx 107 - 5: movb (%esi),%cl 108 - 6: addl %ecx,%eax 109 - adcl $0, %eax 110 - 7: 111 - popl %ebx 112 - popl %esi 113 - RET 114 - 115 - #else 116 - 117 - /* Version for PentiumII/PPro */ 118 - 119 - csum_partial: 120 - pushl %esi 121 - pushl %ebx 122 - movl 20(%esp),%eax # Function arg: unsigned int sum 123 - movl 16(%esp),%ecx # Function arg: int len 124 - movl 12(%esp),%esi # Function arg: const unsigned char *buf 125 - 126 - testl $2, %esi 127 - jnz 30f 128 - 10: 129 - movl %ecx, %edx 130 - movl %ecx, %ebx 131 - andl $0x7c, %ebx 132 - shrl $7, %ecx 133 - addl %ebx,%esi 134 - shrl $2, %ebx 135 - negl %ebx 136 - lea 45f(%ebx,%ebx,2), %ebx 137 - testl %esi, %esi 138 - jmp *%ebx 139 - 140 - # Handle 2-byte-aligned regions 141 - 20: addw (%esi), %ax 142 - lea 2(%esi), %esi 143 - adcl $0, %eax 144 - jmp 10b 145 - 146 - 30: subl $2, %ecx 147 - ja 20b 148 - je 32f 149 - movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 150 - addl %ebx, %eax 151 - adcl $0, %eax 152 - jmp 80f 153 - 32: 154 - addw (%esi), %ax # csumming 2 bytes, 2-aligned 155 - adcl $0, %eax 156 - jmp 80f 157 - 158 - 40: 159 - addl -128(%esi), %eax 160 - adcl -124(%esi), %eax 161 - adcl -120(%esi), %eax 162 - adcl -116(%esi), %eax 163 - adcl -112(%esi), %eax 164 - adcl -108(%esi), %eax 165 - adcl -104(%esi), %eax 166 - adcl -100(%esi), %eax 167 - adcl -96(%esi), %eax 168 - adcl -92(%esi), %eax 169 - adcl -88(%esi), %eax 170 - adcl -84(%esi), %eax 171 - adcl -80(%esi), %eax 172 - adcl -76(%esi), %eax 173 - adcl -72(%esi), %eax 174 - adcl -68(%esi), %eax 175 - adcl -64(%esi), %eax 176 - adcl -60(%esi), %eax 177 - adcl -56(%esi), %eax 178 - adcl -52(%esi), %eax 179 - adcl -48(%esi), %eax 180 - adcl -44(%esi), %eax 181 - adcl -40(%esi), %eax 182 - adcl -36(%esi), %eax 183 - adcl -32(%esi), %eax 184 - adcl -28(%esi), %eax 185 - adcl -24(%esi), %eax 186 - adcl -20(%esi), %eax 187 - adcl -16(%esi), %eax 188 - adcl -12(%esi), %eax 189 - adcl -8(%esi), %eax 190 - adcl -4(%esi), %eax 191 - 45: 192 - lea 128(%esi), %esi 193 - adcl $0, %eax 194 - dec %ecx 195 - jge 40b 196 - movl %edx, %ecx 197 - 50: andl $3, %ecx 198 - jz 80f 199 - 200 - # Handle the last 1-3 bytes without jumping 201 - notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 202 - movl $0xffffff,%ebx # by the shll and shrl instructions 203 - shll $3,%ecx 204 - shrl %cl,%ebx 205 - andl -128(%esi),%ebx # esi is 4-aligned so should be ok 206 - addl %ebx,%eax 207 - adcl $0,%eax 208 - 80: 209 - popl %ebx 210 - popl %esi 211 - RET 212 - 213 - #endif 214 - EXPORT_SYMBOL(csum_partial)