Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc: Implement csum_ipv6_magic in assembly

The generic csum_ipv6_magic() generates a pretty bad result

00000000 <csum_ipv6_magic>: (PPC32)
0: 81 23 00 00 lwz r9,0(r3)
4: 81 03 00 04 lwz r8,4(r3)
8: 7c e7 4a 14 add r7,r7,r9
c: 7d 29 38 10 subfc r9,r9,r7
10: 7d 4a 51 10 subfe r10,r10,r10
14: 7d 27 42 14 add r9,r7,r8
18: 7d 2a 48 50 subf r9,r10,r9
1c: 80 e3 00 08 lwz r7,8(r3)
20: 7d 08 48 10 subfc r8,r8,r9
24: 7d 4a 51 10 subfe r10,r10,r10
28: 7d 29 3a 14 add r9,r9,r7
2c: 81 03 00 0c lwz r8,12(r3)
30: 7d 2a 48 50 subf r9,r10,r9
34: 7c e7 48 10 subfc r7,r7,r9
38: 7d 4a 51 10 subfe r10,r10,r10
3c: 7d 29 42 14 add r9,r9,r8
40: 7d 2a 48 50 subf r9,r10,r9
44: 80 e4 00 00 lwz r7,0(r4)
48: 7d 08 48 10 subfc r8,r8,r9
4c: 7d 4a 51 10 subfe r10,r10,r10
50: 7d 29 3a 14 add r9,r9,r7
54: 7d 2a 48 50 subf r9,r10,r9
58: 81 04 00 04 lwz r8,4(r4)
5c: 7c e7 48 10 subfc r7,r7,r9
60: 7d 4a 51 10 subfe r10,r10,r10
64: 7d 29 42 14 add r9,r9,r8
68: 7d 2a 48 50 subf r9,r10,r9
6c: 80 e4 00 08 lwz r7,8(r4)
70: 7d 08 48 10 subfc r8,r8,r9
74: 7d 4a 51 10 subfe r10,r10,r10
78: 7d 29 3a 14 add r9,r9,r7
7c: 7d 2a 48 50 subf r9,r10,r9
80: 81 04 00 0c lwz r8,12(r4)
84: 7c e7 48 10 subfc r7,r7,r9
88: 7d 4a 51 10 subfe r10,r10,r10
8c: 7d 29 42 14 add r9,r9,r8
90: 7d 2a 48 50 subf r9,r10,r9
94: 7d 08 48 10 subfc r8,r8,r9
98: 7d 4a 51 10 subfe r10,r10,r10
9c: 7d 29 2a 14 add r9,r9,r5
a0: 7d 2a 48 50 subf r9,r10,r9
a4: 7c a5 48 10 subfc r5,r5,r9
a8: 7c 63 19 10 subfe r3,r3,r3
ac: 7d 29 32 14 add r9,r9,r6
b0: 7d 23 48 50 subf r9,r3,r9
b4: 7c c6 48 10 subfc r6,r6,r9
b8: 7c 63 19 10 subfe r3,r3,r3
bc: 7c 63 48 50 subf r3,r3,r9
c0: 54 6a 80 3e rotlwi r10,r3,16
c4: 7c 63 52 14 add r3,r3,r10
c8: 7c 63 18 f8 not r3,r3
cc: 54 63 84 3e rlwinm r3,r3,16,16,31
d0: 4e 80 00 20 blr

0000000000000000 <.csum_ipv6_magic>: (PPC64)
0: 81 23 00 00 lwz r9,0(r3)
4: 80 03 00 04 lwz r0,4(r3)
8: 81 63 00 08 lwz r11,8(r3)
c: 7c e7 4a 14 add r7,r7,r9
10: 7f 89 38 40 cmplw cr7,r9,r7
14: 7d 47 02 14 add r10,r7,r0
18: 7d 30 10 26 mfocrf r9,1
1c: 55 29 f7 fe rlwinm r9,r9,30,31,31
20: 7d 4a 4a 14 add r10,r10,r9
24: 7f 80 50 40 cmplw cr7,r0,r10
28: 7d 2a 5a 14 add r9,r10,r11
2c: 80 03 00 0c lwz r0,12(r3)
30: 81 44 00 00 lwz r10,0(r4)
34: 7d 10 10 26 mfocrf r8,1
38: 55 08 f7 fe rlwinm r8,r8,30,31,31
3c: 7d 29 42 14 add r9,r9,r8
40: 81 04 00 04 lwz r8,4(r4)
44: 7f 8b 48 40 cmplw cr7,r11,r9
48: 7d 29 02 14 add r9,r9,r0
4c: 7d 70 10 26 mfocrf r11,1
50: 55 6b f7 fe rlwinm r11,r11,30,31,31
54: 7d 29 5a 14 add r9,r9,r11
58: 7f 80 48 40 cmplw cr7,r0,r9
5c: 7d 29 52 14 add r9,r9,r10
60: 7c 10 10 26 mfocrf r0,1
64: 54 00 f7 fe rlwinm r0,r0,30,31,31
68: 7d 69 02 14 add r11,r9,r0
6c: 7f 8a 58 40 cmplw cr7,r10,r11
70: 7c 0b 42 14 add r0,r11,r8
74: 81 44 00 08 lwz r10,8(r4)
78: 7c f0 10 26 mfocrf r7,1
7c: 54 e7 f7 fe rlwinm r7,r7,30,31,31
80: 7c 00 3a 14 add r0,r0,r7
84: 7f 88 00 40 cmplw cr7,r8,r0
88: 7d 20 52 14 add r9,r0,r10
8c: 80 04 00 0c lwz r0,12(r4)
90: 7d 70 10 26 mfocrf r11,1
94: 55 6b f7 fe rlwinm r11,r11,30,31,31
98: 7d 29 5a 14 add r9,r9,r11
9c: 7f 8a 48 40 cmplw cr7,r10,r9
a0: 7d 29 02 14 add r9,r9,r0
a4: 7d 70 10 26 mfocrf r11,1
a8: 55 6b f7 fe rlwinm r11,r11,30,31,31
ac: 7d 29 5a 14 add r9,r9,r11
b0: 7f 80 48 40 cmplw cr7,r0,r9
b4: 7d 29 2a 14 add r9,r9,r5
b8: 7c 10 10 26 mfocrf r0,1
bc: 54 00 f7 fe rlwinm r0,r0,30,31,31
c0: 7d 29 02 14 add r9,r9,r0
c4: 7f 85 48 40 cmplw cr7,r5,r9
c8: 7c 09 32 14 add r0,r9,r6
cc: 7d 50 10 26 mfocrf r10,1
d0: 55 4a f7 fe rlwinm r10,r10,30,31,31
d4: 7c 00 52 14 add r0,r0,r10
d8: 7f 80 30 40 cmplw cr7,r0,r6
dc: 7d 30 10 26 mfocrf r9,1
e0: 55 29 ef fe rlwinm r9,r9,29,31,31
e4: 7c 09 02 14 add r0,r9,r0
e8: 54 03 80 3e rotlwi r3,r0,16
ec: 7c 03 02 14 add r0,r3,r0
f0: 7c 03 00 f8 not r3,r0
f4: 78 63 84 22 rldicl r3,r3,48,48
f8: 4e 80 00 20 blr

This patch implements it in assembly for both PPC32 and PPC64

Link: https://github.com/linuxppc/linux/issues/9
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Christophe Leroy and committed by
Michael Ellerman
e9c4943a 373e098e

+67
+6
arch/powerpc/include/asm/checksum.h
··· 13 13 #include <asm-generic/checksum.h> 14 14 #else 15 15 #include <linux/bitops.h> 16 + #include <linux/in6.h> 16 17 /* 17 18 * Computes the checksum of a memory block at src, length len, 18 19 * and adds in "sum" (32-bit), while copying the block to dst. ··· 211 210 { 212 211 return csum_fold(csum_partial(buff, len, 0)); 213 212 } 213 + 214 + #define _HAVE_ARCH_IPV6_CSUM 215 + __sum16 csum_ipv6_magic(const struct in6_addr *saddr, 216 + const struct in6_addr *daddr, 217 + __u32 len, __u8 proto, __wsum sum); 214 218 215 219 #endif 216 220 #endif /* __KERNEL__ */
+33
arch/powerpc/lib/checksum_32.S
··· 302 302 EX_TABLE(51b, dst_error); 303 303 304 304 EXPORT_SYMBOL(csum_partial_copy_generic) 305 + 306 + /* 307 + * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, 308 + * const struct in6_addr *daddr, 309 + * __u32 len, __u8 proto, __wsum sum) 310 + */ 311 + 312 + _GLOBAL(csum_ipv6_magic) 313 + lwz r8, 0(r3) 314 + lwz r9, 4(r3) 315 + addc r0, r7, r8 316 + lwz r10, 8(r3) 317 + adde r0, r0, r9 318 + lwz r11, 12(r3) 319 + adde r0, r0, r10 320 + lwz r8, 0(r4) 321 + adde r0, r0, r11 322 + lwz r9, 4(r4) 323 + adde r0, r0, r8 324 + lwz r10, 8(r4) 325 + adde r0, r0, r9 326 + lwz r11, 12(r4) 327 + adde r0, r0, r10 328 + add r5, r5, r6 /* assumption: len + proto doesn't carry */ 329 + adde r0, r0, r11 330 + adde r0, r0, r5 331 + addze r0, r0 332 + rotlwi r3, r0, 16 333 + add r3, r0, r3 334 + not r3, r3 335 + rlwinm r3, r3, 16, 16, 31 336 + blr 337 + EXPORT_SYMBOL(csum_ipv6_magic)
+28
arch/powerpc/lib/checksum_64.S
··· 429 429 stw r6,0(r8) 430 430 blr 431 431 EXPORT_SYMBOL(csum_partial_copy_generic) 432 + 433 + /* 434 + * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, 435 + * const struct in6_addr *daddr, 436 + * __u32 len, __u8 proto, __wsum sum) 437 + */ 438 + 439 + _GLOBAL(csum_ipv6_magic) 440 + ld r8, 0(r3) 441 + ld r9, 8(r3) 442 + add r5, r5, r6 443 + addc r0, r8, r9 444 + ld r10, 0(r4) 445 + ld r11, 8(r4) 446 + adde r0, r0, r10 447 + add r5, r5, r7 448 + adde r0, r0, r11 449 + adde r0, r0, r5 450 + addze r0, r0 451 + rotldi r3, r0, 32 /* fold two 32 bit halves together */ 452 + add r3, r0, r3 453 + srdi r0, r3, 32 454 + rotlwi r3, r0, 16 /* fold two 16 bit halves together */ 455 + add r3, r0, r3 456 + not r3, r3 457 + rlwinm r3, r3, 16, 16, 31 458 + blr 459 + EXPORT_SYMBOL(csum_ipv6_magic)