Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[IA64] implement csum_ipv6_magic for ia64.

The asm version is 4.4 times faster than the generic C version and
10X smaller in code size.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

authored by

Chen, Kenneth W and committed by
Tony Luck
007d77d0 5b4d5681

+59 -2
+53 -2
arch/ia64/lib/ip_fast_csum.S
··· 8 8 * in0: address of buffer to checksum (char *) 9 9 * in1: length of the buffer (int) 10 10 * 11 - * Copyright (C) 2002 Intel Corp. 12 - * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> 11 + * Copyright (C) 2002, 2006 Intel Corp. 12 + * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com> 13 13 */ 14 14 15 15 #include <asm/asmmacro.h> ··· 25 25 26 26 #define in0 r32 27 27 #define in1 r33 28 + #define in2 r34 29 + #define in3 r35 30 + #define in4 r36 28 31 #define ret0 r8 29 32 30 33 GLOBAL_ENTRY(ip_fast_csum) ··· 91 88 mov b0=r34 92 89 br.ret.sptk.many b0 93 90 END(ip_fast_csum) 91 + 92 + GLOBAL_ENTRY(csum_ipv6_magic) 93 + ld4 r20=[in0],4 94 + ld4 r21=[in1],4 95 + dep r15=in3,in2,32,16 96 + ;; 97 + ld4 r22=[in0],4 98 + ld4 r23=[in1],4 99 + mux1 r15=r15,@rev 100 + ;; 101 + ld4 r24=[in0],4 102 + ld4 r25=[in1],4 103 + shr.u r15=r15,16 104 + add r16=r20,r21 105 + add r17=r22,r23 106 + ;; 107 + ld4 r26=[in0],4 108 + ld4 r27=[in1],4 109 + add r18=r24,r25 110 + add r8=r16,r17 111 + ;; 112 + add r19=r26,r27 113 + add r8=r8,r18 114 + ;; 115 + add r8=r8,r19 116 + add r15=r15,in4 117 + ;; 118 + add r8=r8,r15 119 + ;; 120 + shr.u r10=r8,32 // now fold sum into short 121 + zxt4 r11=r8 122 + ;; 123 + add r8=r10,r11 124 + ;; 125 + shr.u r10=r8,16 // yeah, keep it rolling 126 + zxt2 r11=r8 127 + ;; 128 + add r8=r10,r11 129 + ;; 130 + shr.u r10=r8,16 // three times lucky 131 + zxt2 r11=r8 132 + ;; 133 + add r8=r10,r11 134 + mov r9=0xffff 135 + ;; 136 + andcm r8=r9,r8 137 + br.ret.sptk.many b0 138 + END(csum_ipv6_magic)
+6
include/asm-ia64/checksum.h
··· 70 70 return (__force __sum16)~sum; 71 71 } 72 72 73 + #define _HAVE_ARCH_IPV6_CSUM 1 74 + struct in6_addr; 75 + extern unsigned short int csum_ipv6_magic(struct in6_addr *saddr, 76 + struct in6_addr *daddr, __u32 len, unsigned short proto, 77 + unsigned int csum); 78 + 73 79 #endif /* _ASM_IA64_CHECKSUM_H */