Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[ARM] Improve csum_fold, cleanup csum_tcpudp_magic()

csum_fold doesn't need two assembly instructions to perform its task,
it can simply add the high and low parts together by rotating by 16
bits, and the carry into the upper-16 bits will automatically happen.

Also, since csum_tcpudp_magic() is just csum_tcpudp_nofold + csum_fold,
use those two functions to achieve this. Also note that there is a
csum_fold() at the end of ip_fast_csum() as well, so use the real
csum_fold() there as well.

Boot tested on Versatile.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

authored by

Russell King and committed by
Russell King
7ef416c4 10c03f69

+18 -38
+18 -38
include/asm-arm/checksum.h
··· 40 40 csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr); 41 41 42 42 /* 43 + * Fold a partial checksum without adding pseudo headers 44 + */ 45 + static inline __sum16 csum_fold(__wsum sum) 46 + { 47 + __asm__( 48 + "add %0, %1, %1, ror #16 @ csum_fold" 49 + : "=r" (sum) 50 + : "r" (sum) 51 + : "cc"); 52 + return (__force __sum16)(~(__force u32)sum >> 16); 53 + } 54 + 55 + /* 43 56 * This is a version of ip_compute_csum() optimized for IP headers, 44 57 * which always checksum on 4 octet boundaries. 45 58 */ 46 59 static inline __sum16 47 60 ip_fast_csum(const void *iph, unsigned int ihl) 48 61 { 49 - unsigned int sum, tmp1; 62 + unsigned int tmp1; 63 + __wsum sum; 50 64 51 65 __asm__ __volatile__( 52 66 "ldr %0, [%1], #4 @ ip_fast_csum \n\ ··· 76 62 subne %2, %2, #1 @ without destroying \n\ 77 63 bne 1b @ the carry flag \n\ 78 64 adcs %0, %0, %3 \n\ 79 - adc %0, %0, #0 \n\ 80 - adds %0, %0, %0, lsl #16 \n\ 81 - addcs %0, %0, #0x10000 \n\ 82 - mvn %0, %0 \n\ 83 - mov %0, %0, lsr #16" 65 + adc %0, %0, #0" 84 66 : "=r" (sum), "=r" (iph), "=r" (ihl), "=r" (tmp1) 85 67 : "1" (iph), "2" (ihl) 86 68 : "cc", "memory"); 87 - return (__force __sum16)sum; 88 - } 89 - 90 - /* 91 - * Fold a partial checksum without adding pseudo headers 92 - */ 93 - static inline __sum16 csum_fold(__wsum sum) 94 - { 95 - __asm__( 96 - "adds %0, %1, %1, lsl #16 @ csum_fold \n\ 97 - addcs %0, %0, #0x10000" 98 - : "=r" (sum) 99 - : "r" (sum) 100 - : "cc"); 101 - return (__force __sum16)(~(__force u32)sum >> 16); 69 + return csum_fold(sum); 102 70 } 103 71 104 72 static inline __wsum ··· 110 114 csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, 111 115 unsigned short proto, __wsum sum) 112 116 { 113 - __asm__( 114 - "adds %0, %1, %2 @ csum_tcpudp_magic \n\ 115 - adcs %0, %0, %3 \n" 116 - #ifdef __ARMEB__ 117 - "adcs %0, %0, %4 \n" 118 - #else 119 - "adcs %0, %0, %4, lsl #8 \n" 120 - #endif 121 - "adcs %0, %0, %5 \n\ 122 - adc %0, %0, #0 \n\ 123 - adds %0, %0, %0, lsl #16 \n\ 124 - addcs %0, %0, #0x10000 \n\ 125 - mvn %0, %0" 126 - : "=&r"(sum) 127 - : "r" (sum), "r" (daddr), "r" (saddr), "r" (len), "Ir" (htons(proto)) 128 - : "cc"); 129 - return (__force __sum16)((__force u32)sum >> 16); 117 + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); 130 118 } 131 119 132 120