ARC: Checksum/byteorder/swab routines · tjh.dev/kernel@ca15c8e

+18

arch/arc/include/asm/byteorder.h

··· 1 + /* 2 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + #ifndef __ASM_ARC_BYTEORDER_H 10 + #define __ASM_ARC_BYTEORDER_H 11 + 12 + #ifdef CONFIG_CPU_BIG_ENDIAN 13 + #include <linux/byteorder/big_endian.h> 14 + #else 15 + #include <linux/byteorder/little_endian.h> 16 + #endif 17 + 18 + #endif /* ASM_ARC_BYTEORDER_H */

+101

arch/arc/include/asm/checksum.h

··· 1 + /* 2 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * Joern Rennecke <joern.rennecke@embecosm.com>: Jan 2012 9 + * -Insn Scheduling improvements to csum core routines. 10 + * = csum_fold( ) largely derived from ARM version. 11 + * = ip_fast_cum( ) to have module scheduling 12 + * -gcc 4.4.x broke networking. Alias analysis needed to be primed. 13 + * worked around by adding memory clobber to ip_fast_csum( ) 14 + * 15 + * vineetg: May 2010 16 + * -Rewrote ip_fast_cscum( ) and csum_fold( ) with fast inline asm 17 + */ 18 + 19 + #ifndef _ASM_ARC_CHECKSUM_H 20 + #define _ASM_ARC_CHECKSUM_H 21 + 22 + /* 23 + * Fold a partial checksum 24 + * 25 + * The 2 swords comprising the 32bit sum are added, any carry to 16th bit 26 + * added back and final sword result inverted. 27 + */ 28 + static inline __sum16 csum_fold(__wsum s) 29 + { 30 + unsigned r = s << 16 | s >> 16; /* ror */ 31 + s = ~s; 32 + s -= r; 33 + return s >> 16; 34 + } 35 + 36 + /* 37 + * This is a version of ip_compute_csum() optimized for IP headers, 38 + * which always checksum on 4 octet boundaries. 39 + */ 40 + static inline __sum16 41 + ip_fast_csum(const void *iph, unsigned int ihl) 42 + { 43 + const void *ptr = iph; 44 + unsigned int tmp, tmp2, sum; 45 + 46 + __asm__( 47 + " ld.ab %0, [%3, 4] \n" 48 + " ld.ab %2, [%3, 4] \n" 49 + " sub %1, %4, 2 \n" 50 + " lsr.f lp_count, %1, 1 \n" 51 + " bcc 0f \n" 52 + " add.f %0, %0, %2 \n" 53 + " ld.ab %2, [%3, 4] \n" 54 + "0: lp 1f \n" 55 + " ld.ab %1, [%3, 4] \n" 56 + " adc.f %0, %0, %2 \n" 57 + " ld.ab %2, [%3, 4] \n" 58 + " adc.f %0, %0, %1 \n" 59 + "1: adc.f %0, %0, %2 \n" 60 + " add.cs %0,%0,1 \n" 61 + : "=&r"(sum), "=r"(tmp), "=&r"(tmp2), "+&r" (ptr) 62 + : "r"(ihl) 63 + : "cc", "lp_count", "memory"); 64 + 65 + return csum_fold(sum); 66 + } 67 + 68 + /* 69 + * TCP pseudo Header is 12 bytes: 70 + * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2] 71 + */ 72 + static inline __wsum 73 + csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, 74 + unsigned short proto, __wsum sum) 75 + { 76 + __asm__ __volatile__( 77 + " add.f %0, %0, %1 \n" 78 + " adc.f %0, %0, %2 \n" 79 + " adc.f %0, %0, %3 \n" 80 + " adc.f %0, %0, %4 \n" 81 + " adc %0, %0, 0 \n" 82 + : "+&r"(sum) 83 + : "r"(saddr), "r"(daddr), 84 + #ifdef CONFIG_CPU_BIG_ENDIAN 85 + "r"(len), 86 + #else 87 + "r"(len << 8), 88 + #endif 89 + "r"(htons(proto)) 90 + : "cc"); 91 + 92 + return sum; 93 + } 94 + 95 + #define csum_fold csum_fold 96 + #define ip_fast_csum ip_fast_csum 97 + #define csum_tcpudp_nofold csum_tcpudp_nofold 98 + 99 + #include <asm-generic/checksum.h> 100 + 101 + #endif /* _ASM_ARC_CHECKSUM_H */

+98

arch/arc/include/asm/swab.h

··· 1 + /* 2 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * vineetg: May 2011 9 + * -Support single cycle endian-swap insn in ARC700 4.10 10 + * 11 + * vineetg: June 2009 12 + * -Better htonl implementation (5 instead of 9 ALU instructions) 13 + * -Hardware assisted single cycle bswap (Use Case of ARC custom instrn) 14 + */ 15 + 16 + #ifndef __ASM_ARC_SWAB_H 17 + #define __ASM_ARC_SWAB_H 18 + 19 + #include <linux/types.h> 20 + 21 + /* Native single cycle endian swap insn */ 22 + #ifdef CONFIG_ARC_HAS_SWAPE 23 + 24 + #define __arch_swab32(x) \ 25 + ({ \ 26 + unsigned int tmp = x; \ 27 + __asm__( \ 28 + " swape %0, %1 \n" \ 29 + : "=r" (tmp) \ 30 + : "r" (tmp)); \ 31 + tmp; \ 32 + }) 33 + 34 + #else 35 + 36 + /* Several ways of Endian-Swap Emulation for ARC 37 + * 0: kernel generic 38 + * 1: ARC optimised "C" 39 + * 2: ARC Custom instruction 40 + */ 41 + #define ARC_BSWAP_TYPE 1 42 + 43 + #if (ARC_BSWAP_TYPE == 1) /******* Software only ********/ 44 + 45 + /* The kernel default implementation of htonl is 46 + * return x<<24 | x>>24 | 47 + * (x & (__u32)0x0000ff00UL)<<8 | (x & (__u32)0x00ff0000UL)>>8; 48 + * 49 + * This generates 9 instructions on ARC (excluding the ld/st) 50 + * 51 + * 8051fd8c: ld r3,[r7,20] ; Mem op : Get the value to be swapped 52 + * 8051fd98: asl r5,r3,24 ; get 3rd Byte 53 + * 8051fd9c: lsr r2,r3,24 ; get 0th Byte 54 + * 8051fda0: and r4,r3,0xff00 55 + * 8051fda8: asl r4,r4,8 ; get 1st Byte 56 + * 8051fdac: and r3,r3,0x00ff0000 57 + * 8051fdb4: or r2,r2,r5 ; combine 0th and 3rd Bytes 58 + * 8051fdb8: lsr r3,r3,8 ; 2nd Byte at correct place in Dst Reg 59 + * 8051fdbc: or r2,r2,r4 ; combine 0,3 Bytes with 1st Byte 60 + * 8051fdc0: or r2,r2,r3 ; combine 0,3,1 Bytes with 2nd Byte 61 + * 8051fdc4: st r2,[r1,20] ; Mem op : save result back to mem 62 + * 63 + * Joern suggested a better "C" algorithm which is great since 64 + * (1) It is portable to any architecure 65 + * (2) At the same time it takes advantage of ARC ISA (rotate intrns) 66 + */ 67 + 68 + #define __arch_swab32(x) \ 69 + ({ unsigned long __in = (x), __tmp; \ 70 + __tmp = __in << 8 | __in >> 24; /* ror tmp,in,24 */ \ 71 + __in = __in << 24 | __in >> 8; /* ror in,in,8 */ \ 72 + __tmp ^= __in; \ 73 + __tmp &= 0xff00ff; \ 74 + __tmp ^ __in; \ 75 + }) 76 + 77 + #elif (ARC_BSWAP_TYPE == 2) /* Custom single cycle bwap instruction */ 78 + 79 + #define __arch_swab32(x) \ 80 + ({ \ 81 + unsigned int tmp = x; \ 82 + __asm__( \ 83 + " .extInstruction bswap, 7, 0x00, SUFFIX_NONE, SYNTAX_2OP \n"\ 84 + " bswap %0, %1 \n"\ 85 + : "=r" (tmp) \ 86 + : "r" (tmp)); \ 87 + tmp; \ 88 + }) 89 + 90 + #endif /* ARC_BSWAP_TYPE=zzz */ 91 + 92 + #endif /* CONFIG_ARC_HAS_SWAPE */ 93 + 94 + #if !defined(__STRICT_ANSI__) || defined(__KERNEL__) 95 + #define __SWAB_64_THRU_32__ 96 + #endif 97 + 98 + #endif