Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.12 496 lines 10 kB view raw
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IP/TCP/UDP checksumming routines 7 * 8 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 10 * Tom May, <ftom@netcom.com> 11 * Pentium Pro/II routines: 12 * Alexander Kjeldaas <astor@guardian.no> 13 * Finn Arne Gangstad <finnag@guardian.no> 14 * Lots of code moved from tcp.c and ip.c; see those files 15 * for more names. 16 * 17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 18 * handling. 19 * Andi Kleen, add zeroing on error 20 * converted to pure assembler 21 * 22 * This program is free software; you can redistribute it and/or 23 * modify it under the terms of the GNU General Public License 24 * as published by the Free Software Foundation; either version 25 * 2 of the License, or (at your option) any later version. 26 */ 27 28#include <linux/config.h> 29#include <asm/errno.h> 30 31/* 32 * computes a partial checksum, e.g. for TCP/UDP fragments 33 */ 34 35/* 36unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 37 */ 38 39.text 40.align 4 41.globl csum_partial 42 43#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 44 45 /* 46 * Experiments with Ethernet and SLIP connections show that buff 47 * is aligned on either a 2-byte or 4-byte boundary. We get at 48 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 49 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 50 * alignment for the unrolled loop. 51 */ 52csum_partial: 53 pushl %esi 54 pushl %ebx 55 movl 20(%esp),%eax # Function arg: unsigned int sum 56 movl 16(%esp),%ecx # Function arg: int len 57 movl 12(%esp),%esi # Function arg: unsigned char *buff 58 testl $3, %esi # Check alignment. 59 jz 2f # Jump if alignment is ok. 60 testl $1, %esi # Check alignment. 61 jz 10f # Jump if alignment is boundary of 2bytes. 62 63 # buf is odd 64 dec %ecx 65 jl 8f 66 movzbl (%esi), %ebx 67 adcl %ebx, %eax 68 roll $8, %eax 69 inc %esi 70 testl $2, %esi 71 jz 2f 7210: 73 subl $2, %ecx # Alignment uses up two bytes. 74 jae 1f # Jump if we had at least two bytes. 75 addl $2, %ecx # ecx was < 2. Deal with it. 76 jmp 4f 771: movw (%esi), %bx 78 addl $2, %esi 79 addw %bx, %ax 80 adcl $0, %eax 812: 82 movl %ecx, %edx 83 shrl $5, %ecx 84 jz 2f 85 testl %esi, %esi 861: movl (%esi), %ebx 87 adcl %ebx, %eax 88 movl 4(%esi), %ebx 89 adcl %ebx, %eax 90 movl 8(%esi), %ebx 91 adcl %ebx, %eax 92 movl 12(%esi), %ebx 93 adcl %ebx, %eax 94 movl 16(%esi), %ebx 95 adcl %ebx, %eax 96 movl 20(%esi), %ebx 97 adcl %ebx, %eax 98 movl 24(%esi), %ebx 99 adcl %ebx, %eax 100 movl 28(%esi), %ebx 101 adcl %ebx, %eax 102 lea 32(%esi), %esi 103 dec %ecx 104 jne 1b 105 adcl $0, %eax 1062: movl %edx, %ecx 107 andl $0x1c, %edx 108 je 4f 109 shrl $2, %edx # This clears CF 1103: adcl (%esi), %eax 111 lea 4(%esi), %esi 112 dec %edx 113 jne 3b 114 adcl $0, %eax 1154: andl $3, %ecx 116 jz 7f 117 cmpl $2, %ecx 118 jb 5f 119 movw (%esi),%cx 120 leal 2(%esi),%esi 121 je 6f 122 shll $16,%ecx 1235: movb (%esi),%cl 1246: addl %ecx,%eax 125 adcl $0, %eax 1267: 127 testl $1, 12(%esp) 128 jz 8f 129 roll $8, %eax 1308: 131 popl %ebx 132 popl %esi 133 ret 134 135#else 136 137/* Version for PentiumII/PPro */ 138 139csum_partial: 140 pushl %esi 141 pushl %ebx 142 movl 20(%esp),%eax # Function arg: unsigned int sum 143 movl 16(%esp),%ecx # Function arg: int len 144 movl 12(%esp),%esi # Function arg: const unsigned char *buf 145 146 testl $3, %esi 147 jnz 25f 14810: 149 movl %ecx, %edx 150 movl %ecx, %ebx 151 andl $0x7c, %ebx 152 shrl $7, %ecx 153 addl %ebx,%esi 154 shrl $2, %ebx 155 negl %ebx 156 lea 45f(%ebx,%ebx,2), %ebx 157 testl %esi, %esi 158 jmp *%ebx 159 160 # Handle 2-byte-aligned regions 16120: addw (%esi), %ax 162 lea 2(%esi), %esi 163 adcl $0, %eax 164 jmp 10b 16525: 166 testl $1, %esi 167 jz 30f 168 # buf is odd 169 dec %ecx 170 jl 90f 171 movzbl (%esi), %ebx 172 addl %ebx, %eax 173 adcl $0, %eax 174 roll $8, %eax 175 inc %esi 176 testl $2, %esi 177 jz 10b 178 17930: subl $2, %ecx 180 ja 20b 181 je 32f 182 addl $2, %ecx 183 jz 80f 184 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 185 addl %ebx, %eax 186 adcl $0, %eax 187 jmp 80f 18832: 189 addw (%esi), %ax # csumming 2 bytes, 2-aligned 190 adcl $0, %eax 191 jmp 80f 192 19340: 194 addl -128(%esi), %eax 195 adcl -124(%esi), %eax 196 adcl -120(%esi), %eax 197 adcl -116(%esi), %eax 198 adcl -112(%esi), %eax 199 adcl -108(%esi), %eax 200 adcl -104(%esi), %eax 201 adcl -100(%esi), %eax 202 adcl -96(%esi), %eax 203 adcl -92(%esi), %eax 204 adcl -88(%esi), %eax 205 adcl -84(%esi), %eax 206 adcl -80(%esi), %eax 207 adcl -76(%esi), %eax 208 adcl -72(%esi), %eax 209 adcl -68(%esi), %eax 210 adcl -64(%esi), %eax 211 adcl -60(%esi), %eax 212 adcl -56(%esi), %eax 213 adcl -52(%esi), %eax 214 adcl -48(%esi), %eax 215 adcl -44(%esi), %eax 216 adcl -40(%esi), %eax 217 adcl -36(%esi), %eax 218 adcl -32(%esi), %eax 219 adcl -28(%esi), %eax 220 adcl -24(%esi), %eax 221 adcl -20(%esi), %eax 222 adcl -16(%esi), %eax 223 adcl -12(%esi), %eax 224 adcl -8(%esi), %eax 225 adcl -4(%esi), %eax 22645: 227 lea 128(%esi), %esi 228 adcl $0, %eax 229 dec %ecx 230 jge 40b 231 movl %edx, %ecx 23250: andl $3, %ecx 233 jz 80f 234 235 # Handle the last 1-3 bytes without jumping 236 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 237 movl $0xffffff,%ebx # by the shll and shrl instructions 238 shll $3,%ecx 239 shrl %cl,%ebx 240 andl -128(%esi),%ebx # esi is 4-aligned so should be ok 241 addl %ebx,%eax 242 adcl $0,%eax 24380: 244 testl $1, 12(%esp) 245 jz 90f 246 roll $8, %eax 24790: 248 popl %ebx 249 popl %esi 250 ret 251 252#endif 253 254/* 255unsigned int csum_partial_copy_generic (const char *src, char *dst, 256 int len, int sum, int *src_err_ptr, int *dst_err_ptr) 257 */ 258 259/* 260 * Copy from ds while checksumming, otherwise like csum_partial 261 * 262 * The macros SRC and DST specify the type of access for the instruction. 263 * thus we can call a custom exception handler for all access types. 264 * 265 * FIXME: could someone double-check whether I haven't mixed up some SRC and 266 * DST definitions? It's damn hard to trigger all cases. I hope I got 267 * them all but there's no guarantee. 268 */ 269 270#define SRC(y...) \ 271 9999: y; \ 272 .section __ex_table, "a"; \ 273 .long 9999b, 6001f ; \ 274 .previous 275 276#define DST(y...) \ 277 9999: y; \ 278 .section __ex_table, "a"; \ 279 .long 9999b, 6002f ; \ 280 .previous 281 282.align 4 283.globl csum_partial_copy_generic 284 285#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 286 287#define ARGBASE 16 288#define FP 12 289 290csum_partial_copy_generic: 291 subl $4,%esp 292 pushl %edi 293 pushl %esi 294 pushl %ebx 295 movl ARGBASE+16(%esp),%eax # sum 296 movl ARGBASE+12(%esp),%ecx # len 297 movl ARGBASE+4(%esp),%esi # src 298 movl ARGBASE+8(%esp),%edi # dst 299 300 testl $2, %edi # Check alignment. 301 jz 2f # Jump if alignment is ok. 302 subl $2, %ecx # Alignment uses up two bytes. 303 jae 1f # Jump if we had at least two bytes. 304 addl $2, %ecx # ecx was < 2. Deal with it. 305 jmp 4f 306SRC(1: movw (%esi), %bx ) 307 addl $2, %esi 308DST( movw %bx, (%edi) ) 309 addl $2, %edi 310 addw %bx, %ax 311 adcl $0, %eax 3122: 313 movl %ecx, FP(%esp) 314 shrl $5, %ecx 315 jz 2f 316 testl %esi, %esi 317SRC(1: movl (%esi), %ebx ) 318SRC( movl 4(%esi), %edx ) 319 adcl %ebx, %eax 320DST( movl %ebx, (%edi) ) 321 adcl %edx, %eax 322DST( movl %edx, 4(%edi) ) 323 324SRC( movl 8(%esi), %ebx ) 325SRC( movl 12(%esi), %edx ) 326 adcl %ebx, %eax 327DST( movl %ebx, 8(%edi) ) 328 adcl %edx, %eax 329DST( movl %edx, 12(%edi) ) 330 331SRC( movl 16(%esi), %ebx ) 332SRC( movl 20(%esi), %edx ) 333 adcl %ebx, %eax 334DST( movl %ebx, 16(%edi) ) 335 adcl %edx, %eax 336DST( movl %edx, 20(%edi) ) 337 338SRC( movl 24(%esi), %ebx ) 339SRC( movl 28(%esi), %edx ) 340 adcl %ebx, %eax 341DST( movl %ebx, 24(%edi) ) 342 adcl %edx, %eax 343DST( movl %edx, 28(%edi) ) 344 345 lea 32(%esi), %esi 346 lea 32(%edi), %edi 347 dec %ecx 348 jne 1b 349 adcl $0, %eax 3502: movl FP(%esp), %edx 351 movl %edx, %ecx 352 andl $0x1c, %edx 353 je 4f 354 shrl $2, %edx # This clears CF 355SRC(3: movl (%esi), %ebx ) 356 adcl %ebx, %eax 357DST( movl %ebx, (%edi) ) 358 lea 4(%esi), %esi 359 lea 4(%edi), %edi 360 dec %edx 361 jne 3b 362 adcl $0, %eax 3634: andl $3, %ecx 364 jz 7f 365 cmpl $2, %ecx 366 jb 5f 367SRC( movw (%esi), %cx ) 368 leal 2(%esi), %esi 369DST( movw %cx, (%edi) ) 370 leal 2(%edi), %edi 371 je 6f 372 shll $16,%ecx 373SRC(5: movb (%esi), %cl ) 374DST( movb %cl, (%edi) ) 3756: addl %ecx, %eax 376 adcl $0, %eax 3777: 3785000: 379 380# Exception handler: 381.section .fixup, "ax" 382 3836001: 384 movl ARGBASE+20(%esp), %ebx # src_err_ptr 385 movl $-EFAULT, (%ebx) 386 387 # zero the complete destination - computing the rest 388 # is too much work 389 movl ARGBASE+8(%esp), %edi # dst 390 movl ARGBASE+12(%esp), %ecx # len 391 xorl %eax,%eax 392 rep ; stosb 393 394 jmp 5000b 395 3966002: 397 movl ARGBASE+24(%esp), %ebx # dst_err_ptr 398 movl $-EFAULT,(%ebx) 399 jmp 5000b 400 401.previous 402 403 popl %ebx 404 popl %esi 405 popl %edi 406 popl %ecx # equivalent to addl $4,%esp 407 ret 408 409#else 410 411/* Version for PentiumII/PPro */ 412 413#define ROUND1(x) \ 414 SRC(movl x(%esi), %ebx ) ; \ 415 addl %ebx, %eax ; \ 416 DST(movl %ebx, x(%edi) ) ; 417 418#define ROUND(x) \ 419 SRC(movl x(%esi), %ebx ) ; \ 420 adcl %ebx, %eax ; \ 421 DST(movl %ebx, x(%edi) ) ; 422 423#define ARGBASE 12 424 425csum_partial_copy_generic: 426 pushl %ebx 427 pushl %edi 428 pushl %esi 429 movl ARGBASE+4(%esp),%esi #src 430 movl ARGBASE+8(%esp),%edi #dst 431 movl ARGBASE+12(%esp),%ecx #len 432 movl ARGBASE+16(%esp),%eax #sum 433# movl %ecx, %edx 434 movl %ecx, %ebx 435 movl %esi, %edx 436 shrl $6, %ecx 437 andl $0x3c, %ebx 438 negl %ebx 439 subl %ebx, %esi 440 subl %ebx, %edi 441 lea -1(%esi),%edx 442 andl $-32,%edx 443 lea 3f(%ebx,%ebx), %ebx 444 testl %esi, %esi 445 jmp *%ebx 4461: addl $64,%esi 447 addl $64,%edi 448 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) 449 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) 450 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) 451 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) 452 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) 4533: adcl $0,%eax 454 addl $64, %edx 455 dec %ecx 456 jge 1b 4574: movl ARGBASE+12(%esp),%edx #len 458 andl $3, %edx 459 jz 7f 460 cmpl $2, %edx 461 jb 5f 462SRC( movw (%esi), %dx ) 463 leal 2(%esi), %esi 464DST( movw %dx, (%edi) ) 465 leal 2(%edi), %edi 466 je 6f 467 shll $16,%edx 4685: 469SRC( movb (%esi), %dl ) 470DST( movb %dl, (%edi) ) 4716: addl %edx, %eax 472 adcl $0, %eax 4737: 474.section .fixup, "ax" 4756001: movl ARGBASE+20(%esp), %ebx # src_err_ptr 476 movl $-EFAULT, (%ebx) 477 # zero the complete destination (computing the rest is too much work) 478 movl ARGBASE+8(%esp),%edi # dst 479 movl ARGBASE+12(%esp),%ecx # len 480 xorl %eax,%eax 481 rep; stosb 482 jmp 7b 4836002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr 484 movl $-EFAULT, (%ebx) 485 jmp 7b 486.previous 487 488 popl %esi 489 popl %edi 490 popl %ebx 491 ret 492 493#undef ROUND 494#undef ROUND1 495 496#endif