Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.0 703 lines 10 kB view raw
1/* 2 * AES-NI + SSE2 implementation of AEGIS-128L 3 * 4 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> 5 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published 9 * by the Free Software Foundation. 10 */ 11 12#include <linux/linkage.h> 13#include <asm/frame.h> 14 15#define STATE0 %xmm0 16#define STATE1 %xmm1 17#define STATE2 %xmm2 18#define STATE3 %xmm3 19#define STATE4 %xmm4 20#define STATE5 %xmm5 21#define MSG %xmm6 22#define T0 %xmm7 23#define T1 %xmm8 24#define T2 %xmm9 25#define T3 %xmm10 26 27#define STATEP %rdi 28#define LEN %rsi 29#define SRC %rdx 30#define DST %rcx 31 32.section .rodata.cst16.aegis256_const, "aM", @progbits, 32 33.align 16 34.Laegis256_const_0: 35 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d 36 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 37.Laegis256_const_1: 38 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 39 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd 40 41.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16 42.align 16 43.Laegis256_counter: 44 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 45 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f 46 47.text 48 49/* 50 * __load_partial: internal ABI 51 * input: 52 * LEN - bytes 53 * SRC - src 54 * output: 55 * MSG - message block 56 * changed: 57 * T0 58 * %r8 59 * %r9 60 */ 61__load_partial: 62 xor %r9d, %r9d 63 pxor MSG, MSG 64 65 mov LEN, %r8 66 and $0x1, %r8 67 jz .Lld_partial_1 68 69 mov LEN, %r8 70 and $0x1E, %r8 71 add SRC, %r8 72 mov (%r8), %r9b 73 74.Lld_partial_1: 75 mov LEN, %r8 76 and $0x2, %r8 77 jz .Lld_partial_2 78 79 mov LEN, %r8 80 and $0x1C, %r8 81 add SRC, %r8 82 shl $0x10, %r9 83 mov (%r8), %r9w 84 85.Lld_partial_2: 86 mov LEN, %r8 87 and $0x4, %r8 88 jz .Lld_partial_4 89 90 mov LEN, %r8 91 and $0x18, %r8 92 add SRC, %r8 93 shl $32, %r9 94 mov (%r8), %r8d 95 xor %r8, %r9 96 97.Lld_partial_4: 98 movq %r9, MSG 99 100 mov LEN, %r8 101 and $0x8, %r8 102 jz .Lld_partial_8 103 104 mov LEN, %r8 105 and $0x10, %r8 106 add SRC, %r8 107 pslldq $8, MSG 108 movq (%r8), T0 109 pxor T0, MSG 110 111.Lld_partial_8: 112 ret 113ENDPROC(__load_partial) 114 115/* 116 * __store_partial: internal ABI 117 * input: 118 * LEN - bytes 119 * DST - dst 120 * output: 121 * T0 - message block 122 * changed: 123 * %r8 124 * %r9 125 * %r10 126 */ 127__store_partial: 128 mov LEN, %r8 129 mov DST, %r9 130 131 movq T0, %r10 132 133 cmp $8, %r8 134 jl .Lst_partial_8 135 136 mov %r10, (%r9) 137 psrldq $8, T0 138 movq T0, %r10 139 140 sub $8, %r8 141 add $8, %r9 142 143.Lst_partial_8: 144 cmp $4, %r8 145 jl .Lst_partial_4 146 147 mov %r10d, (%r9) 148 shr $32, %r10 149 150 sub $4, %r8 151 add $4, %r9 152 153.Lst_partial_4: 154 cmp $2, %r8 155 jl .Lst_partial_2 156 157 mov %r10w, (%r9) 158 shr $0x10, %r10 159 160 sub $2, %r8 161 add $2, %r9 162 163.Lst_partial_2: 164 cmp $1, %r8 165 jl .Lst_partial_1 166 167 mov %r10b, (%r9) 168 169.Lst_partial_1: 170 ret 171ENDPROC(__store_partial) 172 173.macro update 174 movdqa STATE5, T0 175 aesenc STATE0, STATE5 176 aesenc STATE1, STATE0 177 aesenc STATE2, STATE1 178 aesenc STATE3, STATE2 179 aesenc STATE4, STATE3 180 aesenc T0, STATE4 181.endm 182 183.macro update0 m 184 update 185 pxor \m, STATE5 186.endm 187 188.macro update1 m 189 update 190 pxor \m, STATE4 191.endm 192 193.macro update2 m 194 update 195 pxor \m, STATE3 196.endm 197 198.macro update3 m 199 update 200 pxor \m, STATE2 201.endm 202 203.macro update4 m 204 update 205 pxor \m, STATE1 206.endm 207 208.macro update5 m 209 update 210 pxor \m, STATE0 211.endm 212 213.macro state_load 214 movdqu 0x00(STATEP), STATE0 215 movdqu 0x10(STATEP), STATE1 216 movdqu 0x20(STATEP), STATE2 217 movdqu 0x30(STATEP), STATE3 218 movdqu 0x40(STATEP), STATE4 219 movdqu 0x50(STATEP), STATE5 220.endm 221 222.macro state_store s0 s1 s2 s3 s4 s5 223 movdqu \s5, 0x00(STATEP) 224 movdqu \s0, 0x10(STATEP) 225 movdqu \s1, 0x20(STATEP) 226 movdqu \s2, 0x30(STATEP) 227 movdqu \s3, 0x40(STATEP) 228 movdqu \s4, 0x50(STATEP) 229.endm 230 231.macro state_store0 232 state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 233.endm 234 235.macro state_store1 236 state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4 237.endm 238 239.macro state_store2 240 state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3 241.endm 242 243.macro state_store3 244 state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2 245.endm 246 247.macro state_store4 248 state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1 249.endm 250 251.macro state_store5 252 state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0 253.endm 254 255/* 256 * void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv); 257 */ 258ENTRY(crypto_aegis256_aesni_init) 259 FRAME_BEGIN 260 261 /* load key: */ 262 movdqa 0x00(%rsi), MSG 263 movdqa 0x10(%rsi), T1 264 movdqa MSG, STATE4 265 movdqa T1, STATE5 266 267 /* load IV: */ 268 movdqu 0x00(%rdx), T2 269 movdqu 0x10(%rdx), T3 270 pxor MSG, T2 271 pxor T1, T3 272 movdqa T2, STATE0 273 movdqa T3, STATE1 274 275 /* load the constants: */ 276 movdqa .Laegis256_const_0, STATE3 277 movdqa .Laegis256_const_1, STATE2 278 pxor STATE3, STATE4 279 pxor STATE2, STATE5 280 281 /* update 10 times with IV and KEY: */ 282 update0 MSG 283 update1 T1 284 update2 T2 285 update3 T3 286 update4 MSG 287 update5 T1 288 update0 T2 289 update1 T3 290 update2 MSG 291 update3 T1 292 update4 T2 293 update5 T3 294 update0 MSG 295 update1 T1 296 update2 T2 297 update3 T3 298 299 state_store3 300 301 FRAME_END 302 ret 303ENDPROC(crypto_aegis256_aesni_init) 304 305.macro ad_block a i 306 movdq\a (\i * 0x10)(SRC), MSG 307 update\i MSG 308 sub $0x10, LEN 309 cmp $0x10, LEN 310 jl .Lad_out_\i 311.endm 312 313/* 314 * void crypto_aegis256_aesni_ad(void *state, unsigned int length, 315 * const void *data); 316 */ 317ENTRY(crypto_aegis256_aesni_ad) 318 FRAME_BEGIN 319 320 cmp $0x10, LEN 321 jb .Lad_out 322 323 state_load 324 325 mov SRC, %r8 326 and $0xf, %r8 327 jnz .Lad_u_loop 328 329.align 8 330.Lad_a_loop: 331 ad_block a 0 332 ad_block a 1 333 ad_block a 2 334 ad_block a 3 335 ad_block a 4 336 ad_block a 5 337 338 add $0x60, SRC 339 jmp .Lad_a_loop 340 341.align 8 342.Lad_u_loop: 343 ad_block u 0 344 ad_block u 1 345 ad_block u 2 346 ad_block u 3 347 ad_block u 4 348 ad_block u 5 349 350 add $0x60, SRC 351 jmp .Lad_u_loop 352 353.Lad_out_0: 354 state_store0 355 FRAME_END 356 ret 357 358.Lad_out_1: 359 state_store1 360 FRAME_END 361 ret 362 363.Lad_out_2: 364 state_store2 365 FRAME_END 366 ret 367 368.Lad_out_3: 369 state_store3 370 FRAME_END 371 ret 372 373.Lad_out_4: 374 state_store4 375 FRAME_END 376 ret 377 378.Lad_out_5: 379 state_store5 380 FRAME_END 381 ret 382 383.Lad_out: 384 FRAME_END 385 ret 386ENDPROC(crypto_aegis256_aesni_ad) 387 388.macro crypt m s0 s1 s2 s3 s4 s5 389 pxor \s1, \m 390 pxor \s4, \m 391 pxor \s5, \m 392 movdqa \s2, T3 393 pand \s3, T3 394 pxor T3, \m 395.endm 396 397.macro crypt0 m 398 crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 399.endm 400 401.macro crypt1 m 402 crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4 403.endm 404 405.macro crypt2 m 406 crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3 407.endm 408 409.macro crypt3 m 410 crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2 411.endm 412 413.macro crypt4 m 414 crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1 415.endm 416 417.macro crypt5 m 418 crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0 419.endm 420 421.macro encrypt_block a i 422 movdq\a (\i * 0x10)(SRC), MSG 423 movdqa MSG, T0 424 crypt\i T0 425 movdq\a T0, (\i * 0x10)(DST) 426 427 update\i MSG 428 429 sub $0x10, LEN 430 cmp $0x10, LEN 431 jl .Lenc_out_\i 432.endm 433 434.macro decrypt_block a i 435 movdq\a (\i * 0x10)(SRC), MSG 436 crypt\i MSG 437 movdq\a MSG, (\i * 0x10)(DST) 438 439 update\i MSG 440 441 sub $0x10, LEN 442 cmp $0x10, LEN 443 jl .Ldec_out_\i 444.endm 445 446/* 447 * void crypto_aegis256_aesni_enc(void *state, unsigned int length, 448 * const void *src, void *dst); 449 */ 450ENTRY(crypto_aegis256_aesni_enc) 451 FRAME_BEGIN 452 453 cmp $0x10, LEN 454 jb .Lenc_out 455 456 state_load 457 458 mov SRC, %r8 459 or DST, %r8 460 and $0xf, %r8 461 jnz .Lenc_u_loop 462 463.align 8 464.Lenc_a_loop: 465 encrypt_block a 0 466 encrypt_block a 1 467 encrypt_block a 2 468 encrypt_block a 3 469 encrypt_block a 4 470 encrypt_block a 5 471 472 add $0x60, SRC 473 add $0x60, DST 474 jmp .Lenc_a_loop 475 476.align 8 477.Lenc_u_loop: 478 encrypt_block u 0 479 encrypt_block u 1 480 encrypt_block u 2 481 encrypt_block u 3 482 encrypt_block u 4 483 encrypt_block u 5 484 485 add $0x60, SRC 486 add $0x60, DST 487 jmp .Lenc_u_loop 488 489.Lenc_out_0: 490 state_store0 491 FRAME_END 492 ret 493 494.Lenc_out_1: 495 state_store1 496 FRAME_END 497 ret 498 499.Lenc_out_2: 500 state_store2 501 FRAME_END 502 ret 503 504.Lenc_out_3: 505 state_store3 506 FRAME_END 507 ret 508 509.Lenc_out_4: 510 state_store4 511 FRAME_END 512 ret 513 514.Lenc_out_5: 515 state_store5 516 FRAME_END 517 ret 518 519.Lenc_out: 520 FRAME_END 521 ret 522ENDPROC(crypto_aegis256_aesni_enc) 523 524/* 525 * void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length, 526 * const void *src, void *dst); 527 */ 528ENTRY(crypto_aegis256_aesni_enc_tail) 529 FRAME_BEGIN 530 531 state_load 532 533 /* encrypt message: */ 534 call __load_partial 535 536 movdqa MSG, T0 537 crypt0 T0 538 539 call __store_partial 540 541 update0 MSG 542 543 state_store0 544 545 FRAME_END 546 ret 547ENDPROC(crypto_aegis256_aesni_enc_tail) 548 549/* 550 * void crypto_aegis256_aesni_dec(void *state, unsigned int length, 551 * const void *src, void *dst); 552 */ 553ENTRY(crypto_aegis256_aesni_dec) 554 FRAME_BEGIN 555 556 cmp $0x10, LEN 557 jb .Ldec_out 558 559 state_load 560 561 mov SRC, %r8 562 or DST, %r8 563 and $0xF, %r8 564 jnz .Ldec_u_loop 565 566.align 8 567.Ldec_a_loop: 568 decrypt_block a 0 569 decrypt_block a 1 570 decrypt_block a 2 571 decrypt_block a 3 572 decrypt_block a 4 573 decrypt_block a 5 574 575 add $0x60, SRC 576 add $0x60, DST 577 jmp .Ldec_a_loop 578 579.align 8 580.Ldec_u_loop: 581 decrypt_block u 0 582 decrypt_block u 1 583 decrypt_block u 2 584 decrypt_block u 3 585 decrypt_block u 4 586 decrypt_block u 5 587 588 add $0x60, SRC 589 add $0x60, DST 590 jmp .Ldec_u_loop 591 592.Ldec_out_0: 593 state_store0 594 FRAME_END 595 ret 596 597.Ldec_out_1: 598 state_store1 599 FRAME_END 600 ret 601 602.Ldec_out_2: 603 state_store2 604 FRAME_END 605 ret 606 607.Ldec_out_3: 608 state_store3 609 FRAME_END 610 ret 611 612.Ldec_out_4: 613 state_store4 614 FRAME_END 615 ret 616 617.Ldec_out_5: 618 state_store5 619 FRAME_END 620 ret 621 622.Ldec_out: 623 FRAME_END 624 ret 625ENDPROC(crypto_aegis256_aesni_dec) 626 627/* 628 * void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length, 629 * const void *src, void *dst); 630 */ 631ENTRY(crypto_aegis256_aesni_dec_tail) 632 FRAME_BEGIN 633 634 state_load 635 636 /* decrypt message: */ 637 call __load_partial 638 639 crypt0 MSG 640 641 movdqa MSG, T0 642 call __store_partial 643 644 /* mask with byte count: */ 645 movq LEN, T0 646 punpcklbw T0, T0 647 punpcklbw T0, T0 648 punpcklbw T0, T0 649 punpcklbw T0, T0 650 movdqa .Laegis256_counter, T1 651 pcmpgtb T1, T0 652 pand T0, MSG 653 654 update0 MSG 655 656 state_store0 657 658 FRAME_END 659 ret 660ENDPROC(crypto_aegis256_aesni_dec_tail) 661 662/* 663 * void crypto_aegis256_aesni_final(void *state, void *tag_xor, 664 * u64 assoclen, u64 cryptlen); 665 */ 666ENTRY(crypto_aegis256_aesni_final) 667 FRAME_BEGIN 668 669 state_load 670 671 /* prepare length block: */ 672 movq %rdx, MSG 673 movq %rcx, T0 674 pslldq $8, T0 675 pxor T0, MSG 676 psllq $3, MSG /* multiply by 8 (to get bit count) */ 677 678 pxor STATE3, MSG 679 680 /* update state: */ 681 update0 MSG 682 update1 MSG 683 update2 MSG 684 update3 MSG 685 update4 MSG 686 update5 MSG 687 update0 MSG 688 689 /* xor tag: */ 690 movdqu (%rsi), MSG 691 692 pxor STATE0, MSG 693 pxor STATE1, MSG 694 pxor STATE2, MSG 695 pxor STATE3, MSG 696 pxor STATE4, MSG 697 pxor STATE5, MSG 698 699 movdqu MSG, (%rsi) 700 701 FRAME_END 702 ret 703ENDPROC(crypto_aegis256_aesni_final)