Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.1 622 lines 13 kB view raw
1/* 2 * AVX2 implementation of MORUS-1280 3 * 4 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> 5 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published 9 * by the Free Software Foundation. 10 */ 11 12#include <linux/linkage.h> 13#include <asm/frame.h> 14 15#define SHUFFLE_MASK(i0, i1, i2, i3) \ 16 (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6)) 17 18#define MASK1 SHUFFLE_MASK(3, 0, 1, 2) 19#define MASK2 SHUFFLE_MASK(2, 3, 0, 1) 20#define MASK3 SHUFFLE_MASK(1, 2, 3, 0) 21 22#define STATE0 %ymm0 23#define STATE0_LOW %xmm0 24#define STATE1 %ymm1 25#define STATE2 %ymm2 26#define STATE3 %ymm3 27#define STATE4 %ymm4 28#define KEY %ymm5 29#define MSG %ymm5 30#define MSG_LOW %xmm5 31#define T0 %ymm6 32#define T0_LOW %xmm6 33#define T1 %ymm7 34 35.section .rodata.cst32.morus1280_const, "aM", @progbits, 32 36.align 32 37.Lmorus1280_const: 38 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d 39 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 40 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 41 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd 42 43.section .rodata.cst32.morus1280_counter, "aM", @progbits, 32 44.align 32 45.Lmorus1280_counter: 46 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 47 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f 48 .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 49 .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f 50 51.text 52 53.macro morus1280_round s0, s1, s2, s3, s4, b, w 54 vpand \s1, \s2, T0 55 vpxor T0, \s0, \s0 56 vpxor \s3, \s0, \s0 57 vpsllq $\b, \s0, T0 58 vpsrlq $(64 - \b), \s0, \s0 59 vpxor T0, \s0, \s0 60 vpermq $\w, \s3, \s3 61.endm 62 63/* 64 * __morus1280_update: internal ABI 65 * input: 66 * STATE[0-4] - input state 67 * MSG - message block 68 * output: 69 * STATE[0-4] - output state 70 * changed: 71 * T0 72 */ 73__morus1280_update: 74 morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1 75 vpxor MSG, STATE1, STATE1 76 morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2 77 vpxor MSG, STATE2, STATE2 78 morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3 79 vpxor MSG, STATE3, STATE3 80 morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2 81 vpxor MSG, STATE4, STATE4 82 morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1 83 ret 84ENDPROC(__morus1280_update) 85 86/* 87 * __morus1280_update_zero: internal ABI 88 * input: 89 * STATE[0-4] - input state 90 * output: 91 * STATE[0-4] - output state 92 * changed: 93 * T0 94 */ 95__morus1280_update_zero: 96 morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1 97 morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2 98 morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3 99 morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2 100 morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1 101 ret 102ENDPROC(__morus1280_update_zero) 103 104/* 105 * __load_partial: internal ABI 106 * input: 107 * %rsi - src 108 * %rcx - bytes 109 * output: 110 * MSG - message block 111 * changed: 112 * %r8 113 * %r9 114 */ 115__load_partial: 116 xor %r9d, %r9d 117 vpxor MSG, MSG, MSG 118 119 mov %rcx, %r8 120 and $0x1, %r8 121 jz .Lld_partial_1 122 123 mov %rcx, %r8 124 and $0x1E, %r8 125 add %rsi, %r8 126 mov (%r8), %r9b 127 128.Lld_partial_1: 129 mov %rcx, %r8 130 and $0x2, %r8 131 jz .Lld_partial_2 132 133 mov %rcx, %r8 134 and $0x1C, %r8 135 add %rsi, %r8 136 shl $16, %r9 137 mov (%r8), %r9w 138 139.Lld_partial_2: 140 mov %rcx, %r8 141 and $0x4, %r8 142 jz .Lld_partial_4 143 144 mov %rcx, %r8 145 and $0x18, %r8 146 add %rsi, %r8 147 shl $32, %r9 148 mov (%r8), %r8d 149 xor %r8, %r9 150 151.Lld_partial_4: 152 movq %r9, MSG_LOW 153 154 mov %rcx, %r8 155 and $0x8, %r8 156 jz .Lld_partial_8 157 158 mov %rcx, %r8 159 and $0x10, %r8 160 add %rsi, %r8 161 pshufd $MASK2, MSG_LOW, MSG_LOW 162 pinsrq $0, (%r8), MSG_LOW 163 164.Lld_partial_8: 165 mov %rcx, %r8 166 and $0x10, %r8 167 jz .Lld_partial_16 168 169 vpermq $MASK2, MSG, MSG 170 movdqu (%rsi), MSG_LOW 171 172.Lld_partial_16: 173 ret 174ENDPROC(__load_partial) 175 176/* 177 * __store_partial: internal ABI 178 * input: 179 * %rdx - dst 180 * %rcx - bytes 181 * output: 182 * T0 - message block 183 * changed: 184 * %r8 185 * %r9 186 * %r10 187 */ 188__store_partial: 189 mov %rcx, %r8 190 mov %rdx, %r9 191 192 cmp $16, %r8 193 jl .Lst_partial_16 194 195 movdqu T0_LOW, (%r9) 196 vpermq $MASK2, T0, T0 197 198 sub $16, %r8 199 add $16, %r9 200 201.Lst_partial_16: 202 movq T0_LOW, %r10 203 204 cmp $8, %r8 205 jl .Lst_partial_8 206 207 mov %r10, (%r9) 208 pextrq $1, T0_LOW, %r10 209 210 sub $8, %r8 211 add $8, %r9 212 213.Lst_partial_8: 214 cmp $4, %r8 215 jl .Lst_partial_4 216 217 mov %r10d, (%r9) 218 shr $32, %r10 219 220 sub $4, %r8 221 add $4, %r9 222 223.Lst_partial_4: 224 cmp $2, %r8 225 jl .Lst_partial_2 226 227 mov %r10w, (%r9) 228 shr $16, %r10 229 230 sub $2, %r8 231 add $2, %r9 232 233.Lst_partial_2: 234 cmp $1, %r8 235 jl .Lst_partial_1 236 237 mov %r10b, (%r9) 238 239.Lst_partial_1: 240 ret 241ENDPROC(__store_partial) 242 243/* 244 * void crypto_morus1280_avx2_init(void *state, const void *key, 245 * const void *iv); 246 */ 247ENTRY(crypto_morus1280_avx2_init) 248 FRAME_BEGIN 249 250 /* load IV: */ 251 vpxor STATE0, STATE0, STATE0 252 movdqu (%rdx), STATE0_LOW 253 /* load key: */ 254 vmovdqu (%rsi), KEY 255 vmovdqa KEY, STATE1 256 /* load all ones: */ 257 vpcmpeqd STATE2, STATE2, STATE2 258 /* load all zeros: */ 259 vpxor STATE3, STATE3, STATE3 260 /* load the constant: */ 261 vmovdqa .Lmorus1280_const, STATE4 262 263 /* update 16 times with zero: */ 264 call __morus1280_update_zero 265 call __morus1280_update_zero 266 call __morus1280_update_zero 267 call __morus1280_update_zero 268 call __morus1280_update_zero 269 call __morus1280_update_zero 270 call __morus1280_update_zero 271 call __morus1280_update_zero 272 call __morus1280_update_zero 273 call __morus1280_update_zero 274 call __morus1280_update_zero 275 call __morus1280_update_zero 276 call __morus1280_update_zero 277 call __morus1280_update_zero 278 call __morus1280_update_zero 279 call __morus1280_update_zero 280 281 /* xor-in the key again after updates: */ 282 vpxor KEY, STATE1, STATE1 283 284 /* store the state: */ 285 vmovdqu STATE0, (0 * 32)(%rdi) 286 vmovdqu STATE1, (1 * 32)(%rdi) 287 vmovdqu STATE2, (2 * 32)(%rdi) 288 vmovdqu STATE3, (3 * 32)(%rdi) 289 vmovdqu STATE4, (4 * 32)(%rdi) 290 291 FRAME_END 292 ret 293ENDPROC(crypto_morus1280_avx2_init) 294 295/* 296 * void crypto_morus1280_avx2_ad(void *state, const void *data, 297 * unsigned int length); 298 */ 299ENTRY(crypto_morus1280_avx2_ad) 300 FRAME_BEGIN 301 302 cmp $32, %rdx 303 jb .Lad_out 304 305 /* load the state: */ 306 vmovdqu (0 * 32)(%rdi), STATE0 307 vmovdqu (1 * 32)(%rdi), STATE1 308 vmovdqu (2 * 32)(%rdi), STATE2 309 vmovdqu (3 * 32)(%rdi), STATE3 310 vmovdqu (4 * 32)(%rdi), STATE4 311 312 mov %rsi, %r8 313 and $0x1F, %r8 314 jnz .Lad_u_loop 315 316.align 4 317.Lad_a_loop: 318 vmovdqa (%rsi), MSG 319 call __morus1280_update 320 sub $32, %rdx 321 add $32, %rsi 322 cmp $32, %rdx 323 jge .Lad_a_loop 324 325 jmp .Lad_cont 326.align 4 327.Lad_u_loop: 328 vmovdqu (%rsi), MSG 329 call __morus1280_update 330 sub $32, %rdx 331 add $32, %rsi 332 cmp $32, %rdx 333 jge .Lad_u_loop 334 335.Lad_cont: 336 /* store the state: */ 337 vmovdqu STATE0, (0 * 32)(%rdi) 338 vmovdqu STATE1, (1 * 32)(%rdi) 339 vmovdqu STATE2, (2 * 32)(%rdi) 340 vmovdqu STATE3, (3 * 32)(%rdi) 341 vmovdqu STATE4, (4 * 32)(%rdi) 342 343.Lad_out: 344 FRAME_END 345 ret 346ENDPROC(crypto_morus1280_avx2_ad) 347 348/* 349 * void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst, 350 * unsigned int length); 351 */ 352ENTRY(crypto_morus1280_avx2_enc) 353 FRAME_BEGIN 354 355 cmp $32, %rcx 356 jb .Lenc_out 357 358 /* load the state: */ 359 vmovdqu (0 * 32)(%rdi), STATE0 360 vmovdqu (1 * 32)(%rdi), STATE1 361 vmovdqu (2 * 32)(%rdi), STATE2 362 vmovdqu (3 * 32)(%rdi), STATE3 363 vmovdqu (4 * 32)(%rdi), STATE4 364 365 mov %rsi, %r8 366 or %rdx, %r8 367 and $0x1F, %r8 368 jnz .Lenc_u_loop 369 370.align 4 371.Lenc_a_loop: 372 vmovdqa (%rsi), MSG 373 vmovdqa MSG, T0 374 vpxor STATE0, T0, T0 375 vpermq $MASK3, STATE1, T1 376 vpxor T1, T0, T0 377 vpand STATE2, STATE3, T1 378 vpxor T1, T0, T0 379 vmovdqa T0, (%rdx) 380 381 call __morus1280_update 382 sub $32, %rcx 383 add $32, %rsi 384 add $32, %rdx 385 cmp $32, %rcx 386 jge .Lenc_a_loop 387 388 jmp .Lenc_cont 389.align 4 390.Lenc_u_loop: 391 vmovdqu (%rsi), MSG 392 vmovdqa MSG, T0 393 vpxor STATE0, T0, T0 394 vpermq $MASK3, STATE1, T1 395 vpxor T1, T0, T0 396 vpand STATE2, STATE3, T1 397 vpxor T1, T0, T0 398 vmovdqu T0, (%rdx) 399 400 call __morus1280_update 401 sub $32, %rcx 402 add $32, %rsi 403 add $32, %rdx 404 cmp $32, %rcx 405 jge .Lenc_u_loop 406 407.Lenc_cont: 408 /* store the state: */ 409 vmovdqu STATE0, (0 * 32)(%rdi) 410 vmovdqu STATE1, (1 * 32)(%rdi) 411 vmovdqu STATE2, (2 * 32)(%rdi) 412 vmovdqu STATE3, (3 * 32)(%rdi) 413 vmovdqu STATE4, (4 * 32)(%rdi) 414 415.Lenc_out: 416 FRAME_END 417 ret 418ENDPROC(crypto_morus1280_avx2_enc) 419 420/* 421 * void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst, 422 * unsigned int length); 423 */ 424ENTRY(crypto_morus1280_avx2_enc_tail) 425 FRAME_BEGIN 426 427 /* load the state: */ 428 vmovdqu (0 * 32)(%rdi), STATE0 429 vmovdqu (1 * 32)(%rdi), STATE1 430 vmovdqu (2 * 32)(%rdi), STATE2 431 vmovdqu (3 * 32)(%rdi), STATE3 432 vmovdqu (4 * 32)(%rdi), STATE4 433 434 /* encrypt message: */ 435 call __load_partial 436 437 vmovdqa MSG, T0 438 vpxor STATE0, T0, T0 439 vpermq $MASK3, STATE1, T1 440 vpxor T1, T0, T0 441 vpand STATE2, STATE3, T1 442 vpxor T1, T0, T0 443 444 call __store_partial 445 446 call __morus1280_update 447 448 /* store the state: */ 449 vmovdqu STATE0, (0 * 32)(%rdi) 450 vmovdqu STATE1, (1 * 32)(%rdi) 451 vmovdqu STATE2, (2 * 32)(%rdi) 452 vmovdqu STATE3, (3 * 32)(%rdi) 453 vmovdqu STATE4, (4 * 32)(%rdi) 454 455 FRAME_END 456 ret 457ENDPROC(crypto_morus1280_avx2_enc_tail) 458 459/* 460 * void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst, 461 * unsigned int length); 462 */ 463ENTRY(crypto_morus1280_avx2_dec) 464 FRAME_BEGIN 465 466 cmp $32, %rcx 467 jb .Ldec_out 468 469 /* load the state: */ 470 vmovdqu (0 * 32)(%rdi), STATE0 471 vmovdqu (1 * 32)(%rdi), STATE1 472 vmovdqu (2 * 32)(%rdi), STATE2 473 vmovdqu (3 * 32)(%rdi), STATE3 474 vmovdqu (4 * 32)(%rdi), STATE4 475 476 mov %rsi, %r8 477 or %rdx, %r8 478 and $0x1F, %r8 479 jnz .Ldec_u_loop 480 481.align 4 482.Ldec_a_loop: 483 vmovdqa (%rsi), MSG 484 vpxor STATE0, MSG, MSG 485 vpermq $MASK3, STATE1, T0 486 vpxor T0, MSG, MSG 487 vpand STATE2, STATE3, T0 488 vpxor T0, MSG, MSG 489 vmovdqa MSG, (%rdx) 490 491 call __morus1280_update 492 sub $32, %rcx 493 add $32, %rsi 494 add $32, %rdx 495 cmp $32, %rcx 496 jge .Ldec_a_loop 497 498 jmp .Ldec_cont 499.align 4 500.Ldec_u_loop: 501 vmovdqu (%rsi), MSG 502 vpxor STATE0, MSG, MSG 503 vpermq $MASK3, STATE1, T0 504 vpxor T0, MSG, MSG 505 vpand STATE2, STATE3, T0 506 vpxor T0, MSG, MSG 507 vmovdqu MSG, (%rdx) 508 509 call __morus1280_update 510 sub $32, %rcx 511 add $32, %rsi 512 add $32, %rdx 513 cmp $32, %rcx 514 jge .Ldec_u_loop 515 516.Ldec_cont: 517 /* store the state: */ 518 vmovdqu STATE0, (0 * 32)(%rdi) 519 vmovdqu STATE1, (1 * 32)(%rdi) 520 vmovdqu STATE2, (2 * 32)(%rdi) 521 vmovdqu STATE3, (3 * 32)(%rdi) 522 vmovdqu STATE4, (4 * 32)(%rdi) 523 524.Ldec_out: 525 FRAME_END 526 ret 527ENDPROC(crypto_morus1280_avx2_dec) 528 529/* 530 * void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst, 531 * unsigned int length); 532 */ 533ENTRY(crypto_morus1280_avx2_dec_tail) 534 FRAME_BEGIN 535 536 /* load the state: */ 537 vmovdqu (0 * 32)(%rdi), STATE0 538 vmovdqu (1 * 32)(%rdi), STATE1 539 vmovdqu (2 * 32)(%rdi), STATE2 540 vmovdqu (3 * 32)(%rdi), STATE3 541 vmovdqu (4 * 32)(%rdi), STATE4 542 543 /* decrypt message: */ 544 call __load_partial 545 546 vpxor STATE0, MSG, MSG 547 vpermq $MASK3, STATE1, T0 548 vpxor T0, MSG, MSG 549 vpand STATE2, STATE3, T0 550 vpxor T0, MSG, MSG 551 vmovdqa MSG, T0 552 553 call __store_partial 554 555 /* mask with byte count: */ 556 movq %rcx, T0_LOW 557 vpbroadcastb T0_LOW, T0 558 vmovdqa .Lmorus1280_counter, T1 559 vpcmpgtb T1, T0, T0 560 vpand T0, MSG, MSG 561 562 call __morus1280_update 563 564 /* store the state: */ 565 vmovdqu STATE0, (0 * 32)(%rdi) 566 vmovdqu STATE1, (1 * 32)(%rdi) 567 vmovdqu STATE2, (2 * 32)(%rdi) 568 vmovdqu STATE3, (3 * 32)(%rdi) 569 vmovdqu STATE4, (4 * 32)(%rdi) 570 571 FRAME_END 572 ret 573ENDPROC(crypto_morus1280_avx2_dec_tail) 574 575/* 576 * void crypto_morus1280_avx2_final(void *state, void *tag_xor, 577 * u64 assoclen, u64 cryptlen); 578 */ 579ENTRY(crypto_morus1280_avx2_final) 580 FRAME_BEGIN 581 582 /* load the state: */ 583 vmovdqu (0 * 32)(%rdi), STATE0 584 vmovdqu (1 * 32)(%rdi), STATE1 585 vmovdqu (2 * 32)(%rdi), STATE2 586 vmovdqu (3 * 32)(%rdi), STATE3 587 vmovdqu (4 * 32)(%rdi), STATE4 588 589 /* xor state[0] into state[4]: */ 590 vpxor STATE0, STATE4, STATE4 591 592 /* prepare length block: */ 593 vpxor MSG, MSG, MSG 594 vpinsrq $0, %rdx, MSG_LOW, MSG_LOW 595 vpinsrq $1, %rcx, MSG_LOW, MSG_LOW 596 vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */ 597 598 /* update state: */ 599 call __morus1280_update 600 call __morus1280_update 601 call __morus1280_update 602 call __morus1280_update 603 call __morus1280_update 604 call __morus1280_update 605 call __morus1280_update 606 call __morus1280_update 607 call __morus1280_update 608 call __morus1280_update 609 610 /* xor tag: */ 611 vmovdqu (%rsi), MSG 612 613 vpxor STATE0, MSG, MSG 614 vpermq $MASK3, STATE1, T0 615 vpxor T0, MSG, MSG 616 vpand STATE2, STATE3, T0 617 vpxor T0, MSG, MSG 618 vmovdqu MSG, (%rsi) 619 620 FRAME_END 621 ret 622ENDPROC(crypto_morus1280_avx2_final)