Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: x86/aes-xts - eliminate a few more instructions

- For conditionally subtracting 16 from LEN when decrypting a message
whose length isn't a multiple of 16, use the cmovnz instruction.

- Fold the addition of 4*VL to LEN into the sub of VL or 16 from LEN.

- Remove an unnecessary test instruction.

This results in slightly shorter code, both source and binary.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Eric Biggers and committed by
Herbert Xu
e619723a 2717e01f

+13 -26
+13 -26
arch/x86/crypto/aes-xts-avx-x86_64.S
··· 559 559 .macro _aes_xts_crypt enc 560 560 _define_aliases 561 561 562 - // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). 563 - movl 480(KEY), KEYLEN 564 - 565 562 .if !\enc 566 563 // When decrypting a message whose length isn't a multiple of the AES 567 564 // block length, exclude the last full block from the main loop by 568 565 // subtracting 16 from LEN. This is needed because ciphertext stealing 569 566 // decryption uses the last two tweaks in reverse order. We'll handle 570 567 // the last full block and the partial block specially at the end. 568 + lea -16(LEN), %rax 571 569 test $15, LEN 572 - jnz .Lneed_cts_dec\@ 573 - .Lxts_init\@: 570 + cmovnz %rax, LEN 574 571 .endif 572 + 573 + // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). 574 + movl 480(KEY), KEYLEN 575 575 576 576 // Setup the pointer to the round keys and cache as many as possible. 577 577 _setup_round_keys \enc ··· 661 661 RET 662 662 663 663 .Lhandle_remainder\@: 664 - add $4*VL, LEN // Undo the extra sub from earlier. 665 664 666 665 // En/decrypt any remaining full blocks, one vector at a time. 667 666 .if VL > 16 668 - sub $VL, LEN 667 + add $3*VL, LEN // Undo extra sub of 4*VL, then sub VL. 669 668 jl .Lvec_at_a_time_done\@ 670 669 .Lvec_at_a_time\@: 671 670 _vmovdqu (SRC), V0 ··· 676 677 sub $VL, LEN 677 678 jge .Lvec_at_a_time\@ 678 679 .Lvec_at_a_time_done\@: 679 - add $VL-16, LEN // Undo the extra sub from earlier. 680 + add $VL-16, LEN // Undo extra sub of VL, then sub 16. 680 681 .else 681 - sub $16, LEN 682 + add $4*VL-16, LEN // Undo extra sub of 4*VL, then sub 16. 682 683 .endif 683 684 684 685 // En/decrypt any remaining full blocks, one at a time. ··· 693 694 sub $16, LEN 694 695 jge .Lblock_at_a_time\@ 695 696 .Lblock_at_a_time_done\@: 696 - add $16, LEN // Undo the extra sub from earlier. 697 + add $16, LEN // Undo the extra sub of 16. 698 + // Now 0 <= LEN <= 15. If LEN is zero, we're done. 699 + jz .Ldone\@ 697 700 698 - .Lfull_blocks_done\@: 699 - // Now 0 <= LEN <= 15. If LEN is nonzero, do ciphertext stealing to 700 - // process the last 16 + LEN bytes. If LEN is zero, we're done. 701 - test LEN, LEN 702 - jnz .Lcts\@ 703 - jmp .Ldone\@ 704 - 705 - .if !\enc 706 - .Lneed_cts_dec\@: 707 - sub $16, LEN 708 - jmp .Lxts_init\@ 709 - .endif 710 - 711 - .Lcts\@: 712 - // Do ciphertext stealing (CTS) to en/decrypt the last full block and 713 - // the partial block. TWEAK0_XMM contains the next tweak. 701 + // Otherwise 1 <= LEN <= 15, but the real remaining length is 16 + LEN. 702 + // Do ciphertext stealing to process the last 16 + LEN bytes. 714 703 715 704 .if \enc 716 705 // If encrypting, the main loop already encrypted the last full block to