crypto: aesni_intel - fix accessing of unaligned memory

The new XTS code for aesni_intel uses input buffers directly as memory operands
for pxor instructions, which causes crash if those buffers are not aligned to
16 bytes.

Patch changes XTS code to handle unaligned memory correctly, by loading memory
with movdqu instead.

Reported-by: Dave Jones <davej@redhat.com>
Tested-by: Dave Jones <davej@redhat.com>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by Jussi Kivilinna and committed by Herbert Xu fe6510b5 68be0b1a

Changed files
+32 -16
arch
x86
+32 -16
arch/x86/crypto/aesni-intel_asm.S
··· 2681 2681 addq %rcx, KEYP 2682 2682 2683 2683 movdqa IV, STATE1 2684 - pxor 0x00(INP), STATE1 2684 + movdqu 0x00(INP), INC 2685 + pxor INC, STATE1 2685 2686 movdqu IV, 0x00(OUTP) 2686 2687 2687 2688 _aesni_gf128mul_x_ble() 2688 2689 movdqa IV, STATE2 2689 - pxor 0x10(INP), STATE2 2690 + movdqu 0x10(INP), INC 2691 + pxor INC, STATE2 2690 2692 movdqu IV, 0x10(OUTP) 2691 2693 2692 2694 _aesni_gf128mul_x_ble() 2693 2695 movdqa IV, STATE3 2694 - pxor 0x20(INP), STATE3 2696 + movdqu 0x20(INP), INC 2697 + pxor INC, STATE3 2695 2698 movdqu IV, 0x20(OUTP) 2696 2699 2697 2700 _aesni_gf128mul_x_ble() 2698 2701 movdqa IV, STATE4 2699 - pxor 0x30(INP), STATE4 2702 + movdqu 0x30(INP), INC 2703 + pxor INC, STATE4 2700 2704 movdqu IV, 0x30(OUTP) 2701 2705 2702 2706 call *%r11 2703 2707 2704 - pxor 0x00(OUTP), STATE1 2708 + movdqu 0x00(OUTP), INC 2709 + pxor INC, STATE1 2705 2710 movdqu STATE1, 0x00(OUTP) 2706 2711 2707 2712 _aesni_gf128mul_x_ble() 2708 2713 movdqa IV, STATE1 2709 - pxor 0x40(INP), STATE1 2714 + movdqu 0x40(INP), INC 2715 + pxor INC, STATE1 2710 2716 movdqu IV, 0x40(OUTP) 2711 2717 2712 - pxor 0x10(OUTP), STATE2 2718 + movdqu 0x10(OUTP), INC 2719 + pxor INC, STATE2 2713 2720 movdqu STATE2, 0x10(OUTP) 2714 2721 2715 2722 _aesni_gf128mul_x_ble() 2716 2723 movdqa IV, STATE2 2717 - pxor 0x50(INP), STATE2 2724 + movdqu 0x50(INP), INC 2725 + pxor INC, STATE2 2718 2726 movdqu IV, 0x50(OUTP) 2719 2727 2720 - pxor 0x20(OUTP), STATE3 2728 + movdqu 0x20(OUTP), INC 2729 + pxor INC, STATE3 2721 2730 movdqu STATE3, 0x20(OUTP) 2722 2731 2723 2732 _aesni_gf128mul_x_ble() 2724 2733 movdqa IV, STATE3 2725 - pxor 0x60(INP), STATE3 2734 + movdqu 0x60(INP), INC 2735 + pxor INC, STATE3 2726 2736 movdqu IV, 0x60(OUTP) 2727 2737 2728 - pxor 0x30(OUTP), STATE4 2738 + movdqu 0x30(OUTP), INC 2739 + pxor INC, STATE4 2729 2740 movdqu STATE4, 0x30(OUTP) 2730 2741 2731 2742 _aesni_gf128mul_x_ble() 2732 2743 movdqa IV, STATE4 2733 - pxor 0x70(INP), STATE4 2744 + movdqu 0x70(INP), INC 2745 + pxor INC, STATE4 2734 2746 movdqu IV, 0x70(OUTP) 2735 2747 2736 2748 _aesni_gf128mul_x_ble() ··· 2750 2738 2751 2739 call *%r11 2752 2740 2753 - pxor 0x40(OUTP), STATE1 2741 + movdqu 0x40(OUTP), INC 2742 + pxor INC, STATE1 2754 2743 movdqu STATE1, 0x40(OUTP) 2755 2744 2756 - pxor 0x50(OUTP), STATE2 2745 + movdqu 0x50(OUTP), INC 2746 + pxor INC, STATE2 2757 2747 movdqu STATE2, 0x50(OUTP) 2758 2748 2759 - pxor 0x60(OUTP), STATE3 2749 + movdqu 0x60(OUTP), INC 2750 + pxor INC, STATE3 2760 2751 movdqu STATE3, 0x60(OUTP) 2761 2752 2762 - pxor 0x70(OUTP), STATE4 2753 + movdqu 0x70(OUTP), INC 2754 + pxor INC, STATE4 2763 2755 movdqu STATE4, 0x70(OUTP) 2764 2756 2765 2757 ret