Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec()

Since aesni_xts_enc() and aesni_xts_dec() are very similar, generate
them from a macro that's passed an argument enc=1 or enc=0. This
reduces the length of aesni-intel_asm.S by 112 lines while still
producing the exact same object file in both 32-bit and 64-bit mode.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Eric Biggers and committed by
Herbert Xu
ea9459ef 1d27e1f5

+83 -195
+83 -195
arch/x86/crypto/aesni-intel_asm.S
··· 2825 2825 .previous 2826 2826 2827 2827 /* 2828 - * _aesni_gf128mul_x_ble: internal ABI 2829 - * Multiply in GF(2^128) for XTS IVs 2828 + * _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs 2830 2829 * input: 2831 2830 * IV: current IV 2832 2831 * GF128MUL_MASK == mask with 0x87 and 0x01 2833 2832 * output: 2834 2833 * IV: next IV 2835 2834 * changed: 2836 - * CTR: == temporary value 2835 + * KEY: == temporary value 2837 2836 */ 2838 - #define _aesni_gf128mul_x_ble() \ 2839 - pshufd $0x13, IV, KEY; \ 2840 - paddq IV, IV; \ 2841 - psrad $31, KEY; \ 2842 - pand GF128MUL_MASK, KEY; \ 2843 - pxor KEY, IV; 2837 + .macro _aesni_gf128mul_x_ble 2838 + pshufd $0x13, IV, KEY 2839 + paddq IV, IV 2840 + psrad $31, KEY 2841 + pand GF128MUL_MASK, KEY 2842 + pxor KEY, IV 2843 + .endm 2844 2844 2845 - /* 2846 - * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst, 2847 - * const u8 *src, unsigned int len, le128 *iv) 2848 - */ 2849 - SYM_FUNC_START(aesni_xts_enc) 2845 + .macro _aesni_xts_crypt enc 2850 2846 FRAME_BEGIN 2851 2847 #ifndef __x86_64__ 2852 2848 pushl IVP ··· 2861 2865 movups (IVP), IV 2862 2866 2863 2867 mov 480(KEYP), KLEN 2868 + .if !\enc 2869 + add $240, KEYP 2864 2870 2865 - .Lxts_enc_loop4: 2871 + test $15, LEN 2872 + jz .Lxts_loop4\@ 2873 + sub $16, LEN 2874 + .endif 2875 + 2876 + .Lxts_loop4\@: 2866 2877 sub $64, LEN 2867 - jl .Lxts_enc_1x 2878 + jl .Lxts_1x\@ 2868 2879 2869 2880 movdqa IV, STATE1 2870 2881 movdqu 0x00(INP), IN 2871 2882 pxor IN, STATE1 2872 2883 movdqu IV, 0x00(OUTP) 2873 2884 2874 - _aesni_gf128mul_x_ble() 2885 + _aesni_gf128mul_x_ble 2875 2886 movdqa IV, STATE2 2876 2887 movdqu 0x10(INP), IN 2877 2888 pxor IN, STATE2 2878 2889 movdqu IV, 0x10(OUTP) 2879 2890 2880 - _aesni_gf128mul_x_ble() 2891 + _aesni_gf128mul_x_ble 2881 2892 movdqa IV, STATE3 2882 2893 movdqu 0x20(INP), IN 2883 2894 pxor IN, STATE3 2884 2895 movdqu IV, 0x20(OUTP) 2885 2896 2886 - _aesni_gf128mul_x_ble() 2897 + _aesni_gf128mul_x_ble 2887 2898 movdqa IV, STATE4 2888 2899 movdqu 0x30(INP), IN 2889 2900 pxor IN, STATE4 2890 2901 movdqu IV, 0x30(OUTP) 2891 2902 2903 + .if \enc 2892 2904 call _aesni_enc4 2905 + .else 2906 + call _aesni_dec4 2907 + .endif 2893 2908 2894 2909 movdqu 0x00(OUTP), IN 2895 2910 pxor IN, STATE1 ··· 2918 2911 pxor IN, STATE4 2919 2912 movdqu STATE4, 0x30(OUTP) 2920 2913 2921 - _aesni_gf128mul_x_ble() 2914 + _aesni_gf128mul_x_ble 2922 2915 2923 2916 add $64, INP 2924 2917 add $64, OUTP 2925 2918 test LEN, LEN 2926 - jnz .Lxts_enc_loop4 2919 + jnz .Lxts_loop4\@ 2927 2920 2928 - .Lxts_enc_ret_iv: 2921 + .Lxts_ret_iv\@: 2929 2922 movups IV, (IVP) 2930 2923 2931 - .Lxts_enc_ret: 2924 + .Lxts_ret\@: 2932 2925 #ifndef __x86_64__ 2933 2926 popl KLEN 2934 2927 popl KEYP ··· 2938 2931 FRAME_END 2939 2932 RET 2940 2933 2941 - .Lxts_enc_1x: 2934 + .Lxts_1x\@: 2942 2935 add $64, LEN 2943 - jz .Lxts_enc_ret_iv 2936 + jz .Lxts_ret_iv\@ 2937 + .if \enc 2944 2938 sub $16, LEN 2945 - jl .Lxts_enc_cts4 2939 + jl .Lxts_cts4\@ 2940 + .endif 2946 2941 2947 - .Lxts_enc_loop1: 2942 + .Lxts_loop1\@: 2948 2943 movdqu (INP), STATE 2944 + .if \enc 2949 2945 pxor IV, STATE 2950 2946 call _aesni_enc1 2951 - pxor IV, STATE 2952 - _aesni_gf128mul_x_ble() 2953 - 2954 - test LEN, LEN 2955 - jz .Lxts_enc_out 2956 - 2947 + .else 2957 2948 add $16, INP 2958 2949 sub $16, LEN 2959 - jl .Lxts_enc_cts1 2950 + jl .Lxts_cts1\@ 2951 + pxor IV, STATE 2952 + call _aesni_dec1 2953 + .endif 2954 + pxor IV, STATE 2955 + _aesni_gf128mul_x_ble 2956 + 2957 + test LEN, LEN 2958 + jz .Lxts_out\@ 2959 + 2960 + .if \enc 2961 + add $16, INP 2962 + sub $16, LEN 2963 + jl .Lxts_cts1\@ 2964 + .endif 2960 2965 2961 2966 movdqu STATE, (OUTP) 2962 2967 add $16, OUTP 2963 - jmp .Lxts_enc_loop1 2968 + jmp .Lxts_loop1\@ 2964 2969 2965 - .Lxts_enc_out: 2970 + .Lxts_out\@: 2966 2971 movdqu STATE, (OUTP) 2967 - jmp .Lxts_enc_ret_iv 2972 + jmp .Lxts_ret_iv\@ 2968 2973 2969 - .Lxts_enc_cts4: 2974 + .if \enc 2975 + .Lxts_cts4\@: 2970 2976 movdqa STATE4, STATE 2971 2977 sub $16, OUTP 2978 + .Lxts_cts1\@: 2979 + .else 2980 + .Lxts_cts1\@: 2981 + movdqa IV, STATE4 2982 + _aesni_gf128mul_x_ble 2972 2983 2973 - .Lxts_enc_cts1: 2984 + pxor IV, STATE 2985 + call _aesni_dec1 2986 + pxor IV, STATE 2987 + .endif 2974 2988 #ifndef __x86_64__ 2975 2989 lea .Lcts_permute_table, T1 2976 2990 #else ··· 3017 2989 pblendvb IN2, IN1 3018 2990 movaps IN1, STATE 3019 2991 2992 + .if \enc 3020 2993 pxor IV, STATE 3021 2994 call _aesni_enc1 3022 2995 pxor IV, STATE 2996 + .else 2997 + pxor STATE4, STATE 2998 + call _aesni_dec1 2999 + pxor STATE4, STATE 3000 + .endif 3023 3001 3024 3002 movups STATE, (OUTP) 3025 - jmp .Lxts_enc_ret 3003 + jmp .Lxts_ret\@ 3004 + .endm 3005 + 3006 + /* 3007 + * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst, 3008 + * const u8 *src, unsigned int len, le128 *iv) 3009 + */ 3010 + SYM_FUNC_START(aesni_xts_enc) 3011 + _aesni_xts_crypt 1 3026 3012 SYM_FUNC_END(aesni_xts_enc) 3027 3013 3028 3014 /* ··· 3044 3002 * const u8 *src, unsigned int len, le128 *iv) 3045 3003 */ 3046 3004 SYM_FUNC_START(aesni_xts_dec) 3047 - FRAME_BEGIN 3048 - #ifndef __x86_64__ 3049 - pushl IVP 3050 - pushl LEN 3051 - pushl KEYP 3052 - pushl KLEN 3053 - movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 3054 - movl (FRAME_OFFSET+24)(%esp), OUTP # dst 3055 - movl (FRAME_OFFSET+28)(%esp), INP # src 3056 - movl (FRAME_OFFSET+32)(%esp), LEN # len 3057 - movl (FRAME_OFFSET+36)(%esp), IVP # iv 3058 - movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK 3059 - #else 3060 - movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK 3061 - #endif 3062 - movups (IVP), IV 3063 - 3064 - mov 480(KEYP), KLEN 3065 - add $240, KEYP 3066 - 3067 - test $15, LEN 3068 - jz .Lxts_dec_loop4 3069 - sub $16, LEN 3070 - 3071 - .Lxts_dec_loop4: 3072 - sub $64, LEN 3073 - jl .Lxts_dec_1x 3074 - 3075 - movdqa IV, STATE1 3076 - movdqu 0x00(INP), IN 3077 - pxor IN, STATE1 3078 - movdqu IV, 0x00(OUTP) 3079 - 3080 - _aesni_gf128mul_x_ble() 3081 - movdqa IV, STATE2 3082 - movdqu 0x10(INP), IN 3083 - pxor IN, STATE2 3084 - movdqu IV, 0x10(OUTP) 3085 - 3086 - _aesni_gf128mul_x_ble() 3087 - movdqa IV, STATE3 3088 - movdqu 0x20(INP), IN 3089 - pxor IN, STATE3 3090 - movdqu IV, 0x20(OUTP) 3091 - 3092 - _aesni_gf128mul_x_ble() 3093 - movdqa IV, STATE4 3094 - movdqu 0x30(INP), IN 3095 - pxor IN, STATE4 3096 - movdqu IV, 0x30(OUTP) 3097 - 3098 - call _aesni_dec4 3099 - 3100 - movdqu 0x00(OUTP), IN 3101 - pxor IN, STATE1 3102 - movdqu STATE1, 0x00(OUTP) 3103 - 3104 - movdqu 0x10(OUTP), IN 3105 - pxor IN, STATE2 3106 - movdqu STATE2, 0x10(OUTP) 3107 - 3108 - movdqu 0x20(OUTP), IN 3109 - pxor IN, STATE3 3110 - movdqu STATE3, 0x20(OUTP) 3111 - 3112 - movdqu 0x30(OUTP), IN 3113 - pxor IN, STATE4 3114 - movdqu STATE4, 0x30(OUTP) 3115 - 3116 - _aesni_gf128mul_x_ble() 3117 - 3118 - add $64, INP 3119 - add $64, OUTP 3120 - test LEN, LEN 3121 - jnz .Lxts_dec_loop4 3122 - 3123 - .Lxts_dec_ret_iv: 3124 - movups IV, (IVP) 3125 - 3126 - .Lxts_dec_ret: 3127 - #ifndef __x86_64__ 3128 - popl KLEN 3129 - popl KEYP 3130 - popl LEN 3131 - popl IVP 3132 - #endif 3133 - FRAME_END 3134 - RET 3135 - 3136 - .Lxts_dec_1x: 3137 - add $64, LEN 3138 - jz .Lxts_dec_ret_iv 3139 - 3140 - .Lxts_dec_loop1: 3141 - movdqu (INP), STATE 3142 - 3143 - add $16, INP 3144 - sub $16, LEN 3145 - jl .Lxts_dec_cts1 3146 - 3147 - pxor IV, STATE 3148 - call _aesni_dec1 3149 - pxor IV, STATE 3150 - _aesni_gf128mul_x_ble() 3151 - 3152 - test LEN, LEN 3153 - jz .Lxts_dec_out 3154 - 3155 - movdqu STATE, (OUTP) 3156 - add $16, OUTP 3157 - jmp .Lxts_dec_loop1 3158 - 3159 - .Lxts_dec_out: 3160 - movdqu STATE, (OUTP) 3161 - jmp .Lxts_dec_ret_iv 3162 - 3163 - .Lxts_dec_cts1: 3164 - movdqa IV, STATE4 3165 - _aesni_gf128mul_x_ble() 3166 - 3167 - pxor IV, STATE 3168 - call _aesni_dec1 3169 - pxor IV, STATE 3170 - 3171 - #ifndef __x86_64__ 3172 - lea .Lcts_permute_table, T1 3173 - #else 3174 - lea .Lcts_permute_table(%rip), T1 3175 - #endif 3176 - add LEN, INP /* rewind input pointer */ 3177 - add $16, LEN /* # bytes in final block */ 3178 - movups (INP), IN1 3179 - 3180 - mov T1, IVP 3181 - add $32, IVP 3182 - add LEN, T1 3183 - sub LEN, IVP 3184 - add OUTP, LEN 3185 - 3186 - movups (T1), %xmm4 3187 - movaps STATE, IN2 3188 - pshufb %xmm4, STATE 3189 - movups STATE, (LEN) 3190 - 3191 - movups (IVP), %xmm0 3192 - pshufb %xmm0, IN1 3193 - pblendvb IN2, IN1 3194 - movaps IN1, STATE 3195 - 3196 - pxor STATE4, STATE 3197 - call _aesni_dec1 3198 - pxor STATE4, STATE 3199 - 3200 - movups STATE, (OUTP) 3201 - jmp .Lxts_dec_ret 3005 + _aesni_xts_crypt 0 3202 3006 SYM_FUNC_END(aesni_xts_dec)