Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/asm/crypto: Create stack frames in crypto functions

The crypto code has several callable non-leaf functions which don't
honor CONFIG_FRAME_POINTER, which can result in bad stack traces.

Create stack frames for them when CONFIG_FRAME_POINTER is enabled.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bernd Petrovitsch <bernd@petrovitsch.priv.at>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Chris J Arges <chris.j.arges@canonical.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Pedro Alves <palves@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: live-patching@vger.kernel.org
Link: http://lkml.kernel.org/r/6c20192bcf1102ae18ae5a242cabf30ce9b29895.1453405861.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Josh Poimboeuf and committed by
Ingo Molnar
8691ccd7 68874ac3

+148 -27
+46 -27
arch/x86/crypto/aesni-intel_asm.S
··· 31 31 32 32 #include <linux/linkage.h> 33 33 #include <asm/inst.h> 34 + #include <asm/frame.h> 34 35 35 36 /* 36 37 * The following macros are used to move an (un)aligned 16 byte value to/from ··· 1801 1800 * unsigned int key_len) 1802 1801 */ 1803 1802 ENTRY(aesni_set_key) 1803 + FRAME_BEGIN 1804 1804 #ifndef __x86_64__ 1805 1805 pushl KEYP 1806 - movl 8(%esp), KEYP # ctx 1807 - movl 12(%esp), UKEYP # in_key 1808 - movl 16(%esp), %edx # key_len 1806 + movl (FRAME_OFFSET+8)(%esp), KEYP # ctx 1807 + movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key 1808 + movl (FRAME_OFFSET+16)(%esp), %edx # key_len 1809 1809 #endif 1810 1810 movups (UKEYP), %xmm0 # user key (first 16 bytes) 1811 1811 movaps %xmm0, (KEYP) ··· 1907 1905 #ifndef __x86_64__ 1908 1906 popl KEYP 1909 1907 #endif 1908 + FRAME_END 1910 1909 ret 1911 1910 ENDPROC(aesni_set_key) 1912 1911 ··· 1915 1912 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 1916 1913 */ 1917 1914 ENTRY(aesni_enc) 1915 + FRAME_BEGIN 1918 1916 #ifndef __x86_64__ 1919 1917 pushl KEYP 1920 1918 pushl KLEN 1921 - movl 12(%esp), KEYP 1922 - movl 16(%esp), OUTP 1923 - movl 20(%esp), INP 1919 + movl (FRAME_OFFSET+12)(%esp), KEYP # ctx 1920 + movl (FRAME_OFFSET+16)(%esp), OUTP # dst 1921 + movl (FRAME_OFFSET+20)(%esp), INP # src 1924 1922 #endif 1925 1923 movl 480(KEYP), KLEN # key length 1926 1924 movups (INP), STATE # input ··· 1931 1927 popl KLEN 1932 1928 popl KEYP 1933 1929 #endif 1930 + FRAME_END 1934 1931 ret 1935 1932 ENDPROC(aesni_enc) 1936 1933 ··· 2106 2101 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 2107 2102 */ 2108 2103 ENTRY(aesni_dec) 2104 + FRAME_BEGIN 2109 2105 #ifndef __x86_64__ 2110 2106 pushl KEYP 2111 2107 pushl KLEN 2112 - movl 12(%esp), KEYP 2113 - movl 16(%esp), OUTP 2114 - movl 20(%esp), INP 2108 + movl (FRAME_OFFSET+12)(%esp), KEYP # ctx 2109 + movl (FRAME_OFFSET+16)(%esp), OUTP # dst 2110 + movl (FRAME_OFFSET+20)(%esp), INP # src 2115 2111 #endif 2116 2112 mov 480(KEYP), KLEN # key length 2117 2113 add $240, KEYP ··· 2123 2117 popl KLEN 2124 2118 popl KEYP 2125 2119 #endif 2120 + FRAME_END 2126 2121 ret 2127 2122 ENDPROC(aesni_dec) 2128 2123 ··· 2299 2292 * size_t len) 2300 2293 */ 2301 2294 ENTRY(aesni_ecb_enc) 2295 + FRAME_BEGIN 2302 2296 #ifndef __x86_64__ 2303 2297 pushl LEN 2304 2298 pushl KEYP 2305 2299 pushl KLEN 2306 - movl 16(%esp), KEYP 2307 - movl 20(%esp), OUTP 2308 - movl 24(%esp), INP 2309 - movl 28(%esp), LEN 2300 + movl (FRAME_OFFSET+16)(%esp), KEYP # ctx 2301 + movl (FRAME_OFFSET+20)(%esp), OUTP # dst 2302 + movl (FRAME_OFFSET+24)(%esp), INP # src 2303 + movl (FRAME_OFFSET+28)(%esp), LEN # len 2310 2304 #endif 2311 2305 test LEN, LEN # check length 2312 2306 jz .Lecb_enc_ret ··· 2350 2342 popl KEYP 2351 2343 popl LEN 2352 2344 #endif 2345 + FRAME_END 2353 2346 ret 2354 2347 ENDPROC(aesni_ecb_enc) 2355 2348 ··· 2359 2350 * size_t len); 2360 2351 */ 2361 2352 ENTRY(aesni_ecb_dec) 2353 + FRAME_BEGIN 2362 2354 #ifndef __x86_64__ 2363 2355 pushl LEN 2364 2356 pushl KEYP 2365 2357 pushl KLEN 2366 - movl 16(%esp), KEYP 2367 - movl 20(%esp), OUTP 2368 - movl 24(%esp), INP 2369 - movl 28(%esp), LEN 2358 + movl (FRAME_OFFSET+16)(%esp), KEYP # ctx 2359 + movl (FRAME_OFFSET+20)(%esp), OUTP # dst 2360 + movl (FRAME_OFFSET+24)(%esp), INP # src 2361 + movl (FRAME_OFFSET+28)(%esp), LEN # len 2370 2362 #endif 2371 2363 test LEN, LEN 2372 2364 jz .Lecb_dec_ret ··· 2411 2401 popl KEYP 2412 2402 popl LEN 2413 2403 #endif 2404 + FRAME_END 2414 2405 ret 2415 2406 ENDPROC(aesni_ecb_dec) 2416 2407 ··· 2420 2409 * size_t len, u8 *iv) 2421 2410 */ 2422 2411 ENTRY(aesni_cbc_enc) 2412 + FRAME_BEGIN 2423 2413 #ifndef __x86_64__ 2424 2414 pushl IVP 2425 2415 pushl LEN 2426 2416 pushl KEYP 2427 2417 pushl KLEN 2428 - movl 20(%esp), KEYP 2429 - movl 24(%esp), OUTP 2430 - movl 28(%esp), INP 2431 - movl 32(%esp), LEN 2432 - movl 36(%esp), IVP 2418 + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 2419 + movl (FRAME_OFFSET+24)(%esp), OUTP # dst 2420 + movl (FRAME_OFFSET+28)(%esp), INP # src 2421 + movl (FRAME_OFFSET+32)(%esp), LEN # len 2422 + movl (FRAME_OFFSET+36)(%esp), IVP # iv 2433 2423 #endif 2434 2424 cmp $16, LEN 2435 2425 jb .Lcbc_enc_ret ··· 2455 2443 popl LEN 2456 2444 popl IVP 2457 2445 #endif 2446 + FRAME_END 2458 2447 ret 2459 2448 ENDPROC(aesni_cbc_enc) 2460 2449 ··· 2464 2451 * size_t len, u8 *iv) 2465 2452 */ 2466 2453 ENTRY(aesni_cbc_dec) 2454 + FRAME_BEGIN 2467 2455 #ifndef __x86_64__ 2468 2456 pushl IVP 2469 2457 pushl LEN 2470 2458 pushl KEYP 2471 2459 pushl KLEN 2472 - movl 20(%esp), KEYP 2473 - movl 24(%esp), OUTP 2474 - movl 28(%esp), INP 2475 - movl 32(%esp), LEN 2476 - movl 36(%esp), IVP 2460 + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 2461 + movl (FRAME_OFFSET+24)(%esp), OUTP # dst 2462 + movl (FRAME_OFFSET+28)(%esp), INP # src 2463 + movl (FRAME_OFFSET+32)(%esp), LEN # len 2464 + movl (FRAME_OFFSET+36)(%esp), IVP # iv 2477 2465 #endif 2478 2466 cmp $16, LEN 2479 2467 jb .Lcbc_dec_just_ret ··· 2548 2534 popl LEN 2549 2535 popl IVP 2550 2536 #endif 2537 + FRAME_END 2551 2538 ret 2552 2539 ENDPROC(aesni_cbc_dec) 2553 2540 ··· 2615 2600 * size_t len, u8 *iv) 2616 2601 */ 2617 2602 ENTRY(aesni_ctr_enc) 2603 + FRAME_BEGIN 2618 2604 cmp $16, LEN 2619 2605 jb .Lctr_enc_just_ret 2620 2606 mov 480(KEYP), KLEN ··· 2669 2653 .Lctr_enc_ret: 2670 2654 movups IV, (IVP) 2671 2655 .Lctr_enc_just_ret: 2656 + FRAME_END 2672 2657 ret 2673 2658 ENDPROC(aesni_ctr_enc) 2674 2659 ··· 2696 2679 * bool enc, u8 *iv) 2697 2680 */ 2698 2681 ENTRY(aesni_xts_crypt8) 2682 + FRAME_BEGIN 2699 2683 cmpb $0, %cl 2700 2684 movl $0, %ecx 2701 2685 movl $240, %r10d ··· 2797 2779 pxor INC, STATE4 2798 2780 movdqu STATE4, 0x70(OUTP) 2799 2781 2782 + FRAME_END 2800 2783 ret 2801 2784 ENDPROC(aesni_xts_crypt8) 2802 2785
+15
arch/x86/crypto/camellia-aesni-avx-asm_64.S
··· 16 16 */ 17 17 18 18 #include <linux/linkage.h> 19 + #include <asm/frame.h> 19 20 20 21 #define CAMELLIA_TABLE_BYTE_LEN 272 21 22 ··· 727 726 * %xmm0..%xmm15: 16 encrypted blocks, order swapped: 728 727 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 729 728 */ 729 + FRAME_BEGIN 730 730 731 731 leaq 8 * 16(%rax), %rcx; 732 732 ··· 782 780 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, 783 781 %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax)); 784 782 783 + FRAME_END 785 784 ret; 786 785 787 786 .align 8 ··· 815 812 * %xmm0..%xmm15: 16 plaintext blocks, order swapped: 816 813 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 817 814 */ 815 + FRAME_BEGIN 818 816 819 817 leaq 8 * 16(%rax), %rcx; 820 818 ··· 869 865 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, 870 866 %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax)); 871 867 868 + FRAME_END 872 869 ret; 873 870 874 871 .align 8 ··· 895 890 * %rsi: dst (16 blocks) 896 891 * %rdx: src (16 blocks) 897 892 */ 893 + FRAME_BEGIN 898 894 899 895 inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, 900 896 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, ··· 910 904 %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, 911 905 %xmm8, %rsi); 912 906 907 + FRAME_END 913 908 ret; 914 909 ENDPROC(camellia_ecb_enc_16way) 915 910 ··· 920 913 * %rsi: dst (16 blocks) 921 914 * %rdx: src (16 blocks) 922 915 */ 916 + FRAME_BEGIN 923 917 924 918 cmpl $16, key_length(CTX); 925 919 movl $32, %r8d; ··· 940 932 %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, 941 933 %xmm8, %rsi); 942 934 935 + FRAME_END 943 936 ret; 944 937 ENDPROC(camellia_ecb_dec_16way) 945 938 ··· 950 941 * %rsi: dst (16 blocks) 951 942 * %rdx: src (16 blocks) 952 943 */ 944 + FRAME_BEGIN 953 945 954 946 cmpl $16, key_length(CTX); 955 947 movl $32, %r8d; ··· 991 981 %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, 992 982 %xmm8, %rsi); 993 983 984 + FRAME_END 994 985 ret; 995 986 ENDPROC(camellia_cbc_dec_16way) 996 987 ··· 1008 997 * %rdx: src (16 blocks) 1009 998 * %rcx: iv (little endian, 128bit) 1010 999 */ 1000 + FRAME_BEGIN 1011 1001 1012 1002 subq $(16 * 16), %rsp; 1013 1003 movq %rsp, %rax; ··· 1104 1092 %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, 1105 1093 %xmm8, %rsi); 1106 1094 1095 + FRAME_END 1107 1096 ret; 1108 1097 ENDPROC(camellia_ctr_16way) 1109 1098 ··· 1125 1112 * %r8: index for input whitening key 1126 1113 * %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16 1127 1114 */ 1115 + FRAME_BEGIN 1128 1116 1129 1117 subq $(16 * 16), %rsp; 1130 1118 movq %rsp, %rax; ··· 1248 1234 %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, 1249 1235 %xmm8, %rsi); 1250 1236 1237 + FRAME_END 1251 1238 ret; 1252 1239 ENDPROC(camellia_xts_crypt_16way) 1253 1240
+15
arch/x86/crypto/camellia-aesni-avx2-asm_64.S
··· 11 11 */ 12 12 13 13 #include <linux/linkage.h> 14 + #include <asm/frame.h> 14 15 15 16 #define CAMELLIA_TABLE_BYTE_LEN 272 16 17 ··· 767 766 * %ymm0..%ymm15: 32 encrypted blocks, order swapped: 768 767 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 769 768 */ 769 + FRAME_BEGIN 770 770 771 771 leaq 8 * 32(%rax), %rcx; 772 772 ··· 822 820 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 823 821 %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax)); 824 822 823 + FRAME_END 825 824 ret; 826 825 827 826 .align 8 ··· 855 852 * %ymm0..%ymm15: 16 plaintext blocks, order swapped: 856 853 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 857 854 */ 855 + FRAME_BEGIN 858 856 859 857 leaq 8 * 32(%rax), %rcx; 860 858 ··· 909 905 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 910 906 %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax)); 911 907 908 + FRAME_END 912 909 ret; 913 910 914 911 .align 8 ··· 935 930 * %rsi: dst (32 blocks) 936 931 * %rdx: src (32 blocks) 937 932 */ 933 + FRAME_BEGIN 938 934 939 935 vzeroupper; 940 936 ··· 954 948 955 949 vzeroupper; 956 950 951 + FRAME_END 957 952 ret; 958 953 ENDPROC(camellia_ecb_enc_32way) 959 954 ··· 964 957 * %rsi: dst (32 blocks) 965 958 * %rdx: src (32 blocks) 966 959 */ 960 + FRAME_BEGIN 967 961 968 962 vzeroupper; 969 963 ··· 988 980 989 981 vzeroupper; 990 982 983 + FRAME_END 991 984 ret; 992 985 ENDPROC(camellia_ecb_dec_32way) 993 986 ··· 998 989 * %rsi: dst (32 blocks) 999 990 * %rdx: src (32 blocks) 1000 991 */ 992 + FRAME_BEGIN 1001 993 1002 994 vzeroupper; 1003 995 ··· 1056 1046 1057 1047 vzeroupper; 1058 1048 1049 + FRAME_END 1059 1050 ret; 1060 1051 ENDPROC(camellia_cbc_dec_32way) 1061 1052 ··· 1081 1070 * %rdx: src (32 blocks) 1082 1071 * %rcx: iv (little endian, 128bit) 1083 1072 */ 1073 + FRAME_BEGIN 1084 1074 1085 1075 vzeroupper; 1086 1076 ··· 1196 1184 1197 1185 vzeroupper; 1198 1186 1187 + FRAME_END 1199 1188 ret; 1200 1189 ENDPROC(camellia_ctr_32way) 1201 1190 ··· 1229 1216 * %r8: index for input whitening key 1230 1217 * %r9: pointer to __camellia_enc_blk32 or __camellia_dec_blk32 1231 1218 */ 1219 + FRAME_BEGIN 1232 1220 1233 1221 vzeroupper; 1234 1222 ··· 1363 1349 1364 1350 vzeroupper; 1365 1351 1352 + FRAME_END 1366 1353 ret; 1367 1354 ENDPROC(camellia_xts_crypt_32way) 1368 1355
+9
arch/x86/crypto/cast5-avx-x86_64-asm_64.S
··· 24 24 */ 25 25 26 26 #include <linux/linkage.h> 27 + #include <asm/frame.h> 27 28 28 29 .file "cast5-avx-x86_64-asm_64.S" 29 30 ··· 366 365 * %rsi: dst 367 366 * %rdx: src 368 367 */ 368 + FRAME_BEGIN 369 369 370 370 movq %rsi, %r11; 371 371 ··· 390 388 vmovdqu RR4, (6*4*4)(%r11); 391 389 vmovdqu RL4, (7*4*4)(%r11); 392 390 391 + FRAME_END 393 392 ret; 394 393 ENDPROC(cast5_ecb_enc_16way) 395 394 ··· 401 398 * %rdx: src 402 399 */ 403 400 401 + FRAME_BEGIN 404 402 movq %rsi, %r11; 405 403 406 404 vmovdqu (0*4*4)(%rdx), RL1; ··· 424 420 vmovdqu RR4, (6*4*4)(%r11); 425 421 vmovdqu RL4, (7*4*4)(%r11); 426 422 423 + FRAME_END 427 424 ret; 428 425 ENDPROC(cast5_ecb_dec_16way) 429 426 ··· 434 429 * %rsi: dst 435 430 * %rdx: src 436 431 */ 432 + FRAME_BEGIN 437 433 438 434 pushq %r12; 439 435 ··· 475 469 476 470 popq %r12; 477 471 472 + FRAME_END 478 473 ret; 479 474 ENDPROC(cast5_cbc_dec_16way) 480 475 ··· 486 479 * %rdx: src 487 480 * %rcx: iv (big endian, 64bit) 488 481 */ 482 + FRAME_BEGIN 489 483 490 484 pushq %r12; 491 485 ··· 550 542 551 543 popq %r12; 552 544 545 + FRAME_END 553 546 ret; 554 547 ENDPROC(cast5_ctr_16way)
+13
arch/x86/crypto/cast6-avx-x86_64-asm_64.S
··· 24 24 */ 25 25 26 26 #include <linux/linkage.h> 27 + #include <asm/frame.h> 27 28 #include "glue_helper-asm-avx.S" 28 29 29 30 .file "cast6-avx-x86_64-asm_64.S" ··· 350 349 * %rsi: dst 351 350 * %rdx: src 352 351 */ 352 + FRAME_BEGIN 353 353 354 354 movq %rsi, %r11; 355 355 ··· 360 358 361 359 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 362 360 361 + FRAME_END 363 362 ret; 364 363 ENDPROC(cast6_ecb_enc_8way) 365 364 ··· 370 367 * %rsi: dst 371 368 * %rdx: src 372 369 */ 370 + FRAME_BEGIN 373 371 374 372 movq %rsi, %r11; 375 373 ··· 380 376 381 377 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 382 378 379 + FRAME_END 383 380 ret; 384 381 ENDPROC(cast6_ecb_dec_8way) 385 382 ··· 390 385 * %rsi: dst 391 386 * %rdx: src 392 387 */ 388 + FRAME_BEGIN 393 389 394 390 pushq %r12; 395 391 ··· 405 399 406 400 popq %r12; 407 401 402 + FRAME_END 408 403 ret; 409 404 ENDPROC(cast6_cbc_dec_8way) 410 405 ··· 416 409 * %rdx: src 417 410 * %rcx: iv (little endian, 128bit) 418 411 */ 412 + FRAME_BEGIN 419 413 420 414 pushq %r12; 421 415 ··· 432 424 433 425 popq %r12; 434 426 427 + FRAME_END 435 428 ret; 436 429 ENDPROC(cast6_ctr_8way) 437 430 ··· 443 434 * %rdx: src 444 435 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 445 436 */ 437 + FRAME_BEGIN 446 438 447 439 movq %rsi, %r11; 448 440 ··· 456 446 /* dst <= regs xor IVs(in dst) */ 457 447 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 458 448 449 + FRAME_END 459 450 ret; 460 451 ENDPROC(cast6_xts_enc_8way) 461 452 ··· 467 456 * %rdx: src 468 457 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 469 458 */ 459 + FRAME_BEGIN 470 460 471 461 movq %rsi, %r11; 472 462 ··· 480 468 /* dst <= regs xor IVs(in dst) */ 481 469 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 482 470 471 + FRAME_END 483 472 ret; 484 473 ENDPROC(cast6_xts_dec_8way)
+5
arch/x86/crypto/ghash-clmulni-intel_asm.S
··· 18 18 19 19 #include <linux/linkage.h> 20 20 #include <asm/inst.h> 21 + #include <asm/frame.h> 21 22 22 23 .data 23 24 ··· 95 94 96 95 /* void clmul_ghash_mul(char *dst, const u128 *shash) */ 97 96 ENTRY(clmul_ghash_mul) 97 + FRAME_BEGIN 98 98 movups (%rdi), DATA 99 99 movups (%rsi), SHASH 100 100 movaps .Lbswap_mask, BSWAP ··· 103 101 call __clmul_gf128mul_ble 104 102 PSHUFB_XMM BSWAP DATA 105 103 movups DATA, (%rdi) 104 + FRAME_END 106 105 ret 107 106 ENDPROC(clmul_ghash_mul) 108 107 ··· 112 109 * const u128 *shash); 113 110 */ 114 111 ENTRY(clmul_ghash_update) 112 + FRAME_BEGIN 115 113 cmp $16, %rdx 116 114 jb .Lupdate_just_ret # check length 117 115 movaps .Lbswap_mask, BSWAP ··· 132 128 PSHUFB_XMM BSWAP DATA 133 129 movups DATA, (%rdi) 134 130 .Lupdate_just_ret: 131 + FRAME_END 135 132 ret 136 133 ENDPROC(clmul_ghash_update)
+13
arch/x86/crypto/serpent-avx-x86_64-asm_64.S
··· 24 24 */ 25 25 26 26 #include <linux/linkage.h> 27 + #include <asm/frame.h> 27 28 #include "glue_helper-asm-avx.S" 28 29 29 30 .file "serpent-avx-x86_64-asm_64.S" ··· 682 681 * %rsi: dst 683 682 * %rdx: src 684 683 */ 684 + FRAME_BEGIN 685 685 686 686 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 687 687 ··· 690 688 691 689 store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 692 690 691 + FRAME_END 693 692 ret; 694 693 ENDPROC(serpent_ecb_enc_8way_avx) 695 694 ··· 700 697 * %rsi: dst 701 698 * %rdx: src 702 699 */ 700 + FRAME_BEGIN 703 701 704 702 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 705 703 ··· 708 704 709 705 store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 710 706 707 + FRAME_END 711 708 ret; 712 709 ENDPROC(serpent_ecb_dec_8way_avx) 713 710 ··· 718 713 * %rsi: dst 719 714 * %rdx: src 720 715 */ 716 + FRAME_BEGIN 721 717 722 718 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 723 719 ··· 726 720 727 721 store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 728 722 723 + FRAME_END 729 724 ret; 730 725 ENDPROC(serpent_cbc_dec_8way_avx) 731 726 ··· 737 730 * %rdx: src 738 731 * %rcx: iv (little endian, 128bit) 739 732 */ 733 + FRAME_BEGIN 740 734 741 735 load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, 742 736 RD2, RK0, RK1, RK2); ··· 746 738 747 739 store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 748 740 741 + FRAME_END 749 742 ret; 750 743 ENDPROC(serpent_ctr_8way_avx) 751 744 ··· 757 748 * %rdx: src 758 749 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 759 750 */ 751 + FRAME_BEGIN 760 752 761 753 /* regs <= src, dst <= IVs, regs <= regs xor IVs */ 762 754 load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, ··· 768 758 /* dst <= regs xor IVs(in dst) */ 769 759 store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 770 760 761 + FRAME_END 771 762 ret; 772 763 ENDPROC(serpent_xts_enc_8way_avx) 773 764 ··· 779 768 * %rdx: src 780 769 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 781 770 */ 771 + FRAME_BEGIN 782 772 783 773 /* regs <= src, dst <= IVs, regs <= regs xor IVs */ 784 774 load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, ··· 790 778 /* dst <= regs xor IVs(in dst) */ 791 779 store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 792 780 781 + FRAME_END 793 782 ret; 794 783 ENDPROC(serpent_xts_dec_8way_avx)
+13
arch/x86/crypto/serpent-avx2-asm_64.S
··· 15 15 */ 16 16 17 17 #include <linux/linkage.h> 18 + #include <asm/frame.h> 18 19 #include "glue_helper-asm-avx2.S" 19 20 20 21 .file "serpent-avx2-asm_64.S" ··· 674 673 * %rsi: dst 675 674 * %rdx: src 676 675 */ 676 + FRAME_BEGIN 677 677 678 678 vzeroupper; 679 679 ··· 686 684 687 685 vzeroupper; 688 686 687 + FRAME_END 689 688 ret; 690 689 ENDPROC(serpent_ecb_enc_16way) 691 690 ··· 696 693 * %rsi: dst 697 694 * %rdx: src 698 695 */ 696 + FRAME_BEGIN 699 697 700 698 vzeroupper; 701 699 ··· 708 704 709 705 vzeroupper; 710 706 707 + FRAME_END 711 708 ret; 712 709 ENDPROC(serpent_ecb_dec_16way) 713 710 ··· 718 713 * %rsi: dst 719 714 * %rdx: src 720 715 */ 716 + FRAME_BEGIN 721 717 722 718 vzeroupper; 723 719 ··· 731 725 732 726 vzeroupper; 733 727 728 + FRAME_END 734 729 ret; 735 730 ENDPROC(serpent_cbc_dec_16way) 736 731 ··· 742 735 * %rdx: src (16 blocks) 743 736 * %rcx: iv (little endian, 128bit) 744 737 */ 738 + FRAME_BEGIN 745 739 746 740 vzeroupper; 747 741 ··· 756 748 757 749 vzeroupper; 758 750 751 + FRAME_END 759 752 ret; 760 753 ENDPROC(serpent_ctr_16way) 761 754 ··· 767 758 * %rdx: src (16 blocks) 768 759 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 769 760 */ 761 + FRAME_BEGIN 770 762 771 763 vzeroupper; 772 764 ··· 782 772 783 773 vzeroupper; 784 774 775 + FRAME_END 785 776 ret; 786 777 ENDPROC(serpent_xts_enc_16way) 787 778 ··· 793 782 * %rdx: src (16 blocks) 794 783 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 795 784 */ 785 + FRAME_BEGIN 796 786 797 787 vzeroupper; 798 788 ··· 808 796 809 797 vzeroupper; 810 798 799 + FRAME_END 811 800 ret; 812 801 ENDPROC(serpent_xts_dec_16way)
+3
arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S
··· 52 52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 53 */ 54 54 #include <linux/linkage.h> 55 + #include <asm/frame.h> 55 56 #include "sha1_mb_mgr_datastruct.S" 56 57 57 58 ··· 104 103 # JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state) 105 104 # arg 1 : rcx : state 106 105 ENTRY(sha1_mb_mgr_flush_avx2) 106 + FRAME_BEGIN 107 107 push %rbx 108 108 109 109 # If bit (32+3) is set, then all lanes are empty ··· 214 212 215 213 return: 216 214 pop %rbx 215 + FRAME_END 217 216 ret 218 217 219 218 return_null:
+3
arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
··· 53 53 */ 54 54 55 55 #include <linux/linkage.h> 56 + #include <asm/frame.h> 56 57 #include "sha1_mb_mgr_datastruct.S" 57 58 58 59 ··· 99 98 # arg 1 : rcx : state 100 99 # arg 2 : rdx : job 101 100 ENTRY(sha1_mb_mgr_submit_avx2) 101 + FRAME_BEGIN 102 102 push %rbx 103 103 push %r12 104 104 ··· 194 192 return: 195 193 pop %r12 196 194 pop %rbx 195 + FRAME_END 197 196 ret 198 197 199 198 return_null:
+13
arch/x86/crypto/twofish-avx-x86_64-asm_64.S
··· 24 24 */ 25 25 26 26 #include <linux/linkage.h> 27 + #include <asm/frame.h> 27 28 #include "glue_helper-asm-avx.S" 28 29 29 30 .file "twofish-avx-x86_64-asm_64.S" ··· 334 333 * %rsi: dst 335 334 * %rdx: src 336 335 */ 336 + FRAME_BEGIN 337 337 338 338 movq %rsi, %r11; 339 339 ··· 344 342 345 343 store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); 346 344 345 + FRAME_END 347 346 ret; 348 347 ENDPROC(twofish_ecb_enc_8way) 349 348 ··· 354 351 * %rsi: dst 355 352 * %rdx: src 356 353 */ 354 + FRAME_BEGIN 357 355 358 356 movq %rsi, %r11; 359 357 ··· 364 360 365 361 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 366 362 363 + FRAME_END 367 364 ret; 368 365 ENDPROC(twofish_ecb_dec_8way) 369 366 ··· 374 369 * %rsi: dst 375 370 * %rdx: src 376 371 */ 372 + FRAME_BEGIN 377 373 378 374 pushq %r12; 379 375 ··· 389 383 390 384 popq %r12; 391 385 386 + FRAME_END 392 387 ret; 393 388 ENDPROC(twofish_cbc_dec_8way) 394 389 ··· 400 393 * %rdx: src 401 394 * %rcx: iv (little endian, 128bit) 402 395 */ 396 + FRAME_BEGIN 403 397 404 398 pushq %r12; 405 399 ··· 416 408 417 409 popq %r12; 418 410 411 + FRAME_END 419 412 ret; 420 413 ENDPROC(twofish_ctr_8way) 421 414 ··· 427 418 * %rdx: src 428 419 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 429 420 */ 421 + FRAME_BEGIN 430 422 431 423 movq %rsi, %r11; 432 424 ··· 440 430 /* dst <= regs xor IVs(in dst) */ 441 431 store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); 442 432 433 + FRAME_END 443 434 ret; 444 435 ENDPROC(twofish_xts_enc_8way) 445 436 ··· 451 440 * %rdx: src 452 441 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 453 442 */ 443 + FRAME_BEGIN 454 444 455 445 movq %rsi, %r11; 456 446 ··· 464 452 /* dst <= regs xor IVs(in dst) */ 465 453 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 466 454 455 + FRAME_END 467 456 ret; 468 457 ENDPROC(twofish_xts_dec_8way)