Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xtensa: new fast_alloca handler

Instead of emulating movsp instruction in the kernel use window
underflow handler to load missing register window and retry failed
movsp.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Chris Zankel <chris@zankel.net>

authored by

Max Filippov and committed by
Chris Zankel
fff96d69 99d5040e

+40 -152
+40 -152
arch/xtensa/kernel/entry.S
··· 31 31 /* Unimplemented features. */ 32 32 33 33 #undef KERNEL_STACK_OVERFLOW_CHECK 34 - #undef ALLOCA_EXCEPTION_IN_IRAM 35 34 36 35 /* Not well tested. 37 36 * ··· 818 819 * 819 820 * The ALLOCA handler is entered when user code executes the MOVSP 820 821 * instruction and the caller's frame is not in the register file. 821 - * In this case, the caller frame's a0..a3 are on the stack just 822 - * below sp (a1), and this handler moves them. 823 822 * 824 - * For "MOVSP <ar>,<as>" without destination register a1, this routine 825 - * simply moves the value from <as> to <ar> without moving the save area. 823 + * This algorithm was taken from the Ross Morley's RTOS Porting Layer: 824 + * 825 + * /home/ross/rtos/porting/XtensaRTOS-PortingLayer-20090507/xtensa_vectors.S 826 + * 827 + * It leverages the existing window spill/fill routines and their support for 828 + * double exceptions. The 'movsp' instruction will only cause an exception if 829 + * the next window needs to be loaded. In fact this ALLOCA exception may be 830 + * replaced at some point by changing the hardware to do a underflow exception 831 + * of the proper size instead. 832 + * 833 + * This algorithm simply backs out the register changes started by the user 834 + * excpetion handler, makes it appear that we have started a window underflow 835 + * by rotating the window back and then setting the old window base (OWB) in 836 + * the 'ps' register with the rolled back window base. The 'movsp' instruction 837 + * will be re-executed and this time since the next window frames is in the 838 + * active AR registers it won't cause an exception. 839 + * 840 + * If the WindowUnderflow code gets a TLB miss the page will get mapped 841 + * the the partial windeowUnderflow will be handeled in the double exception 842 + * handler. 826 843 * 827 844 * Entry condition: 828 845 * ··· 853 838 * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception 854 839 */ 855 840 856 - #if XCHAL_HAVE_BE 857 - #define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 4, 4 858 - #define _EXTUI_MOVSP_DST(ar) extui ar, ar, 0, 4 859 - #else 860 - #define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 0, 4 861 - #define _EXTUI_MOVSP_DST(ar) extui ar, ar, 4, 4 862 - #endif 863 - 864 841 ENTRY(fast_alloca) 842 + rsr a0, windowbase 843 + rotw -1 844 + rsr a2, ps 845 + extui a3, a2, PS_OWB_SHIFT, PS_OWB_WIDTH 846 + xor a3, a3, a4 847 + l32i a4, a6, PT_AREG0 848 + l32i a1, a6, PT_DEPC 849 + rsr a6, depc 850 + wsr a1, depc 851 + slli a3, a3, PS_OWB_SHIFT 852 + xor a2, a2, a3 853 + wsr a2, ps 854 + rsync 865 855 866 - /* We shouldn't be in a double exception. */ 867 - 868 - l32i a0, a2, PT_DEPC 869 - _bgeui a0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lunhandled_double 870 - 871 - rsr a0, depc # get a2 872 - s32i a4, a2, PT_AREG4 # save a4 and 873 - s32i a3, a2, PT_AREG3 874 - s32i a0, a2, PT_AREG2 # a2 to stack 875 - 876 - /* Exit critical section. */ 877 - 878 - movi a0, 0 879 - rsr a3, excsave1 880 - s32i a0, a3, EXC_TABLE_FIXUP 881 - 882 - rsr a4, epc1 # get exception address 883 - 884 - #ifdef ALLOCA_EXCEPTION_IN_IRAM 885 - #error iram not supported 886 - #else 887 - /* Note: l8ui not allowed in IRAM/IROM!! */ 888 - l8ui a0, a4, 1 # read as(src) from MOVSP instruction 889 - #endif 890 - movi a3, .Lmovsp_src 891 - _EXTUI_MOVSP_SRC(a0) # extract source register number 892 - addx8 a3, a0, a3 893 - jx a3 894 - 895 - .Lunhandled_double: 896 - wsr a0, excsave1 897 - movi a0, unrecoverable_exception 898 - callx0 a0 899 - 900 - .align 8 901 - .Lmovsp_src: 902 - l32i a3, a2, PT_AREG0; _j 1f; .align 8 903 - mov a3, a1; _j 1f; .align 8 904 - l32i a3, a2, PT_AREG2; _j 1f; .align 8 905 - l32i a3, a2, PT_AREG3; _j 1f; .align 8 906 - l32i a3, a2, PT_AREG4; _j 1f; .align 8 907 - mov a3, a5; _j 1f; .align 8 908 - mov a3, a6; _j 1f; .align 8 909 - mov a3, a7; _j 1f; .align 8 910 - mov a3, a8; _j 1f; .align 8 911 - mov a3, a9; _j 1f; .align 8 912 - mov a3, a10; _j 1f; .align 8 913 - mov a3, a11; _j 1f; .align 8 914 - mov a3, a12; _j 1f; .align 8 915 - mov a3, a13; _j 1f; .align 8 916 - mov a3, a14; _j 1f; .align 8 917 - mov a3, a15; _j 1f; .align 8 918 - 919 - 1: 920 - 921 - #ifdef ALLOCA_EXCEPTION_IN_IRAM 922 - #error iram not supported 923 - #else 924 - l8ui a0, a4, 0 # read ar(dst) from MOVSP instruction 925 - #endif 926 - addi a4, a4, 3 # step over movsp 927 - _EXTUI_MOVSP_DST(a0) # extract destination register 928 - wsr a4, epc1 # save new epc_1 929 - 930 - _bnei a0, 1, 1f # no 'movsp a1, ax': jump 931 - 932 - /* Move the save area. This implies the use of the L32E 933 - * and S32E instructions, because this move must be done with 934 - * the user's PS.RING privilege levels, not with ring 0 935 - * (kernel's) privileges currently active with PS.EXCM 936 - * set. Note that we have stil registered a fixup routine with the 937 - * double exception vector in case a double exception occurs. 938 - */ 939 - 940 - /* a0,a4:avail a1:old user stack a2:exc. stack a3:new user stack. */ 941 - 942 - l32e a0, a1, -16 943 - l32e a4, a1, -12 944 - s32e a0, a3, -16 945 - s32e a4, a3, -12 946 - l32e a0, a1, -8 947 - l32e a4, a1, -4 948 - s32e a0, a3, -8 949 - s32e a4, a3, -4 950 - 951 - /* Restore stack-pointer and all the other saved registers. */ 952 - 953 - mov a1, a3 954 - 955 - l32i a4, a2, PT_AREG4 956 - l32i a3, a2, PT_AREG3 957 - l32i a0, a2, PT_AREG0 958 - l32i a2, a2, PT_AREG2 959 - rfe 960 - 961 - /* MOVSP <at>,<as> was invoked with <at> != a1. 962 - * Because the stack pointer is not being modified, 963 - * we should be able to just modify the pointer 964 - * without moving any save area. 965 - * The processor only traps these occurrences if the 966 - * caller window isn't live, so unfortunately we can't 967 - * use this as an alternate trap mechanism. 968 - * So we just do the move. This requires that we 969 - * resolve the destination register, not just the source, 970 - * so there's some extra work. 971 - * (PERHAPS NOT REALLY NEEDED, BUT CLEANER...) 972 - */ 973 - 974 - /* a0 dst-reg, a1 user-stack, a2 stack, a3 value of src reg. */ 975 - 976 - 1: movi a4, .Lmovsp_dst 977 - addx8 a4, a0, a4 978 - jx a4 979 - 980 - .align 8 981 - .Lmovsp_dst: 982 - s32i a3, a2, PT_AREG0; _j 1f; .align 8 983 - mov a1, a3; _j 1f; .align 8 984 - s32i a3, a2, PT_AREG2; _j 1f; .align 8 985 - s32i a3, a2, PT_AREG3; _j 1f; .align 8 986 - s32i a3, a2, PT_AREG4; _j 1f; .align 8 987 - mov a5, a3; _j 1f; .align 8 988 - mov a6, a3; _j 1f; .align 8 989 - mov a7, a3; _j 1f; .align 8 990 - mov a8, a3; _j 1f; .align 8 991 - mov a9, a3; _j 1f; .align 8 992 - mov a10, a3; _j 1f; .align 8 993 - mov a11, a3; _j 1f; .align 8 994 - mov a12, a3; _j 1f; .align 8 995 - mov a13, a3; _j 1f; .align 8 996 - mov a14, a3; _j 1f; .align 8 997 - mov a15, a3; _j 1f; .align 8 998 - 999 - 1: l32i a4, a2, PT_AREG4 1000 - l32i a3, a2, PT_AREG3 1001 - l32i a0, a2, PT_AREG0 1002 - l32i a2, a2, PT_AREG2 1003 - rfe 1004 - 856 + _bbci.l a4, 31, 4f 857 + rotw -1 858 + _bbci.l a8, 30, 8f 859 + rotw -1 860 + j _WindowUnderflow12 861 + 8: j _WindowUnderflow8 862 + 4: j _WindowUnderflow4 1005 863 ENDPROC(fast_alloca) 1006 864 1007 865 /*