Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

microblaze: Add libgcc function directly to kernel

Replaced libgcc functions with asm optimized implementation.

Signed-off-by: Michal Simek <monstr@monstr.eu>

+607 -28
-3
arch/microblaze/Makefile
··· 42 42 LDFLAGS := 43 43 LDFLAGS_vmlinux := 44 44 45 - LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) 46 - 47 45 head-y := arch/microblaze/kernel/head.o 48 46 libs-y += arch/microblaze/lib/ 49 - libs-y += $(LIBGCC) 50 47 core-y += arch/microblaze/kernel/ 51 48 core-y += arch/microblaze/mm/ 52 49 core-y += arch/microblaze/platform/
-25
arch/microblaze/kernel/microblaze_ksyms.c
··· 21 21 #include <linux/ftrace.h> 22 22 #include <linux/uaccess.h> 23 23 24 - /* 25 - * libgcc functions - functions that are used internally by the 26 - * compiler... (prototypes are not correct though, but that 27 - * doesn't really matter since they're not versioned). 28 - */ 29 - extern void __ashldi3(void); 30 - EXPORT_SYMBOL(__ashldi3); 31 - extern void __ashrdi3(void); 32 - EXPORT_SYMBOL(__ashrdi3); 33 - extern void __divsi3(void); 34 - EXPORT_SYMBOL(__divsi3); 35 - extern void __lshrdi3(void); 36 - EXPORT_SYMBOL(__lshrdi3); 37 - extern void __modsi3(void); 38 - EXPORT_SYMBOL(__modsi3); 39 - extern void __mulsi3(void); 40 - EXPORT_SYMBOL(__mulsi3); 41 - extern void __muldi3(void); 42 - EXPORT_SYMBOL(__muldi3); 43 - extern void __ucmpdi2(void); 44 - EXPORT_SYMBOL(__ucmpdi2); 45 - extern void __udivsi3(void); 46 - EXPORT_SYMBOL(__udivsi3); 47 - extern void __umodsi3(void); 48 - EXPORT_SYMBOL(__umodsi3); 49 24 extern char *_ebss; 50 25 EXPORT_SYMBOL_GPL(_ebss); 51 26 #ifdef CONFIG_FUNCTION_TRACER
+10
arch/microblaze/lib/Makefile
··· 11 11 endif 12 12 13 13 lib-y += uaccess_old.o 14 + 15 + lib-y += ashldi3.o 16 + lib-y += ashrdi3.o 17 + lib-y += divsi3.o 18 + lib-y += lshrdi3.o 19 + lib-y += modsi3.o 20 + lib-y += muldi3.o 21 + lib-y += mulsi3.o 22 + lib-y += udivsi3.o 23 + lib-y += umodsi3.o
+29
arch/microblaze/lib/ashldi3.c
··· 1 + #include <linux/module.h> 2 + 3 + #include "libgcc.h" 4 + 5 + long long __ashldi3(long long u, word_type b) 6 + { 7 + DWunion uu, w; 8 + word_type bm; 9 + 10 + if (b == 0) 11 + return u; 12 + 13 + uu.ll = u; 14 + bm = 32 - b; 15 + 16 + if (bm <= 0) { 17 + w.s.low = 0; 18 + w.s.high = (unsigned int) uu.s.low << -bm; 19 + } else { 20 + const unsigned int carries = (unsigned int) uu.s.low >> bm; 21 + 22 + w.s.low = (unsigned int) uu.s.low << b; 23 + w.s.high = ((unsigned int) uu.s.high << b) | carries; 24 + } 25 + 26 + return w.ll; 27 + } 28 + 29 + EXPORT_SYMBOL(__ashldi3);
+31
arch/microblaze/lib/ashrdi3.c
··· 1 + #include <linux/module.h> 2 + 3 + #include "libgcc.h" 4 + 5 + long long __ashrdi3(long long u, word_type b) 6 + { 7 + DWunion uu, w; 8 + word_type bm; 9 + 10 + if (b == 0) 11 + return u; 12 + 13 + uu.ll = u; 14 + bm = 32 - b; 15 + 16 + if (bm <= 0) { 17 + /* w.s.high = 1..1 or 0..0 */ 18 + w.s.high = 19 + uu.s.high >> 31; 20 + w.s.low = uu.s.high >> -bm; 21 + } else { 22 + const unsigned int carries = (unsigned int) uu.s.high << bm; 23 + 24 + w.s.high = uu.s.high >> b; 25 + w.s.low = ((unsigned int) uu.s.low >> b) | carries; 26 + } 27 + 28 + return w.ll; 29 + } 30 + 31 + EXPORT_SYMBOL(__ashrdi3);
+73
arch/microblaze/lib/divsi3.S
··· 1 + #include <linux/linkage.h> 2 + 3 + /* 4 + * Divide operation for 32 bit integers. 5 + * Input : Dividend in Reg r5 6 + * Divisor in Reg r6 7 + * Output: Result in Reg r3 8 + */ 9 + .text 10 + .globl __divsi3 11 + .type __divsi3, @function 12 + .ent __divsi3 13 + __divsi3: 14 + .frame r1, 0, r15 15 + 16 + addik r1, r1, -16 17 + swi r28, r1, 0 18 + swi r29, r1, 4 19 + swi r30, r1, 8 20 + swi r31, r1, 12 21 + 22 + beqi r6, div_by_zero /* div_by_zero - division error */ 23 + beqi r5, result_is_zero /* result is zero */ 24 + bgeid r5, r5_pos 25 + xor r28, r5, r6 /* get the sign of the result */ 26 + rsubi r5, r5, 0 /* make r5 positive */ 27 + r5_pos: 28 + bgei r6, r6_pos 29 + rsubi r6, r6, 0 /* make r6 positive */ 30 + r6_pos: 31 + addik r30, r0, 0 /* clear mod */ 32 + addik r3, r0, 0 /* clear div */ 33 + addik r29, r0, 32 /* initialize the loop count */ 34 + 35 + /* first part try to find the first '1' in the r5 */ 36 + div0: 37 + blti r5, div2 /* this traps r5 == 0x80000000 */ 38 + div1: 39 + add r5, r5, r5 /* left shift logical r5 */ 40 + bgtid r5, div1 41 + addik r29, r29, -1 42 + div2: 43 + /* left shift logical r5 get the '1' into the carry */ 44 + add r5, r5, r5 45 + addc r30, r30, r30 /* move that bit into the mod register */ 46 + rsub r31, r6, r30 /* try to subtract (r30 a r6) */ 47 + blti r31, mod_too_small 48 + /* move the r31 to mod since the result was positive */ 49 + or r30, r0, r31 50 + addik r3, r3, 1 51 + mod_too_small: 52 + addik r29, r29, -1 53 + beqi r29, loop_end 54 + add r3, r3, r3 /* shift in the '1' into div */ 55 + bri div2 /* div2 */ 56 + loop_end: 57 + bgei r28, return_here 58 + brid return_here 59 + rsubi r3, r3, 0 /* negate the result */ 60 + div_by_zero: 61 + result_is_zero: 62 + or r3, r0, r0 /* set result to 0 */ 63 + return_here: 64 + /* restore values of csrs and that of r3 and the divisor and the dividend */ 65 + lwi r28, r1, 0 66 + lwi r29, r1, 4 67 + lwi r30, r1, 8 68 + lwi r31, r1, 12 69 + rtsd r15, 8 70 + addik r1, r1, 16 71 + 72 + .size __divsi3, . - __divsi3 73 + .end __divsi3
+25
arch/microblaze/lib/libgcc.h
··· 1 + #ifndef __ASM_LIBGCC_H 2 + #define __ASM_LIBGCC_H 3 + 4 + #include <asm/byteorder.h> 5 + 6 + typedef int word_type __attribute__ ((mode (__word__))); 7 + 8 + #ifdef __BIG_ENDIAN 9 + struct DWstruct { 10 + int high, low; 11 + }; 12 + #elif defined(__LITTLE_ENDIAN) 13 + struct DWstruct { 14 + int low, high; 15 + }; 16 + #else 17 + #error I feel sick. 18 + #endif 19 + 20 + typedef union { 21 + struct DWstruct s; 22 + long long ll; 23 + } DWunion; 24 + 25 + #endif /* __ASM_LIBGCC_H */
+29
arch/microblaze/lib/lshrdi3.c
··· 1 + #include <linux/module.h> 2 + 3 + #include "libgcc.h" 4 + 5 + long long __lshrdi3(long long u, word_type b) 6 + { 7 + DWunion uu, w; 8 + word_type bm; 9 + 10 + if (b == 0) 11 + return u; 12 + 13 + uu.ll = u; 14 + bm = 32 - b; 15 + 16 + if (bm <= 0) { 17 + w.s.high = 0; 18 + w.s.low = (unsigned int) uu.s.high >> -bm; 19 + } else { 20 + const unsigned int carries = (unsigned int) uu.s.high << bm; 21 + 22 + w.s.high = (unsigned int) uu.s.high >> b; 23 + w.s.low = ((unsigned int) uu.s.low >> b) | carries; 24 + } 25 + 26 + return w.ll; 27 + } 28 + 29 + EXPORT_SYMBOL(__lshrdi3);
+73
arch/microblaze/lib/modsi3.S
··· 1 + #include <linux/linkage.h> 2 + 3 + /* 4 + * modulo operation for 32 bit integers. 5 + * Input : op1 in Reg r5 6 + * op2 in Reg r6 7 + * Output: op1 mod op2 in Reg r3 8 + */ 9 + 10 + .text 11 + .globl __modsi3 12 + .type __modsi3, @function 13 + .ent __modsi3 14 + 15 + __modsi3: 16 + .frame r1, 0, r15 17 + 18 + addik r1, r1, -16 19 + swi r28, r1, 0 20 + swi r29, r1, 4 21 + swi r30, r1, 8 22 + swi r31, r1, 12 23 + 24 + beqi r6, div_by_zero /* div_by_zero division error */ 25 + beqi r5, result_is_zero /* result is zero */ 26 + bgeid r5, r5_pos 27 + /* get the sign of the result [ depends only on the first arg] */ 28 + add r28, r5, r0 29 + rsubi r5, r5, 0 /* make r5 positive */ 30 + r5_pos: 31 + bgei r6, r6_pos 32 + rsubi r6, r6, 0 /* make r6 positive */ 33 + r6_pos: 34 + addik r3, r0, 0 /* clear mod */ 35 + addik r30, r0, 0 /* clear div */ 36 + addik r29, r0, 32 /* initialize the loop count */ 37 + /* first part try to find the first '1' in the r5 */ 38 + div1: 39 + add r5, r5, r5 /* left shift logical r5 */ 40 + bgeid r5, div1 41 + addik r29, r29, -1 42 + div2: 43 + /* left shift logical r5 get the '1' into the carry */ 44 + add r5, r5, r5 45 + addc r3, r3, r3 /* move that bit into the mod register */ 46 + rsub r31, r6, r3 /* try to subtract (r30 a r6) */ 47 + blti r31, mod_too_small 48 + /* move the r31 to mod since the result was positive */ 49 + or r3, r0, r31 50 + addik r30, r30, 1 51 + mod_too_small: 52 + addik r29, r29, -1 53 + beqi r29, loop_end 54 + add r30, r30, r30 /* shift in the '1' into div */ 55 + bri div2 /* div2 */ 56 + loop_end: 57 + bgei r28, return_here 58 + brid return_here 59 + rsubi r3, r3, 0 /* negate the result */ 60 + div_by_zero: 61 + result_is_zero: 62 + or r3, r0, r0 /* set result to 0 [both mod as well as div are 0] */ 63 + return_here: 64 + /* restore values of csrs and that of r3 and the divisor and the dividend */ 65 + lwi r28, r1, 0 66 + lwi r29, r1, 4 67 + lwi r30, r1, 8 68 + lwi r31, r1, 12 69 + rtsd r15, 8 70 + addik r1, r1, 16 71 + 72 + .size __modsi3, . - __modsi3 73 + .end __modsi3
+121
arch/microblaze/lib/muldi3.S
··· 1 + #include <linux/linkage.h> 2 + 3 + /* 4 + * Multiply operation for 64 bit integers, for devices with hard multiply 5 + * Input : Operand1[H] in Reg r5 6 + * Operand1[L] in Reg r6 7 + * Operand2[H] in Reg r7 8 + * Operand2[L] in Reg r8 9 + * Output: Result[H] in Reg r3 10 + * Result[L] in Reg r4 11 + * 12 + * Explaination: 13 + * 14 + * Both the input numbers are divided into 16 bit number as follows 15 + * op1 = A B C D 16 + * op2 = E F G H 17 + * result = D * H 18 + * + (C * H + D * G) << 16 19 + * + (B * H + C * G + D * F) << 32 20 + * + (A * H + B * G + C * F + D * E) << 48 21 + * 22 + * Only 64 bits of the output are considered 23 + */ 24 + 25 + .text 26 + .globl __muldi3 27 + .type __muldi3, @function 28 + .ent __muldi3 29 + 30 + __muldi3: 31 + addi r1, r1, -40 32 + 33 + /* Save the input operands on the caller's stack */ 34 + swi r5, r1, 44 35 + swi r6, r1, 48 36 + swi r7, r1, 52 37 + swi r8, r1, 56 38 + 39 + /* Store all the callee saved registers */ 40 + sw r20, r1, r0 41 + swi r21, r1, 4 42 + swi r22, r1, 8 43 + swi r23, r1, 12 44 + swi r24, r1, 16 45 + swi r25, r1, 20 46 + swi r26, r1, 24 47 + swi r27, r1, 28 48 + 49 + /* Load all the 16 bit values for A thru H */ 50 + lhui r20, r1, 44 /* A */ 51 + lhui r21, r1, 46 /* B */ 52 + lhui r22, r1, 48 /* C */ 53 + lhui r23, r1, 50 /* D */ 54 + lhui r24, r1, 52 /* E */ 55 + lhui r25, r1, 54 /* F */ 56 + lhui r26, r1, 56 /* G */ 57 + lhui r27, r1, 58 /* H */ 58 + 59 + /* D * H ==> LSB of the result on stack ==> Store1 */ 60 + mul r9, r23, r27 61 + swi r9, r1, 36 /* Pos2 and Pos3 */ 62 + 63 + /* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */ 64 + /* Store the carry generated in position 2 for Pos 3 */ 65 + lhui r11, r1, 36 /* Pos2 */ 66 + mul r9, r22, r27 /* C * H */ 67 + mul r10, r23, r26 /* D * G */ 68 + add r9, r9, r10 69 + addc r12, r0, r0 70 + add r9, r9, r11 71 + addc r12, r12, r0 /* Store the Carry */ 72 + shi r9, r1, 36 /* Store Pos2 */ 73 + swi r9, r1, 32 74 + lhui r11, r1, 32 75 + shi r11, r1, 34 /* Store Pos1 */ 76 + 77 + /* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */ 78 + mul r9, r21, r27 /* B * H */ 79 + mul r10, r22, r26 /* C * G */ 80 + mul r7, r23, r25 /* D * F */ 81 + add r9, r9, r11 82 + add r9, r9, r10 83 + add r9, r9, r7 84 + swi r9, r1, 32 /* Pos0 and Pos1 */ 85 + 86 + /* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */ 87 + lhui r11, r1, 32 /* Pos0 */ 88 + mul r9, r20, r27 /* A * H */ 89 + mul r10, r21, r26 /* B * G */ 90 + mul r7, r22, r25 /* C * F */ 91 + mul r8, r23, r24 /* D * E */ 92 + add r9, r9, r11 93 + add r9, r9, r10 94 + add r9, r9, r7 95 + add r9, r9, r8 96 + sext16 r9, r9 /* Sign extend the MSB */ 97 + shi r9, r1, 32 98 + 99 + /* Move results to r3 and r4 */ 100 + lhui r3, r1, 32 101 + add r3, r3, r12 102 + shi r3, r1, 32 103 + lwi r3, r1, 32 /* Hi Part */ 104 + lwi r4, r1, 36 /* Lo Part */ 105 + 106 + /* Restore Callee saved registers */ 107 + lw r20, r1, r0 108 + lwi r21, r1, 4 109 + lwi r22, r1, 8 110 + lwi r23, r1, 12 111 + lwi r24, r1, 16 112 + lwi r25, r1, 20 113 + lwi r26, r1, 24 114 + lwi r27, r1, 28 115 + 116 + /* Restore Frame and return */ 117 + rtsd r15, 8 118 + addi r1, r1, 40 119 + 120 + .size __muldi3, . - __muldi3 121 + .end __muldi3
+46
arch/microblaze/lib/mulsi3.S
··· 1 + #include <linux/linkage.h> 2 + 3 + /* 4 + * Multiply operation for 32 bit integers. 5 + * Input : Operand1 in Reg r5 6 + * Operand2 in Reg r6 7 + * Output: Result [op1 * op2] in Reg r3 8 + */ 9 + .text 10 + .globl __mulsi3 11 + .type __mulsi3, @function 12 + .ent __mulsi3 13 + 14 + __mulsi3: 15 + .frame r1, 0, r15 16 + add r3, r0, r0 17 + beqi r5, result_is_zero /* multiply by zero */ 18 + beqi r6, result_is_zero /* multiply by zero */ 19 + bgeid r5, r5_pos 20 + xor r4, r5, r6 /* get the sign of the result */ 21 + rsubi r5, r5, 0 /* make r5 positive */ 22 + r5_pos: 23 + bgei r6, r6_pos 24 + rsubi r6, r6, 0 /* make r6 positive */ 25 + r6_pos: 26 + bri l1 27 + l2: 28 + add r5, r5, r5 29 + l1: 30 + srl r6, r6 31 + addc r7, r0, r0 32 + beqi r7, l2 33 + bneid r6, l2 34 + add r3, r3, r5 35 + blti r4, negateresult 36 + rtsd r15, 8 37 + nop 38 + negateresult: 39 + rtsd r15, 8 40 + rsub r3, r3, r0 41 + result_is_zero: 42 + rtsd r15, 8 43 + addi r3, r0, 0 44 + 45 + .size __mulsi3, . - __mulsi3 46 + .end __mulsi3
+84
arch/microblaze/lib/udivsi3.S
··· 1 + #include <linux/linkage.h> 2 + 3 + /* 4 + * Unsigned divide operation. 5 + * Input : Divisor in Reg r5 6 + * Dividend in Reg r6 7 + * Output: Result in Reg r3 8 + */ 9 + 10 + .text 11 + .globl __udivsi3 12 + .type __udivsi3, @function 13 + .ent __udivsi3 14 + 15 + __udivsi3: 16 + 17 + .frame r1, 0, r15 18 + 19 + addik r1, r1, -12 20 + swi r29, r1, 0 21 + swi r30, r1, 4 22 + swi r31, r1, 8 23 + 24 + beqi r6, div_by_zero /* div_by_zero /* division error */ 25 + beqid r5, result_is_zero /* result is zero */ 26 + addik r30, r0, 0 /* clear mod */ 27 + addik r29, r0, 32 /* initialize the loop count */ 28 + 29 + /* check if r6 and r5 are equal - if yes, return 1 */ 30 + rsub r18, r5, r6 31 + beqid r18, return_here 32 + addik r3, r0, 1 33 + 34 + /* check if (uns)r6 is greater than (uns)r5. in that case, just return 0 */ 35 + xor r18, r5, r6 36 + bgeid r18, 16 37 + add r3, r0, r0 /* we would anyways clear r3 */ 38 + blti r6, return_here /* r6[bit 31 = 1] hence is greater */ 39 + bri checkr6 40 + rsub r18, r6, r5 /* microblazecmp */ 41 + blti r18, return_here 42 + 43 + /* if r6 [bit 31] is set, then return result as 1 */ 44 + checkr6: 45 + bgti r6, div0 46 + brid return_here 47 + addik r3, r0, 1 48 + 49 + /* first part try to find the first '1' in the r5 */ 50 + div0: 51 + blti r5, div2 52 + div1: 53 + add r5, r5, r5 /* left shift logical r5 */ 54 + bgtid r5, div1 55 + addik r29, r29, -1 56 + div2: 57 + /* left shift logical r5 get the '1' into the carry */ 58 + add r5, r5, r5 59 + addc r30, r30, r30 /* move that bit into the mod register */ 60 + rsub r31, r6, r30 /* try to subtract (r30 a r6) */ 61 + blti r31, mod_too_small 62 + /* move the r31 to mod since the result was positive */ 63 + or r30, r0, r31 64 + addik r3, r3, 1 65 + mod_too_small: 66 + addik r29, r29, -1 67 + beqi r29, loop_end 68 + add r3, r3, r3 /* shift in the '1' into div */ 69 + bri div2 /* div2 */ 70 + loop_end: 71 + bri return_here 72 + div_by_zero: 73 + result_is_zero: 74 + or r3, r0, r0 /* set result to 0 */ 75 + return_here: 76 + /* restore values of csrs and that of r3 and the divisor and the dividend */ 77 + lwi r29, r1, 0 78 + lwi r30, r1, 4 79 + lwi r31, r1, 8 80 + rtsd r15, 8 81 + addik r1, r1, 12 82 + 83 + .size __udivsi3, . - __udivsi3 84 + .end __udivsi3
+86
arch/microblaze/lib/umodsi3.S
··· 1 + #include <linux/linkage.h> 2 + 3 + /* 4 + * Unsigned modulo operation for 32 bit integers. 5 + * Input : op1 in Reg r5 6 + * op2 in Reg r6 7 + * Output: op1 mod op2 in Reg r3 8 + */ 9 + 10 + .text 11 + .globl __umodsi3 12 + .type __umodsi3, @function 13 + .ent __umodsi3 14 + 15 + __umodsi3: 16 + .frame r1, 0, r15 17 + 18 + addik r1, r1, -12 19 + swi r29, r1, 0 20 + swi r30, r1, 4 21 + swi r31, r1, 8 22 + 23 + beqi r6, div_by_zero /* div_by_zero - division error */ 24 + beqid r5, result_is_zero /* result is zero */ 25 + addik r3, r0, 0 /* clear div */ 26 + addik r30, r0, 0 /* clear mod */ 27 + addik r29, r0, 32 /* initialize the loop count */ 28 + 29 + /* check if r6 and r5 are equal /* if yes, return 0 */ 30 + rsub r18, r5, r6 31 + beqi r18, return_here 32 + 33 + /* check if (uns)r6 is greater than (uns)r5. in that case, just return r5 */ 34 + xor r18, r5, r6 35 + bgeid r18, 16 36 + addik r3, r5, 0 37 + blti r6, return_here 38 + bri $lcheckr6 39 + rsub r18, r5, r6 /* microblazecmp */ 40 + bgti r18, return_here 41 + 42 + /* if r6 [bit 31] is set, then return result as r5-r6 */ 43 + $lcheckr6: 44 + bgtid r6, div0 45 + addik r3, r0, 0 46 + addik r18, r0, 0x7fffffff 47 + and r5, r5, r18 48 + and r6, r6, r18 49 + brid return_here 50 + rsub r3, r6, r5 51 + /* first part: try to find the first '1' in the r5 */ 52 + div0: 53 + blti r5, div2 54 + div1: 55 + add r5, r5, r5 /* left shift logical r5 */ 56 + bgeid r5, div1 57 + addik r29, r29, -1 58 + div2: 59 + /* left shift logical r5 get the '1' into the carry */ 60 + add r5, r5, r5 61 + addc r3, r3, r3 /* move that bit into the mod register */ 62 + rsub r31, r6, r3 /* try to subtract (r3 a r6) */ 63 + blti r31, mod_too_small 64 + /* move the r31 to mod since the result was positive */ 65 + or r3, r0, r31 66 + addik r30, r30, 1 67 + mod_too_small: 68 + addik r29, r29, -1 69 + beqi r29, loop_end 70 + add r30, r30, r30 /* shift in the '1' into div */ 71 + bri div2 /* div2 */ 72 + loop_end: 73 + bri return_here 74 + div_by_zero: 75 + result_is_zero: 76 + or r3, r0, r0 /* set result to 0 */ 77 + return_here: 78 + /* restore values of csrs and that of r3 and the divisor and the dividend */ 79 + lwi r29, r1, 0 80 + lwi r30, r1, 4 81 + lwi r31, r1, 8 82 + rtsd r15, 8 83 + addik r1, r1, 12 84 + 85 + .size __umodsi3, . - __umodsi3 86 + .end __umodsi3