Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'sparc64-optimized-fls'

Vijay Kumar says:

====================
sparc64: Optimize fls and __fls

SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, __fls and fls64 functions. For the systems that supports
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, __fls and fls64 functions.

v3->v4:
- Fixed a typo.
v2->v3:
- Using ENTRY(), ENDPROC() for assembler functions.
- Removed BITS_PER_LONG from __fls.
- Using generic fls64().
- Replaced lzcnt instruction with .word directive.
v1->v2:
- Fixed delay slot issue.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+175 -2
+3 -2
arch/sparc/include/asm/bitops_64.h
··· 22 22 void clear_bit(unsigned long nr, volatile unsigned long *addr); 23 23 void change_bit(unsigned long nr, volatile unsigned long *addr); 24 24 25 + int fls(unsigned int word); 26 + int __fls(unsigned long word); 27 + 25 28 #include <asm-generic/bitops/non-atomic.h> 26 29 27 - #include <asm-generic/bitops/fls.h> 28 - #include <asm-generic/bitops/__fls.h> 29 30 #include <asm-generic/bitops/fls64.h> 30 31 31 32 #ifdef __KERNEL__
+2
arch/sparc/kernel/head_64.S
··· 640 640 nop 641 641 call niagara4_patch_pageops 642 642 nop 643 + call niagara4_patch_fls 644 + nop 643 645 644 646 ba,a,pt %xcc, 80f 645 647 nop
+3
arch/sparc/lib/Makefile
··· 16 16 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o 17 17 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o 18 18 lib-$(CONFIG_SPARC64) += multi3.o 19 + lib-$(CONFIG_SPARC64) += fls.o 20 + lib-$(CONFIG_SPARC64) += fls64.o 21 + obj-$(CONFIG_SPARC64) += NG4fls.o 19 22 20 23 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o 21 24 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
+30
arch/sparc/lib/NG4fls.S
··· 1 + /* NG4fls.S: SPARC optimized fls and __fls for T4 and above. 2 + * 3 + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 4 + */ 5 + 6 + #include <linux/linkage.h> 7 + 8 + #define LZCNT_O0_G2 \ 9 + .word 0x85b002e8 10 + 11 + .text 12 + .register %g2, #scratch 13 + .register %g3, #scratch 14 + 15 + ENTRY(NG4fls) 16 + LZCNT_O0_G2 !lzcnt %o0, %g2 17 + mov 64, %g3 18 + retl 19 + sub %g3, %g2, %o0 20 + ENDPROC(NG4fls) 21 + 22 + ENTRY(__NG4fls) 23 + brz,pn %o0, 1f 24 + LZCNT_O0_G2 !lzcnt %o0, %g2 25 + mov 63, %g3 26 + sub %g3, %g2, %o0 27 + 1: 28 + retl 29 + nop 30 + ENDPROC(__NG4fls)
+9
arch/sparc/lib/NG4patch.S
··· 3 3 * Copyright (C) 2012 David S. Miller <davem@davemloft.net> 4 4 */ 5 5 6 + #include <linux/linkage.h> 7 + 6 8 #define BRANCH_ALWAYS 0x10680000 7 9 #define NOP 0x01000000 8 10 #define NG_DO_PATCH(OLD, NEW) \ ··· 54 52 retl 55 53 nop 56 54 .size niagara4_patch_pageops,.-niagara4_patch_pageops 55 + 56 + ENTRY(niagara4_patch_fls) 57 + NG_DO_PATCH(fls, NG4fls) 58 + NG_DO_PATCH(__fls, __NG4fls) 59 + retl 60 + nop 61 + ENDPROC(niagara4_patch_fls)
+67
arch/sparc/lib/fls.S
··· 1 + /* fls.S: SPARC default fls definition. 2 + * 3 + * SPARC default fls definition, which follows the same algorithm as 4 + * in generic fls(). This function will be boot time patched on T4 5 + * and onward. 6 + */ 7 + 8 + #include <linux/linkage.h> 9 + #include <asm/export.h> 10 + 11 + .text 12 + .register %g2, #scratch 13 + .register %g3, #scratch 14 + ENTRY(fls) 15 + brz,pn %o0, 6f 16 + mov 0, %o1 17 + sethi %hi(0xffff0000), %g3 18 + mov %o0, %g2 19 + andcc %o0, %g3, %g0 20 + be,pt %icc, 8f 21 + mov 32, %o1 22 + sethi %hi(0xff000000), %g3 23 + andcc %g2, %g3, %g0 24 + bne,pt %icc, 3f 25 + sethi %hi(0xf0000000), %g3 26 + sll %o0, 8, %o0 27 + 1: 28 + add %o1, -8, %o1 29 + sra %o0, 0, %o0 30 + mov %o0, %g2 31 + 2: 32 + sethi %hi(0xf0000000), %g3 33 + 3: 34 + andcc %g2, %g3, %g0 35 + bne,pt %icc, 4f 36 + sethi %hi(0xc0000000), %g3 37 + sll %o0, 4, %o0 38 + add %o1, -4, %o1 39 + sra %o0, 0, %o0 40 + mov %o0, %g2 41 + 4: 42 + andcc %g2, %g3, %g0 43 + be,a,pt %icc, 7f 44 + sll %o0, 2, %o0 45 + 5: 46 + xnor %g0, %o0, %o0 47 + srl %o0, 31, %o0 48 + sub %o1, %o0, %o1 49 + 6: 50 + jmp %o7 + 8 51 + sra %o1, 0, %o0 52 + 7: 53 + add %o1, -2, %o1 54 + ba,pt %xcc, 5b 55 + sra %o0, 0, %o0 56 + 8: 57 + sll %o0, 16, %o0 58 + sethi %hi(0xff000000), %g3 59 + sra %o0, 0, %o0 60 + mov %o0, %g2 61 + andcc %g2, %g3, %g0 62 + bne,pt %icc, 2b 63 + mov 16, %o1 64 + ba,pt %xcc, 1b 65 + sll %o0, 8, %o0 66 + ENDPROC(fls) 67 + EXPORT_SYMBOL(fls)
+61
arch/sparc/lib/fls64.S
··· 1 + /* fls64.S: SPARC default __fls definition. 2 + * 3 + * SPARC default __fls definition, which follows the same algorithm as 4 + * in generic __fls(). This function will be boot time patched on T4 5 + * and onward. 6 + */ 7 + 8 + #include <linux/linkage.h> 9 + #include <asm/export.h> 10 + 11 + .text 12 + .register %g2, #scratch 13 + .register %g3, #scratch 14 + ENTRY(__fls) 15 + mov -1, %g2 16 + sllx %g2, 32, %g2 17 + and %o0, %g2, %g2 18 + brnz,pt %g2, 1f 19 + mov 63, %g1 20 + sllx %o0, 32, %o0 21 + mov 31, %g1 22 + 1: 23 + mov -1, %g2 24 + sllx %g2, 48, %g2 25 + and %o0, %g2, %g2 26 + brnz,pt %g2, 2f 27 + mov -1, %g2 28 + sllx %o0, 16, %o0 29 + add %g1, -16, %g1 30 + 2: 31 + mov -1, %g2 32 + sllx %g2, 56, %g2 33 + and %o0, %g2, %g2 34 + brnz,pt %g2, 3f 35 + mov -1, %g2 36 + sllx %o0, 8, %o0 37 + add %g1, -8, %g1 38 + 3: 39 + sllx %g2, 60, %g2 40 + and %o0, %g2, %g2 41 + brnz,pt %g2, 4f 42 + mov -1, %g2 43 + sllx %o0, 4, %o0 44 + add %g1, -4, %g1 45 + 4: 46 + sllx %g2, 62, %g2 47 + and %o0, %g2, %g2 48 + brnz,pt %g2, 5f 49 + mov -1, %g3 50 + sllx %o0, 2, %o0 51 + add %g1, -2, %g1 52 + 5: 53 + mov 0, %g2 54 + sllx %g3, 63, %g3 55 + and %o0, %g3, %o0 56 + movre %o0, 1, %g2 57 + sub %g1, %g2, %g1 58 + jmp %o7+8 59 + sra %g1, 0, %o0 60 + ENDPROC(__fls) 61 + EXPORT_SYMBOL(__fls)