Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARC: ARCv2: jump label: implement jump label patching

Implement jump label patching for ARC. Jump labels provide
an interface to generate dynamic branches using
self-modifying code.

This allows us to implement conditional branches where
changing branch direction is expensive but branch selection
is basically 'free'

This implementation uses 32-bit NOP and BRANCH instructions
which forced to be aligned by 4 to guarantee that they don't
cross L1 cache line boundary and can be update atomically.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

authored by

Eugeniy Paltsev and committed by
Vineet Gupta
f091d5a4 2f4ecf68

+253
+8
arch/arc/Kconfig
··· 46 46 select OF_EARLY_FLATTREE 47 47 select PCI_SYSCALL if PCI 48 48 select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING 49 + select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32 49 50 50 51 config ARCH_HAS_CACHE_LINE_SIZE 51 52 def_bool y ··· 526 525 config ARC_DBG_TLB_PARANOIA 527 526 bool "Paranoia Checks in Low Level TLB Handlers" 528 527 528 + config ARC_DBG_JUMP_LABEL 529 + bool "Paranoid checks in Static Keys (jump labels) code" 530 + depends on JUMP_LABEL 531 + default y if STATIC_KEYS_SELFTEST 532 + help 533 + Enable paranoid checks and self-test of both ARC-specific and generic 534 + part of static keys (jump labels) related code. 529 535 endif 530 536 531 537 config ARC_BUILTIN_DTB_NAME
+2
arch/arc/include/asm/cache.h
··· 25 25 26 26 #ifndef __ASSEMBLY__ 27 27 28 + #include <linux/build_bug.h> 29 + 28 30 /* Uncached access macros */ 29 31 #define arc_read_uncached_32(ptr) \ 30 32 ({ \
+72
arch/arc/include/asm/jump_label.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_ARC_JUMP_LABEL_H 3 + #define _ASM_ARC_JUMP_LABEL_H 4 + 5 + #ifndef __ASSEMBLY__ 6 + 7 + #include <linux/stringify.h> 8 + #include <linux/types.h> 9 + 10 + #define JUMP_LABEL_NOP_SIZE 4 11 + 12 + /* 13 + * NOTE about '.balign 4': 14 + * 15 + * To make atomic update of patched instruction available we need to guarantee 16 + * that this instruction doesn't cross L1 cache line boundary. 17 + * 18 + * As of today we simply align instruction which can be patched by 4 byte using 19 + * ".balign 4" directive. In that case patched instruction is aligned with one 20 + * 16-bit NOP_S if this is required. 21 + * However 'align by 4' directive is much stricter than it actually required. 22 + * It's enough that our 32-bit instruction don't cross L1 cache line boundary / 23 + * L1 I$ fetch block boundary which can be achieved by using 24 + * ".bundle_align_mode" assembler directive. That will save us from adding 25 + * useless NOP_S padding in most of the cases. 26 + * 27 + * TODO: switch to ".bundle_align_mode" directive using whin it will be 28 + * supported by ARC toolchain. 29 + */ 30 + 31 + static __always_inline bool arch_static_branch(struct static_key *key, 32 + bool branch) 33 + { 34 + asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" 35 + "1: \n" 36 + "nop \n" 37 + ".pushsection __jump_table, \"aw\" \n" 38 + ".word 1b, %l[l_yes], %c0 \n" 39 + ".popsection \n" 40 + : : "i" (&((char *)key)[branch]) : : l_yes); 41 + 42 + return false; 43 + l_yes: 44 + return true; 45 + } 46 + 47 + static __always_inline bool arch_static_branch_jump(struct static_key *key, 48 + bool branch) 49 + { 50 + asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" 51 + "1: \n" 52 + "b %l[l_yes] \n" 53 + ".pushsection __jump_table, \"aw\" \n" 54 + ".word 1b, %l[l_yes], %c0 \n" 55 + ".popsection \n" 56 + : : "i" (&((char *)key)[branch]) : : l_yes); 57 + 58 + return false; 59 + l_yes: 60 + return true; 61 + } 62 + 63 + typedef u32 jump_label_t; 64 + 65 + struct jump_entry { 66 + jump_label_t code; 67 + jump_label_t target; 68 + jump_label_t key; 69 + }; 70 + 71 + #endif /* __ASSEMBLY__ */ 72 + #endif
+1
arch/arc/kernel/Makefile
··· 20 20 obj-$(CONFIG_KGDB) += kgdb.o 21 21 obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o 22 22 obj-$(CONFIG_PERF_EVENTS) += perf_event.o 23 + obj-$(CONFIG_JUMP_LABEL) += jump_label.o 23 24 24 25 obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o 25 26 CFLAGS_fpu.o += -mdpfp
+170
arch/arc/kernel/jump_label.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/kernel.h> 4 + #include <linux/jump_label.h> 5 + 6 + #include "asm/cacheflush.h" 7 + 8 + #define JUMPLABEL_ERR "ARC: jump_label: ERROR: " 9 + 10 + /* Halt system on fatal error to make debug easier */ 11 + #define arc_jl_fatal(format...) \ 12 + ({ \ 13 + pr_err(JUMPLABEL_ERR format); \ 14 + BUG(); \ 15 + }) 16 + 17 + static inline u32 arc_gen_nop(void) 18 + { 19 + /* 1x 32bit NOP in middle endian */ 20 + return 0x7000264a; 21 + } 22 + 23 + /* 24 + * Atomic update of patched instruction is only available if this 25 + * instruction doesn't cross L1 cache line boundary. You can read about 26 + * the way we achieve this in arc/include/asm/jump_label.h 27 + */ 28 + static inline void instruction_align_assert(void *addr, int len) 29 + { 30 + unsigned long a = (unsigned long)addr; 31 + 32 + if ((a >> L1_CACHE_SHIFT) != ((a + len - 1) >> L1_CACHE_SHIFT)) 33 + arc_jl_fatal("instruction (addr %px) cross L1 cache line border", 34 + addr); 35 + } 36 + 37 + /* 38 + * ARCv2 'Branch unconditionally' instruction: 39 + * 00000ssssssssss1SSSSSSSSSSNRtttt 40 + * s S[n:0] lower bits signed immediate (number is bitfield size) 41 + * S S[m:n+1] upper bits signed immediate (number is bitfield size) 42 + * t S[24:21] upper bits signed immediate (branch unconditionally far) 43 + * N N <.d> delay slot mode 44 + * R R Reserved 45 + */ 46 + static inline u32 arc_gen_branch(jump_label_t pc, jump_label_t target) 47 + { 48 + u32 instruction_l, instruction_r; 49 + u32 pcl = pc & GENMASK(31, 2); 50 + u32 u_offset = target - pcl; 51 + u32 s, S, t; 52 + 53 + /* 54 + * Offset in 32-bit branch instruction must to fit into s25. 55 + * Something is terribly broken if we get such huge offset within one 56 + * function. 57 + */ 58 + if ((s32)u_offset < -16777216 || (s32)u_offset > 16777214) 59 + arc_jl_fatal("gen branch with offset (%d) not fit in s25", 60 + (s32)u_offset); 61 + 62 + /* 63 + * All instructions are aligned by 2 bytes so we should never get offset 64 + * here which is not 2 bytes aligned. 65 + */ 66 + if (u_offset & 0x1) 67 + arc_jl_fatal("gen branch with offset (%d) unaligned to 2 bytes", 68 + (s32)u_offset); 69 + 70 + s = (u_offset >> 1) & GENMASK(9, 0); 71 + S = (u_offset >> 11) & GENMASK(9, 0); 72 + t = (u_offset >> 21) & GENMASK(3, 0); 73 + 74 + /* 00000ssssssssss1 */ 75 + instruction_l = (s << 1) | 0x1; 76 + /* SSSSSSSSSSNRtttt */ 77 + instruction_r = (S << 6) | t; 78 + 79 + return (instruction_r << 16) | (instruction_l & GENMASK(15, 0)); 80 + } 81 + 82 + void arch_jump_label_transform(struct jump_entry *entry, 83 + enum jump_label_type type) 84 + { 85 + jump_label_t *instr_addr = (jump_label_t *)entry->code; 86 + u32 instr; 87 + 88 + instruction_align_assert(instr_addr, JUMP_LABEL_NOP_SIZE); 89 + 90 + if (type == JUMP_LABEL_JMP) 91 + instr = arc_gen_branch(entry->code, entry->target); 92 + else 93 + instr = arc_gen_nop(); 94 + 95 + WRITE_ONCE(*instr_addr, instr); 96 + flush_icache_range(entry->code, entry->code + JUMP_LABEL_NOP_SIZE); 97 + } 98 + 99 + void arch_jump_label_transform_static(struct jump_entry *entry, 100 + enum jump_label_type type) 101 + { 102 + /* 103 + * We use only one NOP type (1x, 4 byte) in arch_static_branch, so 104 + * there's no need to patch an identical NOP over the top of it here. 105 + * The generic code calls 'arch_jump_label_transform' if the NOP needs 106 + * to be replaced by a branch, so 'arch_jump_label_transform_static' is 107 + * never called with type other than JUMP_LABEL_NOP. 108 + */ 109 + BUG_ON(type != JUMP_LABEL_NOP); 110 + } 111 + 112 + #ifdef CONFIG_ARC_DBG_JUMP_LABEL 113 + #define SELFTEST_MSG "ARC: instruction generation self-test: " 114 + 115 + struct arc_gen_branch_testdata { 116 + jump_label_t pc; 117 + jump_label_t target_address; 118 + u32 expected_instr; 119 + }; 120 + 121 + static __init int branch_gen_test(const struct arc_gen_branch_testdata *test) 122 + { 123 + u32 instr_got; 124 + 125 + instr_got = arc_gen_branch(test->pc, test->target_address); 126 + if (instr_got == test->expected_instr) 127 + return 0; 128 + 129 + pr_err(SELFTEST_MSG "FAIL:\n arc_gen_branch(0x%08x, 0x%08x) != 0x%08x, got 0x%08x\n", 130 + test->pc, test->target_address, 131 + test->expected_instr, instr_got); 132 + 133 + return -EFAULT; 134 + } 135 + 136 + /* 137 + * Offset field in branch instruction is not continuous. Test all 138 + * available offset field and sign combinations. Test data is generated 139 + * from real working code. 140 + */ 141 + static const struct arc_gen_branch_testdata arcgenbr_test_data[] __initconst = { 142 + {0x90007548, 0x90007514, 0xffcf07cd}, /* tiny (-52) offs */ 143 + {0x9000c9c0, 0x9000c782, 0xffcf05c3}, /* tiny (-574) offs */ 144 + {0x9000cc1c, 0x9000c782, 0xffcf0367}, /* tiny (-1178) offs */ 145 + {0x9009dce0, 0x9009d106, 0xff8f0427}, /* small (-3034) offs */ 146 + {0x9000f5de, 0x90007d30, 0xfc0f0755}, /* big (-30892) offs */ 147 + {0x900a2444, 0x90035f64, 0xc9cf0321}, /* huge (-443616) offs */ 148 + {0x90007514, 0x9000752c, 0x00000019}, /* tiny (+24) offs */ 149 + {0x9001a578, 0x9001a77a, 0x00000203}, /* tiny (+514) offs */ 150 + {0x90031ed8, 0x90032634, 0x0000075d}, /* tiny (+1884) offs */ 151 + {0x9008c7f2, 0x9008d3f0, 0x00400401}, /* small (+3072) offs */ 152 + {0x9000bb38, 0x9003b340, 0x17c00009}, /* big (+194568) offs */ 153 + {0x90008f44, 0x90578d80, 0xb7c2063d} /* huge (+5701180) offs */ 154 + }; 155 + 156 + static __init int instr_gen_test(void) 157 + { 158 + int i; 159 + 160 + for (i = 0; i < ARRAY_SIZE(arcgenbr_test_data); i++) 161 + if (branch_gen_test(&arcgenbr_test_data[i])) 162 + return -EFAULT; 163 + 164 + pr_info(SELFTEST_MSG "OK\n"); 165 + 166 + return 0; 167 + } 168 + early_initcall(instr_gen_test); 169 + 170 + #endif /* CONFIG_ARC_DBG_JUMP_LABEL */