Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

MIPS: memset: Limit excessive `noreorder' assembly mode use

Rewrite to use the `reorder' assembly mode and remove manually scheduled
delay slots except where GAS cannot schedule a delay-slot instruction
due to a data dependency or a section switch (as is the case with the EX
macro). No change in machine code produced.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
[paul.burton@mips.com:
Fix conflict with commit 932afdeec18b ("MIPS: Add Kconfig variable for
CPUs with unaligned load/store instructions")]
Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/20834/
Cc: Ralf Baechle <ralf@linux-mips.org>

authored by

Maciej W. Rozycki and committed by
Paul Burton
68dec269 2f7619ae

+24 -24
+24 -24
arch/mips/lib/memset.S
··· 78 78 #endif 79 79 .endm 80 80 81 - .set noreorder 82 81 .align 5 83 82 84 83 /* ··· 93 94 .endif 94 95 95 96 sltiu t0, a2, STORSIZE /* very small region? */ 97 + .set noreorder 96 98 bnez t0, .Lsmall_memset\@ 97 99 andi t0, a0, STORMASK /* aligned? */ 100 + .set reorder 98 101 99 102 #ifdef CONFIG_CPU_MICROMIPS 100 103 move t8, a1 /* used by 'swp' instruction */ 101 104 move t9, a1 102 105 #endif 106 + .set noreorder 103 107 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS 104 108 beqz t0, 1f 105 109 PTR_SUBU t0, STORSIZE /* alignment in bytes */ ··· 113 111 PTR_SUBU t0, AT /* alignment in bytes */ 114 112 .set at 115 113 #endif 114 + .set reorder 116 115 117 116 #ifdef CONFIG_CPU_HAS_LOAD_STORE_LR 118 117 R10KCBARRIER(0(ra)) ··· 128 125 #else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ 129 126 #define STORE_BYTE(N) \ 130 127 EX(sb, a1, N(a0), .Lbyte_fixup\@); \ 128 + .set noreorder; \ 131 129 beqz t0, 0f; \ 132 - PTR_ADDU t0, 1; 130 + PTR_ADDU t0, 1; \ 131 + .set reorder; 133 132 134 133 PTR_ADDU a2, t0 /* correct size */ 135 134 PTR_ADDU t0, 1 ··· 153 148 #endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ 154 149 1: ori t1, a2, 0x3f /* # of full blocks */ 155 150 xori t1, 0x3f 151 + andi t0, a2, 0x40-STORSIZE 156 152 beqz t1, .Lmemset_partial\@ /* no block to fill */ 157 - andi t0, a2, 0x40-STORSIZE 158 153 159 154 PTR_ADDU t1, a0 /* end address */ 160 - .set reorder 161 155 1: PTR_ADDIU a0, 64 162 156 R10KCBARRIER(0(ra)) 163 157 f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode 164 158 bne t1, a0, 1b 165 - .set noreorder 166 159 167 160 .Lmemset_partial\@: 168 161 R10KCBARRIER(0(ra)) ··· 176 173 PTR_SUBU t1, AT 177 174 .set at 178 175 #endif 176 + PTR_ADDU a0, t0 /* dest ptr */ 179 177 jr t1 180 - PTR_ADDU a0, t0 /* dest ptr */ 181 178 182 - .set push 183 - .set noreorder 184 - .set nomacro 185 179 /* ... but first do longs ... */ 186 180 f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode 187 - 2: .set pop 188 - andi a2, STORMASK /* At most one long to go */ 181 + 2: andi a2, STORMASK /* At most one long to go */ 189 182 183 + .set noreorder 190 184 beqz a2, 1f 191 185 #ifdef CONFIG_CPU_HAS_LOAD_STORE_LR 192 186 PTR_ADDU a0, a2 /* What's left */ 187 + .set reorder 193 188 R10KCBARRIER(0(ra)) 194 189 #ifdef __MIPSEB__ 195 190 EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@) ··· 196 195 #endif 197 196 #else 198 197 PTR_SUBU t0, $0, a2 198 + .set reorder 199 199 move a2, zero /* No remaining longs */ 200 200 PTR_ADDIU t0, 1 201 201 STORE_BYTE(0) ··· 212 210 #endif 213 211 0: 214 212 #endif 215 - 1: jr ra 216 - move a2, zero 213 + 1: move a2, zero 214 + jr ra 217 215 218 216 .Lsmall_memset\@: 217 + PTR_ADDU t1, a0, a2 219 218 beqz a2, 2f 220 - PTR_ADDU t1, a0, a2 221 219 222 220 1: PTR_ADDIU a0, 1 /* fill bytewise */ 223 221 R10KCBARRIER(0(ra)) 222 + .set noreorder 224 223 bne t1, a0, 1b 225 224 EX(sb, a1, -1(a0), .Lsmall_fixup\@) 225 + .set reorder 226 226 227 - 2: jr ra /* done */ 228 - move a2, zero 227 + 2: move a2, zero 228 + jr ra /* done */ 229 229 .if __memset == 1 230 230 END(memset) 231 231 .set __memset, 0 ··· 241 237 * a2 = a2 - t0 + 1 242 238 */ 243 239 PTR_SUBU a2, t0 240 + PTR_ADDIU a2, 1 244 241 jr ra 245 - PTR_ADDIU a2, 1 246 242 #endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ 247 243 248 244 .Lfirst_fixup\@: 249 245 /* unset_bytes already in a2 */ 250 246 jr ra 251 - nop 252 247 253 248 .Lfwd_fixup\@: 254 249 /* ··· 258 255 andi a2, 0x3f 259 256 LONG_L t0, THREAD_BUADDR(t0) 260 257 LONG_ADDU a2, t1 258 + LONG_SUBU a2, t0 261 259 jr ra 262 - LONG_SUBU a2, t0 263 260 264 261 .Lpartial_fixup\@: 265 262 /* ··· 270 267 andi a2, STORMASK 271 268 LONG_L t0, THREAD_BUADDR(t0) 272 269 LONG_ADDU a2, a0 270 + LONG_SUBU a2, t0 273 271 jr ra 274 - LONG_SUBU a2, t0 275 272 276 273 .Llast_fixup\@: 277 274 /* unset_bytes already in a2 */ 278 275 jr ra 279 - nop 280 276 281 277 .Lsmall_fixup\@: 282 278 /* 283 279 * unset_bytes = end_addr - current_addr + 1 284 280 * a2 = t1 - a0 + 1 285 281 */ 286 - .set reorder 287 282 PTR_SUBU a2, t1, a0 288 283 PTR_ADDIU a2, 1 289 284 jr ra 290 - .set noreorder 291 285 292 286 .endm 293 287 ··· 298 298 299 299 LEAF(memset) 300 300 EXPORT_SYMBOL(memset) 301 + move v0, a0 /* result */ 301 302 beqz a1, 1f 302 - move v0, a0 /* result */ 303 303 304 304 andi a1, 0xff /* spread fillword */ 305 305 LONG_SLL t1, a1, 8