Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * FP/SIMD state saving and restoring macros
4 *
5 * Copyright (C) 2012 ARM Ltd.
6 * Author: Catalin Marinas <catalin.marinas@arm.com>
7 */
8
9#include <asm/assembler.h>
10
11.macro fpsimd_save state, tmpnr
12 stp q0, q1, [\state, #16 * 0]
13 stp q2, q3, [\state, #16 * 2]
14 stp q4, q5, [\state, #16 * 4]
15 stp q6, q7, [\state, #16 * 6]
16 stp q8, q9, [\state, #16 * 8]
17 stp q10, q11, [\state, #16 * 10]
18 stp q12, q13, [\state, #16 * 12]
19 stp q14, q15, [\state, #16 * 14]
20 stp q16, q17, [\state, #16 * 16]
21 stp q18, q19, [\state, #16 * 18]
22 stp q20, q21, [\state, #16 * 20]
23 stp q22, q23, [\state, #16 * 22]
24 stp q24, q25, [\state, #16 * 24]
25 stp q26, q27, [\state, #16 * 26]
26 stp q28, q29, [\state, #16 * 28]
27 stp q30, q31, [\state, #16 * 30]!
28 mrs x\tmpnr, fpsr
29 str w\tmpnr, [\state, #16 * 2]
30 mrs x\tmpnr, fpcr
31 str w\tmpnr, [\state, #16 * 2 + 4]
32.endm
33
34.macro fpsimd_restore_fpcr state, tmp
35 /*
36 * Writes to fpcr may be self-synchronising, so avoid restoring
37 * the register if it hasn't changed.
38 */
39 mrs \tmp, fpcr
40 cmp \tmp, \state
41 b.eq 9999f
42 msr fpcr, \state
439999:
44.endm
45
46/* Clobbers \state */
47.macro fpsimd_restore state, tmpnr
48 ldp q0, q1, [\state, #16 * 0]
49 ldp q2, q3, [\state, #16 * 2]
50 ldp q4, q5, [\state, #16 * 4]
51 ldp q6, q7, [\state, #16 * 6]
52 ldp q8, q9, [\state, #16 * 8]
53 ldp q10, q11, [\state, #16 * 10]
54 ldp q12, q13, [\state, #16 * 12]
55 ldp q14, q15, [\state, #16 * 14]
56 ldp q16, q17, [\state, #16 * 16]
57 ldp q18, q19, [\state, #16 * 18]
58 ldp q20, q21, [\state, #16 * 20]
59 ldp q22, q23, [\state, #16 * 22]
60 ldp q24, q25, [\state, #16 * 24]
61 ldp q26, q27, [\state, #16 * 26]
62 ldp q28, q29, [\state, #16 * 28]
63 ldp q30, q31, [\state, #16 * 30]!
64 ldr w\tmpnr, [\state, #16 * 2]
65 msr fpsr, x\tmpnr
66 ldr w\tmpnr, [\state, #16 * 2 + 4]
67 fpsimd_restore_fpcr x\tmpnr, \state
68.endm
69
70/* Sanity-check macros to help avoid encoding garbage instructions */
71
72.macro _check_general_reg nr
73 .if (\nr) < 0 || (\nr) > 30
74 .error "Bad register number \nr."
75 .endif
76.endm
77
78.macro _sve_check_zreg znr
79 .if (\znr) < 0 || (\znr) > 31
80 .error "Bad Scalable Vector Extension vector register number \znr."
81 .endif
82.endm
83
84.macro _sve_check_preg pnr
85 .if (\pnr) < 0 || (\pnr) > 15
86 .error "Bad Scalable Vector Extension predicate register number \pnr."
87 .endif
88.endm
89
90.macro _check_num n, min, max
91 .if (\n) < (\min) || (\n) > (\max)
92 .error "Number \n out of range [\min,\max]"
93 .endif
94.endm
95
96.macro _sme_check_wv v
97 .if (\v) < 12 || (\v) > 15
98 .error "Bad vector select register \v."
99 .endif
100.endm
101
102/* SVE instruction encodings for non-SVE-capable assemblers */
103/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
104
105/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
106.macro _sve_str_v nz, nxbase, offset=0
107 _sve_check_zreg \nz
108 _check_general_reg \nxbase
109 _check_num (\offset), -0x100, 0xff
110 .inst 0xe5804000 \
111 | (\nz) \
112 | ((\nxbase) << 5) \
113 | (((\offset) & 7) << 10) \
114 | (((\offset) & 0x1f8) << 13)
115.endm
116
117/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
118.macro _sve_ldr_v nz, nxbase, offset=0
119 _sve_check_zreg \nz
120 _check_general_reg \nxbase
121 _check_num (\offset), -0x100, 0xff
122 .inst 0x85804000 \
123 | (\nz) \
124 | ((\nxbase) << 5) \
125 | (((\offset) & 7) << 10) \
126 | (((\offset) & 0x1f8) << 13)
127.endm
128
129/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
130.macro _sve_str_p np, nxbase, offset=0
131 _sve_check_preg \np
132 _check_general_reg \nxbase
133 _check_num (\offset), -0x100, 0xff
134 .inst 0xe5800000 \
135 | (\np) \
136 | ((\nxbase) << 5) \
137 | (((\offset) & 7) << 10) \
138 | (((\offset) & 0x1f8) << 13)
139.endm
140
141/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
142.macro _sve_ldr_p np, nxbase, offset=0
143 _sve_check_preg \np
144 _check_general_reg \nxbase
145 _check_num (\offset), -0x100, 0xff
146 .inst 0x85800000 \
147 | (\np) \
148 | ((\nxbase) << 5) \
149 | (((\offset) & 7) << 10) \
150 | (((\offset) & 0x1f8) << 13)
151.endm
152
153/* RDVL X\nx, #\imm */
154.macro _sve_rdvl nx, imm
155 _check_general_reg \nx
156 _check_num (\imm), -0x20, 0x1f
157 .inst 0x04bf5000 \
158 | (\nx) \
159 | (((\imm) & 0x3f) << 5)
160.endm
161
162/* RDFFR (unpredicated): RDFFR P\np.B */
163.macro _sve_rdffr np
164 _sve_check_preg \np
165 .inst 0x2519f000 \
166 | (\np)
167.endm
168
169/* WRFFR P\np.B */
170.macro _sve_wrffr np
171 _sve_check_preg \np
172 .inst 0x25289000 \
173 | ((\np) << 5)
174.endm
175
176/* PFALSE P\np.B */
177.macro _sve_pfalse np
178 _sve_check_preg \np
179 .inst 0x2518e400 \
180 | (\np)
181.endm
182
183/* SME instruction encodings for non-SME-capable assemblers */
184/* (pre binutils 2.38/LLVM 13) */
185
186/* RDSVL X\nx, #\imm */
187.macro _sme_rdsvl nx, imm
188 _check_general_reg \nx
189 _check_num (\imm), -0x20, 0x1f
190 .inst 0x04bf5800 \
191 | (\nx) \
192 | (((\imm) & 0x3f) << 5)
193.endm
194
195/*
196 * STR (vector from ZA array):
197 * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
198 */
199.macro _sme_str_zav nw, nxbase, offset=0
200 _sme_check_wv \nw
201 _check_general_reg \nxbase
202 _check_num (\offset), -0x100, 0xff
203 .inst 0xe1200000 \
204 | (((\nw) & 3) << 13) \
205 | ((\nxbase) << 5) \
206 | ((\offset) & 7)
207.endm
208
209/*
210 * LDR (vector to ZA array):
211 * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
212 */
213.macro _sme_ldr_zav nw, nxbase, offset=0
214 _sme_check_wv \nw
215 _check_general_reg \nxbase
216 _check_num (\offset), -0x100, 0xff
217 .inst 0xe1000000 \
218 | (((\nw) & 3) << 13) \
219 | ((\nxbase) << 5) \
220 | ((\offset) & 7)
221.endm
222
223/*
224 * Zero the entire ZA array
225 * ZERO ZA
226 */
227.macro zero_za
228 .inst 0xc00800ff
229.endm
230
231.macro __for from:req, to:req
232 .if (\from) == (\to)
233 _for__body %\from
234 .else
235 __for %\from, %((\from) + ((\to) - (\from)) / 2)
236 __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
237 .endif
238.endm
239
240.macro _for var:req, from:req, to:req, insn:vararg
241 .macro _for__body \var:req
242 .noaltmacro
243 \insn
244 .altmacro
245 .endm
246
247 .altmacro
248 __for \from, \to
249 .noaltmacro
250
251 .purgem _for__body
252.endm
253
254/* Update ZCR_EL1.LEN with the new VQ */
255.macro sve_load_vq xvqminus1, xtmp, xtmp2
256 mrs_s \xtmp, SYS_ZCR_EL1
257 bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
258 orr \xtmp2, \xtmp2, \xvqminus1
259 cmp \xtmp2, \xtmp
260 b.eq 921f
261 msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising
262921:
263.endm
264
265/* Update SMCR_EL1.LEN with the new VQ */
266.macro sme_load_vq xvqminus1, xtmp, xtmp2
267 mrs_s \xtmp, SYS_SMCR_EL1
268 bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
269 orr \xtmp2, \xtmp2, \xvqminus1
270 cmp \xtmp2, \xtmp
271 b.eq 921f
272 msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
273921:
274.endm
275
276/* Preserve the first 128-bits of Znz and zero the rest. */
277.macro _sve_flush_z nz
278 _sve_check_zreg \nz
279 mov v\nz\().16b, v\nz\().16b
280.endm
281
282.macro sve_flush_z
283 _for n, 0, 31, _sve_flush_z \n
284.endm
285.macro sve_flush_p
286 _for n, 0, 15, _sve_pfalse \n
287.endm
288.macro sve_flush_ffr
289 _sve_wrffr 0
290.endm
291
292.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
293 _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
294 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
295 cbz \save_ffr, 921f
296 _sve_rdffr 0
297 _sve_str_p 0, \nxbase
298 _sve_ldr_p 0, \nxbase, -16
299 b 922f
300921:
301 str xzr, [x\nxbase] // Zero out FFR
302922:
303 mrs x\nxtmp, fpsr
304 str w\nxtmp, [\xpfpsr]
305 mrs x\nxtmp, fpcr
306 str w\nxtmp, [\xpfpsr, #4]
307.endm
308
309.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
310 _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
311 cbz \restore_ffr, 921f
312 _sve_ldr_p 0, \nxbase
313 _sve_wrffr 0
314921:
315 _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
316
317 ldr w\nxtmp, [\xpfpsr]
318 msr fpsr, x\nxtmp
319 ldr w\nxtmp, [\xpfpsr, #4]
320 msr fpcr, x\nxtmp
321.endm
322
323.macro sme_save_za nxbase, xvl, nw
324 mov w\nw, #0
325
326423:
327 _sme_str_zav \nw, \nxbase
328 add x\nxbase, x\nxbase, \xvl
329 add x\nw, x\nw, #1
330 cmp \xvl, x\nw
331 bne 423b
332.endm
333
334.macro sme_load_za nxbase, xvl, nw
335 mov w\nw, #0
336
337423:
338 _sme_ldr_zav \nw, \nxbase
339 add x\nxbase, x\nxbase, \xvl
340 add x\nw, x\nw, #1
341 cmp \xvl, x\nw
342 bne 423b
343.endm