Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
2//
3// This file is dual-licensed, meaning that you can use it under your
4// choice of either of the following two licenses:
5//
6// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
7//
8// Licensed under the Apache License 2.0 (the "License"). You can obtain
9// a copy in the file LICENSE in the source distribution or at
10// https://www.openssl.org/source/license.html
11//
12// or
13//
14// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
15// Copyright 2024 Google LLC
16// All rights reserved.
17//
18// Redistribution and use in source and binary forms, with or without
19// modification, are permitted provided that the following conditions
20// are met:
21// 1. Redistributions of source code must retain the above copyright
22// notice, this list of conditions and the following disclaimer.
23// 2. Redistributions in binary form must reproduce the above copyright
24// notice, this list of conditions and the following disclaimer in the
25// documentation and/or other materials provided with the distribution.
26//
27// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38
39// The generated code of this file depends on the following RISC-V extensions:
40// - RV64I
41// - RISC-V Vector ('V') with VLEN >= 128
42// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
43
44#include <linux/linkage.h>
45
46.text
47.option arch, +zvkb
48
49#define KEYP a0
50#define INP a1
51#define OUTP a2
52#define LEN a3
53#define IVP a4
54
55#define CONSTS0 a5
56#define CONSTS1 a6
57#define CONSTS2 a7
58#define CONSTS3 t0
59#define TMP t1
60#define VL t2
61#define STRIDE t3
62#define NROUNDS t4
63#define KEY0 s0
64#define KEY1 s1
65#define KEY2 s2
66#define KEY3 s3
67#define KEY4 s4
68#define KEY5 s5
69#define KEY6 s6
70#define KEY7 s7
71#define COUNTER s8
72#define NONCE0 s9
73#define NONCE1 s10
74#define NONCE2 s11
75
76.macro chacha_round a0, b0, c0, d0, a1, b1, c1, d1, \
77 a2, b2, c2, d2, a3, b3, c3, d3
78 // a += b; d ^= a; d = rol(d, 16);
79 vadd.vv \a0, \a0, \b0
80 vadd.vv \a1, \a1, \b1
81 vadd.vv \a2, \a2, \b2
82 vadd.vv \a3, \a3, \b3
83 vxor.vv \d0, \d0, \a0
84 vxor.vv \d1, \d1, \a1
85 vxor.vv \d2, \d2, \a2
86 vxor.vv \d3, \d3, \a3
87 vror.vi \d0, \d0, 32 - 16
88 vror.vi \d1, \d1, 32 - 16
89 vror.vi \d2, \d2, 32 - 16
90 vror.vi \d3, \d3, 32 - 16
91
92 // c += d; b ^= c; b = rol(b, 12);
93 vadd.vv \c0, \c0, \d0
94 vadd.vv \c1, \c1, \d1
95 vadd.vv \c2, \c2, \d2
96 vadd.vv \c3, \c3, \d3
97 vxor.vv \b0, \b0, \c0
98 vxor.vv \b1, \b1, \c1
99 vxor.vv \b2, \b2, \c2
100 vxor.vv \b3, \b3, \c3
101 vror.vi \b0, \b0, 32 - 12
102 vror.vi \b1, \b1, 32 - 12
103 vror.vi \b2, \b2, 32 - 12
104 vror.vi \b3, \b3, 32 - 12
105
106 // a += b; d ^= a; d = rol(d, 8);
107 vadd.vv \a0, \a0, \b0
108 vadd.vv \a1, \a1, \b1
109 vadd.vv \a2, \a2, \b2
110 vadd.vv \a3, \a3, \b3
111 vxor.vv \d0, \d0, \a0
112 vxor.vv \d1, \d1, \a1
113 vxor.vv \d2, \d2, \a2
114 vxor.vv \d3, \d3, \a3
115 vror.vi \d0, \d0, 32 - 8
116 vror.vi \d1, \d1, 32 - 8
117 vror.vi \d2, \d2, 32 - 8
118 vror.vi \d3, \d3, 32 - 8
119
120 // c += d; b ^= c; b = rol(b, 7);
121 vadd.vv \c0, \c0, \d0
122 vadd.vv \c1, \c1, \d1
123 vadd.vv \c2, \c2, \d2
124 vadd.vv \c3, \c3, \d3
125 vxor.vv \b0, \b0, \c0
126 vxor.vv \b1, \b1, \c1
127 vxor.vv \b2, \b2, \c2
128 vxor.vv \b3, \b3, \c3
129 vror.vi \b0, \b0, 32 - 7
130 vror.vi \b1, \b1, 32 - 7
131 vror.vi \b2, \b2, 32 - 7
132 vror.vi \b3, \b3, 32 - 7
133.endm
134
135// void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out, size_t len,
136// const u32 iv[4]);
137//
138// |len| must be nonzero and a multiple of 64 (CHACHA_BLOCK_SIZE).
139// The counter is treated as 32-bit, following the RFC7539 convention.
140SYM_FUNC_START(chacha20_zvkb)
141 srli LEN, LEN, 6 // Bytes to blocks
142
143 addi sp, sp, -96
144 sd s0, 0(sp)
145 sd s1, 8(sp)
146 sd s2, 16(sp)
147 sd s3, 24(sp)
148 sd s4, 32(sp)
149 sd s5, 40(sp)
150 sd s6, 48(sp)
151 sd s7, 56(sp)
152 sd s8, 64(sp)
153 sd s9, 72(sp)
154 sd s10, 80(sp)
155 sd s11, 88(sp)
156
157 li STRIDE, 64
158
159 // Set up the initial state matrix in scalar registers.
160 li CONSTS0, 0x61707865 // "expa" little endian
161 li CONSTS1, 0x3320646e // "nd 3" little endian
162 li CONSTS2, 0x79622d32 // "2-by" little endian
163 li CONSTS3, 0x6b206574 // "te k" little endian
164 lw KEY0, 0(KEYP)
165 lw KEY1, 4(KEYP)
166 lw KEY2, 8(KEYP)
167 lw KEY3, 12(KEYP)
168 lw KEY4, 16(KEYP)
169 lw KEY5, 20(KEYP)
170 lw KEY6, 24(KEYP)
171 lw KEY7, 28(KEYP)
172 lw COUNTER, 0(IVP)
173 lw NONCE0, 4(IVP)
174 lw NONCE1, 8(IVP)
175 lw NONCE2, 12(IVP)
176
177.Lblock_loop:
178 // Set vl to the number of blocks to process in this iteration.
179 vsetvli VL, LEN, e32, m1, ta, ma
180
181 // Set up the initial state matrix for the next VL blocks in v0-v15.
182 // v{i} holds the i'th 32-bit word of the state matrix for all blocks.
183 // Note that only the counter word, at index 12, differs across blocks.
184 vmv.v.x v0, CONSTS0
185 vmv.v.x v1, CONSTS1
186 vmv.v.x v2, CONSTS2
187 vmv.v.x v3, CONSTS3
188 vmv.v.x v4, KEY0
189 vmv.v.x v5, KEY1
190 vmv.v.x v6, KEY2
191 vmv.v.x v7, KEY3
192 vmv.v.x v8, KEY4
193 vmv.v.x v9, KEY5
194 vmv.v.x v10, KEY6
195 vmv.v.x v11, KEY7
196 vid.v v12
197 vadd.vx v12, v12, COUNTER
198 vmv.v.x v13, NONCE0
199 vmv.v.x v14, NONCE1
200 vmv.v.x v15, NONCE2
201
202 // Load the first half of the input data for each block into v16-v23.
203 // v{16+i} holds the i'th 32-bit word for all blocks.
204 vlsseg8e32.v v16, (INP), STRIDE
205
206 li NROUNDS, 20
207.Lnext_doubleround:
208 addi NROUNDS, NROUNDS, -2
209 // column round
210 chacha_round v0, v4, v8, v12, v1, v5, v9, v13, \
211 v2, v6, v10, v14, v3, v7, v11, v15
212 // diagonal round
213 chacha_round v0, v5, v10, v15, v1, v6, v11, v12, \
214 v2, v7, v8, v13, v3, v4, v9, v14
215 bnez NROUNDS, .Lnext_doubleround
216
217 // Load the second half of the input data for each block into v24-v31.
218 // v{24+i} holds the {8+i}'th 32-bit word for all blocks.
219 addi TMP, INP, 32
220 vlsseg8e32.v v24, (TMP), STRIDE
221
222 // Finalize the first half of the keystream for each block.
223 vadd.vx v0, v0, CONSTS0
224 vadd.vx v1, v1, CONSTS1
225 vadd.vx v2, v2, CONSTS2
226 vadd.vx v3, v3, CONSTS3
227 vadd.vx v4, v4, KEY0
228 vadd.vx v5, v5, KEY1
229 vadd.vx v6, v6, KEY2
230 vadd.vx v7, v7, KEY3
231
232 // Encrypt/decrypt the first half of the data for each block.
233 vxor.vv v16, v16, v0
234 vxor.vv v17, v17, v1
235 vxor.vv v18, v18, v2
236 vxor.vv v19, v19, v3
237 vxor.vv v20, v20, v4
238 vxor.vv v21, v21, v5
239 vxor.vv v22, v22, v6
240 vxor.vv v23, v23, v7
241
242 // Store the first half of the output data for each block.
243 vssseg8e32.v v16, (OUTP), STRIDE
244
245 // Finalize the second half of the keystream for each block.
246 vadd.vx v8, v8, KEY4
247 vadd.vx v9, v9, KEY5
248 vadd.vx v10, v10, KEY6
249 vadd.vx v11, v11, KEY7
250 vid.v v0
251 vadd.vx v12, v12, COUNTER
252 vadd.vx v13, v13, NONCE0
253 vadd.vx v14, v14, NONCE1
254 vadd.vx v15, v15, NONCE2
255 vadd.vv v12, v12, v0
256
257 // Encrypt/decrypt the second half of the data for each block.
258 vxor.vv v24, v24, v8
259 vxor.vv v25, v25, v9
260 vxor.vv v26, v26, v10
261 vxor.vv v27, v27, v11
262 vxor.vv v29, v29, v13
263 vxor.vv v28, v28, v12
264 vxor.vv v30, v30, v14
265 vxor.vv v31, v31, v15
266
267 // Store the second half of the output data for each block.
268 addi TMP, OUTP, 32
269 vssseg8e32.v v24, (TMP), STRIDE
270
271 // Update the counter, the remaining number of blocks, and the input and
272 // output pointers according to the number of blocks processed (VL).
273 add COUNTER, COUNTER, VL
274 sub LEN, LEN, VL
275 slli TMP, VL, 6
276 add OUTP, OUTP, TMP
277 add INP, INP, TMP
278 bnez LEN, .Lblock_loop
279
280 ld s0, 0(sp)
281 ld s1, 8(sp)
282 ld s2, 16(sp)
283 ld s3, 24(sp)
284 ld s4, 32(sp)
285 ld s5, 40(sp)
286 ld s6, 48(sp)
287 ld s7, 56(sp)
288 ld s8, 64(sp)
289 ld s9, 72(sp)
290 ld s10, 80(sp)
291 ld s11, 88(sp)
292 addi sp, sp, 96
293 ret
294SYM_FUNC_END(chacha20_zvkb)