Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/***************************************************************************
3* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> *
4* *
5***************************************************************************/
6
7.file "twofish-x86_64-asm.S"
8.text
9
10#include <linux/linkage.h>
11#include <linux/cfi_types.h>
12#include <asm/asm-offsets.h>
13
14#define a_offset 0
15#define b_offset 4
16#define c_offset 8
17#define d_offset 12
18
19/* Structure of the crypto context struct*/
20
21#define s0 0 /* S0 Array 256 Words each */
22#define s1 1024 /* S1 Array */
23#define s2 2048 /* S2 Array */
24#define s3 3072 /* S3 Array */
25#define w 4096 /* 8 whitening keys (word) */
26#define k 4128 /* key 1-32 ( word ) */
27
28/* define a few register aliases to allow macro substitution */
29
30#define R0 %rax
31#define R0D %eax
32#define R0B %al
33#define R0H %ah
34
35#define R1 %rbx
36#define R1D %ebx
37#define R1B %bl
38#define R1H %bh
39
40#define R2 %rcx
41#define R2D %ecx
42#define R2B %cl
43#define R2H %ch
44
45#define R3 %rdx
46#define R3D %edx
47#define R3B %dl
48#define R3H %dh
49
50
51/* performs input whitening */
52#define input_whitening(src,context,offset)\
53 xor w+offset(context), src;
54
55/* performs input whitening */
56#define output_whitening(src,context,offset)\
57 xor w+16+offset(context), src;
58
59
60/*
61 * a input register containing a (rotated 16)
62 * b input register containing b
63 * c input register containing c
64 * d input register containing d (already rol $1)
65 * operations on a and b are interleaved to increase performance
66 */
67#define encrypt_round(a,b,c,d,round)\
68 movzx b ## B, %edi;\
69 mov s1(%r11,%rdi,4),%r8d;\
70 movzx a ## B, %edi;\
71 mov s2(%r11,%rdi,4),%r9d;\
72 movzx b ## H, %edi;\
73 ror $16, b ## D;\
74 xor s2(%r11,%rdi,4),%r8d;\
75 movzx a ## H, %edi;\
76 ror $16, a ## D;\
77 xor s3(%r11,%rdi,4),%r9d;\
78 movzx b ## B, %edi;\
79 xor s3(%r11,%rdi,4),%r8d;\
80 movzx a ## B, %edi;\
81 xor (%r11,%rdi,4), %r9d;\
82 movzx b ## H, %edi;\
83 ror $15, b ## D;\
84 xor (%r11,%rdi,4), %r8d;\
85 movzx a ## H, %edi;\
86 xor s1(%r11,%rdi,4),%r9d;\
87 add %r8d, %r9d;\
88 add %r9d, %r8d;\
89 add k+round(%r11), %r9d;\
90 xor %r9d, c ## D;\
91 rol $15, c ## D;\
92 add k+4+round(%r11),%r8d;\
93 xor %r8d, d ## D;
94
95/*
96 * a input register containing a(rotated 16)
97 * b input register containing b
98 * c input register containing c
99 * d input register containing d (already rol $1)
100 * operations on a and b are interleaved to increase performance
101 * during the round a and b are prepared for the output whitening
102 */
103#define encrypt_last_round(a,b,c,d,round)\
104 mov b ## D, %r10d;\
105 shl $32, %r10;\
106 movzx b ## B, %edi;\
107 mov s1(%r11,%rdi,4),%r8d;\
108 movzx a ## B, %edi;\
109 mov s2(%r11,%rdi,4),%r9d;\
110 movzx b ## H, %edi;\
111 ror $16, b ## D;\
112 xor s2(%r11,%rdi,4),%r8d;\
113 movzx a ## H, %edi;\
114 ror $16, a ## D;\
115 xor s3(%r11,%rdi,4),%r9d;\
116 movzx b ## B, %edi;\
117 xor s3(%r11,%rdi,4),%r8d;\
118 movzx a ## B, %edi;\
119 xor (%r11,%rdi,4), %r9d;\
120 xor a, %r10;\
121 movzx b ## H, %edi;\
122 xor (%r11,%rdi,4), %r8d;\
123 movzx a ## H, %edi;\
124 xor s1(%r11,%rdi,4),%r9d;\
125 add %r8d, %r9d;\
126 add %r9d, %r8d;\
127 add k+round(%r11), %r9d;\
128 xor %r9d, c ## D;\
129 ror $1, c ## D;\
130 add k+4+round(%r11),%r8d;\
131 xor %r8d, d ## D
132
133/*
134 * a input register containing a
135 * b input register containing b (rotated 16)
136 * c input register containing c (already rol $1)
137 * d input register containing d
138 * operations on a and b are interleaved to increase performance
139 */
140#define decrypt_round(a,b,c,d,round)\
141 movzx a ## B, %edi;\
142 mov (%r11,%rdi,4), %r9d;\
143 movzx b ## B, %edi;\
144 mov s3(%r11,%rdi,4),%r8d;\
145 movzx a ## H, %edi;\
146 ror $16, a ## D;\
147 xor s1(%r11,%rdi,4),%r9d;\
148 movzx b ## H, %edi;\
149 ror $16, b ## D;\
150 xor (%r11,%rdi,4), %r8d;\
151 movzx a ## B, %edi;\
152 xor s2(%r11,%rdi,4),%r9d;\
153 movzx b ## B, %edi;\
154 xor s1(%r11,%rdi,4),%r8d;\
155 movzx a ## H, %edi;\
156 ror $15, a ## D;\
157 xor s3(%r11,%rdi,4),%r9d;\
158 movzx b ## H, %edi;\
159 xor s2(%r11,%rdi,4),%r8d;\
160 add %r8d, %r9d;\
161 add %r9d, %r8d;\
162 add k+round(%r11), %r9d;\
163 xor %r9d, c ## D;\
164 add k+4+round(%r11),%r8d;\
165 xor %r8d, d ## D;\
166 rol $15, d ## D;
167
168/*
169 * a input register containing a
170 * b input register containing b
171 * c input register containing c (already rol $1)
172 * d input register containing d
173 * operations on a and b are interleaved to increase performance
174 * during the round a and b are prepared for the output whitening
175 */
176#define decrypt_last_round(a,b,c,d,round)\
177 movzx a ## B, %edi;\
178 mov (%r11,%rdi,4), %r9d;\
179 movzx b ## B, %edi;\
180 mov s3(%r11,%rdi,4),%r8d;\
181 movzx b ## H, %edi;\
182 ror $16, b ## D;\
183 xor (%r11,%rdi,4), %r8d;\
184 movzx a ## H, %edi;\
185 mov b ## D, %r10d;\
186 shl $32, %r10;\
187 xor a, %r10;\
188 ror $16, a ## D;\
189 xor s1(%r11,%rdi,4),%r9d;\
190 movzx b ## B, %edi;\
191 xor s1(%r11,%rdi,4),%r8d;\
192 movzx a ## B, %edi;\
193 xor s2(%r11,%rdi,4),%r9d;\
194 movzx b ## H, %edi;\
195 xor s2(%r11,%rdi,4),%r8d;\
196 movzx a ## H, %edi;\
197 xor s3(%r11,%rdi,4),%r9d;\
198 add %r8d, %r9d;\
199 add %r9d, %r8d;\
200 add k+round(%r11), %r9d;\
201 xor %r9d, c ## D;\
202 add k+4+round(%r11),%r8d;\
203 xor %r8d, d ## D;\
204 ror $1, d ## D;
205
206SYM_TYPED_FUNC_START(twofish_enc_blk)
207 pushq R1
208
209 /* %rdi contains the ctx address */
210 /* %rsi contains the output address */
211 /* %rdx contains the input address */
212 /* ctx address is moved to free one non-rex register
213 as target for the 8bit high operations */
214 mov %rdi, %r11
215
216 movq (R3), R1
217 movq 8(R3), R3
218 input_whitening(R1,%r11,a_offset)
219 input_whitening(R3,%r11,c_offset)
220 mov R1D, R0D
221 rol $16, R0D
222 shr $32, R1
223 mov R3D, R2D
224 shr $32, R3
225 rol $1, R3D
226
227 encrypt_round(R0,R1,R2,R3,0);
228 encrypt_round(R2,R3,R0,R1,8);
229 encrypt_round(R0,R1,R2,R3,2*8);
230 encrypt_round(R2,R3,R0,R1,3*8);
231 encrypt_round(R0,R1,R2,R3,4*8);
232 encrypt_round(R2,R3,R0,R1,5*8);
233 encrypt_round(R0,R1,R2,R3,6*8);
234 encrypt_round(R2,R3,R0,R1,7*8);
235 encrypt_round(R0,R1,R2,R3,8*8);
236 encrypt_round(R2,R3,R0,R1,9*8);
237 encrypt_round(R0,R1,R2,R3,10*8);
238 encrypt_round(R2,R3,R0,R1,11*8);
239 encrypt_round(R0,R1,R2,R3,12*8);
240 encrypt_round(R2,R3,R0,R1,13*8);
241 encrypt_round(R0,R1,R2,R3,14*8);
242 encrypt_last_round(R2,R3,R0,R1,15*8);
243
244
245 output_whitening(%r10,%r11,a_offset)
246 movq %r10, (%rsi)
247
248 shl $32, R1
249 xor R0, R1
250
251 output_whitening(R1,%r11,c_offset)
252 movq R1, 8(%rsi)
253
254 popq R1
255 movl $1,%eax
256 RET
257SYM_FUNC_END(twofish_enc_blk)
258
259SYM_TYPED_FUNC_START(twofish_dec_blk)
260 pushq R1
261
262 /* %rdi contains the ctx address */
263 /* %rsi contains the output address */
264 /* %rdx contains the input address */
265 /* ctx address is moved to free one non-rex register
266 as target for the 8bit high operations */
267 mov %rdi, %r11
268
269 movq (R3), R1
270 movq 8(R3), R3
271 output_whitening(R1,%r11,a_offset)
272 output_whitening(R3,%r11,c_offset)
273 mov R1D, R0D
274 shr $32, R1
275 rol $16, R1D
276 mov R3D, R2D
277 shr $32, R3
278 rol $1, R2D
279
280 decrypt_round(R0,R1,R2,R3,15*8);
281 decrypt_round(R2,R3,R0,R1,14*8);
282 decrypt_round(R0,R1,R2,R3,13*8);
283 decrypt_round(R2,R3,R0,R1,12*8);
284 decrypt_round(R0,R1,R2,R3,11*8);
285 decrypt_round(R2,R3,R0,R1,10*8);
286 decrypt_round(R0,R1,R2,R3,9*8);
287 decrypt_round(R2,R3,R0,R1,8*8);
288 decrypt_round(R0,R1,R2,R3,7*8);
289 decrypt_round(R2,R3,R0,R1,6*8);
290 decrypt_round(R0,R1,R2,R3,5*8);
291 decrypt_round(R2,R3,R0,R1,4*8);
292 decrypt_round(R0,R1,R2,R3,3*8);
293 decrypt_round(R2,R3,R0,R1,2*8);
294 decrypt_round(R0,R1,R2,R3,1*8);
295 decrypt_last_round(R2,R3,R0,R1,0);
296
297 input_whitening(%r10,%r11,a_offset)
298 movq %r10, (%rsi)
299
300 shl $32, R1
301 xor R0, R1
302
303 input_whitening(R1,%r11,c_offset)
304 movq R1, 8(%rsi)
305
306 popq R1
307 movl $1,%eax
308 RET
309SYM_FUNC_END(twofish_dec_blk)