Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#! /usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3
4# This code is taken from CRYPTOGAMs[1] and is included here using the option
5# in the license to distribute the code under the GPL. Therefore this program
6# is free software; you can redistribute it and/or modify it under the terms of
7# the GNU General Public License version 2 as published by the Free Software
8# Foundation.
9#
10# [1] https://www.openssl.org/~appro/cryptogams/
11
12# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13# All rights reserved.
14#
15# Redistribution and use in source and binary forms, with or without
16# modification, are permitted provided that the following conditions
17# are met:
18#
19# * Redistributions of source code must retain copyright notices,
20# this list of conditions and the following disclaimer.
21#
22# * Redistributions in binary form must reproduce the above
23# copyright notice, this list of conditions and the following
24# disclaimer in the documentation and/or other materials
25# provided with the distribution.
26#
27# * Neither the name of the CRYPTOGAMS nor the names of its
28# copyright holder and contributors may be used to endorse or
29# promote products derived from this software without specific
30# prior written permission.
31#
32# ALTERNATIVELY, provided that this notice is retained in full, this
33# product may be distributed under the terms of the GNU General Public
34# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35# those given above.
36#
37# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
49# ====================================================================
50# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51# project. The module is, however, dual licensed under OpenSSL and
52# CRYPTOGAMS licenses depending on where you obtain it. For further
53# details see https://www.openssl.org/~appro/cryptogams/.
54# ====================================================================
55#
56# This module implements support for AES instructions as per PowerISA
57# specification version 2.07, first implemented by POWER8 processor.
58# The module is endian-agnostic in sense that it supports both big-
59# and little-endian cases. Data alignment in parallelizable modes is
60# handled with VSX loads and stores, which implies MSR.VSX flag being
61# set. It should also be noted that ISA specification doesn't prohibit
62# alignment exceptions for these instructions on page boundaries.
63# Initially alignment was handled in pure AltiVec/VMX way [when data
64# is aligned programmatically, which in turn guarantees exception-
65# free execution], but it turned to hamper performance when vcipher
66# instructions are interleaved. It's reckoned that eventual
67# misalignment penalties at page boundaries are in average lower
68# than additional overhead in pure AltiVec approach.
69#
70# May 2016
71#
72# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73# systems were measured.
74#
75######################################################################
76# Current large-block performance in cycles per byte processed with
77# 128-bit key (less is better).
78#
79# CBC en-/decrypt CTR XTS
80# POWER8[le] 3.96/0.72 0.74 1.1
81# POWER8[be] 3.75/0.65 0.66 1.0
82
83$flavour = shift;
84
85if ($flavour =~ /64/) {
86 $SIZE_T =8;
87 $LRSAVE =2*$SIZE_T;
88 $STU ="stdu";
89 $POP ="ld";
90 $PUSH ="std";
91 $UCMP ="cmpld";
92 $SHL ="sldi";
93} elsif ($flavour =~ /32/) {
94 $SIZE_T =4;
95 $LRSAVE =$SIZE_T;
96 $STU ="stwu";
97 $POP ="lwz";
98 $PUSH ="stw";
99 $UCMP ="cmplw";
100 $SHL ="slwi";
101} else { die "nonsense $flavour"; }
102
103$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108die "can't locate ppc-xlate.pl";
109
110open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
111
112$FRAME=8*$SIZE_T;
113$prefix="aes_p10";
114
115$sp="r1";
116$vrsave="r12";
117
118#########################################################################
119{{{ # Key setup procedures #
120my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
123
124$code.=<<___;
125.machine "any"
126
127.text
128
129.align 7
130rcon:
131.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
132.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
133.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
134.long 0,0,0,0 ?asis
135Lconsts:
136 mflr r0
137 bcl 20,31,\$+4
138 mflr $ptr #vvvvv "distance between . and rcon
139 addi $ptr,$ptr,-0x48
140 mtlr r0
141 blr
142 .long 0
143 .byte 0,12,0x14,0,0,0,0,0
144.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
145
146.globl .${prefix}_set_encrypt_key
147Lset_encrypt_key:
148 mflr r11
149 $PUSH r11,$LRSAVE($sp)
150
151 li $ptr,-1
152 ${UCMP}i $inp,0
153 beq- Lenc_key_abort # if ($inp==0) return -1;
154 ${UCMP}i $out,0
155 beq- Lenc_key_abort # if ($out==0) return -1;
156 li $ptr,-2
157 cmpwi $bits,128
158 blt- Lenc_key_abort
159 cmpwi $bits,256
160 bgt- Lenc_key_abort
161 andi. r0,$bits,0x3f
162 bne- Lenc_key_abort
163
164 lis r0,0xfff0
165 mfspr $vrsave,256
166 mtspr 256,r0
167
168 bl Lconsts
169 mtlr r11
170
171 neg r9,$inp
172 lvx $in0,0,$inp
173 addi $inp,$inp,15 # 15 is not typo
174 lvsr $key,0,r9 # borrow $key
175 li r8,0x20
176 cmpwi $bits,192
177 lvx $in1,0,$inp
178 le?vspltisb $mask,0x0f # borrow $mask
179 lvx $rcon,0,$ptr
180 le?vxor $key,$key,$mask # adjust for byte swap
181 lvx $mask,r8,$ptr
182 addi $ptr,$ptr,0x10
183 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
184 li $cnt,8
185 vxor $zero,$zero,$zero
186 mtctr $cnt
187
188 ?lvsr $outperm,0,$out
189 vspltisb $outmask,-1
190 lvx $outhead,0,$out
191 ?vperm $outmask,$zero,$outmask,$outperm
192
193 blt Loop128
194 addi $inp,$inp,8
195 beq L192
196 addi $inp,$inp,8
197 b L256
198
199.align 4
200Loop128:
201 vperm $key,$in0,$in0,$mask # rotate-n-splat
202 vsldoi $tmp,$zero,$in0,12 # >>32
203 vperm $outtail,$in0,$in0,$outperm # rotate
204 vsel $stage,$outhead,$outtail,$outmask
205 vmr $outhead,$outtail
206 vcipherlast $key,$key,$rcon
207 stvx $stage,0,$out
208 addi $out,$out,16
209
210 vxor $in0,$in0,$tmp
211 vsldoi $tmp,$zero,$tmp,12 # >>32
212 vxor $in0,$in0,$tmp
213 vsldoi $tmp,$zero,$tmp,12 # >>32
214 vxor $in0,$in0,$tmp
215 vadduwm $rcon,$rcon,$rcon
216 vxor $in0,$in0,$key
217 bdnz Loop128
218
219 lvx $rcon,0,$ptr # last two round keys
220
221 vperm $key,$in0,$in0,$mask # rotate-n-splat
222 vsldoi $tmp,$zero,$in0,12 # >>32
223 vperm $outtail,$in0,$in0,$outperm # rotate
224 vsel $stage,$outhead,$outtail,$outmask
225 vmr $outhead,$outtail
226 vcipherlast $key,$key,$rcon
227 stvx $stage,0,$out
228 addi $out,$out,16
229
230 vxor $in0,$in0,$tmp
231 vsldoi $tmp,$zero,$tmp,12 # >>32
232 vxor $in0,$in0,$tmp
233 vsldoi $tmp,$zero,$tmp,12 # >>32
234 vxor $in0,$in0,$tmp
235 vadduwm $rcon,$rcon,$rcon
236 vxor $in0,$in0,$key
237
238 vperm $key,$in0,$in0,$mask # rotate-n-splat
239 vsldoi $tmp,$zero,$in0,12 # >>32
240 vperm $outtail,$in0,$in0,$outperm # rotate
241 vsel $stage,$outhead,$outtail,$outmask
242 vmr $outhead,$outtail
243 vcipherlast $key,$key,$rcon
244 stvx $stage,0,$out
245 addi $out,$out,16
246
247 vxor $in0,$in0,$tmp
248 vsldoi $tmp,$zero,$tmp,12 # >>32
249 vxor $in0,$in0,$tmp
250 vsldoi $tmp,$zero,$tmp,12 # >>32
251 vxor $in0,$in0,$tmp
252 vxor $in0,$in0,$key
253 vperm $outtail,$in0,$in0,$outperm # rotate
254 vsel $stage,$outhead,$outtail,$outmask
255 vmr $outhead,$outtail
256 stvx $stage,0,$out
257
258 addi $inp,$out,15 # 15 is not typo
259 addi $out,$out,0x50
260
261 li $rounds,10
262 b Ldone
263
264.align 4
265L192:
266 lvx $tmp,0,$inp
267 li $cnt,4
268 vperm $outtail,$in0,$in0,$outperm # rotate
269 vsel $stage,$outhead,$outtail,$outmask
270 vmr $outhead,$outtail
271 stvx $stage,0,$out
272 addi $out,$out,16
273 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
274 vspltisb $key,8 # borrow $key
275 mtctr $cnt
276 vsububm $mask,$mask,$key # adjust the mask
277
278Loop192:
279 vperm $key,$in1,$in1,$mask # roate-n-splat
280 vsldoi $tmp,$zero,$in0,12 # >>32
281 vcipherlast $key,$key,$rcon
282
283 vxor $in0,$in0,$tmp
284 vsldoi $tmp,$zero,$tmp,12 # >>32
285 vxor $in0,$in0,$tmp
286 vsldoi $tmp,$zero,$tmp,12 # >>32
287 vxor $in0,$in0,$tmp
288
289 vsldoi $stage,$zero,$in1,8
290 vspltw $tmp,$in0,3
291 vxor $tmp,$tmp,$in1
292 vsldoi $in1,$zero,$in1,12 # >>32
293 vadduwm $rcon,$rcon,$rcon
294 vxor $in1,$in1,$tmp
295 vxor $in0,$in0,$key
296 vxor $in1,$in1,$key
297 vsldoi $stage,$stage,$in0,8
298
299 vperm $key,$in1,$in1,$mask # rotate-n-splat
300 vsldoi $tmp,$zero,$in0,12 # >>32
301 vperm $outtail,$stage,$stage,$outperm # rotate
302 vsel $stage,$outhead,$outtail,$outmask
303 vmr $outhead,$outtail
304 vcipherlast $key,$key,$rcon
305 stvx $stage,0,$out
306 addi $out,$out,16
307
308 vsldoi $stage,$in0,$in1,8
309 vxor $in0,$in0,$tmp
310 vsldoi $tmp,$zero,$tmp,12 # >>32
311 vperm $outtail,$stage,$stage,$outperm # rotate
312 vsel $stage,$outhead,$outtail,$outmask
313 vmr $outhead,$outtail
314 vxor $in0,$in0,$tmp
315 vsldoi $tmp,$zero,$tmp,12 # >>32
316 vxor $in0,$in0,$tmp
317 stvx $stage,0,$out
318 addi $out,$out,16
319
320 vspltw $tmp,$in0,3
321 vxor $tmp,$tmp,$in1
322 vsldoi $in1,$zero,$in1,12 # >>32
323 vadduwm $rcon,$rcon,$rcon
324 vxor $in1,$in1,$tmp
325 vxor $in0,$in0,$key
326 vxor $in1,$in1,$key
327 vperm $outtail,$in0,$in0,$outperm # rotate
328 vsel $stage,$outhead,$outtail,$outmask
329 vmr $outhead,$outtail
330 stvx $stage,0,$out
331 addi $inp,$out,15 # 15 is not typo
332 addi $out,$out,16
333 bdnz Loop192
334
335 li $rounds,12
336 addi $out,$out,0x20
337 b Ldone
338
339.align 4
340L256:
341 lvx $tmp,0,$inp
342 li $cnt,7
343 li $rounds,14
344 vperm $outtail,$in0,$in0,$outperm # rotate
345 vsel $stage,$outhead,$outtail,$outmask
346 vmr $outhead,$outtail
347 stvx $stage,0,$out
348 addi $out,$out,16
349 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
350 mtctr $cnt
351
352Loop256:
353 vperm $key,$in1,$in1,$mask # rotate-n-splat
354 vsldoi $tmp,$zero,$in0,12 # >>32
355 vperm $outtail,$in1,$in1,$outperm # rotate
356 vsel $stage,$outhead,$outtail,$outmask
357 vmr $outhead,$outtail
358 vcipherlast $key,$key,$rcon
359 stvx $stage,0,$out
360 addi $out,$out,16
361
362 vxor $in0,$in0,$tmp
363 vsldoi $tmp,$zero,$tmp,12 # >>32
364 vxor $in0,$in0,$tmp
365 vsldoi $tmp,$zero,$tmp,12 # >>32
366 vxor $in0,$in0,$tmp
367 vadduwm $rcon,$rcon,$rcon
368 vxor $in0,$in0,$key
369 vperm $outtail,$in0,$in0,$outperm # rotate
370 vsel $stage,$outhead,$outtail,$outmask
371 vmr $outhead,$outtail
372 stvx $stage,0,$out
373 addi $inp,$out,15 # 15 is not typo
374 addi $out,$out,16
375 bdz Ldone
376
377 vspltw $key,$in0,3 # just splat
378 vsldoi $tmp,$zero,$in1,12 # >>32
379 vsbox $key,$key
380
381 vxor $in1,$in1,$tmp
382 vsldoi $tmp,$zero,$tmp,12 # >>32
383 vxor $in1,$in1,$tmp
384 vsldoi $tmp,$zero,$tmp,12 # >>32
385 vxor $in1,$in1,$tmp
386
387 vxor $in1,$in1,$key
388 b Loop256
389
390.align 4
391Ldone:
392 lvx $in1,0,$inp # redundant in aligned case
393 vsel $in1,$outhead,$in1,$outmask
394 stvx $in1,0,$inp
395 li $ptr,0
396 mtspr 256,$vrsave
397 stw $rounds,0($out)
398
399Lenc_key_abort:
400 mr r3,$ptr
401 blr
402 .long 0
403 .byte 0,12,0x14,1,0,0,3,0
404 .long 0
405.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
406
407.globl .${prefix}_set_decrypt_key
408 $STU $sp,-$FRAME($sp)
409 mflr r10
410 $PUSH r10,$FRAME+$LRSAVE($sp)
411 bl Lset_encrypt_key
412 mtlr r10
413
414 cmpwi r3,0
415 bne- Ldec_key_abort
416
417 slwi $cnt,$rounds,4
418 subi $inp,$out,240 # first round key
419 srwi $rounds,$rounds,1
420 add $out,$inp,$cnt # last round key
421 mtctr $rounds
422
423Ldeckey:
424 lwz r0, 0($inp)
425 lwz r6, 4($inp)
426 lwz r7, 8($inp)
427 lwz r8, 12($inp)
428 addi $inp,$inp,16
429 lwz r9, 0($out)
430 lwz r10,4($out)
431 lwz r11,8($out)
432 lwz r12,12($out)
433 stw r0, 0($out)
434 stw r6, 4($out)
435 stw r7, 8($out)
436 stw r8, 12($out)
437 subi $out,$out,16
438 stw r9, -16($inp)
439 stw r10,-12($inp)
440 stw r11,-8($inp)
441 stw r12,-4($inp)
442 bdnz Ldeckey
443
444 xor r3,r3,r3 # return value
445Ldec_key_abort:
446 addi $sp,$sp,$FRAME
447 blr
448 .long 0
449 .byte 0,12,4,1,0x80,0,3,0
450 .long 0
451.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
452___
453}}}
454#########################################################################
455{{{ # Single block en- and decrypt procedures #
456sub gen_block () {
457my $dir = shift;
458my $n = $dir eq "de" ? "n" : "";
459my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
460
461$code.=<<___;
462.globl .${prefix}_${dir}crypt
463 lwz $rounds,240($key)
464 lis r0,0xfc00
465 mfspr $vrsave,256
466 li $idx,15 # 15 is not typo
467 mtspr 256,r0
468
469 lvx v0,0,$inp
470 neg r11,$out
471 lvx v1,$idx,$inp
472 lvsl v2,0,$inp # inpperm
473 le?vspltisb v4,0x0f
474 ?lvsl v3,0,r11 # outperm
475 le?vxor v2,v2,v4
476 li $idx,16
477 vperm v0,v0,v1,v2 # align [and byte swap in LE]
478 lvx v1,0,$key
479 ?lvsl v5,0,$key # keyperm
480 srwi $rounds,$rounds,1
481 lvx v2,$idx,$key
482 addi $idx,$idx,16
483 subi $rounds,$rounds,1
484 ?vperm v1,v1,v2,v5 # align round key
485
486 vxor v0,v0,v1
487 lvx v1,$idx,$key
488 addi $idx,$idx,16
489 mtctr $rounds
490
491Loop_${dir}c:
492 ?vperm v2,v2,v1,v5
493 v${n}cipher v0,v0,v2
494 lvx v2,$idx,$key
495 addi $idx,$idx,16
496 ?vperm v1,v1,v2,v5
497 v${n}cipher v0,v0,v1
498 lvx v1,$idx,$key
499 addi $idx,$idx,16
500 bdnz Loop_${dir}c
501
502 ?vperm v2,v2,v1,v5
503 v${n}cipher v0,v0,v2
504 lvx v2,$idx,$key
505 ?vperm v1,v1,v2,v5
506 v${n}cipherlast v0,v0,v1
507
508 vspltisb v2,-1
509 vxor v1,v1,v1
510 li $idx,15 # 15 is not typo
511 ?vperm v2,v1,v2,v3 # outmask
512 le?vxor v3,v3,v4
513 lvx v1,0,$out # outhead
514 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
515 vsel v1,v1,v0,v2
516 lvx v4,$idx,$out
517 stvx v1,0,$out
518 vsel v0,v0,v4,v2
519 stvx v0,$idx,$out
520
521 mtspr 256,$vrsave
522 blr
523 .long 0
524 .byte 0,12,0x14,0,0,0,3,0
525 .long 0
526.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
527___
528}
529&gen_block("en");
530&gen_block("de");
531}}}
532
533my $consts=1;
534foreach(split("\n",$code)) {
535 s/\`([^\`]*)\`/eval($1)/geo;
536
537 # constants table endian-specific conversion
538 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
539 my $conv=$3;
540 my @bytes=();
541
542 # convert to endian-agnostic format
543 if ($1 eq "long") {
544 foreach (split(/,\s*/,$2)) {
545 my $l = /^0/?oct:int;
546 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
547 }
548 } else {
549 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
550 }
551
552 # little-endian conversion
553 if ($flavour =~ /le$/o) {
554 SWITCH: for($conv) {
555 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
556 /\?rev/ && do { @bytes=reverse(@bytes); last; };
557 }
558 }
559
560 #emit
561 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
562 next;
563 }
564 $consts=0 if (m/Lconsts:/o); # end of table
565
566 # instructions prefixed with '?' are endian-specific and need
567 # to be adjusted accordingly...
568 if ($flavour =~ /le$/o) { # little-endian
569 s/le\?//o or
570 s/be\?/#be#/o or
571 s/\?lvsr/lvsl/o or
572 s/\?lvsl/lvsr/o or
573 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
574 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
575 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
576 } else { # big-endian
577 s/le\?/#le#/o or
578 s/be\?//o or
579 s/\?([a-z]+)/$1/o;
580 }
581
582 print $_,"\n";
583}
584
585close STDOUT;