Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#! /usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3
4# This code is taken from CRYPTOGAMs[1] and is included here using the option
5# in the license to distribute the code under the GPL. Therefore this program
6# is free software; you can redistribute it and/or modify it under the terms of
7# the GNU General Public License version 2 as published by the Free Software
8# Foundation.
9#
10# [1] https://www.openssl.org/~appro/cryptogams/
11
12# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13# All rights reserved.
14#
15# Redistribution and use in source and binary forms, with or without
16# modification, are permitted provided that the following conditions
17# are met:
18#
19# * Redistributions of source code must retain copyright notices,
20# this list of conditions and the following disclaimer.
21#
22# * Redistributions in binary form must reproduce the above
23# copyright notice, this list of conditions and the following
24# disclaimer in the documentation and/or other materials
25# provided with the distribution.
26#
27# * Neither the name of the CRYPTOGAMS nor the names of its
28# copyright holder and contributors may be used to endorse or
29# promote products derived from this software without specific
30# prior written permission.
31#
32# ALTERNATIVELY, provided that this notice is retained in full, this
33# product may be distributed under the terms of the GNU General Public
34# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35# those given above.
36#
37# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
49# ====================================================================
50# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51# project. The module is, however, dual licensed under OpenSSL and
52# CRYPTOGAMS licenses depending on where you obtain it. For further
53# details see https://www.openssl.org/~appro/cryptogams/.
54# ====================================================================
55#
56# This module implements support for AES instructions as per PowerISA
57# specification version 2.07, first implemented by POWER8 processor.
58# The module is endian-agnostic in sense that it supports both big-
59# and little-endian cases. Data alignment in parallelizable modes is
60# handled with VSX loads and stores, which implies MSR.VSX flag being
61# set. It should also be noted that ISA specification doesn't prohibit
62# alignment exceptions for these instructions on page boundaries.
63# Initially alignment was handled in pure AltiVec/VMX way [when data
64# is aligned programmatically, which in turn guarantees exception-
65# free execution], but it turned to hamper performance when vcipher
66# instructions are interleaved. It's reckoned that eventual
67# misalignment penalties at page boundaries are in average lower
68# than additional overhead in pure AltiVec approach.
69#
70# May 2016
71#
72# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73# systems were measured.
74#
75######################################################################
76# Current large-block performance in cycles per byte processed with
77# 128-bit key (less is better).
78#
79# CBC en-/decrypt CTR XTS
80# POWER8[le] 3.96/0.72 0.74 1.1
81# POWER8[be] 3.75/0.65 0.66 1.0
82
83$flavour = shift;
84
85if ($flavour =~ /64/) {
86 $SIZE_T =8;
87 $LRSAVE =2*$SIZE_T;
88 $STU ="stdu";
89 $POP ="ld";
90 $PUSH ="std";
91 $UCMP ="cmpld";
92 $SHL ="sldi";
93} elsif ($flavour =~ /32/) {
94 $SIZE_T =4;
95 $LRSAVE =$SIZE_T;
96 $STU ="stwu";
97 $POP ="lwz";
98 $PUSH ="stw";
99 $UCMP ="cmplw";
100 $SHL ="slwi";
101} else { die "nonsense $flavour"; }
102
103$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108( $xlate="${dir}../../../arch/powerpc/crypto/ppc-xlate.pl" and -f $xlate) or
109die "can't locate ppc-xlate.pl";
110
111open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
112
113$FRAME=8*$SIZE_T;
114$prefix="aes_p8";
115
116$sp="r1";
117$vrsave="r12";
118
119#########################################################################
120{{{ # Key setup procedures #
121my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
122my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
123my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
124
125$code.=<<___;
126.machine "any"
127
128.text
129
130.align 7
131rcon:
132.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
133.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
134.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
135.long 0,0,0,0 ?asis
136.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
137Lconsts:
138 mflr r0
139 bcl 20,31,\$+4
140 mflr $ptr #vvvvv "distance between . and rcon
141 addi $ptr,$ptr,-0x58
142 mtlr r0
143 blr
144 .long 0
145 .byte 0,12,0x14,0,0,0,0,0
146.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
147
148.globl .${prefix}_set_encrypt_key
149Lset_encrypt_key:
150 mflr r11
151 $PUSH r11,$LRSAVE($sp)
152
153 li $ptr,-1
154 ${UCMP}i $inp,0
155 beq- Lenc_key_abort # if ($inp==0) return -1;
156 ${UCMP}i $out,0
157 beq- Lenc_key_abort # if ($out==0) return -1;
158 li $ptr,-2
159 cmpwi $bits,128
160 blt- Lenc_key_abort
161 cmpwi $bits,256
162 bgt- Lenc_key_abort
163 andi. r0,$bits,0x3f
164 bne- Lenc_key_abort
165
166 lis r0,0xfff0
167 mfspr $vrsave,256
168 mtspr 256,r0
169
170 bl Lconsts
171 mtlr r11
172
173 neg r9,$inp
174 lvx $in0,0,$inp
175 addi $inp,$inp,15 # 15 is not typo
176 lvsr $key,0,r9 # borrow $key
177 li r8,0x20
178 cmpwi $bits,192
179 lvx $in1,0,$inp
180 le?vspltisb $mask,0x0f # borrow $mask
181 lvx $rcon,0,$ptr
182 le?vxor $key,$key,$mask # adjust for byte swap
183 lvx $mask,r8,$ptr
184 addi $ptr,$ptr,0x10
185 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
186 li $cnt,8
187 vxor $zero,$zero,$zero
188 mtctr $cnt
189
190 ?lvsr $outperm,0,$out
191 vspltisb $outmask,-1
192 lvx $outhead,0,$out
193 ?vperm $outmask,$zero,$outmask,$outperm
194
195 blt Loop128
196 addi $inp,$inp,8
197 beq L192
198 addi $inp,$inp,8
199 b L256
200
201.align 4
202Loop128:
203 vperm $key,$in0,$in0,$mask # rotate-n-splat
204 vsldoi $tmp,$zero,$in0,12 # >>32
205 vperm $outtail,$in0,$in0,$outperm # rotate
206 vsel $stage,$outhead,$outtail,$outmask
207 vmr $outhead,$outtail
208 vcipherlast $key,$key,$rcon
209 stvx $stage,0,$out
210 addi $out,$out,16
211
212 vxor $in0,$in0,$tmp
213 vsldoi $tmp,$zero,$tmp,12 # >>32
214 vxor $in0,$in0,$tmp
215 vsldoi $tmp,$zero,$tmp,12 # >>32
216 vxor $in0,$in0,$tmp
217 vadduwm $rcon,$rcon,$rcon
218 vxor $in0,$in0,$key
219 bdnz Loop128
220
221 lvx $rcon,0,$ptr # last two round keys
222
223 vperm $key,$in0,$in0,$mask # rotate-n-splat
224 vsldoi $tmp,$zero,$in0,12 # >>32
225 vperm $outtail,$in0,$in0,$outperm # rotate
226 vsel $stage,$outhead,$outtail,$outmask
227 vmr $outhead,$outtail
228 vcipherlast $key,$key,$rcon
229 stvx $stage,0,$out
230 addi $out,$out,16
231
232 vxor $in0,$in0,$tmp
233 vsldoi $tmp,$zero,$tmp,12 # >>32
234 vxor $in0,$in0,$tmp
235 vsldoi $tmp,$zero,$tmp,12 # >>32
236 vxor $in0,$in0,$tmp
237 vadduwm $rcon,$rcon,$rcon
238 vxor $in0,$in0,$key
239
240 vperm $key,$in0,$in0,$mask # rotate-n-splat
241 vsldoi $tmp,$zero,$in0,12 # >>32
242 vperm $outtail,$in0,$in0,$outperm # rotate
243 vsel $stage,$outhead,$outtail,$outmask
244 vmr $outhead,$outtail
245 vcipherlast $key,$key,$rcon
246 stvx $stage,0,$out
247 addi $out,$out,16
248
249 vxor $in0,$in0,$tmp
250 vsldoi $tmp,$zero,$tmp,12 # >>32
251 vxor $in0,$in0,$tmp
252 vsldoi $tmp,$zero,$tmp,12 # >>32
253 vxor $in0,$in0,$tmp
254 vxor $in0,$in0,$key
255 vperm $outtail,$in0,$in0,$outperm # rotate
256 vsel $stage,$outhead,$outtail,$outmask
257 vmr $outhead,$outtail
258 stvx $stage,0,$out
259
260 addi $inp,$out,15 # 15 is not typo
261 addi $out,$out,0x50
262
263 li $rounds,10
264 b Ldone
265
266.align 4
267L192:
268 lvx $tmp,0,$inp
269 li $cnt,4
270 vperm $outtail,$in0,$in0,$outperm # rotate
271 vsel $stage,$outhead,$outtail,$outmask
272 vmr $outhead,$outtail
273 stvx $stage,0,$out
274 addi $out,$out,16
275 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
276 vspltisb $key,8 # borrow $key
277 mtctr $cnt
278 vsububm $mask,$mask,$key # adjust the mask
279
280Loop192:
281 vperm $key,$in1,$in1,$mask # roate-n-splat
282 vsldoi $tmp,$zero,$in0,12 # >>32
283 vcipherlast $key,$key,$rcon
284
285 vxor $in0,$in0,$tmp
286 vsldoi $tmp,$zero,$tmp,12 # >>32
287 vxor $in0,$in0,$tmp
288 vsldoi $tmp,$zero,$tmp,12 # >>32
289 vxor $in0,$in0,$tmp
290
291 vsldoi $stage,$zero,$in1,8
292 vspltw $tmp,$in0,3
293 vxor $tmp,$tmp,$in1
294 vsldoi $in1,$zero,$in1,12 # >>32
295 vadduwm $rcon,$rcon,$rcon
296 vxor $in1,$in1,$tmp
297 vxor $in0,$in0,$key
298 vxor $in1,$in1,$key
299 vsldoi $stage,$stage,$in0,8
300
301 vperm $key,$in1,$in1,$mask # rotate-n-splat
302 vsldoi $tmp,$zero,$in0,12 # >>32
303 vperm $outtail,$stage,$stage,$outperm # rotate
304 vsel $stage,$outhead,$outtail,$outmask
305 vmr $outhead,$outtail
306 vcipherlast $key,$key,$rcon
307 stvx $stage,0,$out
308 addi $out,$out,16
309
310 vsldoi $stage,$in0,$in1,8
311 vxor $in0,$in0,$tmp
312 vsldoi $tmp,$zero,$tmp,12 # >>32
313 vperm $outtail,$stage,$stage,$outperm # rotate
314 vsel $stage,$outhead,$outtail,$outmask
315 vmr $outhead,$outtail
316 vxor $in0,$in0,$tmp
317 vsldoi $tmp,$zero,$tmp,12 # >>32
318 vxor $in0,$in0,$tmp
319 stvx $stage,0,$out
320 addi $out,$out,16
321
322 vspltw $tmp,$in0,3
323 vxor $tmp,$tmp,$in1
324 vsldoi $in1,$zero,$in1,12 # >>32
325 vadduwm $rcon,$rcon,$rcon
326 vxor $in1,$in1,$tmp
327 vxor $in0,$in0,$key
328 vxor $in1,$in1,$key
329 vperm $outtail,$in0,$in0,$outperm # rotate
330 vsel $stage,$outhead,$outtail,$outmask
331 vmr $outhead,$outtail
332 stvx $stage,0,$out
333 addi $inp,$out,15 # 15 is not typo
334 addi $out,$out,16
335 bdnz Loop192
336
337 li $rounds,12
338 addi $out,$out,0x20
339 b Ldone
340
341.align 4
342L256:
343 lvx $tmp,0,$inp
344 li $cnt,7
345 li $rounds,14
346 vperm $outtail,$in0,$in0,$outperm # rotate
347 vsel $stage,$outhead,$outtail,$outmask
348 vmr $outhead,$outtail
349 stvx $stage,0,$out
350 addi $out,$out,16
351 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
352 mtctr $cnt
353
354Loop256:
355 vperm $key,$in1,$in1,$mask # rotate-n-splat
356 vsldoi $tmp,$zero,$in0,12 # >>32
357 vperm $outtail,$in1,$in1,$outperm # rotate
358 vsel $stage,$outhead,$outtail,$outmask
359 vmr $outhead,$outtail
360 vcipherlast $key,$key,$rcon
361 stvx $stage,0,$out
362 addi $out,$out,16
363
364 vxor $in0,$in0,$tmp
365 vsldoi $tmp,$zero,$tmp,12 # >>32
366 vxor $in0,$in0,$tmp
367 vsldoi $tmp,$zero,$tmp,12 # >>32
368 vxor $in0,$in0,$tmp
369 vadduwm $rcon,$rcon,$rcon
370 vxor $in0,$in0,$key
371 vperm $outtail,$in0,$in0,$outperm # rotate
372 vsel $stage,$outhead,$outtail,$outmask
373 vmr $outhead,$outtail
374 stvx $stage,0,$out
375 addi $inp,$out,15 # 15 is not typo
376 addi $out,$out,16
377 bdz Ldone
378
379 vspltw $key,$in0,3 # just splat
380 vsldoi $tmp,$zero,$in1,12 # >>32
381 vsbox $key,$key
382
383 vxor $in1,$in1,$tmp
384 vsldoi $tmp,$zero,$tmp,12 # >>32
385 vxor $in1,$in1,$tmp
386 vsldoi $tmp,$zero,$tmp,12 # >>32
387 vxor $in1,$in1,$tmp
388
389 vxor $in1,$in1,$key
390 b Loop256
391
392.align 4
393Ldone:
394 lvx $in1,0,$inp # redundant in aligned case
395 vsel $in1,$outhead,$in1,$outmask
396 stvx $in1,0,$inp
397 li $ptr,0
398 mtspr 256,$vrsave
399 stw $rounds,0($out)
400
401Lenc_key_abort:
402 mr r3,$ptr
403 blr
404 .long 0
405 .byte 0,12,0x14,1,0,0,3,0
406 .long 0
407.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
408
409.globl .${prefix}_set_decrypt_key
410 $STU $sp,-$FRAME($sp)
411 mflr r10
412 $PUSH r10,$FRAME+$LRSAVE($sp)
413 bl Lset_encrypt_key
414 mtlr r10
415
416 cmpwi r3,0
417 bne- Ldec_key_abort
418
419 slwi $cnt,$rounds,4
420 subi $inp,$out,240 # first round key
421 srwi $rounds,$rounds,1
422 add $out,$inp,$cnt # last round key
423 mtctr $rounds
424
425Ldeckey:
426 lwz r0, 0($inp)
427 lwz r6, 4($inp)
428 lwz r7, 8($inp)
429 lwz r8, 12($inp)
430 addi $inp,$inp,16
431 lwz r9, 0($out)
432 lwz r10,4($out)
433 lwz r11,8($out)
434 lwz r12,12($out)
435 stw r0, 0($out)
436 stw r6, 4($out)
437 stw r7, 8($out)
438 stw r8, 12($out)
439 subi $out,$out,16
440 stw r9, -16($inp)
441 stw r10,-12($inp)
442 stw r11,-8($inp)
443 stw r12,-4($inp)
444 bdnz Ldeckey
445
446 xor r3,r3,r3 # return value
447Ldec_key_abort:
448 addi $sp,$sp,$FRAME
449 blr
450 .long 0
451 .byte 0,12,4,1,0x80,0,3,0
452 .long 0
453.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
454___
455}}}
456#########################################################################
457{{{ # Single block en- and decrypt procedures #
458sub gen_block () {
459my $dir = shift;
460my $n = $dir eq "de" ? "n" : "";
461my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
462
463$code.=<<___;
464.globl .${prefix}_${dir}crypt
465 lwz $rounds,240($key)
466 lis r0,0xfc00
467 mfspr $vrsave,256
468 li $idx,15 # 15 is not typo
469 mtspr 256,r0
470
471 lvx v0,0,$inp
472 neg r11,$out
473 lvx v1,$idx,$inp
474 lvsl v2,0,$inp # inpperm
475 le?vspltisb v4,0x0f
476 ?lvsl v3,0,r11 # outperm
477 le?vxor v2,v2,v4
478 li $idx,16
479 vperm v0,v0,v1,v2 # align [and byte swap in LE]
480 lvx v1,0,$key
481 ?lvsl v5,0,$key # keyperm
482 srwi $rounds,$rounds,1
483 lvx v2,$idx,$key
484 addi $idx,$idx,16
485 subi $rounds,$rounds,1
486 ?vperm v1,v1,v2,v5 # align round key
487
488 vxor v0,v0,v1
489 lvx v1,$idx,$key
490 addi $idx,$idx,16
491 mtctr $rounds
492
493Loop_${dir}c:
494 ?vperm v2,v2,v1,v5
495 v${n}cipher v0,v0,v2
496 lvx v2,$idx,$key
497 addi $idx,$idx,16
498 ?vperm v1,v1,v2,v5
499 v${n}cipher v0,v0,v1
500 lvx v1,$idx,$key
501 addi $idx,$idx,16
502 bdnz Loop_${dir}c
503
504 ?vperm v2,v2,v1,v5
505 v${n}cipher v0,v0,v2
506 lvx v2,$idx,$key
507 ?vperm v1,v1,v2,v5
508 v${n}cipherlast v0,v0,v1
509
510 vspltisb v2,-1
511 vxor v1,v1,v1
512 li $idx,15 # 15 is not typo
513 ?vperm v2,v1,v2,v3 # outmask
514 le?vxor v3,v3,v4
515 lvx v1,0,$out # outhead
516 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
517 vsel v1,v1,v0,v2
518 lvx v4,$idx,$out
519 stvx v1,0,$out
520 vsel v0,v0,v4,v2
521 stvx v0,$idx,$out
522
523 mtspr 256,$vrsave
524 blr
525 .long 0
526 .byte 0,12,0x14,0,0,0,3,0
527 .long 0
528.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
529___
530}
531&gen_block("en");
532&gen_block("de");
533}}}
534#########################################################################
535{{{ # CBC en- and decrypt procedures #
536my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
537my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
538my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
539 map("v$_",(4..10));
540$code.=<<___;
541.globl .${prefix}_cbc_encrypt
542 ${UCMP}i $len,16
543 bltlr-
544
545 cmpwi $enc,0 # test direction
546 lis r0,0xffe0
547 mfspr $vrsave,256
548 mtspr 256,r0
549
550 li $idx,15
551 vxor $rndkey0,$rndkey0,$rndkey0
552 le?vspltisb $tmp,0x0f
553
554 lvx $ivec,0,$ivp # load [unaligned] iv
555 lvsl $inpperm,0,$ivp
556 lvx $inptail,$idx,$ivp
557 le?vxor $inpperm,$inpperm,$tmp
558 vperm $ivec,$ivec,$inptail,$inpperm
559
560 neg r11,$inp
561 ?lvsl $keyperm,0,$key # prepare for unaligned key
562 lwz $rounds,240($key)
563
564 lvsr $inpperm,0,r11 # prepare for unaligned load
565 lvx $inptail,0,$inp
566 addi $inp,$inp,15 # 15 is not typo
567 le?vxor $inpperm,$inpperm,$tmp
568
569 ?lvsr $outperm,0,$out # prepare for unaligned store
570 vspltisb $outmask,-1
571 lvx $outhead,0,$out
572 ?vperm $outmask,$rndkey0,$outmask,$outperm
573 le?vxor $outperm,$outperm,$tmp
574
575 srwi $rounds,$rounds,1
576 li $idx,16
577 subi $rounds,$rounds,1
578 beq Lcbc_dec
579
580Lcbc_enc:
581 vmr $inout,$inptail
582 lvx $inptail,0,$inp
583 addi $inp,$inp,16
584 mtctr $rounds
585 subi $len,$len,16 # len-=16
586
587 lvx $rndkey0,0,$key
588 vperm $inout,$inout,$inptail,$inpperm
589 lvx $rndkey1,$idx,$key
590 addi $idx,$idx,16
591 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
592 vxor $inout,$inout,$rndkey0
593 lvx $rndkey0,$idx,$key
594 addi $idx,$idx,16
595 vxor $inout,$inout,$ivec
596
597Loop_cbc_enc:
598 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
599 vcipher $inout,$inout,$rndkey1
600 lvx $rndkey1,$idx,$key
601 addi $idx,$idx,16
602 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
603 vcipher $inout,$inout,$rndkey0
604 lvx $rndkey0,$idx,$key
605 addi $idx,$idx,16
606 bdnz Loop_cbc_enc
607
608 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
609 vcipher $inout,$inout,$rndkey1
610 lvx $rndkey1,$idx,$key
611 li $idx,16
612 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
613 vcipherlast $ivec,$inout,$rndkey0
614 ${UCMP}i $len,16
615
616 vperm $tmp,$ivec,$ivec,$outperm
617 vsel $inout,$outhead,$tmp,$outmask
618 vmr $outhead,$tmp
619 stvx $inout,0,$out
620 addi $out,$out,16
621 bge Lcbc_enc
622
623 b Lcbc_done
624
625.align 4
626Lcbc_dec:
627 ${UCMP}i $len,128
628 bge _aesp8_cbc_decrypt8x
629 vmr $tmp,$inptail
630 lvx $inptail,0,$inp
631 addi $inp,$inp,16
632 mtctr $rounds
633 subi $len,$len,16 # len-=16
634
635 lvx $rndkey0,0,$key
636 vperm $tmp,$tmp,$inptail,$inpperm
637 lvx $rndkey1,$idx,$key
638 addi $idx,$idx,16
639 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
640 vxor $inout,$tmp,$rndkey0
641 lvx $rndkey0,$idx,$key
642 addi $idx,$idx,16
643
644Loop_cbc_dec:
645 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
646 vncipher $inout,$inout,$rndkey1
647 lvx $rndkey1,$idx,$key
648 addi $idx,$idx,16
649 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
650 vncipher $inout,$inout,$rndkey0
651 lvx $rndkey0,$idx,$key
652 addi $idx,$idx,16
653 bdnz Loop_cbc_dec
654
655 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
656 vncipher $inout,$inout,$rndkey1
657 lvx $rndkey1,$idx,$key
658 li $idx,16
659 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
660 vncipherlast $inout,$inout,$rndkey0
661 ${UCMP}i $len,16
662
663 vxor $inout,$inout,$ivec
664 vmr $ivec,$tmp
665 vperm $tmp,$inout,$inout,$outperm
666 vsel $inout,$outhead,$tmp,$outmask
667 vmr $outhead,$tmp
668 stvx $inout,0,$out
669 addi $out,$out,16
670 bge Lcbc_dec
671
672Lcbc_done:
673 addi $out,$out,-1
674 lvx $inout,0,$out # redundant in aligned case
675 vsel $inout,$outhead,$inout,$outmask
676 stvx $inout,0,$out
677
678 neg $enc,$ivp # write [unaligned] iv
679 li $idx,15 # 15 is not typo
680 vxor $rndkey0,$rndkey0,$rndkey0
681 vspltisb $outmask,-1
682 le?vspltisb $tmp,0x0f
683 ?lvsl $outperm,0,$enc
684 ?vperm $outmask,$rndkey0,$outmask,$outperm
685 le?vxor $outperm,$outperm,$tmp
686 lvx $outhead,0,$ivp
687 vperm $ivec,$ivec,$ivec,$outperm
688 vsel $inout,$outhead,$ivec,$outmask
689 lvx $inptail,$idx,$ivp
690 stvx $inout,0,$ivp
691 vsel $inout,$ivec,$inptail,$outmask
692 stvx $inout,$idx,$ivp
693
694 mtspr 256,$vrsave
695 blr
696 .long 0
697 .byte 0,12,0x14,0,0,0,6,0
698 .long 0
699___
700#########################################################################
701{{ # Optimized CBC decrypt procedure #
702my $key_="r11";
703my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
704my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
705my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
706my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
707 # v26-v31 last 6 round keys
708my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
709
710$code.=<<___;
711.align 5
712_aesp8_cbc_decrypt8x:
713 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
714 li r10,`$FRAME+8*16+15`
715 li r11,`$FRAME+8*16+31`
716 stvx v20,r10,$sp # ABI says so
717 addi r10,r10,32
718 stvx v21,r11,$sp
719 addi r11,r11,32
720 stvx v22,r10,$sp
721 addi r10,r10,32
722 stvx v23,r11,$sp
723 addi r11,r11,32
724 stvx v24,r10,$sp
725 addi r10,r10,32
726 stvx v25,r11,$sp
727 addi r11,r11,32
728 stvx v26,r10,$sp
729 addi r10,r10,32
730 stvx v27,r11,$sp
731 addi r11,r11,32
732 stvx v28,r10,$sp
733 addi r10,r10,32
734 stvx v29,r11,$sp
735 addi r11,r11,32
736 stvx v30,r10,$sp
737 stvx v31,r11,$sp
738 li r0,-1
739 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
740 li $x10,0x10
741 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
742 li $x20,0x20
743 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
744 li $x30,0x30
745 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
746 li $x40,0x40
747 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
748 li $x50,0x50
749 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
750 li $x60,0x60
751 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
752 li $x70,0x70
753 mtspr 256,r0
754
755 subi $rounds,$rounds,3 # -4 in total
756 subi $len,$len,128 # bias
757
758 lvx $rndkey0,$x00,$key # load key schedule
759 lvx v30,$x10,$key
760 addi $key,$key,0x20
761 lvx v31,$x00,$key
762 ?vperm $rndkey0,$rndkey0,v30,$keyperm
763 addi $key_,$sp,$FRAME+15
764 mtctr $rounds
765
766Load_cbc_dec_key:
767 ?vperm v24,v30,v31,$keyperm
768 lvx v30,$x10,$key
769 addi $key,$key,0x20
770 stvx v24,$x00,$key_ # off-load round[1]
771 ?vperm v25,v31,v30,$keyperm
772 lvx v31,$x00,$key
773 stvx v25,$x10,$key_ # off-load round[2]
774 addi $key_,$key_,0x20
775 bdnz Load_cbc_dec_key
776
777 lvx v26,$x10,$key
778 ?vperm v24,v30,v31,$keyperm
779 lvx v27,$x20,$key
780 stvx v24,$x00,$key_ # off-load round[3]
781 ?vperm v25,v31,v26,$keyperm
782 lvx v28,$x30,$key
783 stvx v25,$x10,$key_ # off-load round[4]
784 addi $key_,$sp,$FRAME+15 # rewind $key_
785 ?vperm v26,v26,v27,$keyperm
786 lvx v29,$x40,$key
787 ?vperm v27,v27,v28,$keyperm
788 lvx v30,$x50,$key
789 ?vperm v28,v28,v29,$keyperm
790 lvx v31,$x60,$key
791 ?vperm v29,v29,v30,$keyperm
792 lvx $out0,$x70,$key # borrow $out0
793 ?vperm v30,v30,v31,$keyperm
794 lvx v24,$x00,$key_ # pre-load round[1]
795 ?vperm v31,v31,$out0,$keyperm
796 lvx v25,$x10,$key_ # pre-load round[2]
797
798 #lvx $inptail,0,$inp # "caller" already did this
799 #addi $inp,$inp,15 # 15 is not typo
800 subi $inp,$inp,15 # undo "caller"
801
802 le?li $idx,8
803 lvx_u $in0,$x00,$inp # load first 8 "words"
804 le?lvsl $inpperm,0,$idx
805 le?vspltisb $tmp,0x0f
806 lvx_u $in1,$x10,$inp
807 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
808 lvx_u $in2,$x20,$inp
809 le?vperm $in0,$in0,$in0,$inpperm
810 lvx_u $in3,$x30,$inp
811 le?vperm $in1,$in1,$in1,$inpperm
812 lvx_u $in4,$x40,$inp
813 le?vperm $in2,$in2,$in2,$inpperm
814 vxor $out0,$in0,$rndkey0
815 lvx_u $in5,$x50,$inp
816 le?vperm $in3,$in3,$in3,$inpperm
817 vxor $out1,$in1,$rndkey0
818 lvx_u $in6,$x60,$inp
819 le?vperm $in4,$in4,$in4,$inpperm
820 vxor $out2,$in2,$rndkey0
821 lvx_u $in7,$x70,$inp
822 addi $inp,$inp,0x80
823 le?vperm $in5,$in5,$in5,$inpperm
824 vxor $out3,$in3,$rndkey0
825 le?vperm $in6,$in6,$in6,$inpperm
826 vxor $out4,$in4,$rndkey0
827 le?vperm $in7,$in7,$in7,$inpperm
828 vxor $out5,$in5,$rndkey0
829 vxor $out6,$in6,$rndkey0
830 vxor $out7,$in7,$rndkey0
831
832 mtctr $rounds
833 b Loop_cbc_dec8x
834.align 5
835Loop_cbc_dec8x:
836 vncipher $out0,$out0,v24
837 vncipher $out1,$out1,v24
838 vncipher $out2,$out2,v24
839 vncipher $out3,$out3,v24
840 vncipher $out4,$out4,v24
841 vncipher $out5,$out5,v24
842 vncipher $out6,$out6,v24
843 vncipher $out7,$out7,v24
844 lvx v24,$x20,$key_ # round[3]
845 addi $key_,$key_,0x20
846
847 vncipher $out0,$out0,v25
848 vncipher $out1,$out1,v25
849 vncipher $out2,$out2,v25
850 vncipher $out3,$out3,v25
851 vncipher $out4,$out4,v25
852 vncipher $out5,$out5,v25
853 vncipher $out6,$out6,v25
854 vncipher $out7,$out7,v25
855 lvx v25,$x10,$key_ # round[4]
856 bdnz Loop_cbc_dec8x
857
858 subic $len,$len,128 # $len-=128
859 vncipher $out0,$out0,v24
860 vncipher $out1,$out1,v24
861 vncipher $out2,$out2,v24
862 vncipher $out3,$out3,v24
863 vncipher $out4,$out4,v24
864 vncipher $out5,$out5,v24
865 vncipher $out6,$out6,v24
866 vncipher $out7,$out7,v24
867
868 subfe. r0,r0,r0 # borrow?-1:0
869 vncipher $out0,$out0,v25
870 vncipher $out1,$out1,v25
871 vncipher $out2,$out2,v25
872 vncipher $out3,$out3,v25
873 vncipher $out4,$out4,v25
874 vncipher $out5,$out5,v25
875 vncipher $out6,$out6,v25
876 vncipher $out7,$out7,v25
877
878 and r0,r0,$len
879 vncipher $out0,$out0,v26
880 vncipher $out1,$out1,v26
881 vncipher $out2,$out2,v26
882 vncipher $out3,$out3,v26
883 vncipher $out4,$out4,v26
884 vncipher $out5,$out5,v26
885 vncipher $out6,$out6,v26
886 vncipher $out7,$out7,v26
887
888 add $inp,$inp,r0 # $inp is adjusted in such
889 # way that at exit from the
890 # loop inX-in7 are loaded
891 # with last "words"
892 vncipher $out0,$out0,v27
893 vncipher $out1,$out1,v27
894 vncipher $out2,$out2,v27
895 vncipher $out3,$out3,v27
896 vncipher $out4,$out4,v27
897 vncipher $out5,$out5,v27
898 vncipher $out6,$out6,v27
899 vncipher $out7,$out7,v27
900
901 addi $key_,$sp,$FRAME+15 # rewind $key_
902 vncipher $out0,$out0,v28
903 vncipher $out1,$out1,v28
904 vncipher $out2,$out2,v28
905 vncipher $out3,$out3,v28
906 vncipher $out4,$out4,v28
907 vncipher $out5,$out5,v28
908 vncipher $out6,$out6,v28
909 vncipher $out7,$out7,v28
910 lvx v24,$x00,$key_ # re-pre-load round[1]
911
912 vncipher $out0,$out0,v29
913 vncipher $out1,$out1,v29
914 vncipher $out2,$out2,v29
915 vncipher $out3,$out3,v29
916 vncipher $out4,$out4,v29
917 vncipher $out5,$out5,v29
918 vncipher $out6,$out6,v29
919 vncipher $out7,$out7,v29
920 lvx v25,$x10,$key_ # re-pre-load round[2]
921
922 vncipher $out0,$out0,v30
923 vxor $ivec,$ivec,v31 # xor with last round key
924 vncipher $out1,$out1,v30
925 vxor $in0,$in0,v31
926 vncipher $out2,$out2,v30
927 vxor $in1,$in1,v31
928 vncipher $out3,$out3,v30
929 vxor $in2,$in2,v31
930 vncipher $out4,$out4,v30
931 vxor $in3,$in3,v31
932 vncipher $out5,$out5,v30
933 vxor $in4,$in4,v31
934 vncipher $out6,$out6,v30
935 vxor $in5,$in5,v31
936 vncipher $out7,$out7,v30
937 vxor $in6,$in6,v31
938
939 vncipherlast $out0,$out0,$ivec
940 vncipherlast $out1,$out1,$in0
941 lvx_u $in0,$x00,$inp # load next input block
942 vncipherlast $out2,$out2,$in1
943 lvx_u $in1,$x10,$inp
944 vncipherlast $out3,$out3,$in2
945 le?vperm $in0,$in0,$in0,$inpperm
946 lvx_u $in2,$x20,$inp
947 vncipherlast $out4,$out4,$in3
948 le?vperm $in1,$in1,$in1,$inpperm
949 lvx_u $in3,$x30,$inp
950 vncipherlast $out5,$out5,$in4
951 le?vperm $in2,$in2,$in2,$inpperm
952 lvx_u $in4,$x40,$inp
953 vncipherlast $out6,$out6,$in5
954 le?vperm $in3,$in3,$in3,$inpperm
955 lvx_u $in5,$x50,$inp
956 vncipherlast $out7,$out7,$in6
957 le?vperm $in4,$in4,$in4,$inpperm
958 lvx_u $in6,$x60,$inp
959 vmr $ivec,$in7
960 le?vperm $in5,$in5,$in5,$inpperm
961 lvx_u $in7,$x70,$inp
962 addi $inp,$inp,0x80
963
964 le?vperm $out0,$out0,$out0,$inpperm
965 le?vperm $out1,$out1,$out1,$inpperm
966 stvx_u $out0,$x00,$out
967 le?vperm $in6,$in6,$in6,$inpperm
968 vxor $out0,$in0,$rndkey0
969 le?vperm $out2,$out2,$out2,$inpperm
970 stvx_u $out1,$x10,$out
971 le?vperm $in7,$in7,$in7,$inpperm
972 vxor $out1,$in1,$rndkey0
973 le?vperm $out3,$out3,$out3,$inpperm
974 stvx_u $out2,$x20,$out
975 vxor $out2,$in2,$rndkey0
976 le?vperm $out4,$out4,$out4,$inpperm
977 stvx_u $out3,$x30,$out
978 vxor $out3,$in3,$rndkey0
979 le?vperm $out5,$out5,$out5,$inpperm
980 stvx_u $out4,$x40,$out
981 vxor $out4,$in4,$rndkey0
982 le?vperm $out6,$out6,$out6,$inpperm
983 stvx_u $out5,$x50,$out
984 vxor $out5,$in5,$rndkey0
985 le?vperm $out7,$out7,$out7,$inpperm
986 stvx_u $out6,$x60,$out
987 vxor $out6,$in6,$rndkey0
988 stvx_u $out7,$x70,$out
989 addi $out,$out,0x80
990 vxor $out7,$in7,$rndkey0
991
992 mtctr $rounds
993 beq Loop_cbc_dec8x # did $len-=128 borrow?
994
995 addic. $len,$len,128
996 beq Lcbc_dec8x_done
997 nop
998 nop
999
1000Loop_cbc_dec8x_tail: # up to 7 "words" tail...
1001 vncipher $out1,$out1,v24
1002 vncipher $out2,$out2,v24
1003 vncipher $out3,$out3,v24
1004 vncipher $out4,$out4,v24
1005 vncipher $out5,$out5,v24
1006 vncipher $out6,$out6,v24
1007 vncipher $out7,$out7,v24
1008 lvx v24,$x20,$key_ # round[3]
1009 addi $key_,$key_,0x20
1010
1011 vncipher $out1,$out1,v25
1012 vncipher $out2,$out2,v25
1013 vncipher $out3,$out3,v25
1014 vncipher $out4,$out4,v25
1015 vncipher $out5,$out5,v25
1016 vncipher $out6,$out6,v25
1017 vncipher $out7,$out7,v25
1018 lvx v25,$x10,$key_ # round[4]
1019 bdnz Loop_cbc_dec8x_tail
1020
1021 vncipher $out1,$out1,v24
1022 vncipher $out2,$out2,v24
1023 vncipher $out3,$out3,v24
1024 vncipher $out4,$out4,v24
1025 vncipher $out5,$out5,v24
1026 vncipher $out6,$out6,v24
1027 vncipher $out7,$out7,v24
1028
1029 vncipher $out1,$out1,v25
1030 vncipher $out2,$out2,v25
1031 vncipher $out3,$out3,v25
1032 vncipher $out4,$out4,v25
1033 vncipher $out5,$out5,v25
1034 vncipher $out6,$out6,v25
1035 vncipher $out7,$out7,v25
1036
1037 vncipher $out1,$out1,v26
1038 vncipher $out2,$out2,v26
1039 vncipher $out3,$out3,v26
1040 vncipher $out4,$out4,v26
1041 vncipher $out5,$out5,v26
1042 vncipher $out6,$out6,v26
1043 vncipher $out7,$out7,v26
1044
1045 vncipher $out1,$out1,v27
1046 vncipher $out2,$out2,v27
1047 vncipher $out3,$out3,v27
1048 vncipher $out4,$out4,v27
1049 vncipher $out5,$out5,v27
1050 vncipher $out6,$out6,v27
1051 vncipher $out7,$out7,v27
1052
1053 vncipher $out1,$out1,v28
1054 vncipher $out2,$out2,v28
1055 vncipher $out3,$out3,v28
1056 vncipher $out4,$out4,v28
1057 vncipher $out5,$out5,v28
1058 vncipher $out6,$out6,v28
1059 vncipher $out7,$out7,v28
1060
1061 vncipher $out1,$out1,v29
1062 vncipher $out2,$out2,v29
1063 vncipher $out3,$out3,v29
1064 vncipher $out4,$out4,v29
1065 vncipher $out5,$out5,v29
1066 vncipher $out6,$out6,v29
1067 vncipher $out7,$out7,v29
1068
1069 vncipher $out1,$out1,v30
1070 vxor $ivec,$ivec,v31 # last round key
1071 vncipher $out2,$out2,v30
1072 vxor $in1,$in1,v31
1073 vncipher $out3,$out3,v30
1074 vxor $in2,$in2,v31
1075 vncipher $out4,$out4,v30
1076 vxor $in3,$in3,v31
1077 vncipher $out5,$out5,v30
1078 vxor $in4,$in4,v31
1079 vncipher $out6,$out6,v30
1080 vxor $in5,$in5,v31
1081 vncipher $out7,$out7,v30
1082 vxor $in6,$in6,v31
1083
1084 cmplwi $len,32 # switch($len)
1085 blt Lcbc_dec8x_one
1086 nop
1087 beq Lcbc_dec8x_two
1088 cmplwi $len,64
1089 blt Lcbc_dec8x_three
1090 nop
1091 beq Lcbc_dec8x_four
1092 cmplwi $len,96
1093 blt Lcbc_dec8x_five
1094 nop
1095 beq Lcbc_dec8x_six
1096
1097Lcbc_dec8x_seven:
1098 vncipherlast $out1,$out1,$ivec
1099 vncipherlast $out2,$out2,$in1
1100 vncipherlast $out3,$out3,$in2
1101 vncipherlast $out4,$out4,$in3
1102 vncipherlast $out5,$out5,$in4
1103 vncipherlast $out6,$out6,$in5
1104 vncipherlast $out7,$out7,$in6
1105 vmr $ivec,$in7
1106
1107 le?vperm $out1,$out1,$out1,$inpperm
1108 le?vperm $out2,$out2,$out2,$inpperm
1109 stvx_u $out1,$x00,$out
1110 le?vperm $out3,$out3,$out3,$inpperm
1111 stvx_u $out2,$x10,$out
1112 le?vperm $out4,$out4,$out4,$inpperm
1113 stvx_u $out3,$x20,$out
1114 le?vperm $out5,$out5,$out5,$inpperm
1115 stvx_u $out4,$x30,$out
1116 le?vperm $out6,$out6,$out6,$inpperm
1117 stvx_u $out5,$x40,$out
1118 le?vperm $out7,$out7,$out7,$inpperm
1119 stvx_u $out6,$x50,$out
1120 stvx_u $out7,$x60,$out
1121 addi $out,$out,0x70
1122 b Lcbc_dec8x_done
1123
1124.align 5
1125Lcbc_dec8x_six:
1126 vncipherlast $out2,$out2,$ivec
1127 vncipherlast $out3,$out3,$in2
1128 vncipherlast $out4,$out4,$in3
1129 vncipherlast $out5,$out5,$in4
1130 vncipherlast $out6,$out6,$in5
1131 vncipherlast $out7,$out7,$in6
1132 vmr $ivec,$in7
1133
1134 le?vperm $out2,$out2,$out2,$inpperm
1135 le?vperm $out3,$out3,$out3,$inpperm
1136 stvx_u $out2,$x00,$out
1137 le?vperm $out4,$out4,$out4,$inpperm
1138 stvx_u $out3,$x10,$out
1139 le?vperm $out5,$out5,$out5,$inpperm
1140 stvx_u $out4,$x20,$out
1141 le?vperm $out6,$out6,$out6,$inpperm
1142 stvx_u $out5,$x30,$out
1143 le?vperm $out7,$out7,$out7,$inpperm
1144 stvx_u $out6,$x40,$out
1145 stvx_u $out7,$x50,$out
1146 addi $out,$out,0x60
1147 b Lcbc_dec8x_done
1148
1149.align 5
1150Lcbc_dec8x_five:
1151 vncipherlast $out3,$out3,$ivec
1152 vncipherlast $out4,$out4,$in3
1153 vncipherlast $out5,$out5,$in4
1154 vncipherlast $out6,$out6,$in5
1155 vncipherlast $out7,$out7,$in6
1156 vmr $ivec,$in7
1157
1158 le?vperm $out3,$out3,$out3,$inpperm
1159 le?vperm $out4,$out4,$out4,$inpperm
1160 stvx_u $out3,$x00,$out
1161 le?vperm $out5,$out5,$out5,$inpperm
1162 stvx_u $out4,$x10,$out
1163 le?vperm $out6,$out6,$out6,$inpperm
1164 stvx_u $out5,$x20,$out
1165 le?vperm $out7,$out7,$out7,$inpperm
1166 stvx_u $out6,$x30,$out
1167 stvx_u $out7,$x40,$out
1168 addi $out,$out,0x50
1169 b Lcbc_dec8x_done
1170
1171.align 5
1172Lcbc_dec8x_four:
1173 vncipherlast $out4,$out4,$ivec
1174 vncipherlast $out5,$out5,$in4
1175 vncipherlast $out6,$out6,$in5
1176 vncipherlast $out7,$out7,$in6
1177 vmr $ivec,$in7
1178
1179 le?vperm $out4,$out4,$out4,$inpperm
1180 le?vperm $out5,$out5,$out5,$inpperm
1181 stvx_u $out4,$x00,$out
1182 le?vperm $out6,$out6,$out6,$inpperm
1183 stvx_u $out5,$x10,$out
1184 le?vperm $out7,$out7,$out7,$inpperm
1185 stvx_u $out6,$x20,$out
1186 stvx_u $out7,$x30,$out
1187 addi $out,$out,0x40
1188 b Lcbc_dec8x_done
1189
1190.align 5
1191Lcbc_dec8x_three:
1192 vncipherlast $out5,$out5,$ivec
1193 vncipherlast $out6,$out6,$in5
1194 vncipherlast $out7,$out7,$in6
1195 vmr $ivec,$in7
1196
1197 le?vperm $out5,$out5,$out5,$inpperm
1198 le?vperm $out6,$out6,$out6,$inpperm
1199 stvx_u $out5,$x00,$out
1200 le?vperm $out7,$out7,$out7,$inpperm
1201 stvx_u $out6,$x10,$out
1202 stvx_u $out7,$x20,$out
1203 addi $out,$out,0x30
1204 b Lcbc_dec8x_done
1205
1206.align 5
1207Lcbc_dec8x_two:
1208 vncipherlast $out6,$out6,$ivec
1209 vncipherlast $out7,$out7,$in6
1210 vmr $ivec,$in7
1211
1212 le?vperm $out6,$out6,$out6,$inpperm
1213 le?vperm $out7,$out7,$out7,$inpperm
1214 stvx_u $out6,$x00,$out
1215 stvx_u $out7,$x10,$out
1216 addi $out,$out,0x20
1217 b Lcbc_dec8x_done
1218
1219.align 5
1220Lcbc_dec8x_one:
1221 vncipherlast $out7,$out7,$ivec
1222 vmr $ivec,$in7
1223
1224 le?vperm $out7,$out7,$out7,$inpperm
1225 stvx_u $out7,0,$out
1226 addi $out,$out,0x10
1227
1228Lcbc_dec8x_done:
1229 le?vperm $ivec,$ivec,$ivec,$inpperm
1230 stvx_u $ivec,0,$ivp # write [unaligned] iv
1231
1232 li r10,`$FRAME+15`
1233 li r11,`$FRAME+31`
1234 stvx $inpperm,r10,$sp # wipe copies of round keys
1235 addi r10,r10,32
1236 stvx $inpperm,r11,$sp
1237 addi r11,r11,32
1238 stvx $inpperm,r10,$sp
1239 addi r10,r10,32
1240 stvx $inpperm,r11,$sp
1241 addi r11,r11,32
1242 stvx $inpperm,r10,$sp
1243 addi r10,r10,32
1244 stvx $inpperm,r11,$sp
1245 addi r11,r11,32
1246 stvx $inpperm,r10,$sp
1247 addi r10,r10,32
1248 stvx $inpperm,r11,$sp
1249 addi r11,r11,32
1250
1251 mtspr 256,$vrsave
1252 lvx v20,r10,$sp # ABI says so
1253 addi r10,r10,32
1254 lvx v21,r11,$sp
1255 addi r11,r11,32
1256 lvx v22,r10,$sp
1257 addi r10,r10,32
1258 lvx v23,r11,$sp
1259 addi r11,r11,32
1260 lvx v24,r10,$sp
1261 addi r10,r10,32
1262 lvx v25,r11,$sp
1263 addi r11,r11,32
1264 lvx v26,r10,$sp
1265 addi r10,r10,32
1266 lvx v27,r11,$sp
1267 addi r11,r11,32
1268 lvx v28,r10,$sp
1269 addi r10,r10,32
1270 lvx v29,r11,$sp
1271 addi r11,r11,32
1272 lvx v30,r10,$sp
1273 lvx v31,r11,$sp
1274 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1275 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1276 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1277 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1278 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1279 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1280 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1281 blr
1282 .long 0
1283 .byte 0,12,0x14,0,0x80,6,6,0
1284 .long 0
1285.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1286___
1287}} }}}
1288
1289#########################################################################
1290{{{ # CTR procedure[s] #
1291
1292####################### WARNING: Here be dragons! #######################
1293#
1294# This code is written as 'ctr32', based on a 32-bit counter used
1295# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
1296# a 128-bit counter.
1297#
1298# This leads to subtle changes from the upstream code: the counter
1299# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
1300# both the bulk (8 blocks at a time) path, and in the individual block
1301# path. Be aware of this when doing updates.
1302#
1303# See:
1304# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
1305# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
1306# https://github.com/openssl/openssl/pull/8942
1307#
1308#########################################################################
1309my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1310my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1311my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1312 map("v$_",(4..11));
1313my $dat=$tmp;
1314
1315$code.=<<___;
1316.globl .${prefix}_ctr32_encrypt_blocks
1317 ${UCMP}i $len,1
1318 bltlr-
1319
1320 lis r0,0xfff0
1321 mfspr $vrsave,256
1322 mtspr 256,r0
1323
1324 li $idx,15
1325 vxor $rndkey0,$rndkey0,$rndkey0
1326 le?vspltisb $tmp,0x0f
1327
1328 lvx $ivec,0,$ivp # load [unaligned] iv
1329 lvsl $inpperm,0,$ivp
1330 lvx $inptail,$idx,$ivp
1331 vspltisb $one,1
1332 le?vxor $inpperm,$inpperm,$tmp
1333 vperm $ivec,$ivec,$inptail,$inpperm
1334 vsldoi $one,$rndkey0,$one,1
1335
1336 neg r11,$inp
1337 ?lvsl $keyperm,0,$key # prepare for unaligned key
1338 lwz $rounds,240($key)
1339
1340 lvsr $inpperm,0,r11 # prepare for unaligned load
1341 lvx $inptail,0,$inp
1342 addi $inp,$inp,15 # 15 is not typo
1343 le?vxor $inpperm,$inpperm,$tmp
1344
1345 srwi $rounds,$rounds,1
1346 li $idx,16
1347 subi $rounds,$rounds,1
1348
1349 ${UCMP}i $len,8
1350 bge _aesp8_ctr32_encrypt8x
1351
1352 ?lvsr $outperm,0,$out # prepare for unaligned store
1353 vspltisb $outmask,-1
1354 lvx $outhead,0,$out
1355 ?vperm $outmask,$rndkey0,$outmask,$outperm
1356 le?vxor $outperm,$outperm,$tmp
1357
1358 lvx $rndkey0,0,$key
1359 mtctr $rounds
1360 lvx $rndkey1,$idx,$key
1361 addi $idx,$idx,16
1362 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1363 vxor $inout,$ivec,$rndkey0
1364 lvx $rndkey0,$idx,$key
1365 addi $idx,$idx,16
1366 b Loop_ctr32_enc
1367
1368.align 5
1369Loop_ctr32_enc:
1370 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1371 vcipher $inout,$inout,$rndkey1
1372 lvx $rndkey1,$idx,$key
1373 addi $idx,$idx,16
1374 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1375 vcipher $inout,$inout,$rndkey0
1376 lvx $rndkey0,$idx,$key
1377 addi $idx,$idx,16
1378 bdnz Loop_ctr32_enc
1379
1380 vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
1381 vmr $dat,$inptail
1382 lvx $inptail,0,$inp
1383 addi $inp,$inp,16
1384 subic. $len,$len,1 # blocks--
1385
1386 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1387 vcipher $inout,$inout,$rndkey1
1388 lvx $rndkey1,$idx,$key
1389 vperm $dat,$dat,$inptail,$inpperm
1390 li $idx,16
1391 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1392 lvx $rndkey0,0,$key
1393 vxor $dat,$dat,$rndkey1 # last round key
1394 vcipherlast $inout,$inout,$dat
1395
1396 lvx $rndkey1,$idx,$key
1397 addi $idx,$idx,16
1398 vperm $inout,$inout,$inout,$outperm
1399 vsel $dat,$outhead,$inout,$outmask
1400 mtctr $rounds
1401 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1402 vmr $outhead,$inout
1403 vxor $inout,$ivec,$rndkey0
1404 lvx $rndkey0,$idx,$key
1405 addi $idx,$idx,16
1406 stvx $dat,0,$out
1407 addi $out,$out,16
1408 bne Loop_ctr32_enc
1409
1410 addi $out,$out,-1
1411 lvx $inout,0,$out # redundant in aligned case
1412 vsel $inout,$outhead,$inout,$outmask
1413 stvx $inout,0,$out
1414
1415 mtspr 256,$vrsave
1416 blr
1417 .long 0
1418 .byte 0,12,0x14,0,0,0,6,0
1419 .long 0
1420___
1421#########################################################################
1422{{ # Optimized CTR procedure #
1423my $key_="r11";
1424my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1425my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1426my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1427my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1428 # v26-v31 last 6 round keys
1429my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1430my ($two,$three,$four)=($outhead,$outperm,$outmask);
1431
1432$code.=<<___;
1433.align 5
1434_aesp8_ctr32_encrypt8x:
1435 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1436 li r10,`$FRAME+8*16+15`
1437 li r11,`$FRAME+8*16+31`
1438 stvx v20,r10,$sp # ABI says so
1439 addi r10,r10,32
1440 stvx v21,r11,$sp
1441 addi r11,r11,32
1442 stvx v22,r10,$sp
1443 addi r10,r10,32
1444 stvx v23,r11,$sp
1445 addi r11,r11,32
1446 stvx v24,r10,$sp
1447 addi r10,r10,32
1448 stvx v25,r11,$sp
1449 addi r11,r11,32
1450 stvx v26,r10,$sp
1451 addi r10,r10,32
1452 stvx v27,r11,$sp
1453 addi r11,r11,32
1454 stvx v28,r10,$sp
1455 addi r10,r10,32
1456 stvx v29,r11,$sp
1457 addi r11,r11,32
1458 stvx v30,r10,$sp
1459 stvx v31,r11,$sp
1460 li r0,-1
1461 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1462 li $x10,0x10
1463 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1464 li $x20,0x20
1465 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1466 li $x30,0x30
1467 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1468 li $x40,0x40
1469 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1470 li $x50,0x50
1471 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1472 li $x60,0x60
1473 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1474 li $x70,0x70
1475 mtspr 256,r0
1476
1477 subi $rounds,$rounds,3 # -4 in total
1478
1479 lvx $rndkey0,$x00,$key # load key schedule
1480 lvx v30,$x10,$key
1481 addi $key,$key,0x20
1482 lvx v31,$x00,$key
1483 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1484 addi $key_,$sp,$FRAME+15
1485 mtctr $rounds
1486
1487Load_ctr32_enc_key:
1488 ?vperm v24,v30,v31,$keyperm
1489 lvx v30,$x10,$key
1490 addi $key,$key,0x20
1491 stvx v24,$x00,$key_ # off-load round[1]
1492 ?vperm v25,v31,v30,$keyperm
1493 lvx v31,$x00,$key
1494 stvx v25,$x10,$key_ # off-load round[2]
1495 addi $key_,$key_,0x20
1496 bdnz Load_ctr32_enc_key
1497
1498 lvx v26,$x10,$key
1499 ?vperm v24,v30,v31,$keyperm
1500 lvx v27,$x20,$key
1501 stvx v24,$x00,$key_ # off-load round[3]
1502 ?vperm v25,v31,v26,$keyperm
1503 lvx v28,$x30,$key
1504 stvx v25,$x10,$key_ # off-load round[4]
1505 addi $key_,$sp,$FRAME+15 # rewind $key_
1506 ?vperm v26,v26,v27,$keyperm
1507 lvx v29,$x40,$key
1508 ?vperm v27,v27,v28,$keyperm
1509 lvx v30,$x50,$key
1510 ?vperm v28,v28,v29,$keyperm
1511 lvx v31,$x60,$key
1512 ?vperm v29,v29,v30,$keyperm
1513 lvx $out0,$x70,$key # borrow $out0
1514 ?vperm v30,v30,v31,$keyperm
1515 lvx v24,$x00,$key_ # pre-load round[1]
1516 ?vperm v31,v31,$out0,$keyperm
1517 lvx v25,$x10,$key_ # pre-load round[2]
1518
1519 vadduqm $two,$one,$one
1520 subi $inp,$inp,15 # undo "caller"
1521 $SHL $len,$len,4
1522
1523 vadduqm $out1,$ivec,$one # counter values ...
1524 vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
1525 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1526 le?li $idx,8
1527 vadduqm $out3,$out1,$two
1528 vxor $out1,$out1,$rndkey0
1529 le?lvsl $inpperm,0,$idx
1530 vadduqm $out4,$out2,$two
1531 vxor $out2,$out2,$rndkey0
1532 le?vspltisb $tmp,0x0f
1533 vadduqm $out5,$out3,$two
1534 vxor $out3,$out3,$rndkey0
1535 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1536 vadduqm $out6,$out4,$two
1537 vxor $out4,$out4,$rndkey0
1538 vadduqm $out7,$out5,$two
1539 vxor $out5,$out5,$rndkey0
1540 vadduqm $ivec,$out6,$two # next counter value
1541 vxor $out6,$out6,$rndkey0
1542 vxor $out7,$out7,$rndkey0
1543
1544 mtctr $rounds
1545 b Loop_ctr32_enc8x
1546.align 5
1547Loop_ctr32_enc8x:
1548 vcipher $out0,$out0,v24
1549 vcipher $out1,$out1,v24
1550 vcipher $out2,$out2,v24
1551 vcipher $out3,$out3,v24
1552 vcipher $out4,$out4,v24
1553 vcipher $out5,$out5,v24
1554 vcipher $out6,$out6,v24
1555 vcipher $out7,$out7,v24
1556Loop_ctr32_enc8x_middle:
1557 lvx v24,$x20,$key_ # round[3]
1558 addi $key_,$key_,0x20
1559
1560 vcipher $out0,$out0,v25
1561 vcipher $out1,$out1,v25
1562 vcipher $out2,$out2,v25
1563 vcipher $out3,$out3,v25
1564 vcipher $out4,$out4,v25
1565 vcipher $out5,$out5,v25
1566 vcipher $out6,$out6,v25
1567 vcipher $out7,$out7,v25
1568 lvx v25,$x10,$key_ # round[4]
1569 bdnz Loop_ctr32_enc8x
1570
1571 subic r11,$len,256 # $len-256, borrow $key_
1572 vcipher $out0,$out0,v24
1573 vcipher $out1,$out1,v24
1574 vcipher $out2,$out2,v24
1575 vcipher $out3,$out3,v24
1576 vcipher $out4,$out4,v24
1577 vcipher $out5,$out5,v24
1578 vcipher $out6,$out6,v24
1579 vcipher $out7,$out7,v24
1580
1581 subfe r0,r0,r0 # borrow?-1:0
1582 vcipher $out0,$out0,v25
1583 vcipher $out1,$out1,v25
1584 vcipher $out2,$out2,v25
1585 vcipher $out3,$out3,v25
1586 vcipher $out4,$out4,v25
1587 vcipher $out5,$out5,v25
1588 vcipher $out6,$out6,v25
1589 vcipher $out7,$out7,v25
1590
1591 and r0,r0,r11
1592 addi $key_,$sp,$FRAME+15 # rewind $key_
1593 vcipher $out0,$out0,v26
1594 vcipher $out1,$out1,v26
1595 vcipher $out2,$out2,v26
1596 vcipher $out3,$out3,v26
1597 vcipher $out4,$out4,v26
1598 vcipher $out5,$out5,v26
1599 vcipher $out6,$out6,v26
1600 vcipher $out7,$out7,v26
1601 lvx v24,$x00,$key_ # re-pre-load round[1]
1602
1603 subic $len,$len,129 # $len-=129
1604 vcipher $out0,$out0,v27
1605 addi $len,$len,1 # $len-=128 really
1606 vcipher $out1,$out1,v27
1607 vcipher $out2,$out2,v27
1608 vcipher $out3,$out3,v27
1609 vcipher $out4,$out4,v27
1610 vcipher $out5,$out5,v27
1611 vcipher $out6,$out6,v27
1612 vcipher $out7,$out7,v27
1613 lvx v25,$x10,$key_ # re-pre-load round[2]
1614
1615 vcipher $out0,$out0,v28
1616 lvx_u $in0,$x00,$inp # load input
1617 vcipher $out1,$out1,v28
1618 lvx_u $in1,$x10,$inp
1619 vcipher $out2,$out2,v28
1620 lvx_u $in2,$x20,$inp
1621 vcipher $out3,$out3,v28
1622 lvx_u $in3,$x30,$inp
1623 vcipher $out4,$out4,v28
1624 lvx_u $in4,$x40,$inp
1625 vcipher $out5,$out5,v28
1626 lvx_u $in5,$x50,$inp
1627 vcipher $out6,$out6,v28
1628 lvx_u $in6,$x60,$inp
1629 vcipher $out7,$out7,v28
1630 lvx_u $in7,$x70,$inp
1631 addi $inp,$inp,0x80
1632
1633 vcipher $out0,$out0,v29
1634 le?vperm $in0,$in0,$in0,$inpperm
1635 vcipher $out1,$out1,v29
1636 le?vperm $in1,$in1,$in1,$inpperm
1637 vcipher $out2,$out2,v29
1638 le?vperm $in2,$in2,$in2,$inpperm
1639 vcipher $out3,$out3,v29
1640 le?vperm $in3,$in3,$in3,$inpperm
1641 vcipher $out4,$out4,v29
1642 le?vperm $in4,$in4,$in4,$inpperm
1643 vcipher $out5,$out5,v29
1644 le?vperm $in5,$in5,$in5,$inpperm
1645 vcipher $out6,$out6,v29
1646 le?vperm $in6,$in6,$in6,$inpperm
1647 vcipher $out7,$out7,v29
1648 le?vperm $in7,$in7,$in7,$inpperm
1649
1650 add $inp,$inp,r0 # $inp is adjusted in such
1651 # way that at exit from the
1652 # loop inX-in7 are loaded
1653 # with last "words"
1654 subfe. r0,r0,r0 # borrow?-1:0
1655 vcipher $out0,$out0,v30
1656 vxor $in0,$in0,v31 # xor with last round key
1657 vcipher $out1,$out1,v30
1658 vxor $in1,$in1,v31
1659 vcipher $out2,$out2,v30
1660 vxor $in2,$in2,v31
1661 vcipher $out3,$out3,v30
1662 vxor $in3,$in3,v31
1663 vcipher $out4,$out4,v30
1664 vxor $in4,$in4,v31
1665 vcipher $out5,$out5,v30
1666 vxor $in5,$in5,v31
1667 vcipher $out6,$out6,v30
1668 vxor $in6,$in6,v31
1669 vcipher $out7,$out7,v30
1670 vxor $in7,$in7,v31
1671
1672 bne Lctr32_enc8x_break # did $len-129 borrow?
1673
1674 vcipherlast $in0,$out0,$in0
1675 vcipherlast $in1,$out1,$in1
1676 vadduqm $out1,$ivec,$one # counter values ...
1677 vcipherlast $in2,$out2,$in2
1678 vadduqm $out2,$ivec,$two
1679 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1680 vcipherlast $in3,$out3,$in3
1681 vadduqm $out3,$out1,$two
1682 vxor $out1,$out1,$rndkey0
1683 vcipherlast $in4,$out4,$in4
1684 vadduqm $out4,$out2,$two
1685 vxor $out2,$out2,$rndkey0
1686 vcipherlast $in5,$out5,$in5
1687 vadduqm $out5,$out3,$two
1688 vxor $out3,$out3,$rndkey0
1689 vcipherlast $in6,$out6,$in6
1690 vadduqm $out6,$out4,$two
1691 vxor $out4,$out4,$rndkey0
1692 vcipherlast $in7,$out7,$in7
1693 vadduqm $out7,$out5,$two
1694 vxor $out5,$out5,$rndkey0
1695 le?vperm $in0,$in0,$in0,$inpperm
1696 vadduqm $ivec,$out6,$two # next counter value
1697 vxor $out6,$out6,$rndkey0
1698 le?vperm $in1,$in1,$in1,$inpperm
1699 vxor $out7,$out7,$rndkey0
1700 mtctr $rounds
1701
1702 vcipher $out0,$out0,v24
1703 stvx_u $in0,$x00,$out
1704 le?vperm $in2,$in2,$in2,$inpperm
1705 vcipher $out1,$out1,v24
1706 stvx_u $in1,$x10,$out
1707 le?vperm $in3,$in3,$in3,$inpperm
1708 vcipher $out2,$out2,v24
1709 stvx_u $in2,$x20,$out
1710 le?vperm $in4,$in4,$in4,$inpperm
1711 vcipher $out3,$out3,v24
1712 stvx_u $in3,$x30,$out
1713 le?vperm $in5,$in5,$in5,$inpperm
1714 vcipher $out4,$out4,v24
1715 stvx_u $in4,$x40,$out
1716 le?vperm $in6,$in6,$in6,$inpperm
1717 vcipher $out5,$out5,v24
1718 stvx_u $in5,$x50,$out
1719 le?vperm $in7,$in7,$in7,$inpperm
1720 vcipher $out6,$out6,v24
1721 stvx_u $in6,$x60,$out
1722 vcipher $out7,$out7,v24
1723 stvx_u $in7,$x70,$out
1724 addi $out,$out,0x80
1725
1726 b Loop_ctr32_enc8x_middle
1727
1728.align 5
1729Lctr32_enc8x_break:
1730 cmpwi $len,-0x60
1731 blt Lctr32_enc8x_one
1732 nop
1733 beq Lctr32_enc8x_two
1734 cmpwi $len,-0x40
1735 blt Lctr32_enc8x_three
1736 nop
1737 beq Lctr32_enc8x_four
1738 cmpwi $len,-0x20
1739 blt Lctr32_enc8x_five
1740 nop
1741 beq Lctr32_enc8x_six
1742 cmpwi $len,0x00
1743 blt Lctr32_enc8x_seven
1744
1745Lctr32_enc8x_eight:
1746 vcipherlast $out0,$out0,$in0
1747 vcipherlast $out1,$out1,$in1
1748 vcipherlast $out2,$out2,$in2
1749 vcipherlast $out3,$out3,$in3
1750 vcipherlast $out4,$out4,$in4
1751 vcipherlast $out5,$out5,$in5
1752 vcipherlast $out6,$out6,$in6
1753 vcipherlast $out7,$out7,$in7
1754
1755 le?vperm $out0,$out0,$out0,$inpperm
1756 le?vperm $out1,$out1,$out1,$inpperm
1757 stvx_u $out0,$x00,$out
1758 le?vperm $out2,$out2,$out2,$inpperm
1759 stvx_u $out1,$x10,$out
1760 le?vperm $out3,$out3,$out3,$inpperm
1761 stvx_u $out2,$x20,$out
1762 le?vperm $out4,$out4,$out4,$inpperm
1763 stvx_u $out3,$x30,$out
1764 le?vperm $out5,$out5,$out5,$inpperm
1765 stvx_u $out4,$x40,$out
1766 le?vperm $out6,$out6,$out6,$inpperm
1767 stvx_u $out5,$x50,$out
1768 le?vperm $out7,$out7,$out7,$inpperm
1769 stvx_u $out6,$x60,$out
1770 stvx_u $out7,$x70,$out
1771 addi $out,$out,0x80
1772 b Lctr32_enc8x_done
1773
1774.align 5
1775Lctr32_enc8x_seven:
1776 vcipherlast $out0,$out0,$in1
1777 vcipherlast $out1,$out1,$in2
1778 vcipherlast $out2,$out2,$in3
1779 vcipherlast $out3,$out3,$in4
1780 vcipherlast $out4,$out4,$in5
1781 vcipherlast $out5,$out5,$in6
1782 vcipherlast $out6,$out6,$in7
1783
1784 le?vperm $out0,$out0,$out0,$inpperm
1785 le?vperm $out1,$out1,$out1,$inpperm
1786 stvx_u $out0,$x00,$out
1787 le?vperm $out2,$out2,$out2,$inpperm
1788 stvx_u $out1,$x10,$out
1789 le?vperm $out3,$out3,$out3,$inpperm
1790 stvx_u $out2,$x20,$out
1791 le?vperm $out4,$out4,$out4,$inpperm
1792 stvx_u $out3,$x30,$out
1793 le?vperm $out5,$out5,$out5,$inpperm
1794 stvx_u $out4,$x40,$out
1795 le?vperm $out6,$out6,$out6,$inpperm
1796 stvx_u $out5,$x50,$out
1797 stvx_u $out6,$x60,$out
1798 addi $out,$out,0x70
1799 b Lctr32_enc8x_done
1800
1801.align 5
1802Lctr32_enc8x_six:
1803 vcipherlast $out0,$out0,$in2
1804 vcipherlast $out1,$out1,$in3
1805 vcipherlast $out2,$out2,$in4
1806 vcipherlast $out3,$out3,$in5
1807 vcipherlast $out4,$out4,$in6
1808 vcipherlast $out5,$out5,$in7
1809
1810 le?vperm $out0,$out0,$out0,$inpperm
1811 le?vperm $out1,$out1,$out1,$inpperm
1812 stvx_u $out0,$x00,$out
1813 le?vperm $out2,$out2,$out2,$inpperm
1814 stvx_u $out1,$x10,$out
1815 le?vperm $out3,$out3,$out3,$inpperm
1816 stvx_u $out2,$x20,$out
1817 le?vperm $out4,$out4,$out4,$inpperm
1818 stvx_u $out3,$x30,$out
1819 le?vperm $out5,$out5,$out5,$inpperm
1820 stvx_u $out4,$x40,$out
1821 stvx_u $out5,$x50,$out
1822 addi $out,$out,0x60
1823 b Lctr32_enc8x_done
1824
1825.align 5
1826Lctr32_enc8x_five:
1827 vcipherlast $out0,$out0,$in3
1828 vcipherlast $out1,$out1,$in4
1829 vcipherlast $out2,$out2,$in5
1830 vcipherlast $out3,$out3,$in6
1831 vcipherlast $out4,$out4,$in7
1832
1833 le?vperm $out0,$out0,$out0,$inpperm
1834 le?vperm $out1,$out1,$out1,$inpperm
1835 stvx_u $out0,$x00,$out
1836 le?vperm $out2,$out2,$out2,$inpperm
1837 stvx_u $out1,$x10,$out
1838 le?vperm $out3,$out3,$out3,$inpperm
1839 stvx_u $out2,$x20,$out
1840 le?vperm $out4,$out4,$out4,$inpperm
1841 stvx_u $out3,$x30,$out
1842 stvx_u $out4,$x40,$out
1843 addi $out,$out,0x50
1844 b Lctr32_enc8x_done
1845
1846.align 5
1847Lctr32_enc8x_four:
1848 vcipherlast $out0,$out0,$in4
1849 vcipherlast $out1,$out1,$in5
1850 vcipherlast $out2,$out2,$in6
1851 vcipherlast $out3,$out3,$in7
1852
1853 le?vperm $out0,$out0,$out0,$inpperm
1854 le?vperm $out1,$out1,$out1,$inpperm
1855 stvx_u $out0,$x00,$out
1856 le?vperm $out2,$out2,$out2,$inpperm
1857 stvx_u $out1,$x10,$out
1858 le?vperm $out3,$out3,$out3,$inpperm
1859 stvx_u $out2,$x20,$out
1860 stvx_u $out3,$x30,$out
1861 addi $out,$out,0x40
1862 b Lctr32_enc8x_done
1863
1864.align 5
1865Lctr32_enc8x_three:
1866 vcipherlast $out0,$out0,$in5
1867 vcipherlast $out1,$out1,$in6
1868 vcipherlast $out2,$out2,$in7
1869
1870 le?vperm $out0,$out0,$out0,$inpperm
1871 le?vperm $out1,$out1,$out1,$inpperm
1872 stvx_u $out0,$x00,$out
1873 le?vperm $out2,$out2,$out2,$inpperm
1874 stvx_u $out1,$x10,$out
1875 stvx_u $out2,$x20,$out
1876 addi $out,$out,0x30
1877 b Lctr32_enc8x_done
1878
1879.align 5
1880Lctr32_enc8x_two:
1881 vcipherlast $out0,$out0,$in6
1882 vcipherlast $out1,$out1,$in7
1883
1884 le?vperm $out0,$out0,$out0,$inpperm
1885 le?vperm $out1,$out1,$out1,$inpperm
1886 stvx_u $out0,$x00,$out
1887 stvx_u $out1,$x10,$out
1888 addi $out,$out,0x20
1889 b Lctr32_enc8x_done
1890
1891.align 5
1892Lctr32_enc8x_one:
1893 vcipherlast $out0,$out0,$in7
1894
1895 le?vperm $out0,$out0,$out0,$inpperm
1896 stvx_u $out0,0,$out
1897 addi $out,$out,0x10
1898
1899Lctr32_enc8x_done:
1900 li r10,`$FRAME+15`
1901 li r11,`$FRAME+31`
1902 stvx $inpperm,r10,$sp # wipe copies of round keys
1903 addi r10,r10,32
1904 stvx $inpperm,r11,$sp
1905 addi r11,r11,32
1906 stvx $inpperm,r10,$sp
1907 addi r10,r10,32
1908 stvx $inpperm,r11,$sp
1909 addi r11,r11,32
1910 stvx $inpperm,r10,$sp
1911 addi r10,r10,32
1912 stvx $inpperm,r11,$sp
1913 addi r11,r11,32
1914 stvx $inpperm,r10,$sp
1915 addi r10,r10,32
1916 stvx $inpperm,r11,$sp
1917 addi r11,r11,32
1918
1919 mtspr 256,$vrsave
1920 lvx v20,r10,$sp # ABI says so
1921 addi r10,r10,32
1922 lvx v21,r11,$sp
1923 addi r11,r11,32
1924 lvx v22,r10,$sp
1925 addi r10,r10,32
1926 lvx v23,r11,$sp
1927 addi r11,r11,32
1928 lvx v24,r10,$sp
1929 addi r10,r10,32
1930 lvx v25,r11,$sp
1931 addi r11,r11,32
1932 lvx v26,r10,$sp
1933 addi r10,r10,32
1934 lvx v27,r11,$sp
1935 addi r11,r11,32
1936 lvx v28,r10,$sp
1937 addi r10,r10,32
1938 lvx v29,r11,$sp
1939 addi r11,r11,32
1940 lvx v30,r10,$sp
1941 lvx v31,r11,$sp
1942 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1943 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1944 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1945 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1946 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1947 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1948 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1949 blr
1950 .long 0
1951 .byte 0,12,0x14,0,0x80,6,6,0
1952 .long 0
1953.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1954___
1955}} }}}
1956
1957#########################################################################
1958{{{ # XTS procedures #
1959# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1960# const AES_KEY *key1, const AES_KEY *key2, #
1961# [const] unsigned char iv[16]); #
1962# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1963# input tweak value is assumed to be encrypted already, and last tweak #
1964# value, one suitable for consecutive call on same chunk of data, is #
1965# written back to original buffer. In addition, in "tweak chaining" #
1966# mode only complete input blocks are processed. #
1967
1968my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1969my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1970my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1971my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1972my $taillen = $key2;
1973
1974 ($inp,$idx) = ($idx,$inp); # reassign
1975
1976$code.=<<___;
1977.globl .${prefix}_xts_encrypt
1978 mr $inp,r3 # reassign
1979 li r3,-1
1980 ${UCMP}i $len,16
1981 bltlr-
1982
1983 lis r0,0xfff0
1984 mfspr r12,256 # save vrsave
1985 li r11,0
1986 mtspr 256,r0
1987
1988 vspltisb $seven,0x07 # 0x070707..07
1989 le?lvsl $leperm,r11,r11
1990 le?vspltisb $tmp,0x0f
1991 le?vxor $leperm,$leperm,$seven
1992
1993 li $idx,15
1994 lvx $tweak,0,$ivp # load [unaligned] iv
1995 lvsl $inpperm,0,$ivp
1996 lvx $inptail,$idx,$ivp
1997 le?vxor $inpperm,$inpperm,$tmp
1998 vperm $tweak,$tweak,$inptail,$inpperm
1999
2000 neg r11,$inp
2001 lvsr $inpperm,0,r11 # prepare for unaligned load
2002 lvx $inout,0,$inp
2003 addi $inp,$inp,15 # 15 is not typo
2004 le?vxor $inpperm,$inpperm,$tmp
2005
2006 ${UCMP}i $key2,0 # key2==NULL?
2007 beq Lxts_enc_no_key2
2008
2009 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2010 lwz $rounds,240($key2)
2011 srwi $rounds,$rounds,1
2012 subi $rounds,$rounds,1
2013 li $idx,16
2014
2015 lvx $rndkey0,0,$key2
2016 lvx $rndkey1,$idx,$key2
2017 addi $idx,$idx,16
2018 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2019 vxor $tweak,$tweak,$rndkey0
2020 lvx $rndkey0,$idx,$key2
2021 addi $idx,$idx,16
2022 mtctr $rounds
2023
2024Ltweak_xts_enc:
2025 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2026 vcipher $tweak,$tweak,$rndkey1
2027 lvx $rndkey1,$idx,$key2
2028 addi $idx,$idx,16
2029 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2030 vcipher $tweak,$tweak,$rndkey0
2031 lvx $rndkey0,$idx,$key2
2032 addi $idx,$idx,16
2033 bdnz Ltweak_xts_enc
2034
2035 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2036 vcipher $tweak,$tweak,$rndkey1
2037 lvx $rndkey1,$idx,$key2
2038 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2039 vcipherlast $tweak,$tweak,$rndkey0
2040
2041 li $ivp,0 # don't chain the tweak
2042 b Lxts_enc
2043
2044Lxts_enc_no_key2:
2045 li $idx,-16
2046 and $len,$len,$idx # in "tweak chaining"
2047 # mode only complete
2048 # blocks are processed
2049Lxts_enc:
2050 lvx $inptail,0,$inp
2051 addi $inp,$inp,16
2052
2053 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2054 lwz $rounds,240($key1)
2055 srwi $rounds,$rounds,1
2056 subi $rounds,$rounds,1
2057 li $idx,16
2058
2059 vslb $eighty7,$seven,$seven # 0x808080..80
2060 vor $eighty7,$eighty7,$seven # 0x878787..87
2061 vspltisb $tmp,1 # 0x010101..01
2062 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2063
2064 ${UCMP}i $len,96
2065 bge _aesp8_xts_encrypt6x
2066
2067 andi. $taillen,$len,15
2068 subic r0,$len,32
2069 subi $taillen,$taillen,16
2070 subfe r0,r0,r0
2071 and r0,r0,$taillen
2072 add $inp,$inp,r0
2073
2074 lvx $rndkey0,0,$key1
2075 lvx $rndkey1,$idx,$key1
2076 addi $idx,$idx,16
2077 vperm $inout,$inout,$inptail,$inpperm
2078 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2079 vxor $inout,$inout,$tweak
2080 vxor $inout,$inout,$rndkey0
2081 lvx $rndkey0,$idx,$key1
2082 addi $idx,$idx,16
2083 mtctr $rounds
2084 b Loop_xts_enc
2085
2086.align 5
2087Loop_xts_enc:
2088 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2089 vcipher $inout,$inout,$rndkey1
2090 lvx $rndkey1,$idx,$key1
2091 addi $idx,$idx,16
2092 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2093 vcipher $inout,$inout,$rndkey0
2094 lvx $rndkey0,$idx,$key1
2095 addi $idx,$idx,16
2096 bdnz Loop_xts_enc
2097
2098 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2099 vcipher $inout,$inout,$rndkey1
2100 lvx $rndkey1,$idx,$key1
2101 li $idx,16
2102 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2103 vxor $rndkey0,$rndkey0,$tweak
2104 vcipherlast $output,$inout,$rndkey0
2105
2106 le?vperm $tmp,$output,$output,$leperm
2107 be?nop
2108 le?stvx_u $tmp,0,$out
2109 be?stvx_u $output,0,$out
2110 addi $out,$out,16
2111
2112 subic. $len,$len,16
2113 beq Lxts_enc_done
2114
2115 vmr $inout,$inptail
2116 lvx $inptail,0,$inp
2117 addi $inp,$inp,16
2118 lvx $rndkey0,0,$key1
2119 lvx $rndkey1,$idx,$key1
2120 addi $idx,$idx,16
2121
2122 subic r0,$len,32
2123 subfe r0,r0,r0
2124 and r0,r0,$taillen
2125 add $inp,$inp,r0
2126
2127 vsrab $tmp,$tweak,$seven # next tweak value
2128 vaddubm $tweak,$tweak,$tweak
2129 vsldoi $tmp,$tmp,$tmp,15
2130 vand $tmp,$tmp,$eighty7
2131 vxor $tweak,$tweak,$tmp
2132
2133 vperm $inout,$inout,$inptail,$inpperm
2134 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2135 vxor $inout,$inout,$tweak
2136 vxor $output,$output,$rndkey0 # just in case $len<16
2137 vxor $inout,$inout,$rndkey0
2138 lvx $rndkey0,$idx,$key1
2139 addi $idx,$idx,16
2140
2141 mtctr $rounds
2142 ${UCMP}i $len,16
2143 bge Loop_xts_enc
2144
2145 vxor $output,$output,$tweak
2146 lvsr $inpperm,0,$len # $inpperm is no longer needed
2147 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2148 vspltisb $tmp,-1
2149 vperm $inptail,$inptail,$tmp,$inpperm
2150 vsel $inout,$inout,$output,$inptail
2151
2152 subi r11,$out,17
2153 subi $out,$out,16
2154 mtctr $len
2155 li $len,16
2156Loop_xts_enc_steal:
2157 lbzu r0,1(r11)
2158 stb r0,16(r11)
2159 bdnz Loop_xts_enc_steal
2160
2161 mtctr $rounds
2162 b Loop_xts_enc # one more time...
2163
2164Lxts_enc_done:
2165 ${UCMP}i $ivp,0
2166 beq Lxts_enc_ret
2167
2168 vsrab $tmp,$tweak,$seven # next tweak value
2169 vaddubm $tweak,$tweak,$tweak
2170 vsldoi $tmp,$tmp,$tmp,15
2171 vand $tmp,$tmp,$eighty7
2172 vxor $tweak,$tweak,$tmp
2173
2174 le?vperm $tweak,$tweak,$tweak,$leperm
2175 stvx_u $tweak,0,$ivp
2176
2177Lxts_enc_ret:
2178 mtspr 256,r12 # restore vrsave
2179 li r3,0
2180 blr
2181 .long 0
2182 .byte 0,12,0x04,0,0x80,6,6,0
2183 .long 0
2184.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2185
2186.globl .${prefix}_xts_decrypt
2187 mr $inp,r3 # reassign
2188 li r3,-1
2189 ${UCMP}i $len,16
2190 bltlr-
2191
2192 lis r0,0xfff8
2193 mfspr r12,256 # save vrsave
2194 li r11,0
2195 mtspr 256,r0
2196
2197 andi. r0,$len,15
2198 neg r0,r0
2199 andi. r0,r0,16
2200 sub $len,$len,r0
2201
2202 vspltisb $seven,0x07 # 0x070707..07
2203 le?lvsl $leperm,r11,r11
2204 le?vspltisb $tmp,0x0f
2205 le?vxor $leperm,$leperm,$seven
2206
2207 li $idx,15
2208 lvx $tweak,0,$ivp # load [unaligned] iv
2209 lvsl $inpperm,0,$ivp
2210 lvx $inptail,$idx,$ivp
2211 le?vxor $inpperm,$inpperm,$tmp
2212 vperm $tweak,$tweak,$inptail,$inpperm
2213
2214 neg r11,$inp
2215 lvsr $inpperm,0,r11 # prepare for unaligned load
2216 lvx $inout,0,$inp
2217 addi $inp,$inp,15 # 15 is not typo
2218 le?vxor $inpperm,$inpperm,$tmp
2219
2220 ${UCMP}i $key2,0 # key2==NULL?
2221 beq Lxts_dec_no_key2
2222
2223 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2224 lwz $rounds,240($key2)
2225 srwi $rounds,$rounds,1
2226 subi $rounds,$rounds,1
2227 li $idx,16
2228
2229 lvx $rndkey0,0,$key2
2230 lvx $rndkey1,$idx,$key2
2231 addi $idx,$idx,16
2232 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2233 vxor $tweak,$tweak,$rndkey0
2234 lvx $rndkey0,$idx,$key2
2235 addi $idx,$idx,16
2236 mtctr $rounds
2237
2238Ltweak_xts_dec:
2239 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2240 vcipher $tweak,$tweak,$rndkey1
2241 lvx $rndkey1,$idx,$key2
2242 addi $idx,$idx,16
2243 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2244 vcipher $tweak,$tweak,$rndkey0
2245 lvx $rndkey0,$idx,$key2
2246 addi $idx,$idx,16
2247 bdnz Ltweak_xts_dec
2248
2249 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2250 vcipher $tweak,$tweak,$rndkey1
2251 lvx $rndkey1,$idx,$key2
2252 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2253 vcipherlast $tweak,$tweak,$rndkey0
2254
2255 li $ivp,0 # don't chain the tweak
2256 b Lxts_dec
2257
2258Lxts_dec_no_key2:
2259 neg $idx,$len
2260 andi. $idx,$idx,15
2261 add $len,$len,$idx # in "tweak chaining"
2262 # mode only complete
2263 # blocks are processed
2264Lxts_dec:
2265 lvx $inptail,0,$inp
2266 addi $inp,$inp,16
2267
2268 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2269 lwz $rounds,240($key1)
2270 srwi $rounds,$rounds,1
2271 subi $rounds,$rounds,1
2272 li $idx,16
2273
2274 vslb $eighty7,$seven,$seven # 0x808080..80
2275 vor $eighty7,$eighty7,$seven # 0x878787..87
2276 vspltisb $tmp,1 # 0x010101..01
2277 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2278
2279 ${UCMP}i $len,96
2280 bge _aesp8_xts_decrypt6x
2281
2282 lvx $rndkey0,0,$key1
2283 lvx $rndkey1,$idx,$key1
2284 addi $idx,$idx,16
2285 vperm $inout,$inout,$inptail,$inpperm
2286 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2287 vxor $inout,$inout,$tweak
2288 vxor $inout,$inout,$rndkey0
2289 lvx $rndkey0,$idx,$key1
2290 addi $idx,$idx,16
2291 mtctr $rounds
2292
2293 ${UCMP}i $len,16
2294 blt Ltail_xts_dec
2295 be?b Loop_xts_dec
2296
2297.align 5
2298Loop_xts_dec:
2299 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2300 vncipher $inout,$inout,$rndkey1
2301 lvx $rndkey1,$idx,$key1
2302 addi $idx,$idx,16
2303 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2304 vncipher $inout,$inout,$rndkey0
2305 lvx $rndkey0,$idx,$key1
2306 addi $idx,$idx,16
2307 bdnz Loop_xts_dec
2308
2309 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2310 vncipher $inout,$inout,$rndkey1
2311 lvx $rndkey1,$idx,$key1
2312 li $idx,16
2313 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2314 vxor $rndkey0,$rndkey0,$tweak
2315 vncipherlast $output,$inout,$rndkey0
2316
2317 le?vperm $tmp,$output,$output,$leperm
2318 be?nop
2319 le?stvx_u $tmp,0,$out
2320 be?stvx_u $output,0,$out
2321 addi $out,$out,16
2322
2323 subic. $len,$len,16
2324 beq Lxts_dec_done
2325
2326 vmr $inout,$inptail
2327 lvx $inptail,0,$inp
2328 addi $inp,$inp,16
2329 lvx $rndkey0,0,$key1
2330 lvx $rndkey1,$idx,$key1
2331 addi $idx,$idx,16
2332
2333 vsrab $tmp,$tweak,$seven # next tweak value
2334 vaddubm $tweak,$tweak,$tweak
2335 vsldoi $tmp,$tmp,$tmp,15
2336 vand $tmp,$tmp,$eighty7
2337 vxor $tweak,$tweak,$tmp
2338
2339 vperm $inout,$inout,$inptail,$inpperm
2340 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2341 vxor $inout,$inout,$tweak
2342 vxor $inout,$inout,$rndkey0
2343 lvx $rndkey0,$idx,$key1
2344 addi $idx,$idx,16
2345
2346 mtctr $rounds
2347 ${UCMP}i $len,16
2348 bge Loop_xts_dec
2349
2350Ltail_xts_dec:
2351 vsrab $tmp,$tweak,$seven # next tweak value
2352 vaddubm $tweak1,$tweak,$tweak
2353 vsldoi $tmp,$tmp,$tmp,15
2354 vand $tmp,$tmp,$eighty7
2355 vxor $tweak1,$tweak1,$tmp
2356
2357 subi $inp,$inp,16
2358 add $inp,$inp,$len
2359
2360 vxor $inout,$inout,$tweak # :-(
2361 vxor $inout,$inout,$tweak1 # :-)
2362
2363Loop_xts_dec_short:
2364 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2365 vncipher $inout,$inout,$rndkey1
2366 lvx $rndkey1,$idx,$key1
2367 addi $idx,$idx,16
2368 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2369 vncipher $inout,$inout,$rndkey0
2370 lvx $rndkey0,$idx,$key1
2371 addi $idx,$idx,16
2372 bdnz Loop_xts_dec_short
2373
2374 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2375 vncipher $inout,$inout,$rndkey1
2376 lvx $rndkey1,$idx,$key1
2377 li $idx,16
2378 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2379 vxor $rndkey0,$rndkey0,$tweak1
2380 vncipherlast $output,$inout,$rndkey0
2381
2382 le?vperm $tmp,$output,$output,$leperm
2383 be?nop
2384 le?stvx_u $tmp,0,$out
2385 be?stvx_u $output,0,$out
2386
2387 vmr $inout,$inptail
2388 lvx $inptail,0,$inp
2389 #addi $inp,$inp,16
2390 lvx $rndkey0,0,$key1
2391 lvx $rndkey1,$idx,$key1
2392 addi $idx,$idx,16
2393 vperm $inout,$inout,$inptail,$inpperm
2394 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2395
2396 lvsr $inpperm,0,$len # $inpperm is no longer needed
2397 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2398 vspltisb $tmp,-1
2399 vperm $inptail,$inptail,$tmp,$inpperm
2400 vsel $inout,$inout,$output,$inptail
2401
2402 vxor $rndkey0,$rndkey0,$tweak
2403 vxor $inout,$inout,$rndkey0
2404 lvx $rndkey0,$idx,$key1
2405 addi $idx,$idx,16
2406
2407 subi r11,$out,1
2408 mtctr $len
2409 li $len,16
2410Loop_xts_dec_steal:
2411 lbzu r0,1(r11)
2412 stb r0,16(r11)
2413 bdnz Loop_xts_dec_steal
2414
2415 mtctr $rounds
2416 b Loop_xts_dec # one more time...
2417
2418Lxts_dec_done:
2419 ${UCMP}i $ivp,0
2420 beq Lxts_dec_ret
2421
2422 vsrab $tmp,$tweak,$seven # next tweak value
2423 vaddubm $tweak,$tweak,$tweak
2424 vsldoi $tmp,$tmp,$tmp,15
2425 vand $tmp,$tmp,$eighty7
2426 vxor $tweak,$tweak,$tmp
2427
2428 le?vperm $tweak,$tweak,$tweak,$leperm
2429 stvx_u $tweak,0,$ivp
2430
2431Lxts_dec_ret:
2432 mtspr 256,r12 # restore vrsave
2433 li r3,0
2434 blr
2435 .long 0
2436 .byte 0,12,0x04,0,0x80,6,6,0
2437 .long 0
2438.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2439___
2440#########################################################################
2441{{ # Optimized XTS procedures #
2442my $key_=$key2;
2443my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2444 $x00=0 if ($flavour =~ /osx/);
2445my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2446my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2447my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2448my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2449 # v26-v31 last 6 round keys
2450my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2451my $taillen=$x70;
2452
2453$code.=<<___;
2454.align 5
2455_aesp8_xts_encrypt6x:
2456 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2457 mflr r11
2458 li r7,`$FRAME+8*16+15`
2459 li r3,`$FRAME+8*16+31`
2460 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2461 stvx v20,r7,$sp # ABI says so
2462 addi r7,r7,32
2463 stvx v21,r3,$sp
2464 addi r3,r3,32
2465 stvx v22,r7,$sp
2466 addi r7,r7,32
2467 stvx v23,r3,$sp
2468 addi r3,r3,32
2469 stvx v24,r7,$sp
2470 addi r7,r7,32
2471 stvx v25,r3,$sp
2472 addi r3,r3,32
2473 stvx v26,r7,$sp
2474 addi r7,r7,32
2475 stvx v27,r3,$sp
2476 addi r3,r3,32
2477 stvx v28,r7,$sp
2478 addi r7,r7,32
2479 stvx v29,r3,$sp
2480 addi r3,r3,32
2481 stvx v30,r7,$sp
2482 stvx v31,r3,$sp
2483 li r0,-1
2484 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2485 li $x10,0x10
2486 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2487 li $x20,0x20
2488 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2489 li $x30,0x30
2490 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2491 li $x40,0x40
2492 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2493 li $x50,0x50
2494 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2495 li $x60,0x60
2496 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2497 li $x70,0x70
2498 mtspr 256,r0
2499
2500 xxlor 2, 32+$eighty7, 32+$eighty7
2501 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
2502 xxlor 1, 32+$eighty7, 32+$eighty7
2503
2504 # Load XOR Lconsts.
2505 mr $x70, r6
2506 bl Lconsts
2507 lxvw4x 0, $x40, r6 # load XOR contents
2508 mr r6, $x70
2509 li $x70,0x70
2510
2511 subi $rounds,$rounds,3 # -4 in total
2512
2513 lvx $rndkey0,$x00,$key1 # load key schedule
2514 lvx v30,$x10,$key1
2515 addi $key1,$key1,0x20
2516 lvx v31,$x00,$key1
2517 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2518 addi $key_,$sp,$FRAME+15
2519 mtctr $rounds
2520
2521Load_xts_enc_key:
2522 ?vperm v24,v30,v31,$keyperm
2523 lvx v30,$x10,$key1
2524 addi $key1,$key1,0x20
2525 stvx v24,$x00,$key_ # off-load round[1]
2526 ?vperm v25,v31,v30,$keyperm
2527 lvx v31,$x00,$key1
2528 stvx v25,$x10,$key_ # off-load round[2]
2529 addi $key_,$key_,0x20
2530 bdnz Load_xts_enc_key
2531
2532 lvx v26,$x10,$key1
2533 ?vperm v24,v30,v31,$keyperm
2534 lvx v27,$x20,$key1
2535 stvx v24,$x00,$key_ # off-load round[3]
2536 ?vperm v25,v31,v26,$keyperm
2537 lvx v28,$x30,$key1
2538 stvx v25,$x10,$key_ # off-load round[4]
2539 addi $key_,$sp,$FRAME+15 # rewind $key_
2540 ?vperm v26,v26,v27,$keyperm
2541 lvx v29,$x40,$key1
2542 ?vperm v27,v27,v28,$keyperm
2543 lvx v30,$x50,$key1
2544 ?vperm v28,v28,v29,$keyperm
2545 lvx v31,$x60,$key1
2546 ?vperm v29,v29,v30,$keyperm
2547 lvx $twk5,$x70,$key1 # borrow $twk5
2548 ?vperm v30,v30,v31,$keyperm
2549 lvx v24,$x00,$key_ # pre-load round[1]
2550 ?vperm v31,v31,$twk5,$keyperm
2551 lvx v25,$x10,$key_ # pre-load round[2]
2552
2553 # Switch to use the following codes with 0x010101..87 to generate tweak.
2554 # eighty7 = 0x010101..87
2555 # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
2556 # vand tmp, tmp, eighty7 # last byte with carry
2557 # vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
2558 # xxlor vsx, 0, 0
2559 # vpermxor tweak, tweak, tmp, vsx
2560
2561 vperm $in0,$inout,$inptail,$inpperm
2562 subi $inp,$inp,31 # undo "caller"
2563 vxor $twk0,$tweak,$rndkey0
2564 vsrab $tmp,$tweak,$seven # next tweak value
2565 vaddubm $tweak,$tweak,$tweak
2566 vand $tmp,$tmp,$eighty7
2567 vxor $out0,$in0,$twk0
2568 xxlor 32+$in1, 0, 0
2569 vpermxor $tweak, $tweak, $tmp, $in1
2570
2571 lvx_u $in1,$x10,$inp
2572 vxor $twk1,$tweak,$rndkey0
2573 vsrab $tmp,$tweak,$seven # next tweak value
2574 vaddubm $tweak,$tweak,$tweak
2575 le?vperm $in1,$in1,$in1,$leperm
2576 vand $tmp,$tmp,$eighty7
2577 vxor $out1,$in1,$twk1
2578 xxlor 32+$in2, 0, 0
2579 vpermxor $tweak, $tweak, $tmp, $in2
2580
2581 lvx_u $in2,$x20,$inp
2582 andi. $taillen,$len,15
2583 vxor $twk2,$tweak,$rndkey0
2584 vsrab $tmp,$tweak,$seven # next tweak value
2585 vaddubm $tweak,$tweak,$tweak
2586 le?vperm $in2,$in2,$in2,$leperm
2587 vand $tmp,$tmp,$eighty7
2588 vxor $out2,$in2,$twk2
2589 xxlor 32+$in3, 0, 0
2590 vpermxor $tweak, $tweak, $tmp, $in3
2591
2592 lvx_u $in3,$x30,$inp
2593 sub $len,$len,$taillen
2594 vxor $twk3,$tweak,$rndkey0
2595 vsrab $tmp,$tweak,$seven # next tweak value
2596 vaddubm $tweak,$tweak,$tweak
2597 le?vperm $in3,$in3,$in3,$leperm
2598 vand $tmp,$tmp,$eighty7
2599 vxor $out3,$in3,$twk3
2600 xxlor 32+$in4, 0, 0
2601 vpermxor $tweak, $tweak, $tmp, $in4
2602
2603 lvx_u $in4,$x40,$inp
2604 subi $len,$len,0x60
2605 vxor $twk4,$tweak,$rndkey0
2606 vsrab $tmp,$tweak,$seven # next tweak value
2607 vaddubm $tweak,$tweak,$tweak
2608 le?vperm $in4,$in4,$in4,$leperm
2609 vand $tmp,$tmp,$eighty7
2610 vxor $out4,$in4,$twk4
2611 xxlor 32+$in5, 0, 0
2612 vpermxor $tweak, $tweak, $tmp, $in5
2613
2614 lvx_u $in5,$x50,$inp
2615 addi $inp,$inp,0x60
2616 vxor $twk5,$tweak,$rndkey0
2617 vsrab $tmp,$tweak,$seven # next tweak value
2618 vaddubm $tweak,$tweak,$tweak
2619 le?vperm $in5,$in5,$in5,$leperm
2620 vand $tmp,$tmp,$eighty7
2621 vxor $out5,$in5,$twk5
2622 xxlor 32+$in0, 0, 0
2623 vpermxor $tweak, $tweak, $tmp, $in0
2624
2625 vxor v31,v31,$rndkey0
2626 mtctr $rounds
2627 b Loop_xts_enc6x
2628
2629.align 5
2630Loop_xts_enc6x:
2631 vcipher $out0,$out0,v24
2632 vcipher $out1,$out1,v24
2633 vcipher $out2,$out2,v24
2634 vcipher $out3,$out3,v24
2635 vcipher $out4,$out4,v24
2636 vcipher $out5,$out5,v24
2637 lvx v24,$x20,$key_ # round[3]
2638 addi $key_,$key_,0x20
2639
2640 vcipher $out0,$out0,v25
2641 vcipher $out1,$out1,v25
2642 vcipher $out2,$out2,v25
2643 vcipher $out3,$out3,v25
2644 vcipher $out4,$out4,v25
2645 vcipher $out5,$out5,v25
2646 lvx v25,$x10,$key_ # round[4]
2647 bdnz Loop_xts_enc6x
2648
2649 xxlor 32+$eighty7, 1, 1 # 0x010101..87
2650
2651 subic $len,$len,96 # $len-=96
2652 vxor $in0,$twk0,v31 # xor with last round key
2653 vcipher $out0,$out0,v24
2654 vcipher $out1,$out1,v24
2655 vsrab $tmp,$tweak,$seven # next tweak value
2656 vxor $twk0,$tweak,$rndkey0
2657 vaddubm $tweak,$tweak,$tweak
2658 vcipher $out2,$out2,v24
2659 vcipher $out3,$out3,v24
2660 vcipher $out4,$out4,v24
2661 vcipher $out5,$out5,v24
2662
2663 subfe. r0,r0,r0 # borrow?-1:0
2664 vand $tmp,$tmp,$eighty7
2665 vcipher $out0,$out0,v25
2666 vcipher $out1,$out1,v25
2667 xxlor 32+$in1, 0, 0
2668 vpermxor $tweak, $tweak, $tmp, $in1
2669 vcipher $out2,$out2,v25
2670 vcipher $out3,$out3,v25
2671 vxor $in1,$twk1,v31
2672 vsrab $tmp,$tweak,$seven # next tweak value
2673 vxor $twk1,$tweak,$rndkey0
2674 vcipher $out4,$out4,v25
2675 vcipher $out5,$out5,v25
2676
2677 and r0,r0,$len
2678 vaddubm $tweak,$tweak,$tweak
2679 vcipher $out0,$out0,v26
2680 vcipher $out1,$out1,v26
2681 vand $tmp,$tmp,$eighty7
2682 vcipher $out2,$out2,v26
2683 vcipher $out3,$out3,v26
2684 xxlor 32+$in2, 0, 0
2685 vpermxor $tweak, $tweak, $tmp, $in2
2686 vcipher $out4,$out4,v26
2687 vcipher $out5,$out5,v26
2688
2689 add $inp,$inp,r0 # $inp is adjusted in such
2690 # way that at exit from the
2691 # loop inX-in5 are loaded
2692 # with last "words"
2693 vxor $in2,$twk2,v31
2694 vsrab $tmp,$tweak,$seven # next tweak value
2695 vxor $twk2,$tweak,$rndkey0
2696 vaddubm $tweak,$tweak,$tweak
2697 vcipher $out0,$out0,v27
2698 vcipher $out1,$out1,v27
2699 vcipher $out2,$out2,v27
2700 vcipher $out3,$out3,v27
2701 vand $tmp,$tmp,$eighty7
2702 vcipher $out4,$out4,v27
2703 vcipher $out5,$out5,v27
2704
2705 addi $key_,$sp,$FRAME+15 # rewind $key_
2706 xxlor 32+$in3, 0, 0
2707 vpermxor $tweak, $tweak, $tmp, $in3
2708 vcipher $out0,$out0,v28
2709 vcipher $out1,$out1,v28
2710 vxor $in3,$twk3,v31
2711 vsrab $tmp,$tweak,$seven # next tweak value
2712 vxor $twk3,$tweak,$rndkey0
2713 vcipher $out2,$out2,v28
2714 vcipher $out3,$out3,v28
2715 vaddubm $tweak,$tweak,$tweak
2716 vcipher $out4,$out4,v28
2717 vcipher $out5,$out5,v28
2718 lvx v24,$x00,$key_ # re-pre-load round[1]
2719 vand $tmp,$tmp,$eighty7
2720
2721 vcipher $out0,$out0,v29
2722 vcipher $out1,$out1,v29
2723 xxlor 32+$in4, 0, 0
2724 vpermxor $tweak, $tweak, $tmp, $in4
2725 vcipher $out2,$out2,v29
2726 vcipher $out3,$out3,v29
2727 vxor $in4,$twk4,v31
2728 vsrab $tmp,$tweak,$seven # next tweak value
2729 vxor $twk4,$tweak,$rndkey0
2730 vcipher $out4,$out4,v29
2731 vcipher $out5,$out5,v29
2732 lvx v25,$x10,$key_ # re-pre-load round[2]
2733 vaddubm $tweak,$tweak,$tweak
2734
2735 vcipher $out0,$out0,v30
2736 vcipher $out1,$out1,v30
2737 vand $tmp,$tmp,$eighty7
2738 vcipher $out2,$out2,v30
2739 vcipher $out3,$out3,v30
2740 xxlor 32+$in5, 0, 0
2741 vpermxor $tweak, $tweak, $tmp, $in5
2742 vcipher $out4,$out4,v30
2743 vcipher $out5,$out5,v30
2744 vxor $in5,$twk5,v31
2745 vsrab $tmp,$tweak,$seven # next tweak value
2746 vxor $twk5,$tweak,$rndkey0
2747
2748 vcipherlast $out0,$out0,$in0
2749 lvx_u $in0,$x00,$inp # load next input block
2750 vaddubm $tweak,$tweak,$tweak
2751 vcipherlast $out1,$out1,$in1
2752 lvx_u $in1,$x10,$inp
2753 vcipherlast $out2,$out2,$in2
2754 le?vperm $in0,$in0,$in0,$leperm
2755 lvx_u $in2,$x20,$inp
2756 vand $tmp,$tmp,$eighty7
2757 vcipherlast $out3,$out3,$in3
2758 le?vperm $in1,$in1,$in1,$leperm
2759 lvx_u $in3,$x30,$inp
2760 vcipherlast $out4,$out4,$in4
2761 le?vperm $in2,$in2,$in2,$leperm
2762 lvx_u $in4,$x40,$inp
2763 xxlor 10, 32+$in0, 32+$in0
2764 xxlor 32+$in0, 0, 0
2765 vpermxor $tweak, $tweak, $tmp, $in0
2766 xxlor 32+$in0, 10, 10
2767 vcipherlast $tmp,$out5,$in5 # last block might be needed
2768 # in stealing mode
2769 le?vperm $in3,$in3,$in3,$leperm
2770 lvx_u $in5,$x50,$inp
2771 addi $inp,$inp,0x60
2772 le?vperm $in4,$in4,$in4,$leperm
2773 le?vperm $in5,$in5,$in5,$leperm
2774
2775 le?vperm $out0,$out0,$out0,$leperm
2776 le?vperm $out1,$out1,$out1,$leperm
2777 stvx_u $out0,$x00,$out # store output
2778 vxor $out0,$in0,$twk0
2779 le?vperm $out2,$out2,$out2,$leperm
2780 stvx_u $out1,$x10,$out
2781 vxor $out1,$in1,$twk1
2782 le?vperm $out3,$out3,$out3,$leperm
2783 stvx_u $out2,$x20,$out
2784 vxor $out2,$in2,$twk2
2785 le?vperm $out4,$out4,$out4,$leperm
2786 stvx_u $out3,$x30,$out
2787 vxor $out3,$in3,$twk3
2788 le?vperm $out5,$tmp,$tmp,$leperm
2789 stvx_u $out4,$x40,$out
2790 vxor $out4,$in4,$twk4
2791 le?stvx_u $out5,$x50,$out
2792 be?stvx_u $tmp, $x50,$out
2793 vxor $out5,$in5,$twk5
2794 addi $out,$out,0x60
2795
2796 mtctr $rounds
2797 beq Loop_xts_enc6x # did $len-=96 borrow?
2798
2799 xxlor 32+$eighty7, 2, 2 # 0x010101..87
2800
2801 addic. $len,$len,0x60
2802 beq Lxts_enc6x_zero
2803 cmpwi $len,0x20
2804 blt Lxts_enc6x_one
2805 nop
2806 beq Lxts_enc6x_two
2807 cmpwi $len,0x40
2808 blt Lxts_enc6x_three
2809 nop
2810 beq Lxts_enc6x_four
2811
2812Lxts_enc6x_five:
2813 vxor $out0,$in1,$twk0
2814 vxor $out1,$in2,$twk1
2815 vxor $out2,$in3,$twk2
2816 vxor $out3,$in4,$twk3
2817 vxor $out4,$in5,$twk4
2818
2819 bl _aesp8_xts_enc5x
2820
2821 le?vperm $out0,$out0,$out0,$leperm
2822 vmr $twk0,$twk5 # unused tweak
2823 le?vperm $out1,$out1,$out1,$leperm
2824 stvx_u $out0,$x00,$out # store output
2825 le?vperm $out2,$out2,$out2,$leperm
2826 stvx_u $out1,$x10,$out
2827 le?vperm $out3,$out3,$out3,$leperm
2828 stvx_u $out2,$x20,$out
2829 vxor $tmp,$out4,$twk5 # last block prep for stealing
2830 le?vperm $out4,$out4,$out4,$leperm
2831 stvx_u $out3,$x30,$out
2832 stvx_u $out4,$x40,$out
2833 addi $out,$out,0x50
2834 bne Lxts_enc6x_steal
2835 b Lxts_enc6x_done
2836
2837.align 4
2838Lxts_enc6x_four:
2839 vxor $out0,$in2,$twk0
2840 vxor $out1,$in3,$twk1
2841 vxor $out2,$in4,$twk2
2842 vxor $out3,$in5,$twk3
2843 vxor $out4,$out4,$out4
2844
2845 bl _aesp8_xts_enc5x
2846
2847 le?vperm $out0,$out0,$out0,$leperm
2848 vmr $twk0,$twk4 # unused tweak
2849 le?vperm $out1,$out1,$out1,$leperm
2850 stvx_u $out0,$x00,$out # store output
2851 le?vperm $out2,$out2,$out2,$leperm
2852 stvx_u $out1,$x10,$out
2853 vxor $tmp,$out3,$twk4 # last block prep for stealing
2854 le?vperm $out3,$out3,$out3,$leperm
2855 stvx_u $out2,$x20,$out
2856 stvx_u $out3,$x30,$out
2857 addi $out,$out,0x40
2858 bne Lxts_enc6x_steal
2859 b Lxts_enc6x_done
2860
2861.align 4
2862Lxts_enc6x_three:
2863 vxor $out0,$in3,$twk0
2864 vxor $out1,$in4,$twk1
2865 vxor $out2,$in5,$twk2
2866 vxor $out3,$out3,$out3
2867 vxor $out4,$out4,$out4
2868
2869 bl _aesp8_xts_enc5x
2870
2871 le?vperm $out0,$out0,$out0,$leperm
2872 vmr $twk0,$twk3 # unused tweak
2873 le?vperm $out1,$out1,$out1,$leperm
2874 stvx_u $out0,$x00,$out # store output
2875 vxor $tmp,$out2,$twk3 # last block prep for stealing
2876 le?vperm $out2,$out2,$out2,$leperm
2877 stvx_u $out1,$x10,$out
2878 stvx_u $out2,$x20,$out
2879 addi $out,$out,0x30
2880 bne Lxts_enc6x_steal
2881 b Lxts_enc6x_done
2882
2883.align 4
2884Lxts_enc6x_two:
2885 vxor $out0,$in4,$twk0
2886 vxor $out1,$in5,$twk1
2887 vxor $out2,$out2,$out2
2888 vxor $out3,$out3,$out3
2889 vxor $out4,$out4,$out4
2890
2891 bl _aesp8_xts_enc5x
2892
2893 le?vperm $out0,$out0,$out0,$leperm
2894 vmr $twk0,$twk2 # unused tweak
2895 vxor $tmp,$out1,$twk2 # last block prep for stealing
2896 le?vperm $out1,$out1,$out1,$leperm
2897 stvx_u $out0,$x00,$out # store output
2898 stvx_u $out1,$x10,$out
2899 addi $out,$out,0x20
2900 bne Lxts_enc6x_steal
2901 b Lxts_enc6x_done
2902
2903.align 4
2904Lxts_enc6x_one:
2905 vxor $out0,$in5,$twk0
2906 nop
2907Loop_xts_enc1x:
2908 vcipher $out0,$out0,v24
2909 lvx v24,$x20,$key_ # round[3]
2910 addi $key_,$key_,0x20
2911
2912 vcipher $out0,$out0,v25
2913 lvx v25,$x10,$key_ # round[4]
2914 bdnz Loop_xts_enc1x
2915
2916 add $inp,$inp,$taillen
2917 cmpwi $taillen,0
2918 vcipher $out0,$out0,v24
2919
2920 subi $inp,$inp,16
2921 vcipher $out0,$out0,v25
2922
2923 lvsr $inpperm,0,$taillen
2924 vcipher $out0,$out0,v26
2925
2926 lvx_u $in0,0,$inp
2927 vcipher $out0,$out0,v27
2928
2929 addi $key_,$sp,$FRAME+15 # rewind $key_
2930 vcipher $out0,$out0,v28
2931 lvx v24,$x00,$key_ # re-pre-load round[1]
2932
2933 vcipher $out0,$out0,v29
2934 lvx v25,$x10,$key_ # re-pre-load round[2]
2935 vxor $twk0,$twk0,v31
2936
2937 le?vperm $in0,$in0,$in0,$leperm
2938 vcipher $out0,$out0,v30
2939
2940 vperm $in0,$in0,$in0,$inpperm
2941 vcipherlast $out0,$out0,$twk0
2942
2943 vmr $twk0,$twk1 # unused tweak
2944 vxor $tmp,$out0,$twk1 # last block prep for stealing
2945 le?vperm $out0,$out0,$out0,$leperm
2946 stvx_u $out0,$x00,$out # store output
2947 addi $out,$out,0x10
2948 bne Lxts_enc6x_steal
2949 b Lxts_enc6x_done
2950
2951.align 4
2952Lxts_enc6x_zero:
2953 cmpwi $taillen,0
2954 beq Lxts_enc6x_done
2955
2956 add $inp,$inp,$taillen
2957 subi $inp,$inp,16
2958 lvx_u $in0,0,$inp
2959 lvsr $inpperm,0,$taillen # $in5 is no more
2960 le?vperm $in0,$in0,$in0,$leperm
2961 vperm $in0,$in0,$in0,$inpperm
2962 vxor $tmp,$tmp,$twk0
2963Lxts_enc6x_steal:
2964 vxor $in0,$in0,$twk0
2965 vxor $out0,$out0,$out0
2966 vspltisb $out1,-1
2967 vperm $out0,$out0,$out1,$inpperm
2968 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2969
2970 subi r30,$out,17
2971 subi $out,$out,16
2972 mtctr $taillen
2973Loop_xts_enc6x_steal:
2974 lbzu r0,1(r30)
2975 stb r0,16(r30)
2976 bdnz Loop_xts_enc6x_steal
2977
2978 li $taillen,0
2979 mtctr $rounds
2980 b Loop_xts_enc1x # one more time...
2981
2982.align 4
2983Lxts_enc6x_done:
2984 ${UCMP}i $ivp,0
2985 beq Lxts_enc6x_ret
2986
2987 vxor $tweak,$twk0,$rndkey0
2988 le?vperm $tweak,$tweak,$tweak,$leperm
2989 stvx_u $tweak,0,$ivp
2990
2991Lxts_enc6x_ret:
2992 mtlr r11
2993 li r10,`$FRAME+15`
2994 li r11,`$FRAME+31`
2995 stvx $seven,r10,$sp # wipe copies of round keys
2996 addi r10,r10,32
2997 stvx $seven,r11,$sp
2998 addi r11,r11,32
2999 stvx $seven,r10,$sp
3000 addi r10,r10,32
3001 stvx $seven,r11,$sp
3002 addi r11,r11,32
3003 stvx $seven,r10,$sp
3004 addi r10,r10,32
3005 stvx $seven,r11,$sp
3006 addi r11,r11,32
3007 stvx $seven,r10,$sp
3008 addi r10,r10,32
3009 stvx $seven,r11,$sp
3010 addi r11,r11,32
3011
3012 mtspr 256,$vrsave
3013 lvx v20,r10,$sp # ABI says so
3014 addi r10,r10,32
3015 lvx v21,r11,$sp
3016 addi r11,r11,32
3017 lvx v22,r10,$sp
3018 addi r10,r10,32
3019 lvx v23,r11,$sp
3020 addi r11,r11,32
3021 lvx v24,r10,$sp
3022 addi r10,r10,32
3023 lvx v25,r11,$sp
3024 addi r11,r11,32
3025 lvx v26,r10,$sp
3026 addi r10,r10,32
3027 lvx v27,r11,$sp
3028 addi r11,r11,32
3029 lvx v28,r10,$sp
3030 addi r10,r10,32
3031 lvx v29,r11,$sp
3032 addi r11,r11,32
3033 lvx v30,r10,$sp
3034 lvx v31,r11,$sp
3035 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3036 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3037 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3038 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3039 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3040 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3041 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3042 blr
3043 .long 0
3044 .byte 0,12,0x04,1,0x80,6,6,0
3045 .long 0
3046
3047.align 5
3048_aesp8_xts_enc5x:
3049 vcipher $out0,$out0,v24
3050 vcipher $out1,$out1,v24
3051 vcipher $out2,$out2,v24
3052 vcipher $out3,$out3,v24
3053 vcipher $out4,$out4,v24
3054 lvx v24,$x20,$key_ # round[3]
3055 addi $key_,$key_,0x20
3056
3057 vcipher $out0,$out0,v25
3058 vcipher $out1,$out1,v25
3059 vcipher $out2,$out2,v25
3060 vcipher $out3,$out3,v25
3061 vcipher $out4,$out4,v25
3062 lvx v25,$x10,$key_ # round[4]
3063 bdnz _aesp8_xts_enc5x
3064
3065 add $inp,$inp,$taillen
3066 cmpwi $taillen,0
3067 vcipher $out0,$out0,v24
3068 vcipher $out1,$out1,v24
3069 vcipher $out2,$out2,v24
3070 vcipher $out3,$out3,v24
3071 vcipher $out4,$out4,v24
3072
3073 subi $inp,$inp,16
3074 vcipher $out0,$out0,v25
3075 vcipher $out1,$out1,v25
3076 vcipher $out2,$out2,v25
3077 vcipher $out3,$out3,v25
3078 vcipher $out4,$out4,v25
3079 vxor $twk0,$twk0,v31
3080
3081 vcipher $out0,$out0,v26
3082 lvsr $inpperm,r0,$taillen # $in5 is no more
3083 vcipher $out1,$out1,v26
3084 vcipher $out2,$out2,v26
3085 vcipher $out3,$out3,v26
3086 vcipher $out4,$out4,v26
3087 vxor $in1,$twk1,v31
3088
3089 vcipher $out0,$out0,v27
3090 lvx_u $in0,0,$inp
3091 vcipher $out1,$out1,v27
3092 vcipher $out2,$out2,v27
3093 vcipher $out3,$out3,v27
3094 vcipher $out4,$out4,v27
3095 vxor $in2,$twk2,v31
3096
3097 addi $key_,$sp,$FRAME+15 # rewind $key_
3098 vcipher $out0,$out0,v28
3099 vcipher $out1,$out1,v28
3100 vcipher $out2,$out2,v28
3101 vcipher $out3,$out3,v28
3102 vcipher $out4,$out4,v28
3103 lvx v24,$x00,$key_ # re-pre-load round[1]
3104 vxor $in3,$twk3,v31
3105
3106 vcipher $out0,$out0,v29
3107 le?vperm $in0,$in0,$in0,$leperm
3108 vcipher $out1,$out1,v29
3109 vcipher $out2,$out2,v29
3110 vcipher $out3,$out3,v29
3111 vcipher $out4,$out4,v29
3112 lvx v25,$x10,$key_ # re-pre-load round[2]
3113 vxor $in4,$twk4,v31
3114
3115 vcipher $out0,$out0,v30
3116 vperm $in0,$in0,$in0,$inpperm
3117 vcipher $out1,$out1,v30
3118 vcipher $out2,$out2,v30
3119 vcipher $out3,$out3,v30
3120 vcipher $out4,$out4,v30
3121
3122 vcipherlast $out0,$out0,$twk0
3123 vcipherlast $out1,$out1,$in1
3124 vcipherlast $out2,$out2,$in2
3125 vcipherlast $out3,$out3,$in3
3126 vcipherlast $out4,$out4,$in4
3127 blr
3128 .long 0
3129 .byte 0,12,0x14,0,0,0,0,0
3130
3131.align 5
3132_aesp8_xts_decrypt6x:
3133 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3134 mflr r11
3135 li r7,`$FRAME+8*16+15`
3136 li r3,`$FRAME+8*16+31`
3137 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3138 stvx v20,r7,$sp # ABI says so
3139 addi r7,r7,32
3140 stvx v21,r3,$sp
3141 addi r3,r3,32
3142 stvx v22,r7,$sp
3143 addi r7,r7,32
3144 stvx v23,r3,$sp
3145 addi r3,r3,32
3146 stvx v24,r7,$sp
3147 addi r7,r7,32
3148 stvx v25,r3,$sp
3149 addi r3,r3,32
3150 stvx v26,r7,$sp
3151 addi r7,r7,32
3152 stvx v27,r3,$sp
3153 addi r3,r3,32
3154 stvx v28,r7,$sp
3155 addi r7,r7,32
3156 stvx v29,r3,$sp
3157 addi r3,r3,32
3158 stvx v30,r7,$sp
3159 stvx v31,r3,$sp
3160 li r0,-1
3161 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3162 li $x10,0x10
3163 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3164 li $x20,0x20
3165 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3166 li $x30,0x30
3167 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3168 li $x40,0x40
3169 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3170 li $x50,0x50
3171 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3172 li $x60,0x60
3173 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3174 li $x70,0x70
3175 mtspr 256,r0
3176
3177 xxlor 2, 32+$eighty7, 32+$eighty7
3178 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
3179 xxlor 1, 32+$eighty7, 32+$eighty7
3180
3181 # Load XOR Lconsts.
3182 mr $x70, r6
3183 bl Lconsts
3184 lxvw4x 0, $x40, r6 # load XOR contents
3185 mr r6, $x70
3186 li $x70,0x70
3187
3188 subi $rounds,$rounds,3 # -4 in total
3189
3190 lvx $rndkey0,$x00,$key1 # load key schedule
3191 lvx v30,$x10,$key1
3192 addi $key1,$key1,0x20
3193 lvx v31,$x00,$key1
3194 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3195 addi $key_,$sp,$FRAME+15
3196 mtctr $rounds
3197
3198Load_xts_dec_key:
3199 ?vperm v24,v30,v31,$keyperm
3200 lvx v30,$x10,$key1
3201 addi $key1,$key1,0x20
3202 stvx v24,$x00,$key_ # off-load round[1]
3203 ?vperm v25,v31,v30,$keyperm
3204 lvx v31,$x00,$key1
3205 stvx v25,$x10,$key_ # off-load round[2]
3206 addi $key_,$key_,0x20
3207 bdnz Load_xts_dec_key
3208
3209 lvx v26,$x10,$key1
3210 ?vperm v24,v30,v31,$keyperm
3211 lvx v27,$x20,$key1
3212 stvx v24,$x00,$key_ # off-load round[3]
3213 ?vperm v25,v31,v26,$keyperm
3214 lvx v28,$x30,$key1
3215 stvx v25,$x10,$key_ # off-load round[4]
3216 addi $key_,$sp,$FRAME+15 # rewind $key_
3217 ?vperm v26,v26,v27,$keyperm
3218 lvx v29,$x40,$key1
3219 ?vperm v27,v27,v28,$keyperm
3220 lvx v30,$x50,$key1
3221 ?vperm v28,v28,v29,$keyperm
3222 lvx v31,$x60,$key1
3223 ?vperm v29,v29,v30,$keyperm
3224 lvx $twk5,$x70,$key1 # borrow $twk5
3225 ?vperm v30,v30,v31,$keyperm
3226 lvx v24,$x00,$key_ # pre-load round[1]
3227 ?vperm v31,v31,$twk5,$keyperm
3228 lvx v25,$x10,$key_ # pre-load round[2]
3229
3230 vperm $in0,$inout,$inptail,$inpperm
3231 subi $inp,$inp,31 # undo "caller"
3232 vxor $twk0,$tweak,$rndkey0
3233 vsrab $tmp,$tweak,$seven # next tweak value
3234 vaddubm $tweak,$tweak,$tweak
3235 vand $tmp,$tmp,$eighty7
3236 vxor $out0,$in0,$twk0
3237 xxlor 32+$in1, 0, 0
3238 vpermxor $tweak, $tweak, $tmp, $in1
3239
3240 lvx_u $in1,$x10,$inp
3241 vxor $twk1,$tweak,$rndkey0
3242 vsrab $tmp,$tweak,$seven # next tweak value
3243 vaddubm $tweak,$tweak,$tweak
3244 le?vperm $in1,$in1,$in1,$leperm
3245 vand $tmp,$tmp,$eighty7
3246 vxor $out1,$in1,$twk1
3247 xxlor 32+$in2, 0, 0
3248 vpermxor $tweak, $tweak, $tmp, $in2
3249
3250 lvx_u $in2,$x20,$inp
3251 andi. $taillen,$len,15
3252 vxor $twk2,$tweak,$rndkey0
3253 vsrab $tmp,$tweak,$seven # next tweak value
3254 vaddubm $tweak,$tweak,$tweak
3255 le?vperm $in2,$in2,$in2,$leperm
3256 vand $tmp,$tmp,$eighty7
3257 vxor $out2,$in2,$twk2
3258 xxlor 32+$in3, 0, 0
3259 vpermxor $tweak, $tweak, $tmp, $in3
3260
3261 lvx_u $in3,$x30,$inp
3262 sub $len,$len,$taillen
3263 vxor $twk3,$tweak,$rndkey0
3264 vsrab $tmp,$tweak,$seven # next tweak value
3265 vaddubm $tweak,$tweak,$tweak
3266 le?vperm $in3,$in3,$in3,$leperm
3267 vand $tmp,$tmp,$eighty7
3268 vxor $out3,$in3,$twk3
3269 xxlor 32+$in4, 0, 0
3270 vpermxor $tweak, $tweak, $tmp, $in4
3271
3272 lvx_u $in4,$x40,$inp
3273 subi $len,$len,0x60
3274 vxor $twk4,$tweak,$rndkey0
3275 vsrab $tmp,$tweak,$seven # next tweak value
3276 vaddubm $tweak,$tweak,$tweak
3277 le?vperm $in4,$in4,$in4,$leperm
3278 vand $tmp,$tmp,$eighty7
3279 vxor $out4,$in4,$twk4
3280 xxlor 32+$in5, 0, 0
3281 vpermxor $tweak, $tweak, $tmp, $in5
3282
3283 lvx_u $in5,$x50,$inp
3284 addi $inp,$inp,0x60
3285 vxor $twk5,$tweak,$rndkey0
3286 vsrab $tmp,$tweak,$seven # next tweak value
3287 vaddubm $tweak,$tweak,$tweak
3288 le?vperm $in5,$in5,$in5,$leperm
3289 vand $tmp,$tmp,$eighty7
3290 vxor $out5,$in5,$twk5
3291 xxlor 32+$in0, 0, 0
3292 vpermxor $tweak, $tweak, $tmp, $in0
3293
3294 vxor v31,v31,$rndkey0
3295 mtctr $rounds
3296 b Loop_xts_dec6x
3297
3298.align 5
3299Loop_xts_dec6x:
3300 vncipher $out0,$out0,v24
3301 vncipher $out1,$out1,v24
3302 vncipher $out2,$out2,v24
3303 vncipher $out3,$out3,v24
3304 vncipher $out4,$out4,v24
3305 vncipher $out5,$out5,v24
3306 lvx v24,$x20,$key_ # round[3]
3307 addi $key_,$key_,0x20
3308
3309 vncipher $out0,$out0,v25
3310 vncipher $out1,$out1,v25
3311 vncipher $out2,$out2,v25
3312 vncipher $out3,$out3,v25
3313 vncipher $out4,$out4,v25
3314 vncipher $out5,$out5,v25
3315 lvx v25,$x10,$key_ # round[4]
3316 bdnz Loop_xts_dec6x
3317
3318 xxlor 32+$eighty7, 1, 1 # 0x010101..87
3319
3320 subic $len,$len,96 # $len-=96
3321 vxor $in0,$twk0,v31 # xor with last round key
3322 vncipher $out0,$out0,v24
3323 vncipher $out1,$out1,v24
3324 vsrab $tmp,$tweak,$seven # next tweak value
3325 vxor $twk0,$tweak,$rndkey0
3326 vaddubm $tweak,$tweak,$tweak
3327 vncipher $out2,$out2,v24
3328 vncipher $out3,$out3,v24
3329 vncipher $out4,$out4,v24
3330 vncipher $out5,$out5,v24
3331
3332 subfe. r0,r0,r0 # borrow?-1:0
3333 vand $tmp,$tmp,$eighty7
3334 vncipher $out0,$out0,v25
3335 vncipher $out1,$out1,v25
3336 xxlor 32+$in1, 0, 0
3337 vpermxor $tweak, $tweak, $tmp, $in1
3338 vncipher $out2,$out2,v25
3339 vncipher $out3,$out3,v25
3340 vxor $in1,$twk1,v31
3341 vsrab $tmp,$tweak,$seven # next tweak value
3342 vxor $twk1,$tweak,$rndkey0
3343 vncipher $out4,$out4,v25
3344 vncipher $out5,$out5,v25
3345
3346 and r0,r0,$len
3347 vaddubm $tweak,$tweak,$tweak
3348 vncipher $out0,$out0,v26
3349 vncipher $out1,$out1,v26
3350 vand $tmp,$tmp,$eighty7
3351 vncipher $out2,$out2,v26
3352 vncipher $out3,$out3,v26
3353 xxlor 32+$in2, 0, 0
3354 vpermxor $tweak, $tweak, $tmp, $in2
3355 vncipher $out4,$out4,v26
3356 vncipher $out5,$out5,v26
3357
3358 add $inp,$inp,r0 # $inp is adjusted in such
3359 # way that at exit from the
3360 # loop inX-in5 are loaded
3361 # with last "words"
3362 vxor $in2,$twk2,v31
3363 vsrab $tmp,$tweak,$seven # next tweak value
3364 vxor $twk2,$tweak,$rndkey0
3365 vaddubm $tweak,$tweak,$tweak
3366 vncipher $out0,$out0,v27
3367 vncipher $out1,$out1,v27
3368 vncipher $out2,$out2,v27
3369 vncipher $out3,$out3,v27
3370 vand $tmp,$tmp,$eighty7
3371 vncipher $out4,$out4,v27
3372 vncipher $out5,$out5,v27
3373
3374 addi $key_,$sp,$FRAME+15 # rewind $key_
3375 xxlor 32+$in3, 0, 0
3376 vpermxor $tweak, $tweak, $tmp, $in3
3377 vncipher $out0,$out0,v28
3378 vncipher $out1,$out1,v28
3379 vxor $in3,$twk3,v31
3380 vsrab $tmp,$tweak,$seven # next tweak value
3381 vxor $twk3,$tweak,$rndkey0
3382 vncipher $out2,$out2,v28
3383 vncipher $out3,$out3,v28
3384 vaddubm $tweak,$tweak,$tweak
3385 vncipher $out4,$out4,v28
3386 vncipher $out5,$out5,v28
3387 lvx v24,$x00,$key_ # re-pre-load round[1]
3388 vand $tmp,$tmp,$eighty7
3389
3390 vncipher $out0,$out0,v29
3391 vncipher $out1,$out1,v29
3392 xxlor 32+$in4, 0, 0
3393 vpermxor $tweak, $tweak, $tmp, $in4
3394 vncipher $out2,$out2,v29
3395 vncipher $out3,$out3,v29
3396 vxor $in4,$twk4,v31
3397 vsrab $tmp,$tweak,$seven # next tweak value
3398 vxor $twk4,$tweak,$rndkey0
3399 vncipher $out4,$out4,v29
3400 vncipher $out5,$out5,v29
3401 lvx v25,$x10,$key_ # re-pre-load round[2]
3402 vaddubm $tweak,$tweak,$tweak
3403
3404 vncipher $out0,$out0,v30
3405 vncipher $out1,$out1,v30
3406 vand $tmp,$tmp,$eighty7
3407 vncipher $out2,$out2,v30
3408 vncipher $out3,$out3,v30
3409 xxlor 32+$in5, 0, 0
3410 vpermxor $tweak, $tweak, $tmp, $in5
3411 vncipher $out4,$out4,v30
3412 vncipher $out5,$out5,v30
3413 vxor $in5,$twk5,v31
3414 vsrab $tmp,$tweak,$seven # next tweak value
3415 vxor $twk5,$tweak,$rndkey0
3416
3417 vncipherlast $out0,$out0,$in0
3418 lvx_u $in0,$x00,$inp # load next input block
3419 vaddubm $tweak,$tweak,$tweak
3420 vncipherlast $out1,$out1,$in1
3421 lvx_u $in1,$x10,$inp
3422 vncipherlast $out2,$out2,$in2
3423 le?vperm $in0,$in0,$in0,$leperm
3424 lvx_u $in2,$x20,$inp
3425 vand $tmp,$tmp,$eighty7
3426 vncipherlast $out3,$out3,$in3
3427 le?vperm $in1,$in1,$in1,$leperm
3428 lvx_u $in3,$x30,$inp
3429 vncipherlast $out4,$out4,$in4
3430 le?vperm $in2,$in2,$in2,$leperm
3431 lvx_u $in4,$x40,$inp
3432 xxlor 10, 32+$in0, 32+$in0
3433 xxlor 32+$in0, 0, 0
3434 vpermxor $tweak, $tweak, $tmp, $in0
3435 xxlor 32+$in0, 10, 10
3436 vncipherlast $out5,$out5,$in5
3437 le?vperm $in3,$in3,$in3,$leperm
3438 lvx_u $in5,$x50,$inp
3439 addi $inp,$inp,0x60
3440 le?vperm $in4,$in4,$in4,$leperm
3441 le?vperm $in5,$in5,$in5,$leperm
3442
3443 le?vperm $out0,$out0,$out0,$leperm
3444 le?vperm $out1,$out1,$out1,$leperm
3445 stvx_u $out0,$x00,$out # store output
3446 vxor $out0,$in0,$twk0
3447 le?vperm $out2,$out2,$out2,$leperm
3448 stvx_u $out1,$x10,$out
3449 vxor $out1,$in1,$twk1
3450 le?vperm $out3,$out3,$out3,$leperm
3451 stvx_u $out2,$x20,$out
3452 vxor $out2,$in2,$twk2
3453 le?vperm $out4,$out4,$out4,$leperm
3454 stvx_u $out3,$x30,$out
3455 vxor $out3,$in3,$twk3
3456 le?vperm $out5,$out5,$out5,$leperm
3457 stvx_u $out4,$x40,$out
3458 vxor $out4,$in4,$twk4
3459 stvx_u $out5,$x50,$out
3460 vxor $out5,$in5,$twk5
3461 addi $out,$out,0x60
3462
3463 mtctr $rounds
3464 beq Loop_xts_dec6x # did $len-=96 borrow?
3465
3466 xxlor 32+$eighty7, 2, 2 # 0x010101..87
3467
3468 addic. $len,$len,0x60
3469 beq Lxts_dec6x_zero
3470 cmpwi $len,0x20
3471 blt Lxts_dec6x_one
3472 nop
3473 beq Lxts_dec6x_two
3474 cmpwi $len,0x40
3475 blt Lxts_dec6x_three
3476 nop
3477 beq Lxts_dec6x_four
3478
3479Lxts_dec6x_five:
3480 vxor $out0,$in1,$twk0
3481 vxor $out1,$in2,$twk1
3482 vxor $out2,$in3,$twk2
3483 vxor $out3,$in4,$twk3
3484 vxor $out4,$in5,$twk4
3485
3486 bl _aesp8_xts_dec5x
3487
3488 le?vperm $out0,$out0,$out0,$leperm
3489 vmr $twk0,$twk5 # unused tweak
3490 vxor $twk1,$tweak,$rndkey0
3491 le?vperm $out1,$out1,$out1,$leperm
3492 stvx_u $out0,$x00,$out # store output
3493 vxor $out0,$in0,$twk1
3494 le?vperm $out2,$out2,$out2,$leperm
3495 stvx_u $out1,$x10,$out
3496 le?vperm $out3,$out3,$out3,$leperm
3497 stvx_u $out2,$x20,$out
3498 le?vperm $out4,$out4,$out4,$leperm
3499 stvx_u $out3,$x30,$out
3500 stvx_u $out4,$x40,$out
3501 addi $out,$out,0x50
3502 bne Lxts_dec6x_steal
3503 b Lxts_dec6x_done
3504
3505.align 4
3506Lxts_dec6x_four:
3507 vxor $out0,$in2,$twk0
3508 vxor $out1,$in3,$twk1
3509 vxor $out2,$in4,$twk2
3510 vxor $out3,$in5,$twk3
3511 vxor $out4,$out4,$out4
3512
3513 bl _aesp8_xts_dec5x
3514
3515 le?vperm $out0,$out0,$out0,$leperm
3516 vmr $twk0,$twk4 # unused tweak
3517 vmr $twk1,$twk5
3518 le?vperm $out1,$out1,$out1,$leperm
3519 stvx_u $out0,$x00,$out # store output
3520 vxor $out0,$in0,$twk5
3521 le?vperm $out2,$out2,$out2,$leperm
3522 stvx_u $out1,$x10,$out
3523 le?vperm $out3,$out3,$out3,$leperm
3524 stvx_u $out2,$x20,$out
3525 stvx_u $out3,$x30,$out
3526 addi $out,$out,0x40
3527 bne Lxts_dec6x_steal
3528 b Lxts_dec6x_done
3529
3530.align 4
3531Lxts_dec6x_three:
3532 vxor $out0,$in3,$twk0
3533 vxor $out1,$in4,$twk1
3534 vxor $out2,$in5,$twk2
3535 vxor $out3,$out3,$out3
3536 vxor $out4,$out4,$out4
3537
3538 bl _aesp8_xts_dec5x
3539
3540 le?vperm $out0,$out0,$out0,$leperm
3541 vmr $twk0,$twk3 # unused tweak
3542 vmr $twk1,$twk4
3543 le?vperm $out1,$out1,$out1,$leperm
3544 stvx_u $out0,$x00,$out # store output
3545 vxor $out0,$in0,$twk4
3546 le?vperm $out2,$out2,$out2,$leperm
3547 stvx_u $out1,$x10,$out
3548 stvx_u $out2,$x20,$out
3549 addi $out,$out,0x30
3550 bne Lxts_dec6x_steal
3551 b Lxts_dec6x_done
3552
3553.align 4
3554Lxts_dec6x_two:
3555 vxor $out0,$in4,$twk0
3556 vxor $out1,$in5,$twk1
3557 vxor $out2,$out2,$out2
3558 vxor $out3,$out3,$out3
3559 vxor $out4,$out4,$out4
3560
3561 bl _aesp8_xts_dec5x
3562
3563 le?vperm $out0,$out0,$out0,$leperm
3564 vmr $twk0,$twk2 # unused tweak
3565 vmr $twk1,$twk3
3566 le?vperm $out1,$out1,$out1,$leperm
3567 stvx_u $out0,$x00,$out # store output
3568 vxor $out0,$in0,$twk3
3569 stvx_u $out1,$x10,$out
3570 addi $out,$out,0x20
3571 bne Lxts_dec6x_steal
3572 b Lxts_dec6x_done
3573
3574.align 4
3575Lxts_dec6x_one:
3576 vxor $out0,$in5,$twk0
3577 nop
3578Loop_xts_dec1x:
3579 vncipher $out0,$out0,v24
3580 lvx v24,$x20,$key_ # round[3]
3581 addi $key_,$key_,0x20
3582
3583 vncipher $out0,$out0,v25
3584 lvx v25,$x10,$key_ # round[4]
3585 bdnz Loop_xts_dec1x
3586
3587 subi r0,$taillen,1
3588 vncipher $out0,$out0,v24
3589
3590 andi. r0,r0,16
3591 cmpwi $taillen,0
3592 vncipher $out0,$out0,v25
3593
3594 sub $inp,$inp,r0
3595 vncipher $out0,$out0,v26
3596
3597 lvx_u $in0,0,$inp
3598 vncipher $out0,$out0,v27
3599
3600 addi $key_,$sp,$FRAME+15 # rewind $key_
3601 vncipher $out0,$out0,v28
3602 lvx v24,$x00,$key_ # re-pre-load round[1]
3603
3604 vncipher $out0,$out0,v29
3605 lvx v25,$x10,$key_ # re-pre-load round[2]
3606 vxor $twk0,$twk0,v31
3607
3608 le?vperm $in0,$in0,$in0,$leperm
3609 vncipher $out0,$out0,v30
3610
3611 mtctr $rounds
3612 vncipherlast $out0,$out0,$twk0
3613
3614 vmr $twk0,$twk1 # unused tweak
3615 vmr $twk1,$twk2
3616 le?vperm $out0,$out0,$out0,$leperm
3617 stvx_u $out0,$x00,$out # store output
3618 addi $out,$out,0x10
3619 vxor $out0,$in0,$twk2
3620 bne Lxts_dec6x_steal
3621 b Lxts_dec6x_done
3622
3623.align 4
3624Lxts_dec6x_zero:
3625 cmpwi $taillen,0
3626 beq Lxts_dec6x_done
3627
3628 lvx_u $in0,0,$inp
3629 le?vperm $in0,$in0,$in0,$leperm
3630 vxor $out0,$in0,$twk1
3631Lxts_dec6x_steal:
3632 vncipher $out0,$out0,v24
3633 lvx v24,$x20,$key_ # round[3]
3634 addi $key_,$key_,0x20
3635
3636 vncipher $out0,$out0,v25
3637 lvx v25,$x10,$key_ # round[4]
3638 bdnz Lxts_dec6x_steal
3639
3640 add $inp,$inp,$taillen
3641 vncipher $out0,$out0,v24
3642
3643 cmpwi $taillen,0
3644 vncipher $out0,$out0,v25
3645
3646 lvx_u $in0,0,$inp
3647 vncipher $out0,$out0,v26
3648
3649 lvsr $inpperm,0,$taillen # $in5 is no more
3650 vncipher $out0,$out0,v27
3651
3652 addi $key_,$sp,$FRAME+15 # rewind $key_
3653 vncipher $out0,$out0,v28
3654 lvx v24,$x00,$key_ # re-pre-load round[1]
3655
3656 vncipher $out0,$out0,v29
3657 lvx v25,$x10,$key_ # re-pre-load round[2]
3658 vxor $twk1,$twk1,v31
3659
3660 le?vperm $in0,$in0,$in0,$leperm
3661 vncipher $out0,$out0,v30
3662
3663 vperm $in0,$in0,$in0,$inpperm
3664 vncipherlast $tmp,$out0,$twk1
3665
3666 le?vperm $out0,$tmp,$tmp,$leperm
3667 le?stvx_u $out0,0,$out
3668 be?stvx_u $tmp,0,$out
3669
3670 vxor $out0,$out0,$out0
3671 vspltisb $out1,-1
3672 vperm $out0,$out0,$out1,$inpperm
3673 vsel $out0,$in0,$tmp,$out0
3674 vxor $out0,$out0,$twk0
3675
3676 subi r30,$out,1
3677 mtctr $taillen
3678Loop_xts_dec6x_steal:
3679 lbzu r0,1(r30)
3680 stb r0,16(r30)
3681 bdnz Loop_xts_dec6x_steal
3682
3683 li $taillen,0
3684 mtctr $rounds
3685 b Loop_xts_dec1x # one more time...
3686
3687.align 4
3688Lxts_dec6x_done:
3689 ${UCMP}i $ivp,0
3690 beq Lxts_dec6x_ret
3691
3692 vxor $tweak,$twk0,$rndkey0
3693 le?vperm $tweak,$tweak,$tweak,$leperm
3694 stvx_u $tweak,0,$ivp
3695
3696Lxts_dec6x_ret:
3697 mtlr r11
3698 li r10,`$FRAME+15`
3699 li r11,`$FRAME+31`
3700 stvx $seven,r10,$sp # wipe copies of round keys
3701 addi r10,r10,32
3702 stvx $seven,r11,$sp
3703 addi r11,r11,32
3704 stvx $seven,r10,$sp
3705 addi r10,r10,32
3706 stvx $seven,r11,$sp
3707 addi r11,r11,32
3708 stvx $seven,r10,$sp
3709 addi r10,r10,32
3710 stvx $seven,r11,$sp
3711 addi r11,r11,32
3712 stvx $seven,r10,$sp
3713 addi r10,r10,32
3714 stvx $seven,r11,$sp
3715 addi r11,r11,32
3716
3717 mtspr 256,$vrsave
3718 lvx v20,r10,$sp # ABI says so
3719 addi r10,r10,32
3720 lvx v21,r11,$sp
3721 addi r11,r11,32
3722 lvx v22,r10,$sp
3723 addi r10,r10,32
3724 lvx v23,r11,$sp
3725 addi r11,r11,32
3726 lvx v24,r10,$sp
3727 addi r10,r10,32
3728 lvx v25,r11,$sp
3729 addi r11,r11,32
3730 lvx v26,r10,$sp
3731 addi r10,r10,32
3732 lvx v27,r11,$sp
3733 addi r11,r11,32
3734 lvx v28,r10,$sp
3735 addi r10,r10,32
3736 lvx v29,r11,$sp
3737 addi r11,r11,32
3738 lvx v30,r10,$sp
3739 lvx v31,r11,$sp
3740 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3741 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3742 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3743 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3744 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3745 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3746 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3747 blr
3748 .long 0
3749 .byte 0,12,0x04,1,0x80,6,6,0
3750 .long 0
3751
3752.align 5
3753_aesp8_xts_dec5x:
3754 vncipher $out0,$out0,v24
3755 vncipher $out1,$out1,v24
3756 vncipher $out2,$out2,v24
3757 vncipher $out3,$out3,v24
3758 vncipher $out4,$out4,v24
3759 lvx v24,$x20,$key_ # round[3]
3760 addi $key_,$key_,0x20
3761
3762 vncipher $out0,$out0,v25
3763 vncipher $out1,$out1,v25
3764 vncipher $out2,$out2,v25
3765 vncipher $out3,$out3,v25
3766 vncipher $out4,$out4,v25
3767 lvx v25,$x10,$key_ # round[4]
3768 bdnz _aesp8_xts_dec5x
3769
3770 subi r0,$taillen,1
3771 vncipher $out0,$out0,v24
3772 vncipher $out1,$out1,v24
3773 vncipher $out2,$out2,v24
3774 vncipher $out3,$out3,v24
3775 vncipher $out4,$out4,v24
3776
3777 andi. r0,r0,16
3778 cmpwi $taillen,0
3779 vncipher $out0,$out0,v25
3780 vncipher $out1,$out1,v25
3781 vncipher $out2,$out2,v25
3782 vncipher $out3,$out3,v25
3783 vncipher $out4,$out4,v25
3784 vxor $twk0,$twk0,v31
3785
3786 sub $inp,$inp,r0
3787 vncipher $out0,$out0,v26
3788 vncipher $out1,$out1,v26
3789 vncipher $out2,$out2,v26
3790 vncipher $out3,$out3,v26
3791 vncipher $out4,$out4,v26
3792 vxor $in1,$twk1,v31
3793
3794 vncipher $out0,$out0,v27
3795 lvx_u $in0,0,$inp
3796 vncipher $out1,$out1,v27
3797 vncipher $out2,$out2,v27
3798 vncipher $out3,$out3,v27
3799 vncipher $out4,$out4,v27
3800 vxor $in2,$twk2,v31
3801
3802 addi $key_,$sp,$FRAME+15 # rewind $key_
3803 vncipher $out0,$out0,v28
3804 vncipher $out1,$out1,v28
3805 vncipher $out2,$out2,v28
3806 vncipher $out3,$out3,v28
3807 vncipher $out4,$out4,v28
3808 lvx v24,$x00,$key_ # re-pre-load round[1]
3809 vxor $in3,$twk3,v31
3810
3811 vncipher $out0,$out0,v29
3812 le?vperm $in0,$in0,$in0,$leperm
3813 vncipher $out1,$out1,v29
3814 vncipher $out2,$out2,v29
3815 vncipher $out3,$out3,v29
3816 vncipher $out4,$out4,v29
3817 lvx v25,$x10,$key_ # re-pre-load round[2]
3818 vxor $in4,$twk4,v31
3819
3820 vncipher $out0,$out0,v30
3821 vncipher $out1,$out1,v30
3822 vncipher $out2,$out2,v30
3823 vncipher $out3,$out3,v30
3824 vncipher $out4,$out4,v30
3825
3826 vncipherlast $out0,$out0,$twk0
3827 vncipherlast $out1,$out1,$in1
3828 vncipherlast $out2,$out2,$in2
3829 vncipherlast $out3,$out3,$in3
3830 vncipherlast $out4,$out4,$in4
3831 mtctr $rounds
3832 blr
3833 .long 0
3834 .byte 0,12,0x14,0,0,0,0,0
3835___
3836}} }}}
3837
3838my $consts=1;
3839foreach(split("\n",$code)) {
3840 s/\`([^\`]*)\`/eval($1)/geo;
3841
3842 # constants table endian-specific conversion
3843 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3844 my $conv=$3;
3845 my @bytes=();
3846
3847 # convert to endian-agnostic format
3848 if ($1 eq "long") {
3849 foreach (split(/,\s*/,$2)) {
3850 my $l = /^0/?oct:int;
3851 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3852 }
3853 } else {
3854 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3855 }
3856
3857 # little-endian conversion
3858 if ($flavour =~ /le$/o) {
3859 SWITCH: for($conv) {
3860 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3861 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3862 }
3863 }
3864
3865 #emit
3866 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3867 next;
3868 }
3869 $consts=0 if (m/Lconsts:/o); # end of table
3870
3871 # instructions prefixed with '?' are endian-specific and need
3872 # to be adjusted accordingly...
3873 if ($flavour =~ /le$/o) { # little-endian
3874 s/le\?//o or
3875 s/be\?/#be#/o or
3876 s/\?lvsr/lvsl/o or
3877 s/\?lvsl/lvsr/o or
3878 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3879 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3880 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3881 } else { # big-endian
3882 s/le\?/#le#/o or
3883 s/be\?//o or
3884 s/\?([a-z]+)/$1/o;
3885 }
3886
3887 print $_,"\n";
3888}
3889
3890close STDOUT;