Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
fork
Configure Feed
Select the types of activity you want to include in your feed.
1#! /usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3
4# This code is taken from CRYPTOGAMs[1] and is included here using the option
5# in the license to distribute the code under the GPL. Therefore this program
6# is free software; you can redistribute it and/or modify it under the terms of
7# the GNU General Public License version 2 as published by the Free Software
8# Foundation.
9#
10# [1] https://www.openssl.org/~appro/cryptogams/
11
12# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13# All rights reserved.
14#
15# Redistribution and use in source and binary forms, with or without
16# modification, are permitted provided that the following conditions
17# are met:
18#
19# * Redistributions of source code must retain copyright notices,
20# this list of conditions and the following disclaimer.
21#
22# * Redistributions in binary form must reproduce the above
23# copyright notice, this list of conditions and the following
24# disclaimer in the documentation and/or other materials
25# provided with the distribution.
26#
27# * Neither the name of the CRYPTOGAMS nor the names of its
28# copyright holder and contributors may be used to endorse or
29# promote products derived from this software without specific
30# prior written permission.
31#
32# ALTERNATIVELY, provided that this notice is retained in full, this
33# product may be distributed under the terms of the GNU General Public
34# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35# those given above.
36#
37# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
49# ====================================================================
50# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51# project. The module is, however, dual licensed under OpenSSL and
52# CRYPTOGAMS licenses depending on where you obtain it. For further
53# details see https://www.openssl.org/~appro/cryptogams/.
54# ====================================================================
55#
56# This module implements support for AES instructions as per PowerISA
57# specification version 2.07, first implemented by POWER8 processor.
58# The module is endian-agnostic in sense that it supports both big-
59# and little-endian cases. Data alignment in parallelizable modes is
60# handled with VSX loads and stores, which implies MSR.VSX flag being
61# set. It should also be noted that ISA specification doesn't prohibit
62# alignment exceptions for these instructions on page boundaries.
63# Initially alignment was handled in pure AltiVec/VMX way [when data
64# is aligned programmatically, which in turn guarantees exception-
65# free execution], but it turned to hamper performance when vcipher
66# instructions are interleaved. It's reckoned that eventual
67# misalignment penalties at page boundaries are in average lower
68# than additional overhead in pure AltiVec approach.
69#
70# May 2016
71#
72# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73# systems were measured.
74#
75######################################################################
76# Current large-block performance in cycles per byte processed with
77# 128-bit key (less is better).
78#
79# CBC en-/decrypt CTR XTS
80# POWER8[le] 3.96/0.72 0.74 1.1
81# POWER8[be] 3.75/0.65 0.66 1.0
82
83$flavour = shift;
84
85if ($flavour =~ /64/) {
86 $SIZE_T =8;
87 $LRSAVE =2*$SIZE_T;
88 $STU ="stdu";
89 $POP ="ld";
90 $PUSH ="std";
91 $UCMP ="cmpld";
92 $SHL ="sldi";
93} elsif ($flavour =~ /32/) {
94 $SIZE_T =4;
95 $LRSAVE =$SIZE_T;
96 $STU ="stwu";
97 $POP ="lwz";
98 $PUSH ="stw";
99 $UCMP ="cmplw";
100 $SHL ="slwi";
101} else { die "nonsense $flavour"; }
102
103$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108die "can't locate ppc-xlate.pl";
109
110open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
111
112$FRAME=8*$SIZE_T;
113$prefix="aes_p8";
114
115$sp="r1";
116$vrsave="r12";
117
118#########################################################################
119{{{ # Key setup procedures #
120my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
123
124$code.=<<___;
125.machine "any"
126
127.text
128
129.align 7
130rcon:
131.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
132.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
133.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
134.long 0,0,0,0 ?asis
135.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
136Lconsts:
137 mflr r0
138 bcl 20,31,\$+4
139 mflr $ptr #vvvvv "distance between . and rcon
140 addi $ptr,$ptr,-0x58
141 mtlr r0
142 blr
143 .long 0
144 .byte 0,12,0x14,0,0,0,0,0
145.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
146
147.globl .${prefix}_set_encrypt_key
148Lset_encrypt_key:
149 mflr r11
150 $PUSH r11,$LRSAVE($sp)
151
152 li $ptr,-1
153 ${UCMP}i $inp,0
154 beq- Lenc_key_abort # if ($inp==0) return -1;
155 ${UCMP}i $out,0
156 beq- Lenc_key_abort # if ($out==0) return -1;
157 li $ptr,-2
158 cmpwi $bits,128
159 blt- Lenc_key_abort
160 cmpwi $bits,256
161 bgt- Lenc_key_abort
162 andi. r0,$bits,0x3f
163 bne- Lenc_key_abort
164
165 lis r0,0xfff0
166 mfspr $vrsave,256
167 mtspr 256,r0
168
169 bl Lconsts
170 mtlr r11
171
172 neg r9,$inp
173 lvx $in0,0,$inp
174 addi $inp,$inp,15 # 15 is not typo
175 lvsr $key,0,r9 # borrow $key
176 li r8,0x20
177 cmpwi $bits,192
178 lvx $in1,0,$inp
179 le?vspltisb $mask,0x0f # borrow $mask
180 lvx $rcon,0,$ptr
181 le?vxor $key,$key,$mask # adjust for byte swap
182 lvx $mask,r8,$ptr
183 addi $ptr,$ptr,0x10
184 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
185 li $cnt,8
186 vxor $zero,$zero,$zero
187 mtctr $cnt
188
189 ?lvsr $outperm,0,$out
190 vspltisb $outmask,-1
191 lvx $outhead,0,$out
192 ?vperm $outmask,$zero,$outmask,$outperm
193
194 blt Loop128
195 addi $inp,$inp,8
196 beq L192
197 addi $inp,$inp,8
198 b L256
199
200.align 4
201Loop128:
202 vperm $key,$in0,$in0,$mask # rotate-n-splat
203 vsldoi $tmp,$zero,$in0,12 # >>32
204 vperm $outtail,$in0,$in0,$outperm # rotate
205 vsel $stage,$outhead,$outtail,$outmask
206 vmr $outhead,$outtail
207 vcipherlast $key,$key,$rcon
208 stvx $stage,0,$out
209 addi $out,$out,16
210
211 vxor $in0,$in0,$tmp
212 vsldoi $tmp,$zero,$tmp,12 # >>32
213 vxor $in0,$in0,$tmp
214 vsldoi $tmp,$zero,$tmp,12 # >>32
215 vxor $in0,$in0,$tmp
216 vadduwm $rcon,$rcon,$rcon
217 vxor $in0,$in0,$key
218 bdnz Loop128
219
220 lvx $rcon,0,$ptr # last two round keys
221
222 vperm $key,$in0,$in0,$mask # rotate-n-splat
223 vsldoi $tmp,$zero,$in0,12 # >>32
224 vperm $outtail,$in0,$in0,$outperm # rotate
225 vsel $stage,$outhead,$outtail,$outmask
226 vmr $outhead,$outtail
227 vcipherlast $key,$key,$rcon
228 stvx $stage,0,$out
229 addi $out,$out,16
230
231 vxor $in0,$in0,$tmp
232 vsldoi $tmp,$zero,$tmp,12 # >>32
233 vxor $in0,$in0,$tmp
234 vsldoi $tmp,$zero,$tmp,12 # >>32
235 vxor $in0,$in0,$tmp
236 vadduwm $rcon,$rcon,$rcon
237 vxor $in0,$in0,$key
238
239 vperm $key,$in0,$in0,$mask # rotate-n-splat
240 vsldoi $tmp,$zero,$in0,12 # >>32
241 vperm $outtail,$in0,$in0,$outperm # rotate
242 vsel $stage,$outhead,$outtail,$outmask
243 vmr $outhead,$outtail
244 vcipherlast $key,$key,$rcon
245 stvx $stage,0,$out
246 addi $out,$out,16
247
248 vxor $in0,$in0,$tmp
249 vsldoi $tmp,$zero,$tmp,12 # >>32
250 vxor $in0,$in0,$tmp
251 vsldoi $tmp,$zero,$tmp,12 # >>32
252 vxor $in0,$in0,$tmp
253 vxor $in0,$in0,$key
254 vperm $outtail,$in0,$in0,$outperm # rotate
255 vsel $stage,$outhead,$outtail,$outmask
256 vmr $outhead,$outtail
257 stvx $stage,0,$out
258
259 addi $inp,$out,15 # 15 is not typo
260 addi $out,$out,0x50
261
262 li $rounds,10
263 b Ldone
264
265.align 4
266L192:
267 lvx $tmp,0,$inp
268 li $cnt,4
269 vperm $outtail,$in0,$in0,$outperm # rotate
270 vsel $stage,$outhead,$outtail,$outmask
271 vmr $outhead,$outtail
272 stvx $stage,0,$out
273 addi $out,$out,16
274 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
275 vspltisb $key,8 # borrow $key
276 mtctr $cnt
277 vsububm $mask,$mask,$key # adjust the mask
278
279Loop192:
280 vperm $key,$in1,$in1,$mask # roate-n-splat
281 vsldoi $tmp,$zero,$in0,12 # >>32
282 vcipherlast $key,$key,$rcon
283
284 vxor $in0,$in0,$tmp
285 vsldoi $tmp,$zero,$tmp,12 # >>32
286 vxor $in0,$in0,$tmp
287 vsldoi $tmp,$zero,$tmp,12 # >>32
288 vxor $in0,$in0,$tmp
289
290 vsldoi $stage,$zero,$in1,8
291 vspltw $tmp,$in0,3
292 vxor $tmp,$tmp,$in1
293 vsldoi $in1,$zero,$in1,12 # >>32
294 vadduwm $rcon,$rcon,$rcon
295 vxor $in1,$in1,$tmp
296 vxor $in0,$in0,$key
297 vxor $in1,$in1,$key
298 vsldoi $stage,$stage,$in0,8
299
300 vperm $key,$in1,$in1,$mask # rotate-n-splat
301 vsldoi $tmp,$zero,$in0,12 # >>32
302 vperm $outtail,$stage,$stage,$outperm # rotate
303 vsel $stage,$outhead,$outtail,$outmask
304 vmr $outhead,$outtail
305 vcipherlast $key,$key,$rcon
306 stvx $stage,0,$out
307 addi $out,$out,16
308
309 vsldoi $stage,$in0,$in1,8
310 vxor $in0,$in0,$tmp
311 vsldoi $tmp,$zero,$tmp,12 # >>32
312 vperm $outtail,$stage,$stage,$outperm # rotate
313 vsel $stage,$outhead,$outtail,$outmask
314 vmr $outhead,$outtail
315 vxor $in0,$in0,$tmp
316 vsldoi $tmp,$zero,$tmp,12 # >>32
317 vxor $in0,$in0,$tmp
318 stvx $stage,0,$out
319 addi $out,$out,16
320
321 vspltw $tmp,$in0,3
322 vxor $tmp,$tmp,$in1
323 vsldoi $in1,$zero,$in1,12 # >>32
324 vadduwm $rcon,$rcon,$rcon
325 vxor $in1,$in1,$tmp
326 vxor $in0,$in0,$key
327 vxor $in1,$in1,$key
328 vperm $outtail,$in0,$in0,$outperm # rotate
329 vsel $stage,$outhead,$outtail,$outmask
330 vmr $outhead,$outtail
331 stvx $stage,0,$out
332 addi $inp,$out,15 # 15 is not typo
333 addi $out,$out,16
334 bdnz Loop192
335
336 li $rounds,12
337 addi $out,$out,0x20
338 b Ldone
339
340.align 4
341L256:
342 lvx $tmp,0,$inp
343 li $cnt,7
344 li $rounds,14
345 vperm $outtail,$in0,$in0,$outperm # rotate
346 vsel $stage,$outhead,$outtail,$outmask
347 vmr $outhead,$outtail
348 stvx $stage,0,$out
349 addi $out,$out,16
350 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
351 mtctr $cnt
352
353Loop256:
354 vperm $key,$in1,$in1,$mask # rotate-n-splat
355 vsldoi $tmp,$zero,$in0,12 # >>32
356 vperm $outtail,$in1,$in1,$outperm # rotate
357 vsel $stage,$outhead,$outtail,$outmask
358 vmr $outhead,$outtail
359 vcipherlast $key,$key,$rcon
360 stvx $stage,0,$out
361 addi $out,$out,16
362
363 vxor $in0,$in0,$tmp
364 vsldoi $tmp,$zero,$tmp,12 # >>32
365 vxor $in0,$in0,$tmp
366 vsldoi $tmp,$zero,$tmp,12 # >>32
367 vxor $in0,$in0,$tmp
368 vadduwm $rcon,$rcon,$rcon
369 vxor $in0,$in0,$key
370 vperm $outtail,$in0,$in0,$outperm # rotate
371 vsel $stage,$outhead,$outtail,$outmask
372 vmr $outhead,$outtail
373 stvx $stage,0,$out
374 addi $inp,$out,15 # 15 is not typo
375 addi $out,$out,16
376 bdz Ldone
377
378 vspltw $key,$in0,3 # just splat
379 vsldoi $tmp,$zero,$in1,12 # >>32
380 vsbox $key,$key
381
382 vxor $in1,$in1,$tmp
383 vsldoi $tmp,$zero,$tmp,12 # >>32
384 vxor $in1,$in1,$tmp
385 vsldoi $tmp,$zero,$tmp,12 # >>32
386 vxor $in1,$in1,$tmp
387
388 vxor $in1,$in1,$key
389 b Loop256
390
391.align 4
392Ldone:
393 lvx $in1,0,$inp # redundant in aligned case
394 vsel $in1,$outhead,$in1,$outmask
395 stvx $in1,0,$inp
396 li $ptr,0
397 mtspr 256,$vrsave
398 stw $rounds,0($out)
399
400Lenc_key_abort:
401 mr r3,$ptr
402 blr
403 .long 0
404 .byte 0,12,0x14,1,0,0,3,0
405 .long 0
406.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
407
408.globl .${prefix}_set_decrypt_key
409 $STU $sp,-$FRAME($sp)
410 mflr r10
411 $PUSH r10,$FRAME+$LRSAVE($sp)
412 bl Lset_encrypt_key
413 mtlr r10
414
415 cmpwi r3,0
416 bne- Ldec_key_abort
417
418 slwi $cnt,$rounds,4
419 subi $inp,$out,240 # first round key
420 srwi $rounds,$rounds,1
421 add $out,$inp,$cnt # last round key
422 mtctr $rounds
423
424Ldeckey:
425 lwz r0, 0($inp)
426 lwz r6, 4($inp)
427 lwz r7, 8($inp)
428 lwz r8, 12($inp)
429 addi $inp,$inp,16
430 lwz r9, 0($out)
431 lwz r10,4($out)
432 lwz r11,8($out)
433 lwz r12,12($out)
434 stw r0, 0($out)
435 stw r6, 4($out)
436 stw r7, 8($out)
437 stw r8, 12($out)
438 subi $out,$out,16
439 stw r9, -16($inp)
440 stw r10,-12($inp)
441 stw r11,-8($inp)
442 stw r12,-4($inp)
443 bdnz Ldeckey
444
445 xor r3,r3,r3 # return value
446Ldec_key_abort:
447 addi $sp,$sp,$FRAME
448 blr
449 .long 0
450 .byte 0,12,4,1,0x80,0,3,0
451 .long 0
452.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
453___
454}}}
455#########################################################################
456{{{ # Single block en- and decrypt procedures #
457sub gen_block () {
458my $dir = shift;
459my $n = $dir eq "de" ? "n" : "";
460my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
461
462$code.=<<___;
463.globl .${prefix}_${dir}crypt
464 lwz $rounds,240($key)
465 lis r0,0xfc00
466 mfspr $vrsave,256
467 li $idx,15 # 15 is not typo
468 mtspr 256,r0
469
470 lvx v0,0,$inp
471 neg r11,$out
472 lvx v1,$idx,$inp
473 lvsl v2,0,$inp # inpperm
474 le?vspltisb v4,0x0f
475 ?lvsl v3,0,r11 # outperm
476 le?vxor v2,v2,v4
477 li $idx,16
478 vperm v0,v0,v1,v2 # align [and byte swap in LE]
479 lvx v1,0,$key
480 ?lvsl v5,0,$key # keyperm
481 srwi $rounds,$rounds,1
482 lvx v2,$idx,$key
483 addi $idx,$idx,16
484 subi $rounds,$rounds,1
485 ?vperm v1,v1,v2,v5 # align round key
486
487 vxor v0,v0,v1
488 lvx v1,$idx,$key
489 addi $idx,$idx,16
490 mtctr $rounds
491
492Loop_${dir}c:
493 ?vperm v2,v2,v1,v5
494 v${n}cipher v0,v0,v2
495 lvx v2,$idx,$key
496 addi $idx,$idx,16
497 ?vperm v1,v1,v2,v5
498 v${n}cipher v0,v0,v1
499 lvx v1,$idx,$key
500 addi $idx,$idx,16
501 bdnz Loop_${dir}c
502
503 ?vperm v2,v2,v1,v5
504 v${n}cipher v0,v0,v2
505 lvx v2,$idx,$key
506 ?vperm v1,v1,v2,v5
507 v${n}cipherlast v0,v0,v1
508
509 vspltisb v2,-1
510 vxor v1,v1,v1
511 li $idx,15 # 15 is not typo
512 ?vperm v2,v1,v2,v3 # outmask
513 le?vxor v3,v3,v4
514 lvx v1,0,$out # outhead
515 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
516 vsel v1,v1,v0,v2
517 lvx v4,$idx,$out
518 stvx v1,0,$out
519 vsel v0,v0,v4,v2
520 stvx v0,$idx,$out
521
522 mtspr 256,$vrsave
523 blr
524 .long 0
525 .byte 0,12,0x14,0,0,0,3,0
526 .long 0
527.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
528___
529}
530&gen_block("en");
531&gen_block("de");
532}}}
533#########################################################################
534{{{ # CBC en- and decrypt procedures #
535my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
536my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
537my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
538 map("v$_",(4..10));
539$code.=<<___;
540.globl .${prefix}_cbc_encrypt
541 ${UCMP}i $len,16
542 bltlr-
543
544 cmpwi $enc,0 # test direction
545 lis r0,0xffe0
546 mfspr $vrsave,256
547 mtspr 256,r0
548
549 li $idx,15
550 vxor $rndkey0,$rndkey0,$rndkey0
551 le?vspltisb $tmp,0x0f
552
553 lvx $ivec,0,$ivp # load [unaligned] iv
554 lvsl $inpperm,0,$ivp
555 lvx $inptail,$idx,$ivp
556 le?vxor $inpperm,$inpperm,$tmp
557 vperm $ivec,$ivec,$inptail,$inpperm
558
559 neg r11,$inp
560 ?lvsl $keyperm,0,$key # prepare for unaligned key
561 lwz $rounds,240($key)
562
563 lvsr $inpperm,0,r11 # prepare for unaligned load
564 lvx $inptail,0,$inp
565 addi $inp,$inp,15 # 15 is not typo
566 le?vxor $inpperm,$inpperm,$tmp
567
568 ?lvsr $outperm,0,$out # prepare for unaligned store
569 vspltisb $outmask,-1
570 lvx $outhead,0,$out
571 ?vperm $outmask,$rndkey0,$outmask,$outperm
572 le?vxor $outperm,$outperm,$tmp
573
574 srwi $rounds,$rounds,1
575 li $idx,16
576 subi $rounds,$rounds,1
577 beq Lcbc_dec
578
579Lcbc_enc:
580 vmr $inout,$inptail
581 lvx $inptail,0,$inp
582 addi $inp,$inp,16
583 mtctr $rounds
584 subi $len,$len,16 # len-=16
585
586 lvx $rndkey0,0,$key
587 vperm $inout,$inout,$inptail,$inpperm
588 lvx $rndkey1,$idx,$key
589 addi $idx,$idx,16
590 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
591 vxor $inout,$inout,$rndkey0
592 lvx $rndkey0,$idx,$key
593 addi $idx,$idx,16
594 vxor $inout,$inout,$ivec
595
596Loop_cbc_enc:
597 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
598 vcipher $inout,$inout,$rndkey1
599 lvx $rndkey1,$idx,$key
600 addi $idx,$idx,16
601 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
602 vcipher $inout,$inout,$rndkey0
603 lvx $rndkey0,$idx,$key
604 addi $idx,$idx,16
605 bdnz Loop_cbc_enc
606
607 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
608 vcipher $inout,$inout,$rndkey1
609 lvx $rndkey1,$idx,$key
610 li $idx,16
611 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
612 vcipherlast $ivec,$inout,$rndkey0
613 ${UCMP}i $len,16
614
615 vperm $tmp,$ivec,$ivec,$outperm
616 vsel $inout,$outhead,$tmp,$outmask
617 vmr $outhead,$tmp
618 stvx $inout,0,$out
619 addi $out,$out,16
620 bge Lcbc_enc
621
622 b Lcbc_done
623
624.align 4
625Lcbc_dec:
626 ${UCMP}i $len,128
627 bge _aesp8_cbc_decrypt8x
628 vmr $tmp,$inptail
629 lvx $inptail,0,$inp
630 addi $inp,$inp,16
631 mtctr $rounds
632 subi $len,$len,16 # len-=16
633
634 lvx $rndkey0,0,$key
635 vperm $tmp,$tmp,$inptail,$inpperm
636 lvx $rndkey1,$idx,$key
637 addi $idx,$idx,16
638 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
639 vxor $inout,$tmp,$rndkey0
640 lvx $rndkey0,$idx,$key
641 addi $idx,$idx,16
642
643Loop_cbc_dec:
644 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
645 vncipher $inout,$inout,$rndkey1
646 lvx $rndkey1,$idx,$key
647 addi $idx,$idx,16
648 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
649 vncipher $inout,$inout,$rndkey0
650 lvx $rndkey0,$idx,$key
651 addi $idx,$idx,16
652 bdnz Loop_cbc_dec
653
654 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
655 vncipher $inout,$inout,$rndkey1
656 lvx $rndkey1,$idx,$key
657 li $idx,16
658 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
659 vncipherlast $inout,$inout,$rndkey0
660 ${UCMP}i $len,16
661
662 vxor $inout,$inout,$ivec
663 vmr $ivec,$tmp
664 vperm $tmp,$inout,$inout,$outperm
665 vsel $inout,$outhead,$tmp,$outmask
666 vmr $outhead,$tmp
667 stvx $inout,0,$out
668 addi $out,$out,16
669 bge Lcbc_dec
670
671Lcbc_done:
672 addi $out,$out,-1
673 lvx $inout,0,$out # redundant in aligned case
674 vsel $inout,$outhead,$inout,$outmask
675 stvx $inout,0,$out
676
677 neg $enc,$ivp # write [unaligned] iv
678 li $idx,15 # 15 is not typo
679 vxor $rndkey0,$rndkey0,$rndkey0
680 vspltisb $outmask,-1
681 le?vspltisb $tmp,0x0f
682 ?lvsl $outperm,0,$enc
683 ?vperm $outmask,$rndkey0,$outmask,$outperm
684 le?vxor $outperm,$outperm,$tmp
685 lvx $outhead,0,$ivp
686 vperm $ivec,$ivec,$ivec,$outperm
687 vsel $inout,$outhead,$ivec,$outmask
688 lvx $inptail,$idx,$ivp
689 stvx $inout,0,$ivp
690 vsel $inout,$ivec,$inptail,$outmask
691 stvx $inout,$idx,$ivp
692
693 mtspr 256,$vrsave
694 blr
695 .long 0
696 .byte 0,12,0x14,0,0,0,6,0
697 .long 0
698___
699#########################################################################
700{{ # Optimized CBC decrypt procedure #
701my $key_="r11";
702my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
703my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
704my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
705my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
706 # v26-v31 last 6 round keys
707my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
708
709$code.=<<___;
710.align 5
711_aesp8_cbc_decrypt8x:
712 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
713 li r10,`$FRAME+8*16+15`
714 li r11,`$FRAME+8*16+31`
715 stvx v20,r10,$sp # ABI says so
716 addi r10,r10,32
717 stvx v21,r11,$sp
718 addi r11,r11,32
719 stvx v22,r10,$sp
720 addi r10,r10,32
721 stvx v23,r11,$sp
722 addi r11,r11,32
723 stvx v24,r10,$sp
724 addi r10,r10,32
725 stvx v25,r11,$sp
726 addi r11,r11,32
727 stvx v26,r10,$sp
728 addi r10,r10,32
729 stvx v27,r11,$sp
730 addi r11,r11,32
731 stvx v28,r10,$sp
732 addi r10,r10,32
733 stvx v29,r11,$sp
734 addi r11,r11,32
735 stvx v30,r10,$sp
736 stvx v31,r11,$sp
737 li r0,-1
738 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
739 li $x10,0x10
740 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
741 li $x20,0x20
742 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
743 li $x30,0x30
744 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
745 li $x40,0x40
746 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
747 li $x50,0x50
748 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
749 li $x60,0x60
750 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
751 li $x70,0x70
752 mtspr 256,r0
753
754 subi $rounds,$rounds,3 # -4 in total
755 subi $len,$len,128 # bias
756
757 lvx $rndkey0,$x00,$key # load key schedule
758 lvx v30,$x10,$key
759 addi $key,$key,0x20
760 lvx v31,$x00,$key
761 ?vperm $rndkey0,$rndkey0,v30,$keyperm
762 addi $key_,$sp,$FRAME+15
763 mtctr $rounds
764
765Load_cbc_dec_key:
766 ?vperm v24,v30,v31,$keyperm
767 lvx v30,$x10,$key
768 addi $key,$key,0x20
769 stvx v24,$x00,$key_ # off-load round[1]
770 ?vperm v25,v31,v30,$keyperm
771 lvx v31,$x00,$key
772 stvx v25,$x10,$key_ # off-load round[2]
773 addi $key_,$key_,0x20
774 bdnz Load_cbc_dec_key
775
776 lvx v26,$x10,$key
777 ?vperm v24,v30,v31,$keyperm
778 lvx v27,$x20,$key
779 stvx v24,$x00,$key_ # off-load round[3]
780 ?vperm v25,v31,v26,$keyperm
781 lvx v28,$x30,$key
782 stvx v25,$x10,$key_ # off-load round[4]
783 addi $key_,$sp,$FRAME+15 # rewind $key_
784 ?vperm v26,v26,v27,$keyperm
785 lvx v29,$x40,$key
786 ?vperm v27,v27,v28,$keyperm
787 lvx v30,$x50,$key
788 ?vperm v28,v28,v29,$keyperm
789 lvx v31,$x60,$key
790 ?vperm v29,v29,v30,$keyperm
791 lvx $out0,$x70,$key # borrow $out0
792 ?vperm v30,v30,v31,$keyperm
793 lvx v24,$x00,$key_ # pre-load round[1]
794 ?vperm v31,v31,$out0,$keyperm
795 lvx v25,$x10,$key_ # pre-load round[2]
796
797 #lvx $inptail,0,$inp # "caller" already did this
798 #addi $inp,$inp,15 # 15 is not typo
799 subi $inp,$inp,15 # undo "caller"
800
801 le?li $idx,8
802 lvx_u $in0,$x00,$inp # load first 8 "words"
803 le?lvsl $inpperm,0,$idx
804 le?vspltisb $tmp,0x0f
805 lvx_u $in1,$x10,$inp
806 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
807 lvx_u $in2,$x20,$inp
808 le?vperm $in0,$in0,$in0,$inpperm
809 lvx_u $in3,$x30,$inp
810 le?vperm $in1,$in1,$in1,$inpperm
811 lvx_u $in4,$x40,$inp
812 le?vperm $in2,$in2,$in2,$inpperm
813 vxor $out0,$in0,$rndkey0
814 lvx_u $in5,$x50,$inp
815 le?vperm $in3,$in3,$in3,$inpperm
816 vxor $out1,$in1,$rndkey0
817 lvx_u $in6,$x60,$inp
818 le?vperm $in4,$in4,$in4,$inpperm
819 vxor $out2,$in2,$rndkey0
820 lvx_u $in7,$x70,$inp
821 addi $inp,$inp,0x80
822 le?vperm $in5,$in5,$in5,$inpperm
823 vxor $out3,$in3,$rndkey0
824 le?vperm $in6,$in6,$in6,$inpperm
825 vxor $out4,$in4,$rndkey0
826 le?vperm $in7,$in7,$in7,$inpperm
827 vxor $out5,$in5,$rndkey0
828 vxor $out6,$in6,$rndkey0
829 vxor $out7,$in7,$rndkey0
830
831 mtctr $rounds
832 b Loop_cbc_dec8x
833.align 5
834Loop_cbc_dec8x:
835 vncipher $out0,$out0,v24
836 vncipher $out1,$out1,v24
837 vncipher $out2,$out2,v24
838 vncipher $out3,$out3,v24
839 vncipher $out4,$out4,v24
840 vncipher $out5,$out5,v24
841 vncipher $out6,$out6,v24
842 vncipher $out7,$out7,v24
843 lvx v24,$x20,$key_ # round[3]
844 addi $key_,$key_,0x20
845
846 vncipher $out0,$out0,v25
847 vncipher $out1,$out1,v25
848 vncipher $out2,$out2,v25
849 vncipher $out3,$out3,v25
850 vncipher $out4,$out4,v25
851 vncipher $out5,$out5,v25
852 vncipher $out6,$out6,v25
853 vncipher $out7,$out7,v25
854 lvx v25,$x10,$key_ # round[4]
855 bdnz Loop_cbc_dec8x
856
857 subic $len,$len,128 # $len-=128
858 vncipher $out0,$out0,v24
859 vncipher $out1,$out1,v24
860 vncipher $out2,$out2,v24
861 vncipher $out3,$out3,v24
862 vncipher $out4,$out4,v24
863 vncipher $out5,$out5,v24
864 vncipher $out6,$out6,v24
865 vncipher $out7,$out7,v24
866
867 subfe. r0,r0,r0 # borrow?-1:0
868 vncipher $out0,$out0,v25
869 vncipher $out1,$out1,v25
870 vncipher $out2,$out2,v25
871 vncipher $out3,$out3,v25
872 vncipher $out4,$out4,v25
873 vncipher $out5,$out5,v25
874 vncipher $out6,$out6,v25
875 vncipher $out7,$out7,v25
876
877 and r0,r0,$len
878 vncipher $out0,$out0,v26
879 vncipher $out1,$out1,v26
880 vncipher $out2,$out2,v26
881 vncipher $out3,$out3,v26
882 vncipher $out4,$out4,v26
883 vncipher $out5,$out5,v26
884 vncipher $out6,$out6,v26
885 vncipher $out7,$out7,v26
886
887 add $inp,$inp,r0 # $inp is adjusted in such
888 # way that at exit from the
889 # loop inX-in7 are loaded
890 # with last "words"
891 vncipher $out0,$out0,v27
892 vncipher $out1,$out1,v27
893 vncipher $out2,$out2,v27
894 vncipher $out3,$out3,v27
895 vncipher $out4,$out4,v27
896 vncipher $out5,$out5,v27
897 vncipher $out6,$out6,v27
898 vncipher $out7,$out7,v27
899
900 addi $key_,$sp,$FRAME+15 # rewind $key_
901 vncipher $out0,$out0,v28
902 vncipher $out1,$out1,v28
903 vncipher $out2,$out2,v28
904 vncipher $out3,$out3,v28
905 vncipher $out4,$out4,v28
906 vncipher $out5,$out5,v28
907 vncipher $out6,$out6,v28
908 vncipher $out7,$out7,v28
909 lvx v24,$x00,$key_ # re-pre-load round[1]
910
911 vncipher $out0,$out0,v29
912 vncipher $out1,$out1,v29
913 vncipher $out2,$out2,v29
914 vncipher $out3,$out3,v29
915 vncipher $out4,$out4,v29
916 vncipher $out5,$out5,v29
917 vncipher $out6,$out6,v29
918 vncipher $out7,$out7,v29
919 lvx v25,$x10,$key_ # re-pre-load round[2]
920
921 vncipher $out0,$out0,v30
922 vxor $ivec,$ivec,v31 # xor with last round key
923 vncipher $out1,$out1,v30
924 vxor $in0,$in0,v31
925 vncipher $out2,$out2,v30
926 vxor $in1,$in1,v31
927 vncipher $out3,$out3,v30
928 vxor $in2,$in2,v31
929 vncipher $out4,$out4,v30
930 vxor $in3,$in3,v31
931 vncipher $out5,$out5,v30
932 vxor $in4,$in4,v31
933 vncipher $out6,$out6,v30
934 vxor $in5,$in5,v31
935 vncipher $out7,$out7,v30
936 vxor $in6,$in6,v31
937
938 vncipherlast $out0,$out0,$ivec
939 vncipherlast $out1,$out1,$in0
940 lvx_u $in0,$x00,$inp # load next input block
941 vncipherlast $out2,$out2,$in1
942 lvx_u $in1,$x10,$inp
943 vncipherlast $out3,$out3,$in2
944 le?vperm $in0,$in0,$in0,$inpperm
945 lvx_u $in2,$x20,$inp
946 vncipherlast $out4,$out4,$in3
947 le?vperm $in1,$in1,$in1,$inpperm
948 lvx_u $in3,$x30,$inp
949 vncipherlast $out5,$out5,$in4
950 le?vperm $in2,$in2,$in2,$inpperm
951 lvx_u $in4,$x40,$inp
952 vncipherlast $out6,$out6,$in5
953 le?vperm $in3,$in3,$in3,$inpperm
954 lvx_u $in5,$x50,$inp
955 vncipherlast $out7,$out7,$in6
956 le?vperm $in4,$in4,$in4,$inpperm
957 lvx_u $in6,$x60,$inp
958 vmr $ivec,$in7
959 le?vperm $in5,$in5,$in5,$inpperm
960 lvx_u $in7,$x70,$inp
961 addi $inp,$inp,0x80
962
963 le?vperm $out0,$out0,$out0,$inpperm
964 le?vperm $out1,$out1,$out1,$inpperm
965 stvx_u $out0,$x00,$out
966 le?vperm $in6,$in6,$in6,$inpperm
967 vxor $out0,$in0,$rndkey0
968 le?vperm $out2,$out2,$out2,$inpperm
969 stvx_u $out1,$x10,$out
970 le?vperm $in7,$in7,$in7,$inpperm
971 vxor $out1,$in1,$rndkey0
972 le?vperm $out3,$out3,$out3,$inpperm
973 stvx_u $out2,$x20,$out
974 vxor $out2,$in2,$rndkey0
975 le?vperm $out4,$out4,$out4,$inpperm
976 stvx_u $out3,$x30,$out
977 vxor $out3,$in3,$rndkey0
978 le?vperm $out5,$out5,$out5,$inpperm
979 stvx_u $out4,$x40,$out
980 vxor $out4,$in4,$rndkey0
981 le?vperm $out6,$out6,$out6,$inpperm
982 stvx_u $out5,$x50,$out
983 vxor $out5,$in5,$rndkey0
984 le?vperm $out7,$out7,$out7,$inpperm
985 stvx_u $out6,$x60,$out
986 vxor $out6,$in6,$rndkey0
987 stvx_u $out7,$x70,$out
988 addi $out,$out,0x80
989 vxor $out7,$in7,$rndkey0
990
991 mtctr $rounds
992 beq Loop_cbc_dec8x # did $len-=128 borrow?
993
994 addic. $len,$len,128
995 beq Lcbc_dec8x_done
996 nop
997 nop
998
999Loop_cbc_dec8x_tail: # up to 7 "words" tail...
1000 vncipher $out1,$out1,v24
1001 vncipher $out2,$out2,v24
1002 vncipher $out3,$out3,v24
1003 vncipher $out4,$out4,v24
1004 vncipher $out5,$out5,v24
1005 vncipher $out6,$out6,v24
1006 vncipher $out7,$out7,v24
1007 lvx v24,$x20,$key_ # round[3]
1008 addi $key_,$key_,0x20
1009
1010 vncipher $out1,$out1,v25
1011 vncipher $out2,$out2,v25
1012 vncipher $out3,$out3,v25
1013 vncipher $out4,$out4,v25
1014 vncipher $out5,$out5,v25
1015 vncipher $out6,$out6,v25
1016 vncipher $out7,$out7,v25
1017 lvx v25,$x10,$key_ # round[4]
1018 bdnz Loop_cbc_dec8x_tail
1019
1020 vncipher $out1,$out1,v24
1021 vncipher $out2,$out2,v24
1022 vncipher $out3,$out3,v24
1023 vncipher $out4,$out4,v24
1024 vncipher $out5,$out5,v24
1025 vncipher $out6,$out6,v24
1026 vncipher $out7,$out7,v24
1027
1028 vncipher $out1,$out1,v25
1029 vncipher $out2,$out2,v25
1030 vncipher $out3,$out3,v25
1031 vncipher $out4,$out4,v25
1032 vncipher $out5,$out5,v25
1033 vncipher $out6,$out6,v25
1034 vncipher $out7,$out7,v25
1035
1036 vncipher $out1,$out1,v26
1037 vncipher $out2,$out2,v26
1038 vncipher $out3,$out3,v26
1039 vncipher $out4,$out4,v26
1040 vncipher $out5,$out5,v26
1041 vncipher $out6,$out6,v26
1042 vncipher $out7,$out7,v26
1043
1044 vncipher $out1,$out1,v27
1045 vncipher $out2,$out2,v27
1046 vncipher $out3,$out3,v27
1047 vncipher $out4,$out4,v27
1048 vncipher $out5,$out5,v27
1049 vncipher $out6,$out6,v27
1050 vncipher $out7,$out7,v27
1051
1052 vncipher $out1,$out1,v28
1053 vncipher $out2,$out2,v28
1054 vncipher $out3,$out3,v28
1055 vncipher $out4,$out4,v28
1056 vncipher $out5,$out5,v28
1057 vncipher $out6,$out6,v28
1058 vncipher $out7,$out7,v28
1059
1060 vncipher $out1,$out1,v29
1061 vncipher $out2,$out2,v29
1062 vncipher $out3,$out3,v29
1063 vncipher $out4,$out4,v29
1064 vncipher $out5,$out5,v29
1065 vncipher $out6,$out6,v29
1066 vncipher $out7,$out7,v29
1067
1068 vncipher $out1,$out1,v30
1069 vxor $ivec,$ivec,v31 # last round key
1070 vncipher $out2,$out2,v30
1071 vxor $in1,$in1,v31
1072 vncipher $out3,$out3,v30
1073 vxor $in2,$in2,v31
1074 vncipher $out4,$out4,v30
1075 vxor $in3,$in3,v31
1076 vncipher $out5,$out5,v30
1077 vxor $in4,$in4,v31
1078 vncipher $out6,$out6,v30
1079 vxor $in5,$in5,v31
1080 vncipher $out7,$out7,v30
1081 vxor $in6,$in6,v31
1082
1083 cmplwi $len,32 # switch($len)
1084 blt Lcbc_dec8x_one
1085 nop
1086 beq Lcbc_dec8x_two
1087 cmplwi $len,64
1088 blt Lcbc_dec8x_three
1089 nop
1090 beq Lcbc_dec8x_four
1091 cmplwi $len,96
1092 blt Lcbc_dec8x_five
1093 nop
1094 beq Lcbc_dec8x_six
1095
1096Lcbc_dec8x_seven:
1097 vncipherlast $out1,$out1,$ivec
1098 vncipherlast $out2,$out2,$in1
1099 vncipherlast $out3,$out3,$in2
1100 vncipherlast $out4,$out4,$in3
1101 vncipherlast $out5,$out5,$in4
1102 vncipherlast $out6,$out6,$in5
1103 vncipherlast $out7,$out7,$in6
1104 vmr $ivec,$in7
1105
1106 le?vperm $out1,$out1,$out1,$inpperm
1107 le?vperm $out2,$out2,$out2,$inpperm
1108 stvx_u $out1,$x00,$out
1109 le?vperm $out3,$out3,$out3,$inpperm
1110 stvx_u $out2,$x10,$out
1111 le?vperm $out4,$out4,$out4,$inpperm
1112 stvx_u $out3,$x20,$out
1113 le?vperm $out5,$out5,$out5,$inpperm
1114 stvx_u $out4,$x30,$out
1115 le?vperm $out6,$out6,$out6,$inpperm
1116 stvx_u $out5,$x40,$out
1117 le?vperm $out7,$out7,$out7,$inpperm
1118 stvx_u $out6,$x50,$out
1119 stvx_u $out7,$x60,$out
1120 addi $out,$out,0x70
1121 b Lcbc_dec8x_done
1122
1123.align 5
1124Lcbc_dec8x_six:
1125 vncipherlast $out2,$out2,$ivec
1126 vncipherlast $out3,$out3,$in2
1127 vncipherlast $out4,$out4,$in3
1128 vncipherlast $out5,$out5,$in4
1129 vncipherlast $out6,$out6,$in5
1130 vncipherlast $out7,$out7,$in6
1131 vmr $ivec,$in7
1132
1133 le?vperm $out2,$out2,$out2,$inpperm
1134 le?vperm $out3,$out3,$out3,$inpperm
1135 stvx_u $out2,$x00,$out
1136 le?vperm $out4,$out4,$out4,$inpperm
1137 stvx_u $out3,$x10,$out
1138 le?vperm $out5,$out5,$out5,$inpperm
1139 stvx_u $out4,$x20,$out
1140 le?vperm $out6,$out6,$out6,$inpperm
1141 stvx_u $out5,$x30,$out
1142 le?vperm $out7,$out7,$out7,$inpperm
1143 stvx_u $out6,$x40,$out
1144 stvx_u $out7,$x50,$out
1145 addi $out,$out,0x60
1146 b Lcbc_dec8x_done
1147
1148.align 5
1149Lcbc_dec8x_five:
1150 vncipherlast $out3,$out3,$ivec
1151 vncipherlast $out4,$out4,$in3
1152 vncipherlast $out5,$out5,$in4
1153 vncipherlast $out6,$out6,$in5
1154 vncipherlast $out7,$out7,$in6
1155 vmr $ivec,$in7
1156
1157 le?vperm $out3,$out3,$out3,$inpperm
1158 le?vperm $out4,$out4,$out4,$inpperm
1159 stvx_u $out3,$x00,$out
1160 le?vperm $out5,$out5,$out5,$inpperm
1161 stvx_u $out4,$x10,$out
1162 le?vperm $out6,$out6,$out6,$inpperm
1163 stvx_u $out5,$x20,$out
1164 le?vperm $out7,$out7,$out7,$inpperm
1165 stvx_u $out6,$x30,$out
1166 stvx_u $out7,$x40,$out
1167 addi $out,$out,0x50
1168 b Lcbc_dec8x_done
1169
1170.align 5
1171Lcbc_dec8x_four:
1172 vncipherlast $out4,$out4,$ivec
1173 vncipherlast $out5,$out5,$in4
1174 vncipherlast $out6,$out6,$in5
1175 vncipherlast $out7,$out7,$in6
1176 vmr $ivec,$in7
1177
1178 le?vperm $out4,$out4,$out4,$inpperm
1179 le?vperm $out5,$out5,$out5,$inpperm
1180 stvx_u $out4,$x00,$out
1181 le?vperm $out6,$out6,$out6,$inpperm
1182 stvx_u $out5,$x10,$out
1183 le?vperm $out7,$out7,$out7,$inpperm
1184 stvx_u $out6,$x20,$out
1185 stvx_u $out7,$x30,$out
1186 addi $out,$out,0x40
1187 b Lcbc_dec8x_done
1188
1189.align 5
1190Lcbc_dec8x_three:
1191 vncipherlast $out5,$out5,$ivec
1192 vncipherlast $out6,$out6,$in5
1193 vncipherlast $out7,$out7,$in6
1194 vmr $ivec,$in7
1195
1196 le?vperm $out5,$out5,$out5,$inpperm
1197 le?vperm $out6,$out6,$out6,$inpperm
1198 stvx_u $out5,$x00,$out
1199 le?vperm $out7,$out7,$out7,$inpperm
1200 stvx_u $out6,$x10,$out
1201 stvx_u $out7,$x20,$out
1202 addi $out,$out,0x30
1203 b Lcbc_dec8x_done
1204
1205.align 5
1206Lcbc_dec8x_two:
1207 vncipherlast $out6,$out6,$ivec
1208 vncipherlast $out7,$out7,$in6
1209 vmr $ivec,$in7
1210
1211 le?vperm $out6,$out6,$out6,$inpperm
1212 le?vperm $out7,$out7,$out7,$inpperm
1213 stvx_u $out6,$x00,$out
1214 stvx_u $out7,$x10,$out
1215 addi $out,$out,0x20
1216 b Lcbc_dec8x_done
1217
1218.align 5
1219Lcbc_dec8x_one:
1220 vncipherlast $out7,$out7,$ivec
1221 vmr $ivec,$in7
1222
1223 le?vperm $out7,$out7,$out7,$inpperm
1224 stvx_u $out7,0,$out
1225 addi $out,$out,0x10
1226
1227Lcbc_dec8x_done:
1228 le?vperm $ivec,$ivec,$ivec,$inpperm
1229 stvx_u $ivec,0,$ivp # write [unaligned] iv
1230
1231 li r10,`$FRAME+15`
1232 li r11,`$FRAME+31`
1233 stvx $inpperm,r10,$sp # wipe copies of round keys
1234 addi r10,r10,32
1235 stvx $inpperm,r11,$sp
1236 addi r11,r11,32
1237 stvx $inpperm,r10,$sp
1238 addi r10,r10,32
1239 stvx $inpperm,r11,$sp
1240 addi r11,r11,32
1241 stvx $inpperm,r10,$sp
1242 addi r10,r10,32
1243 stvx $inpperm,r11,$sp
1244 addi r11,r11,32
1245 stvx $inpperm,r10,$sp
1246 addi r10,r10,32
1247 stvx $inpperm,r11,$sp
1248 addi r11,r11,32
1249
1250 mtspr 256,$vrsave
1251 lvx v20,r10,$sp # ABI says so
1252 addi r10,r10,32
1253 lvx v21,r11,$sp
1254 addi r11,r11,32
1255 lvx v22,r10,$sp
1256 addi r10,r10,32
1257 lvx v23,r11,$sp
1258 addi r11,r11,32
1259 lvx v24,r10,$sp
1260 addi r10,r10,32
1261 lvx v25,r11,$sp
1262 addi r11,r11,32
1263 lvx v26,r10,$sp
1264 addi r10,r10,32
1265 lvx v27,r11,$sp
1266 addi r11,r11,32
1267 lvx v28,r10,$sp
1268 addi r10,r10,32
1269 lvx v29,r11,$sp
1270 addi r11,r11,32
1271 lvx v30,r10,$sp
1272 lvx v31,r11,$sp
1273 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1274 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1275 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1276 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1277 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1278 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1279 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1280 blr
1281 .long 0
1282 .byte 0,12,0x14,0,0x80,6,6,0
1283 .long 0
1284.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1285___
1286}} }}}
1287
1288#########################################################################
1289{{{ # CTR procedure[s] #
1290
1291####################### WARNING: Here be dragons! #######################
1292#
1293# This code is written as 'ctr32', based on a 32-bit counter used
1294# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
1295# a 128-bit counter.
1296#
1297# This leads to subtle changes from the upstream code: the counter
1298# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
1299# both the bulk (8 blocks at a time) path, and in the individual block
1300# path. Be aware of this when doing updates.
1301#
1302# See:
1303# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
1304# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
1305# https://github.com/openssl/openssl/pull/8942
1306#
1307#########################################################################
1308my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1309my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1310my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1311 map("v$_",(4..11));
1312my $dat=$tmp;
1313
1314$code.=<<___;
1315.globl .${prefix}_ctr32_encrypt_blocks
1316 ${UCMP}i $len,1
1317 bltlr-
1318
1319 lis r0,0xfff0
1320 mfspr $vrsave,256
1321 mtspr 256,r0
1322
1323 li $idx,15
1324 vxor $rndkey0,$rndkey0,$rndkey0
1325 le?vspltisb $tmp,0x0f
1326
1327 lvx $ivec,0,$ivp # load [unaligned] iv
1328 lvsl $inpperm,0,$ivp
1329 lvx $inptail,$idx,$ivp
1330 vspltisb $one,1
1331 le?vxor $inpperm,$inpperm,$tmp
1332 vperm $ivec,$ivec,$inptail,$inpperm
1333 vsldoi $one,$rndkey0,$one,1
1334
1335 neg r11,$inp
1336 ?lvsl $keyperm,0,$key # prepare for unaligned key
1337 lwz $rounds,240($key)
1338
1339 lvsr $inpperm,0,r11 # prepare for unaligned load
1340 lvx $inptail,0,$inp
1341 addi $inp,$inp,15 # 15 is not typo
1342 le?vxor $inpperm,$inpperm,$tmp
1343
1344 srwi $rounds,$rounds,1
1345 li $idx,16
1346 subi $rounds,$rounds,1
1347
1348 ${UCMP}i $len,8
1349 bge _aesp8_ctr32_encrypt8x
1350
1351 ?lvsr $outperm,0,$out # prepare for unaligned store
1352 vspltisb $outmask,-1
1353 lvx $outhead,0,$out
1354 ?vperm $outmask,$rndkey0,$outmask,$outperm
1355 le?vxor $outperm,$outperm,$tmp
1356
1357 lvx $rndkey0,0,$key
1358 mtctr $rounds
1359 lvx $rndkey1,$idx,$key
1360 addi $idx,$idx,16
1361 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1362 vxor $inout,$ivec,$rndkey0
1363 lvx $rndkey0,$idx,$key
1364 addi $idx,$idx,16
1365 b Loop_ctr32_enc
1366
1367.align 5
1368Loop_ctr32_enc:
1369 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1370 vcipher $inout,$inout,$rndkey1
1371 lvx $rndkey1,$idx,$key
1372 addi $idx,$idx,16
1373 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1374 vcipher $inout,$inout,$rndkey0
1375 lvx $rndkey0,$idx,$key
1376 addi $idx,$idx,16
1377 bdnz Loop_ctr32_enc
1378
1379 vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
1380 vmr $dat,$inptail
1381 lvx $inptail,0,$inp
1382 addi $inp,$inp,16
1383 subic. $len,$len,1 # blocks--
1384
1385 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1386 vcipher $inout,$inout,$rndkey1
1387 lvx $rndkey1,$idx,$key
1388 vperm $dat,$dat,$inptail,$inpperm
1389 li $idx,16
1390 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1391 lvx $rndkey0,0,$key
1392 vxor $dat,$dat,$rndkey1 # last round key
1393 vcipherlast $inout,$inout,$dat
1394
1395 lvx $rndkey1,$idx,$key
1396 addi $idx,$idx,16
1397 vperm $inout,$inout,$inout,$outperm
1398 vsel $dat,$outhead,$inout,$outmask
1399 mtctr $rounds
1400 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1401 vmr $outhead,$inout
1402 vxor $inout,$ivec,$rndkey0
1403 lvx $rndkey0,$idx,$key
1404 addi $idx,$idx,16
1405 stvx $dat,0,$out
1406 addi $out,$out,16
1407 bne Loop_ctr32_enc
1408
1409 addi $out,$out,-1
1410 lvx $inout,0,$out # redundant in aligned case
1411 vsel $inout,$outhead,$inout,$outmask
1412 stvx $inout,0,$out
1413
1414 mtspr 256,$vrsave
1415 blr
1416 .long 0
1417 .byte 0,12,0x14,0,0,0,6,0
1418 .long 0
1419___
1420#########################################################################
1421{{ # Optimized CTR procedure #
1422my $key_="r11";
1423my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1424my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1425my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1426my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1427 # v26-v31 last 6 round keys
1428my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1429my ($two,$three,$four)=($outhead,$outperm,$outmask);
1430
1431$code.=<<___;
1432.align 5
1433_aesp8_ctr32_encrypt8x:
1434 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1435 li r10,`$FRAME+8*16+15`
1436 li r11,`$FRAME+8*16+31`
1437 stvx v20,r10,$sp # ABI says so
1438 addi r10,r10,32
1439 stvx v21,r11,$sp
1440 addi r11,r11,32
1441 stvx v22,r10,$sp
1442 addi r10,r10,32
1443 stvx v23,r11,$sp
1444 addi r11,r11,32
1445 stvx v24,r10,$sp
1446 addi r10,r10,32
1447 stvx v25,r11,$sp
1448 addi r11,r11,32
1449 stvx v26,r10,$sp
1450 addi r10,r10,32
1451 stvx v27,r11,$sp
1452 addi r11,r11,32
1453 stvx v28,r10,$sp
1454 addi r10,r10,32
1455 stvx v29,r11,$sp
1456 addi r11,r11,32
1457 stvx v30,r10,$sp
1458 stvx v31,r11,$sp
1459 li r0,-1
1460 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1461 li $x10,0x10
1462 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1463 li $x20,0x20
1464 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1465 li $x30,0x30
1466 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1467 li $x40,0x40
1468 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1469 li $x50,0x50
1470 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1471 li $x60,0x60
1472 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1473 li $x70,0x70
1474 mtspr 256,r0
1475
1476 subi $rounds,$rounds,3 # -4 in total
1477
1478 lvx $rndkey0,$x00,$key # load key schedule
1479 lvx v30,$x10,$key
1480 addi $key,$key,0x20
1481 lvx v31,$x00,$key
1482 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1483 addi $key_,$sp,$FRAME+15
1484 mtctr $rounds
1485
1486Load_ctr32_enc_key:
1487 ?vperm v24,v30,v31,$keyperm
1488 lvx v30,$x10,$key
1489 addi $key,$key,0x20
1490 stvx v24,$x00,$key_ # off-load round[1]
1491 ?vperm v25,v31,v30,$keyperm
1492 lvx v31,$x00,$key
1493 stvx v25,$x10,$key_ # off-load round[2]
1494 addi $key_,$key_,0x20
1495 bdnz Load_ctr32_enc_key
1496
1497 lvx v26,$x10,$key
1498 ?vperm v24,v30,v31,$keyperm
1499 lvx v27,$x20,$key
1500 stvx v24,$x00,$key_ # off-load round[3]
1501 ?vperm v25,v31,v26,$keyperm
1502 lvx v28,$x30,$key
1503 stvx v25,$x10,$key_ # off-load round[4]
1504 addi $key_,$sp,$FRAME+15 # rewind $key_
1505 ?vperm v26,v26,v27,$keyperm
1506 lvx v29,$x40,$key
1507 ?vperm v27,v27,v28,$keyperm
1508 lvx v30,$x50,$key
1509 ?vperm v28,v28,v29,$keyperm
1510 lvx v31,$x60,$key
1511 ?vperm v29,v29,v30,$keyperm
1512 lvx $out0,$x70,$key # borrow $out0
1513 ?vperm v30,v30,v31,$keyperm
1514 lvx v24,$x00,$key_ # pre-load round[1]
1515 ?vperm v31,v31,$out0,$keyperm
1516 lvx v25,$x10,$key_ # pre-load round[2]
1517
1518 vadduqm $two,$one,$one
1519 subi $inp,$inp,15 # undo "caller"
1520 $SHL $len,$len,4
1521
1522 vadduqm $out1,$ivec,$one # counter values ...
1523 vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
1524 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1525 le?li $idx,8
1526 vadduqm $out3,$out1,$two
1527 vxor $out1,$out1,$rndkey0
1528 le?lvsl $inpperm,0,$idx
1529 vadduqm $out4,$out2,$two
1530 vxor $out2,$out2,$rndkey0
1531 le?vspltisb $tmp,0x0f
1532 vadduqm $out5,$out3,$two
1533 vxor $out3,$out3,$rndkey0
1534 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1535 vadduqm $out6,$out4,$two
1536 vxor $out4,$out4,$rndkey0
1537 vadduqm $out7,$out5,$two
1538 vxor $out5,$out5,$rndkey0
1539 vadduqm $ivec,$out6,$two # next counter value
1540 vxor $out6,$out6,$rndkey0
1541 vxor $out7,$out7,$rndkey0
1542
1543 mtctr $rounds
1544 b Loop_ctr32_enc8x
1545.align 5
1546Loop_ctr32_enc8x:
1547 vcipher $out0,$out0,v24
1548 vcipher $out1,$out1,v24
1549 vcipher $out2,$out2,v24
1550 vcipher $out3,$out3,v24
1551 vcipher $out4,$out4,v24
1552 vcipher $out5,$out5,v24
1553 vcipher $out6,$out6,v24
1554 vcipher $out7,$out7,v24
1555Loop_ctr32_enc8x_middle:
1556 lvx v24,$x20,$key_ # round[3]
1557 addi $key_,$key_,0x20
1558
1559 vcipher $out0,$out0,v25
1560 vcipher $out1,$out1,v25
1561 vcipher $out2,$out2,v25
1562 vcipher $out3,$out3,v25
1563 vcipher $out4,$out4,v25
1564 vcipher $out5,$out5,v25
1565 vcipher $out6,$out6,v25
1566 vcipher $out7,$out7,v25
1567 lvx v25,$x10,$key_ # round[4]
1568 bdnz Loop_ctr32_enc8x
1569
1570 subic r11,$len,256 # $len-256, borrow $key_
1571 vcipher $out0,$out0,v24
1572 vcipher $out1,$out1,v24
1573 vcipher $out2,$out2,v24
1574 vcipher $out3,$out3,v24
1575 vcipher $out4,$out4,v24
1576 vcipher $out5,$out5,v24
1577 vcipher $out6,$out6,v24
1578 vcipher $out7,$out7,v24
1579
1580 subfe r0,r0,r0 # borrow?-1:0
1581 vcipher $out0,$out0,v25
1582 vcipher $out1,$out1,v25
1583 vcipher $out2,$out2,v25
1584 vcipher $out3,$out3,v25
1585 vcipher $out4,$out4,v25
1586 vcipher $out5,$out5,v25
1587 vcipher $out6,$out6,v25
1588 vcipher $out7,$out7,v25
1589
1590 and r0,r0,r11
1591 addi $key_,$sp,$FRAME+15 # rewind $key_
1592 vcipher $out0,$out0,v26
1593 vcipher $out1,$out1,v26
1594 vcipher $out2,$out2,v26
1595 vcipher $out3,$out3,v26
1596 vcipher $out4,$out4,v26
1597 vcipher $out5,$out5,v26
1598 vcipher $out6,$out6,v26
1599 vcipher $out7,$out7,v26
1600 lvx v24,$x00,$key_ # re-pre-load round[1]
1601
1602 subic $len,$len,129 # $len-=129
1603 vcipher $out0,$out0,v27
1604 addi $len,$len,1 # $len-=128 really
1605 vcipher $out1,$out1,v27
1606 vcipher $out2,$out2,v27
1607 vcipher $out3,$out3,v27
1608 vcipher $out4,$out4,v27
1609 vcipher $out5,$out5,v27
1610 vcipher $out6,$out6,v27
1611 vcipher $out7,$out7,v27
1612 lvx v25,$x10,$key_ # re-pre-load round[2]
1613
1614 vcipher $out0,$out0,v28
1615 lvx_u $in0,$x00,$inp # load input
1616 vcipher $out1,$out1,v28
1617 lvx_u $in1,$x10,$inp
1618 vcipher $out2,$out2,v28
1619 lvx_u $in2,$x20,$inp
1620 vcipher $out3,$out3,v28
1621 lvx_u $in3,$x30,$inp
1622 vcipher $out4,$out4,v28
1623 lvx_u $in4,$x40,$inp
1624 vcipher $out5,$out5,v28
1625 lvx_u $in5,$x50,$inp
1626 vcipher $out6,$out6,v28
1627 lvx_u $in6,$x60,$inp
1628 vcipher $out7,$out7,v28
1629 lvx_u $in7,$x70,$inp
1630 addi $inp,$inp,0x80
1631
1632 vcipher $out0,$out0,v29
1633 le?vperm $in0,$in0,$in0,$inpperm
1634 vcipher $out1,$out1,v29
1635 le?vperm $in1,$in1,$in1,$inpperm
1636 vcipher $out2,$out2,v29
1637 le?vperm $in2,$in2,$in2,$inpperm
1638 vcipher $out3,$out3,v29
1639 le?vperm $in3,$in3,$in3,$inpperm
1640 vcipher $out4,$out4,v29
1641 le?vperm $in4,$in4,$in4,$inpperm
1642 vcipher $out5,$out5,v29
1643 le?vperm $in5,$in5,$in5,$inpperm
1644 vcipher $out6,$out6,v29
1645 le?vperm $in6,$in6,$in6,$inpperm
1646 vcipher $out7,$out7,v29
1647 le?vperm $in7,$in7,$in7,$inpperm
1648
1649 add $inp,$inp,r0 # $inp is adjusted in such
1650 # way that at exit from the
1651 # loop inX-in7 are loaded
1652 # with last "words"
1653 subfe. r0,r0,r0 # borrow?-1:0
1654 vcipher $out0,$out0,v30
1655 vxor $in0,$in0,v31 # xor with last round key
1656 vcipher $out1,$out1,v30
1657 vxor $in1,$in1,v31
1658 vcipher $out2,$out2,v30
1659 vxor $in2,$in2,v31
1660 vcipher $out3,$out3,v30
1661 vxor $in3,$in3,v31
1662 vcipher $out4,$out4,v30
1663 vxor $in4,$in4,v31
1664 vcipher $out5,$out5,v30
1665 vxor $in5,$in5,v31
1666 vcipher $out6,$out6,v30
1667 vxor $in6,$in6,v31
1668 vcipher $out7,$out7,v30
1669 vxor $in7,$in7,v31
1670
1671 bne Lctr32_enc8x_break # did $len-129 borrow?
1672
1673 vcipherlast $in0,$out0,$in0
1674 vcipherlast $in1,$out1,$in1
1675 vadduqm $out1,$ivec,$one # counter values ...
1676 vcipherlast $in2,$out2,$in2
1677 vadduqm $out2,$ivec,$two
1678 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1679 vcipherlast $in3,$out3,$in3
1680 vadduqm $out3,$out1,$two
1681 vxor $out1,$out1,$rndkey0
1682 vcipherlast $in4,$out4,$in4
1683 vadduqm $out4,$out2,$two
1684 vxor $out2,$out2,$rndkey0
1685 vcipherlast $in5,$out5,$in5
1686 vadduqm $out5,$out3,$two
1687 vxor $out3,$out3,$rndkey0
1688 vcipherlast $in6,$out6,$in6
1689 vadduqm $out6,$out4,$two
1690 vxor $out4,$out4,$rndkey0
1691 vcipherlast $in7,$out7,$in7
1692 vadduqm $out7,$out5,$two
1693 vxor $out5,$out5,$rndkey0
1694 le?vperm $in0,$in0,$in0,$inpperm
1695 vadduqm $ivec,$out6,$two # next counter value
1696 vxor $out6,$out6,$rndkey0
1697 le?vperm $in1,$in1,$in1,$inpperm
1698 vxor $out7,$out7,$rndkey0
1699 mtctr $rounds
1700
1701 vcipher $out0,$out0,v24
1702 stvx_u $in0,$x00,$out
1703 le?vperm $in2,$in2,$in2,$inpperm
1704 vcipher $out1,$out1,v24
1705 stvx_u $in1,$x10,$out
1706 le?vperm $in3,$in3,$in3,$inpperm
1707 vcipher $out2,$out2,v24
1708 stvx_u $in2,$x20,$out
1709 le?vperm $in4,$in4,$in4,$inpperm
1710 vcipher $out3,$out3,v24
1711 stvx_u $in3,$x30,$out
1712 le?vperm $in5,$in5,$in5,$inpperm
1713 vcipher $out4,$out4,v24
1714 stvx_u $in4,$x40,$out
1715 le?vperm $in6,$in6,$in6,$inpperm
1716 vcipher $out5,$out5,v24
1717 stvx_u $in5,$x50,$out
1718 le?vperm $in7,$in7,$in7,$inpperm
1719 vcipher $out6,$out6,v24
1720 stvx_u $in6,$x60,$out
1721 vcipher $out7,$out7,v24
1722 stvx_u $in7,$x70,$out
1723 addi $out,$out,0x80
1724
1725 b Loop_ctr32_enc8x_middle
1726
1727.align 5
1728Lctr32_enc8x_break:
1729 cmpwi $len,-0x60
1730 blt Lctr32_enc8x_one
1731 nop
1732 beq Lctr32_enc8x_two
1733 cmpwi $len,-0x40
1734 blt Lctr32_enc8x_three
1735 nop
1736 beq Lctr32_enc8x_four
1737 cmpwi $len,-0x20
1738 blt Lctr32_enc8x_five
1739 nop
1740 beq Lctr32_enc8x_six
1741 cmpwi $len,0x00
1742 blt Lctr32_enc8x_seven
1743
1744Lctr32_enc8x_eight:
1745 vcipherlast $out0,$out0,$in0
1746 vcipherlast $out1,$out1,$in1
1747 vcipherlast $out2,$out2,$in2
1748 vcipherlast $out3,$out3,$in3
1749 vcipherlast $out4,$out4,$in4
1750 vcipherlast $out5,$out5,$in5
1751 vcipherlast $out6,$out6,$in6
1752 vcipherlast $out7,$out7,$in7
1753
1754 le?vperm $out0,$out0,$out0,$inpperm
1755 le?vperm $out1,$out1,$out1,$inpperm
1756 stvx_u $out0,$x00,$out
1757 le?vperm $out2,$out2,$out2,$inpperm
1758 stvx_u $out1,$x10,$out
1759 le?vperm $out3,$out3,$out3,$inpperm
1760 stvx_u $out2,$x20,$out
1761 le?vperm $out4,$out4,$out4,$inpperm
1762 stvx_u $out3,$x30,$out
1763 le?vperm $out5,$out5,$out5,$inpperm
1764 stvx_u $out4,$x40,$out
1765 le?vperm $out6,$out6,$out6,$inpperm
1766 stvx_u $out5,$x50,$out
1767 le?vperm $out7,$out7,$out7,$inpperm
1768 stvx_u $out6,$x60,$out
1769 stvx_u $out7,$x70,$out
1770 addi $out,$out,0x80
1771 b Lctr32_enc8x_done
1772
1773.align 5
1774Lctr32_enc8x_seven:
1775 vcipherlast $out0,$out0,$in1
1776 vcipherlast $out1,$out1,$in2
1777 vcipherlast $out2,$out2,$in3
1778 vcipherlast $out3,$out3,$in4
1779 vcipherlast $out4,$out4,$in5
1780 vcipherlast $out5,$out5,$in6
1781 vcipherlast $out6,$out6,$in7
1782
1783 le?vperm $out0,$out0,$out0,$inpperm
1784 le?vperm $out1,$out1,$out1,$inpperm
1785 stvx_u $out0,$x00,$out
1786 le?vperm $out2,$out2,$out2,$inpperm
1787 stvx_u $out1,$x10,$out
1788 le?vperm $out3,$out3,$out3,$inpperm
1789 stvx_u $out2,$x20,$out
1790 le?vperm $out4,$out4,$out4,$inpperm
1791 stvx_u $out3,$x30,$out
1792 le?vperm $out5,$out5,$out5,$inpperm
1793 stvx_u $out4,$x40,$out
1794 le?vperm $out6,$out6,$out6,$inpperm
1795 stvx_u $out5,$x50,$out
1796 stvx_u $out6,$x60,$out
1797 addi $out,$out,0x70
1798 b Lctr32_enc8x_done
1799
1800.align 5
1801Lctr32_enc8x_six:
1802 vcipherlast $out0,$out0,$in2
1803 vcipherlast $out1,$out1,$in3
1804 vcipherlast $out2,$out2,$in4
1805 vcipherlast $out3,$out3,$in5
1806 vcipherlast $out4,$out4,$in6
1807 vcipherlast $out5,$out5,$in7
1808
1809 le?vperm $out0,$out0,$out0,$inpperm
1810 le?vperm $out1,$out1,$out1,$inpperm
1811 stvx_u $out0,$x00,$out
1812 le?vperm $out2,$out2,$out2,$inpperm
1813 stvx_u $out1,$x10,$out
1814 le?vperm $out3,$out3,$out3,$inpperm
1815 stvx_u $out2,$x20,$out
1816 le?vperm $out4,$out4,$out4,$inpperm
1817 stvx_u $out3,$x30,$out
1818 le?vperm $out5,$out5,$out5,$inpperm
1819 stvx_u $out4,$x40,$out
1820 stvx_u $out5,$x50,$out
1821 addi $out,$out,0x60
1822 b Lctr32_enc8x_done
1823
1824.align 5
1825Lctr32_enc8x_five:
1826 vcipherlast $out0,$out0,$in3
1827 vcipherlast $out1,$out1,$in4
1828 vcipherlast $out2,$out2,$in5
1829 vcipherlast $out3,$out3,$in6
1830 vcipherlast $out4,$out4,$in7
1831
1832 le?vperm $out0,$out0,$out0,$inpperm
1833 le?vperm $out1,$out1,$out1,$inpperm
1834 stvx_u $out0,$x00,$out
1835 le?vperm $out2,$out2,$out2,$inpperm
1836 stvx_u $out1,$x10,$out
1837 le?vperm $out3,$out3,$out3,$inpperm
1838 stvx_u $out2,$x20,$out
1839 le?vperm $out4,$out4,$out4,$inpperm
1840 stvx_u $out3,$x30,$out
1841 stvx_u $out4,$x40,$out
1842 addi $out,$out,0x50
1843 b Lctr32_enc8x_done
1844
1845.align 5
1846Lctr32_enc8x_four:
1847 vcipherlast $out0,$out0,$in4
1848 vcipherlast $out1,$out1,$in5
1849 vcipherlast $out2,$out2,$in6
1850 vcipherlast $out3,$out3,$in7
1851
1852 le?vperm $out0,$out0,$out0,$inpperm
1853 le?vperm $out1,$out1,$out1,$inpperm
1854 stvx_u $out0,$x00,$out
1855 le?vperm $out2,$out2,$out2,$inpperm
1856 stvx_u $out1,$x10,$out
1857 le?vperm $out3,$out3,$out3,$inpperm
1858 stvx_u $out2,$x20,$out
1859 stvx_u $out3,$x30,$out
1860 addi $out,$out,0x40
1861 b Lctr32_enc8x_done
1862
1863.align 5
1864Lctr32_enc8x_three:
1865 vcipherlast $out0,$out0,$in5
1866 vcipherlast $out1,$out1,$in6
1867 vcipherlast $out2,$out2,$in7
1868
1869 le?vperm $out0,$out0,$out0,$inpperm
1870 le?vperm $out1,$out1,$out1,$inpperm
1871 stvx_u $out0,$x00,$out
1872 le?vperm $out2,$out2,$out2,$inpperm
1873 stvx_u $out1,$x10,$out
1874 stvx_u $out2,$x20,$out
1875 addi $out,$out,0x30
1876 b Lctr32_enc8x_done
1877
1878.align 5
1879Lctr32_enc8x_two:
1880 vcipherlast $out0,$out0,$in6
1881 vcipherlast $out1,$out1,$in7
1882
1883 le?vperm $out0,$out0,$out0,$inpperm
1884 le?vperm $out1,$out1,$out1,$inpperm
1885 stvx_u $out0,$x00,$out
1886 stvx_u $out1,$x10,$out
1887 addi $out,$out,0x20
1888 b Lctr32_enc8x_done
1889
1890.align 5
1891Lctr32_enc8x_one:
1892 vcipherlast $out0,$out0,$in7
1893
1894 le?vperm $out0,$out0,$out0,$inpperm
1895 stvx_u $out0,0,$out
1896 addi $out,$out,0x10
1897
1898Lctr32_enc8x_done:
1899 li r10,`$FRAME+15`
1900 li r11,`$FRAME+31`
1901 stvx $inpperm,r10,$sp # wipe copies of round keys
1902 addi r10,r10,32
1903 stvx $inpperm,r11,$sp
1904 addi r11,r11,32
1905 stvx $inpperm,r10,$sp
1906 addi r10,r10,32
1907 stvx $inpperm,r11,$sp
1908 addi r11,r11,32
1909 stvx $inpperm,r10,$sp
1910 addi r10,r10,32
1911 stvx $inpperm,r11,$sp
1912 addi r11,r11,32
1913 stvx $inpperm,r10,$sp
1914 addi r10,r10,32
1915 stvx $inpperm,r11,$sp
1916 addi r11,r11,32
1917
1918 mtspr 256,$vrsave
1919 lvx v20,r10,$sp # ABI says so
1920 addi r10,r10,32
1921 lvx v21,r11,$sp
1922 addi r11,r11,32
1923 lvx v22,r10,$sp
1924 addi r10,r10,32
1925 lvx v23,r11,$sp
1926 addi r11,r11,32
1927 lvx v24,r10,$sp
1928 addi r10,r10,32
1929 lvx v25,r11,$sp
1930 addi r11,r11,32
1931 lvx v26,r10,$sp
1932 addi r10,r10,32
1933 lvx v27,r11,$sp
1934 addi r11,r11,32
1935 lvx v28,r10,$sp
1936 addi r10,r10,32
1937 lvx v29,r11,$sp
1938 addi r11,r11,32
1939 lvx v30,r10,$sp
1940 lvx v31,r11,$sp
1941 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1942 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1943 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1944 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1945 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1946 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1947 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1948 blr
1949 .long 0
1950 .byte 0,12,0x14,0,0x80,6,6,0
1951 .long 0
1952.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1953___
1954}} }}}
1955
1956#########################################################################
1957{{{ # XTS procedures #
1958# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1959# const AES_KEY *key1, const AES_KEY *key2, #
1960# [const] unsigned char iv[16]); #
1961# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1962# input tweak value is assumed to be encrypted already, and last tweak #
1963# value, one suitable for consecutive call on same chunk of data, is #
1964# written back to original buffer. In addition, in "tweak chaining" #
1965# mode only complete input blocks are processed. #
1966
1967my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1968my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1969my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1970my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1971my $taillen = $key2;
1972
1973 ($inp,$idx) = ($idx,$inp); # reassign
1974
1975$code.=<<___;
1976.globl .${prefix}_xts_encrypt
1977 mr $inp,r3 # reassign
1978 li r3,-1
1979 ${UCMP}i $len,16
1980 bltlr-
1981
1982 lis r0,0xfff0
1983 mfspr r12,256 # save vrsave
1984 li r11,0
1985 mtspr 256,r0
1986
1987 vspltisb $seven,0x07 # 0x070707..07
1988 le?lvsl $leperm,r11,r11
1989 le?vspltisb $tmp,0x0f
1990 le?vxor $leperm,$leperm,$seven
1991
1992 li $idx,15
1993 lvx $tweak,0,$ivp # load [unaligned] iv
1994 lvsl $inpperm,0,$ivp
1995 lvx $inptail,$idx,$ivp
1996 le?vxor $inpperm,$inpperm,$tmp
1997 vperm $tweak,$tweak,$inptail,$inpperm
1998
1999 neg r11,$inp
2000 lvsr $inpperm,0,r11 # prepare for unaligned load
2001 lvx $inout,0,$inp
2002 addi $inp,$inp,15 # 15 is not typo
2003 le?vxor $inpperm,$inpperm,$tmp
2004
2005 ${UCMP}i $key2,0 # key2==NULL?
2006 beq Lxts_enc_no_key2
2007
2008 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2009 lwz $rounds,240($key2)
2010 srwi $rounds,$rounds,1
2011 subi $rounds,$rounds,1
2012 li $idx,16
2013
2014 lvx $rndkey0,0,$key2
2015 lvx $rndkey1,$idx,$key2
2016 addi $idx,$idx,16
2017 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2018 vxor $tweak,$tweak,$rndkey0
2019 lvx $rndkey0,$idx,$key2
2020 addi $idx,$idx,16
2021 mtctr $rounds
2022
2023Ltweak_xts_enc:
2024 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2025 vcipher $tweak,$tweak,$rndkey1
2026 lvx $rndkey1,$idx,$key2
2027 addi $idx,$idx,16
2028 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2029 vcipher $tweak,$tweak,$rndkey0
2030 lvx $rndkey0,$idx,$key2
2031 addi $idx,$idx,16
2032 bdnz Ltweak_xts_enc
2033
2034 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2035 vcipher $tweak,$tweak,$rndkey1
2036 lvx $rndkey1,$idx,$key2
2037 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2038 vcipherlast $tweak,$tweak,$rndkey0
2039
2040 li $ivp,0 # don't chain the tweak
2041 b Lxts_enc
2042
2043Lxts_enc_no_key2:
2044 li $idx,-16
2045 and $len,$len,$idx # in "tweak chaining"
2046 # mode only complete
2047 # blocks are processed
2048Lxts_enc:
2049 lvx $inptail,0,$inp
2050 addi $inp,$inp,16
2051
2052 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2053 lwz $rounds,240($key1)
2054 srwi $rounds,$rounds,1
2055 subi $rounds,$rounds,1
2056 li $idx,16
2057
2058 vslb $eighty7,$seven,$seven # 0x808080..80
2059 vor $eighty7,$eighty7,$seven # 0x878787..87
2060 vspltisb $tmp,1 # 0x010101..01
2061 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2062
2063 ${UCMP}i $len,96
2064 bge _aesp8_xts_encrypt6x
2065
2066 andi. $taillen,$len,15
2067 subic r0,$len,32
2068 subi $taillen,$taillen,16
2069 subfe r0,r0,r0
2070 and r0,r0,$taillen
2071 add $inp,$inp,r0
2072
2073 lvx $rndkey0,0,$key1
2074 lvx $rndkey1,$idx,$key1
2075 addi $idx,$idx,16
2076 vperm $inout,$inout,$inptail,$inpperm
2077 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2078 vxor $inout,$inout,$tweak
2079 vxor $inout,$inout,$rndkey0
2080 lvx $rndkey0,$idx,$key1
2081 addi $idx,$idx,16
2082 mtctr $rounds
2083 b Loop_xts_enc
2084
2085.align 5
2086Loop_xts_enc:
2087 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2088 vcipher $inout,$inout,$rndkey1
2089 lvx $rndkey1,$idx,$key1
2090 addi $idx,$idx,16
2091 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2092 vcipher $inout,$inout,$rndkey0
2093 lvx $rndkey0,$idx,$key1
2094 addi $idx,$idx,16
2095 bdnz Loop_xts_enc
2096
2097 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2098 vcipher $inout,$inout,$rndkey1
2099 lvx $rndkey1,$idx,$key1
2100 li $idx,16
2101 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2102 vxor $rndkey0,$rndkey0,$tweak
2103 vcipherlast $output,$inout,$rndkey0
2104
2105 le?vperm $tmp,$output,$output,$leperm
2106 be?nop
2107 le?stvx_u $tmp,0,$out
2108 be?stvx_u $output,0,$out
2109 addi $out,$out,16
2110
2111 subic. $len,$len,16
2112 beq Lxts_enc_done
2113
2114 vmr $inout,$inptail
2115 lvx $inptail,0,$inp
2116 addi $inp,$inp,16
2117 lvx $rndkey0,0,$key1
2118 lvx $rndkey1,$idx,$key1
2119 addi $idx,$idx,16
2120
2121 subic r0,$len,32
2122 subfe r0,r0,r0
2123 and r0,r0,$taillen
2124 add $inp,$inp,r0
2125
2126 vsrab $tmp,$tweak,$seven # next tweak value
2127 vaddubm $tweak,$tweak,$tweak
2128 vsldoi $tmp,$tmp,$tmp,15
2129 vand $tmp,$tmp,$eighty7
2130 vxor $tweak,$tweak,$tmp
2131
2132 vperm $inout,$inout,$inptail,$inpperm
2133 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2134 vxor $inout,$inout,$tweak
2135 vxor $output,$output,$rndkey0 # just in case $len<16
2136 vxor $inout,$inout,$rndkey0
2137 lvx $rndkey0,$idx,$key1
2138 addi $idx,$idx,16
2139
2140 mtctr $rounds
2141 ${UCMP}i $len,16
2142 bge Loop_xts_enc
2143
2144 vxor $output,$output,$tweak
2145 lvsr $inpperm,0,$len # $inpperm is no longer needed
2146 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2147 vspltisb $tmp,-1
2148 vperm $inptail,$inptail,$tmp,$inpperm
2149 vsel $inout,$inout,$output,$inptail
2150
2151 subi r11,$out,17
2152 subi $out,$out,16
2153 mtctr $len
2154 li $len,16
2155Loop_xts_enc_steal:
2156 lbzu r0,1(r11)
2157 stb r0,16(r11)
2158 bdnz Loop_xts_enc_steal
2159
2160 mtctr $rounds
2161 b Loop_xts_enc # one more time...
2162
2163Lxts_enc_done:
2164 ${UCMP}i $ivp,0
2165 beq Lxts_enc_ret
2166
2167 vsrab $tmp,$tweak,$seven # next tweak value
2168 vaddubm $tweak,$tweak,$tweak
2169 vsldoi $tmp,$tmp,$tmp,15
2170 vand $tmp,$tmp,$eighty7
2171 vxor $tweak,$tweak,$tmp
2172
2173 le?vperm $tweak,$tweak,$tweak,$leperm
2174 stvx_u $tweak,0,$ivp
2175
2176Lxts_enc_ret:
2177 mtspr 256,r12 # restore vrsave
2178 li r3,0
2179 blr
2180 .long 0
2181 .byte 0,12,0x04,0,0x80,6,6,0
2182 .long 0
2183.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2184
2185.globl .${prefix}_xts_decrypt
2186 mr $inp,r3 # reassign
2187 li r3,-1
2188 ${UCMP}i $len,16
2189 bltlr-
2190
2191 lis r0,0xfff8
2192 mfspr r12,256 # save vrsave
2193 li r11,0
2194 mtspr 256,r0
2195
2196 andi. r0,$len,15
2197 neg r0,r0
2198 andi. r0,r0,16
2199 sub $len,$len,r0
2200
2201 vspltisb $seven,0x07 # 0x070707..07
2202 le?lvsl $leperm,r11,r11
2203 le?vspltisb $tmp,0x0f
2204 le?vxor $leperm,$leperm,$seven
2205
2206 li $idx,15
2207 lvx $tweak,0,$ivp # load [unaligned] iv
2208 lvsl $inpperm,0,$ivp
2209 lvx $inptail,$idx,$ivp
2210 le?vxor $inpperm,$inpperm,$tmp
2211 vperm $tweak,$tweak,$inptail,$inpperm
2212
2213 neg r11,$inp
2214 lvsr $inpperm,0,r11 # prepare for unaligned load
2215 lvx $inout,0,$inp
2216 addi $inp,$inp,15 # 15 is not typo
2217 le?vxor $inpperm,$inpperm,$tmp
2218
2219 ${UCMP}i $key2,0 # key2==NULL?
2220 beq Lxts_dec_no_key2
2221
2222 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2223 lwz $rounds,240($key2)
2224 srwi $rounds,$rounds,1
2225 subi $rounds,$rounds,1
2226 li $idx,16
2227
2228 lvx $rndkey0,0,$key2
2229 lvx $rndkey1,$idx,$key2
2230 addi $idx,$idx,16
2231 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2232 vxor $tweak,$tweak,$rndkey0
2233 lvx $rndkey0,$idx,$key2
2234 addi $idx,$idx,16
2235 mtctr $rounds
2236
2237Ltweak_xts_dec:
2238 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2239 vcipher $tweak,$tweak,$rndkey1
2240 lvx $rndkey1,$idx,$key2
2241 addi $idx,$idx,16
2242 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2243 vcipher $tweak,$tweak,$rndkey0
2244 lvx $rndkey0,$idx,$key2
2245 addi $idx,$idx,16
2246 bdnz Ltweak_xts_dec
2247
2248 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2249 vcipher $tweak,$tweak,$rndkey1
2250 lvx $rndkey1,$idx,$key2
2251 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2252 vcipherlast $tweak,$tweak,$rndkey0
2253
2254 li $ivp,0 # don't chain the tweak
2255 b Lxts_dec
2256
2257Lxts_dec_no_key2:
2258 neg $idx,$len
2259 andi. $idx,$idx,15
2260 add $len,$len,$idx # in "tweak chaining"
2261 # mode only complete
2262 # blocks are processed
2263Lxts_dec:
2264 lvx $inptail,0,$inp
2265 addi $inp,$inp,16
2266
2267 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2268 lwz $rounds,240($key1)
2269 srwi $rounds,$rounds,1
2270 subi $rounds,$rounds,1
2271 li $idx,16
2272
2273 vslb $eighty7,$seven,$seven # 0x808080..80
2274 vor $eighty7,$eighty7,$seven # 0x878787..87
2275 vspltisb $tmp,1 # 0x010101..01
2276 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2277
2278 ${UCMP}i $len,96
2279 bge _aesp8_xts_decrypt6x
2280
2281 lvx $rndkey0,0,$key1
2282 lvx $rndkey1,$idx,$key1
2283 addi $idx,$idx,16
2284 vperm $inout,$inout,$inptail,$inpperm
2285 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2286 vxor $inout,$inout,$tweak
2287 vxor $inout,$inout,$rndkey0
2288 lvx $rndkey0,$idx,$key1
2289 addi $idx,$idx,16
2290 mtctr $rounds
2291
2292 ${UCMP}i $len,16
2293 blt Ltail_xts_dec
2294 be?b Loop_xts_dec
2295
2296.align 5
2297Loop_xts_dec:
2298 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2299 vncipher $inout,$inout,$rndkey1
2300 lvx $rndkey1,$idx,$key1
2301 addi $idx,$idx,16
2302 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2303 vncipher $inout,$inout,$rndkey0
2304 lvx $rndkey0,$idx,$key1
2305 addi $idx,$idx,16
2306 bdnz Loop_xts_dec
2307
2308 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2309 vncipher $inout,$inout,$rndkey1
2310 lvx $rndkey1,$idx,$key1
2311 li $idx,16
2312 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2313 vxor $rndkey0,$rndkey0,$tweak
2314 vncipherlast $output,$inout,$rndkey0
2315
2316 le?vperm $tmp,$output,$output,$leperm
2317 be?nop
2318 le?stvx_u $tmp,0,$out
2319 be?stvx_u $output,0,$out
2320 addi $out,$out,16
2321
2322 subic. $len,$len,16
2323 beq Lxts_dec_done
2324
2325 vmr $inout,$inptail
2326 lvx $inptail,0,$inp
2327 addi $inp,$inp,16
2328 lvx $rndkey0,0,$key1
2329 lvx $rndkey1,$idx,$key1
2330 addi $idx,$idx,16
2331
2332 vsrab $tmp,$tweak,$seven # next tweak value
2333 vaddubm $tweak,$tweak,$tweak
2334 vsldoi $tmp,$tmp,$tmp,15
2335 vand $tmp,$tmp,$eighty7
2336 vxor $tweak,$tweak,$tmp
2337
2338 vperm $inout,$inout,$inptail,$inpperm
2339 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2340 vxor $inout,$inout,$tweak
2341 vxor $inout,$inout,$rndkey0
2342 lvx $rndkey0,$idx,$key1
2343 addi $idx,$idx,16
2344
2345 mtctr $rounds
2346 ${UCMP}i $len,16
2347 bge Loop_xts_dec
2348
2349Ltail_xts_dec:
2350 vsrab $tmp,$tweak,$seven # next tweak value
2351 vaddubm $tweak1,$tweak,$tweak
2352 vsldoi $tmp,$tmp,$tmp,15
2353 vand $tmp,$tmp,$eighty7
2354 vxor $tweak1,$tweak1,$tmp
2355
2356 subi $inp,$inp,16
2357 add $inp,$inp,$len
2358
2359 vxor $inout,$inout,$tweak # :-(
2360 vxor $inout,$inout,$tweak1 # :-)
2361
2362Loop_xts_dec_short:
2363 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2364 vncipher $inout,$inout,$rndkey1
2365 lvx $rndkey1,$idx,$key1
2366 addi $idx,$idx,16
2367 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2368 vncipher $inout,$inout,$rndkey0
2369 lvx $rndkey0,$idx,$key1
2370 addi $idx,$idx,16
2371 bdnz Loop_xts_dec_short
2372
2373 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2374 vncipher $inout,$inout,$rndkey1
2375 lvx $rndkey1,$idx,$key1
2376 li $idx,16
2377 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2378 vxor $rndkey0,$rndkey0,$tweak1
2379 vncipherlast $output,$inout,$rndkey0
2380
2381 le?vperm $tmp,$output,$output,$leperm
2382 be?nop
2383 le?stvx_u $tmp,0,$out
2384 be?stvx_u $output,0,$out
2385
2386 vmr $inout,$inptail
2387 lvx $inptail,0,$inp
2388 #addi $inp,$inp,16
2389 lvx $rndkey0,0,$key1
2390 lvx $rndkey1,$idx,$key1
2391 addi $idx,$idx,16
2392 vperm $inout,$inout,$inptail,$inpperm
2393 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2394
2395 lvsr $inpperm,0,$len # $inpperm is no longer needed
2396 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2397 vspltisb $tmp,-1
2398 vperm $inptail,$inptail,$tmp,$inpperm
2399 vsel $inout,$inout,$output,$inptail
2400
2401 vxor $rndkey0,$rndkey0,$tweak
2402 vxor $inout,$inout,$rndkey0
2403 lvx $rndkey0,$idx,$key1
2404 addi $idx,$idx,16
2405
2406 subi r11,$out,1
2407 mtctr $len
2408 li $len,16
2409Loop_xts_dec_steal:
2410 lbzu r0,1(r11)
2411 stb r0,16(r11)
2412 bdnz Loop_xts_dec_steal
2413
2414 mtctr $rounds
2415 b Loop_xts_dec # one more time...
2416
2417Lxts_dec_done:
2418 ${UCMP}i $ivp,0
2419 beq Lxts_dec_ret
2420
2421 vsrab $tmp,$tweak,$seven # next tweak value
2422 vaddubm $tweak,$tweak,$tweak
2423 vsldoi $tmp,$tmp,$tmp,15
2424 vand $tmp,$tmp,$eighty7
2425 vxor $tweak,$tweak,$tmp
2426
2427 le?vperm $tweak,$tweak,$tweak,$leperm
2428 stvx_u $tweak,0,$ivp
2429
2430Lxts_dec_ret:
2431 mtspr 256,r12 # restore vrsave
2432 li r3,0
2433 blr
2434 .long 0
2435 .byte 0,12,0x04,0,0x80,6,6,0
2436 .long 0
2437.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2438___
2439#########################################################################
2440{{ # Optimized XTS procedures #
2441my $key_=$key2;
2442my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2443 $x00=0 if ($flavour =~ /osx/);
2444my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2445my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2446my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2447my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2448 # v26-v31 last 6 round keys
2449my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2450my $taillen=$x70;
2451
2452$code.=<<___;
2453.align 5
2454_aesp8_xts_encrypt6x:
2455 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2456 mflr r11
2457 li r7,`$FRAME+8*16+15`
2458 li r3,`$FRAME+8*16+31`
2459 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2460 stvx v20,r7,$sp # ABI says so
2461 addi r7,r7,32
2462 stvx v21,r3,$sp
2463 addi r3,r3,32
2464 stvx v22,r7,$sp
2465 addi r7,r7,32
2466 stvx v23,r3,$sp
2467 addi r3,r3,32
2468 stvx v24,r7,$sp
2469 addi r7,r7,32
2470 stvx v25,r3,$sp
2471 addi r3,r3,32
2472 stvx v26,r7,$sp
2473 addi r7,r7,32
2474 stvx v27,r3,$sp
2475 addi r3,r3,32
2476 stvx v28,r7,$sp
2477 addi r7,r7,32
2478 stvx v29,r3,$sp
2479 addi r3,r3,32
2480 stvx v30,r7,$sp
2481 stvx v31,r3,$sp
2482 li r0,-1
2483 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2484 li $x10,0x10
2485 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2486 li $x20,0x20
2487 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2488 li $x30,0x30
2489 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2490 li $x40,0x40
2491 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2492 li $x50,0x50
2493 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2494 li $x60,0x60
2495 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2496 li $x70,0x70
2497 mtspr 256,r0
2498
2499 xxlor 2, 32+$eighty7, 32+$eighty7
2500 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
2501 xxlor 1, 32+$eighty7, 32+$eighty7
2502
2503 # Load XOR Lconsts.
2504 mr $x70, r6
2505 bl Lconsts
2506 lxvw4x 0, $x40, r6 # load XOR contents
2507 mr r6, $x70
2508 li $x70,0x70
2509
2510 subi $rounds,$rounds,3 # -4 in total
2511
2512 lvx $rndkey0,$x00,$key1 # load key schedule
2513 lvx v30,$x10,$key1
2514 addi $key1,$key1,0x20
2515 lvx v31,$x00,$key1
2516 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2517 addi $key_,$sp,$FRAME+15
2518 mtctr $rounds
2519
2520Load_xts_enc_key:
2521 ?vperm v24,v30,v31,$keyperm
2522 lvx v30,$x10,$key1
2523 addi $key1,$key1,0x20
2524 stvx v24,$x00,$key_ # off-load round[1]
2525 ?vperm v25,v31,v30,$keyperm
2526 lvx v31,$x00,$key1
2527 stvx v25,$x10,$key_ # off-load round[2]
2528 addi $key_,$key_,0x20
2529 bdnz Load_xts_enc_key
2530
2531 lvx v26,$x10,$key1
2532 ?vperm v24,v30,v31,$keyperm
2533 lvx v27,$x20,$key1
2534 stvx v24,$x00,$key_ # off-load round[3]
2535 ?vperm v25,v31,v26,$keyperm
2536 lvx v28,$x30,$key1
2537 stvx v25,$x10,$key_ # off-load round[4]
2538 addi $key_,$sp,$FRAME+15 # rewind $key_
2539 ?vperm v26,v26,v27,$keyperm
2540 lvx v29,$x40,$key1
2541 ?vperm v27,v27,v28,$keyperm
2542 lvx v30,$x50,$key1
2543 ?vperm v28,v28,v29,$keyperm
2544 lvx v31,$x60,$key1
2545 ?vperm v29,v29,v30,$keyperm
2546 lvx $twk5,$x70,$key1 # borrow $twk5
2547 ?vperm v30,v30,v31,$keyperm
2548 lvx v24,$x00,$key_ # pre-load round[1]
2549 ?vperm v31,v31,$twk5,$keyperm
2550 lvx v25,$x10,$key_ # pre-load round[2]
2551
2552 # Switch to use the following codes with 0x010101..87 to generate tweak.
2553 # eighty7 = 0x010101..87
2554 # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
2555 # vand tmp, tmp, eighty7 # last byte with carry
2556 # vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
2557 # xxlor vsx, 0, 0
2558 # vpermxor tweak, tweak, tmp, vsx
2559
2560 vperm $in0,$inout,$inptail,$inpperm
2561 subi $inp,$inp,31 # undo "caller"
2562 vxor $twk0,$tweak,$rndkey0
2563 vsrab $tmp,$tweak,$seven # next tweak value
2564 vaddubm $tweak,$tweak,$tweak
2565 vand $tmp,$tmp,$eighty7
2566 vxor $out0,$in0,$twk0
2567 xxlor 32+$in1, 0, 0
2568 vpermxor $tweak, $tweak, $tmp, $in1
2569
2570 lvx_u $in1,$x10,$inp
2571 vxor $twk1,$tweak,$rndkey0
2572 vsrab $tmp,$tweak,$seven # next tweak value
2573 vaddubm $tweak,$tweak,$tweak
2574 le?vperm $in1,$in1,$in1,$leperm
2575 vand $tmp,$tmp,$eighty7
2576 vxor $out1,$in1,$twk1
2577 xxlor 32+$in2, 0, 0
2578 vpermxor $tweak, $tweak, $tmp, $in2
2579
2580 lvx_u $in2,$x20,$inp
2581 andi. $taillen,$len,15
2582 vxor $twk2,$tweak,$rndkey0
2583 vsrab $tmp,$tweak,$seven # next tweak value
2584 vaddubm $tweak,$tweak,$tweak
2585 le?vperm $in2,$in2,$in2,$leperm
2586 vand $tmp,$tmp,$eighty7
2587 vxor $out2,$in2,$twk2
2588 xxlor 32+$in3, 0, 0
2589 vpermxor $tweak, $tweak, $tmp, $in3
2590
2591 lvx_u $in3,$x30,$inp
2592 sub $len,$len,$taillen
2593 vxor $twk3,$tweak,$rndkey0
2594 vsrab $tmp,$tweak,$seven # next tweak value
2595 vaddubm $tweak,$tweak,$tweak
2596 le?vperm $in3,$in3,$in3,$leperm
2597 vand $tmp,$tmp,$eighty7
2598 vxor $out3,$in3,$twk3
2599 xxlor 32+$in4, 0, 0
2600 vpermxor $tweak, $tweak, $tmp, $in4
2601
2602 lvx_u $in4,$x40,$inp
2603 subi $len,$len,0x60
2604 vxor $twk4,$tweak,$rndkey0
2605 vsrab $tmp,$tweak,$seven # next tweak value
2606 vaddubm $tweak,$tweak,$tweak
2607 le?vperm $in4,$in4,$in4,$leperm
2608 vand $tmp,$tmp,$eighty7
2609 vxor $out4,$in4,$twk4
2610 xxlor 32+$in5, 0, 0
2611 vpermxor $tweak, $tweak, $tmp, $in5
2612
2613 lvx_u $in5,$x50,$inp
2614 addi $inp,$inp,0x60
2615 vxor $twk5,$tweak,$rndkey0
2616 vsrab $tmp,$tweak,$seven # next tweak value
2617 vaddubm $tweak,$tweak,$tweak
2618 le?vperm $in5,$in5,$in5,$leperm
2619 vand $tmp,$tmp,$eighty7
2620 vxor $out5,$in5,$twk5
2621 xxlor 32+$in0, 0, 0
2622 vpermxor $tweak, $tweak, $tmp, $in0
2623
2624 vxor v31,v31,$rndkey0
2625 mtctr $rounds
2626 b Loop_xts_enc6x
2627
2628.align 5
2629Loop_xts_enc6x:
2630 vcipher $out0,$out0,v24
2631 vcipher $out1,$out1,v24
2632 vcipher $out2,$out2,v24
2633 vcipher $out3,$out3,v24
2634 vcipher $out4,$out4,v24
2635 vcipher $out5,$out5,v24
2636 lvx v24,$x20,$key_ # round[3]
2637 addi $key_,$key_,0x20
2638
2639 vcipher $out0,$out0,v25
2640 vcipher $out1,$out1,v25
2641 vcipher $out2,$out2,v25
2642 vcipher $out3,$out3,v25
2643 vcipher $out4,$out4,v25
2644 vcipher $out5,$out5,v25
2645 lvx v25,$x10,$key_ # round[4]
2646 bdnz Loop_xts_enc6x
2647
2648 xxlor 32+$eighty7, 1, 1 # 0x010101..87
2649
2650 subic $len,$len,96 # $len-=96
2651 vxor $in0,$twk0,v31 # xor with last round key
2652 vcipher $out0,$out0,v24
2653 vcipher $out1,$out1,v24
2654 vsrab $tmp,$tweak,$seven # next tweak value
2655 vxor $twk0,$tweak,$rndkey0
2656 vaddubm $tweak,$tweak,$tweak
2657 vcipher $out2,$out2,v24
2658 vcipher $out3,$out3,v24
2659 vcipher $out4,$out4,v24
2660 vcipher $out5,$out5,v24
2661
2662 subfe. r0,r0,r0 # borrow?-1:0
2663 vand $tmp,$tmp,$eighty7
2664 vcipher $out0,$out0,v25
2665 vcipher $out1,$out1,v25
2666 xxlor 32+$in1, 0, 0
2667 vpermxor $tweak, $tweak, $tmp, $in1
2668 vcipher $out2,$out2,v25
2669 vcipher $out3,$out3,v25
2670 vxor $in1,$twk1,v31
2671 vsrab $tmp,$tweak,$seven # next tweak value
2672 vxor $twk1,$tweak,$rndkey0
2673 vcipher $out4,$out4,v25
2674 vcipher $out5,$out5,v25
2675
2676 and r0,r0,$len
2677 vaddubm $tweak,$tweak,$tweak
2678 vcipher $out0,$out0,v26
2679 vcipher $out1,$out1,v26
2680 vand $tmp,$tmp,$eighty7
2681 vcipher $out2,$out2,v26
2682 vcipher $out3,$out3,v26
2683 xxlor 32+$in2, 0, 0
2684 vpermxor $tweak, $tweak, $tmp, $in2
2685 vcipher $out4,$out4,v26
2686 vcipher $out5,$out5,v26
2687
2688 add $inp,$inp,r0 # $inp is adjusted in such
2689 # way that at exit from the
2690 # loop inX-in5 are loaded
2691 # with last "words"
2692 vxor $in2,$twk2,v31
2693 vsrab $tmp,$tweak,$seven # next tweak value
2694 vxor $twk2,$tweak,$rndkey0
2695 vaddubm $tweak,$tweak,$tweak
2696 vcipher $out0,$out0,v27
2697 vcipher $out1,$out1,v27
2698 vcipher $out2,$out2,v27
2699 vcipher $out3,$out3,v27
2700 vand $tmp,$tmp,$eighty7
2701 vcipher $out4,$out4,v27
2702 vcipher $out5,$out5,v27
2703
2704 addi $key_,$sp,$FRAME+15 # rewind $key_
2705 xxlor 32+$in3, 0, 0
2706 vpermxor $tweak, $tweak, $tmp, $in3
2707 vcipher $out0,$out0,v28
2708 vcipher $out1,$out1,v28
2709 vxor $in3,$twk3,v31
2710 vsrab $tmp,$tweak,$seven # next tweak value
2711 vxor $twk3,$tweak,$rndkey0
2712 vcipher $out2,$out2,v28
2713 vcipher $out3,$out3,v28
2714 vaddubm $tweak,$tweak,$tweak
2715 vcipher $out4,$out4,v28
2716 vcipher $out5,$out5,v28
2717 lvx v24,$x00,$key_ # re-pre-load round[1]
2718 vand $tmp,$tmp,$eighty7
2719
2720 vcipher $out0,$out0,v29
2721 vcipher $out1,$out1,v29
2722 xxlor 32+$in4, 0, 0
2723 vpermxor $tweak, $tweak, $tmp, $in4
2724 vcipher $out2,$out2,v29
2725 vcipher $out3,$out3,v29
2726 vxor $in4,$twk4,v31
2727 vsrab $tmp,$tweak,$seven # next tweak value
2728 vxor $twk4,$tweak,$rndkey0
2729 vcipher $out4,$out4,v29
2730 vcipher $out5,$out5,v29
2731 lvx v25,$x10,$key_ # re-pre-load round[2]
2732 vaddubm $tweak,$tweak,$tweak
2733
2734 vcipher $out0,$out0,v30
2735 vcipher $out1,$out1,v30
2736 vand $tmp,$tmp,$eighty7
2737 vcipher $out2,$out2,v30
2738 vcipher $out3,$out3,v30
2739 xxlor 32+$in5, 0, 0
2740 vpermxor $tweak, $tweak, $tmp, $in5
2741 vcipher $out4,$out4,v30
2742 vcipher $out5,$out5,v30
2743 vxor $in5,$twk5,v31
2744 vsrab $tmp,$tweak,$seven # next tweak value
2745 vxor $twk5,$tweak,$rndkey0
2746
2747 vcipherlast $out0,$out0,$in0
2748 lvx_u $in0,$x00,$inp # load next input block
2749 vaddubm $tweak,$tweak,$tweak
2750 vcipherlast $out1,$out1,$in1
2751 lvx_u $in1,$x10,$inp
2752 vcipherlast $out2,$out2,$in2
2753 le?vperm $in0,$in0,$in0,$leperm
2754 lvx_u $in2,$x20,$inp
2755 vand $tmp,$tmp,$eighty7
2756 vcipherlast $out3,$out3,$in3
2757 le?vperm $in1,$in1,$in1,$leperm
2758 lvx_u $in3,$x30,$inp
2759 vcipherlast $out4,$out4,$in4
2760 le?vperm $in2,$in2,$in2,$leperm
2761 lvx_u $in4,$x40,$inp
2762 xxlor 10, 32+$in0, 32+$in0
2763 xxlor 32+$in0, 0, 0
2764 vpermxor $tweak, $tweak, $tmp, $in0
2765 xxlor 32+$in0, 10, 10
2766 vcipherlast $tmp,$out5,$in5 # last block might be needed
2767 # in stealing mode
2768 le?vperm $in3,$in3,$in3,$leperm
2769 lvx_u $in5,$x50,$inp
2770 addi $inp,$inp,0x60
2771 le?vperm $in4,$in4,$in4,$leperm
2772 le?vperm $in5,$in5,$in5,$leperm
2773
2774 le?vperm $out0,$out0,$out0,$leperm
2775 le?vperm $out1,$out1,$out1,$leperm
2776 stvx_u $out0,$x00,$out # store output
2777 vxor $out0,$in0,$twk0
2778 le?vperm $out2,$out2,$out2,$leperm
2779 stvx_u $out1,$x10,$out
2780 vxor $out1,$in1,$twk1
2781 le?vperm $out3,$out3,$out3,$leperm
2782 stvx_u $out2,$x20,$out
2783 vxor $out2,$in2,$twk2
2784 le?vperm $out4,$out4,$out4,$leperm
2785 stvx_u $out3,$x30,$out
2786 vxor $out3,$in3,$twk3
2787 le?vperm $out5,$tmp,$tmp,$leperm
2788 stvx_u $out4,$x40,$out
2789 vxor $out4,$in4,$twk4
2790 le?stvx_u $out5,$x50,$out
2791 be?stvx_u $tmp, $x50,$out
2792 vxor $out5,$in5,$twk5
2793 addi $out,$out,0x60
2794
2795 mtctr $rounds
2796 beq Loop_xts_enc6x # did $len-=96 borrow?
2797
2798 xxlor 32+$eighty7, 2, 2 # 0x010101..87
2799
2800 addic. $len,$len,0x60
2801 beq Lxts_enc6x_zero
2802 cmpwi $len,0x20
2803 blt Lxts_enc6x_one
2804 nop
2805 beq Lxts_enc6x_two
2806 cmpwi $len,0x40
2807 blt Lxts_enc6x_three
2808 nop
2809 beq Lxts_enc6x_four
2810
2811Lxts_enc6x_five:
2812 vxor $out0,$in1,$twk0
2813 vxor $out1,$in2,$twk1
2814 vxor $out2,$in3,$twk2
2815 vxor $out3,$in4,$twk3
2816 vxor $out4,$in5,$twk4
2817
2818 bl _aesp8_xts_enc5x
2819
2820 le?vperm $out0,$out0,$out0,$leperm
2821 vmr $twk0,$twk5 # unused tweak
2822 le?vperm $out1,$out1,$out1,$leperm
2823 stvx_u $out0,$x00,$out # store output
2824 le?vperm $out2,$out2,$out2,$leperm
2825 stvx_u $out1,$x10,$out
2826 le?vperm $out3,$out3,$out3,$leperm
2827 stvx_u $out2,$x20,$out
2828 vxor $tmp,$out4,$twk5 # last block prep for stealing
2829 le?vperm $out4,$out4,$out4,$leperm
2830 stvx_u $out3,$x30,$out
2831 stvx_u $out4,$x40,$out
2832 addi $out,$out,0x50
2833 bne Lxts_enc6x_steal
2834 b Lxts_enc6x_done
2835
2836.align 4
2837Lxts_enc6x_four:
2838 vxor $out0,$in2,$twk0
2839 vxor $out1,$in3,$twk1
2840 vxor $out2,$in4,$twk2
2841 vxor $out3,$in5,$twk3
2842 vxor $out4,$out4,$out4
2843
2844 bl _aesp8_xts_enc5x
2845
2846 le?vperm $out0,$out0,$out0,$leperm
2847 vmr $twk0,$twk4 # unused tweak
2848 le?vperm $out1,$out1,$out1,$leperm
2849 stvx_u $out0,$x00,$out # store output
2850 le?vperm $out2,$out2,$out2,$leperm
2851 stvx_u $out1,$x10,$out
2852 vxor $tmp,$out3,$twk4 # last block prep for stealing
2853 le?vperm $out3,$out3,$out3,$leperm
2854 stvx_u $out2,$x20,$out
2855 stvx_u $out3,$x30,$out
2856 addi $out,$out,0x40
2857 bne Lxts_enc6x_steal
2858 b Lxts_enc6x_done
2859
2860.align 4
2861Lxts_enc6x_three:
2862 vxor $out0,$in3,$twk0
2863 vxor $out1,$in4,$twk1
2864 vxor $out2,$in5,$twk2
2865 vxor $out3,$out3,$out3
2866 vxor $out4,$out4,$out4
2867
2868 bl _aesp8_xts_enc5x
2869
2870 le?vperm $out0,$out0,$out0,$leperm
2871 vmr $twk0,$twk3 # unused tweak
2872 le?vperm $out1,$out1,$out1,$leperm
2873 stvx_u $out0,$x00,$out # store output
2874 vxor $tmp,$out2,$twk3 # last block prep for stealing
2875 le?vperm $out2,$out2,$out2,$leperm
2876 stvx_u $out1,$x10,$out
2877 stvx_u $out2,$x20,$out
2878 addi $out,$out,0x30
2879 bne Lxts_enc6x_steal
2880 b Lxts_enc6x_done
2881
2882.align 4
2883Lxts_enc6x_two:
2884 vxor $out0,$in4,$twk0
2885 vxor $out1,$in5,$twk1
2886 vxor $out2,$out2,$out2
2887 vxor $out3,$out3,$out3
2888 vxor $out4,$out4,$out4
2889
2890 bl _aesp8_xts_enc5x
2891
2892 le?vperm $out0,$out0,$out0,$leperm
2893 vmr $twk0,$twk2 # unused tweak
2894 vxor $tmp,$out1,$twk2 # last block prep for stealing
2895 le?vperm $out1,$out1,$out1,$leperm
2896 stvx_u $out0,$x00,$out # store output
2897 stvx_u $out1,$x10,$out
2898 addi $out,$out,0x20
2899 bne Lxts_enc6x_steal
2900 b Lxts_enc6x_done
2901
2902.align 4
2903Lxts_enc6x_one:
2904 vxor $out0,$in5,$twk0
2905 nop
2906Loop_xts_enc1x:
2907 vcipher $out0,$out0,v24
2908 lvx v24,$x20,$key_ # round[3]
2909 addi $key_,$key_,0x20
2910
2911 vcipher $out0,$out0,v25
2912 lvx v25,$x10,$key_ # round[4]
2913 bdnz Loop_xts_enc1x
2914
2915 add $inp,$inp,$taillen
2916 cmpwi $taillen,0
2917 vcipher $out0,$out0,v24
2918
2919 subi $inp,$inp,16
2920 vcipher $out0,$out0,v25
2921
2922 lvsr $inpperm,0,$taillen
2923 vcipher $out0,$out0,v26
2924
2925 lvx_u $in0,0,$inp
2926 vcipher $out0,$out0,v27
2927
2928 addi $key_,$sp,$FRAME+15 # rewind $key_
2929 vcipher $out0,$out0,v28
2930 lvx v24,$x00,$key_ # re-pre-load round[1]
2931
2932 vcipher $out0,$out0,v29
2933 lvx v25,$x10,$key_ # re-pre-load round[2]
2934 vxor $twk0,$twk0,v31
2935
2936 le?vperm $in0,$in0,$in0,$leperm
2937 vcipher $out0,$out0,v30
2938
2939 vperm $in0,$in0,$in0,$inpperm
2940 vcipherlast $out0,$out0,$twk0
2941
2942 vmr $twk0,$twk1 # unused tweak
2943 vxor $tmp,$out0,$twk1 # last block prep for stealing
2944 le?vperm $out0,$out0,$out0,$leperm
2945 stvx_u $out0,$x00,$out # store output
2946 addi $out,$out,0x10
2947 bne Lxts_enc6x_steal
2948 b Lxts_enc6x_done
2949
2950.align 4
2951Lxts_enc6x_zero:
2952 cmpwi $taillen,0
2953 beq Lxts_enc6x_done
2954
2955 add $inp,$inp,$taillen
2956 subi $inp,$inp,16
2957 lvx_u $in0,0,$inp
2958 lvsr $inpperm,0,$taillen # $in5 is no more
2959 le?vperm $in0,$in0,$in0,$leperm
2960 vperm $in0,$in0,$in0,$inpperm
2961 vxor $tmp,$tmp,$twk0
2962Lxts_enc6x_steal:
2963 vxor $in0,$in0,$twk0
2964 vxor $out0,$out0,$out0
2965 vspltisb $out1,-1
2966 vperm $out0,$out0,$out1,$inpperm
2967 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2968
2969 subi r30,$out,17
2970 subi $out,$out,16
2971 mtctr $taillen
2972Loop_xts_enc6x_steal:
2973 lbzu r0,1(r30)
2974 stb r0,16(r30)
2975 bdnz Loop_xts_enc6x_steal
2976
2977 li $taillen,0
2978 mtctr $rounds
2979 b Loop_xts_enc1x # one more time...
2980
2981.align 4
2982Lxts_enc6x_done:
2983 ${UCMP}i $ivp,0
2984 beq Lxts_enc6x_ret
2985
2986 vxor $tweak,$twk0,$rndkey0
2987 le?vperm $tweak,$tweak,$tweak,$leperm
2988 stvx_u $tweak,0,$ivp
2989
2990Lxts_enc6x_ret:
2991 mtlr r11
2992 li r10,`$FRAME+15`
2993 li r11,`$FRAME+31`
2994 stvx $seven,r10,$sp # wipe copies of round keys
2995 addi r10,r10,32
2996 stvx $seven,r11,$sp
2997 addi r11,r11,32
2998 stvx $seven,r10,$sp
2999 addi r10,r10,32
3000 stvx $seven,r11,$sp
3001 addi r11,r11,32
3002 stvx $seven,r10,$sp
3003 addi r10,r10,32
3004 stvx $seven,r11,$sp
3005 addi r11,r11,32
3006 stvx $seven,r10,$sp
3007 addi r10,r10,32
3008 stvx $seven,r11,$sp
3009 addi r11,r11,32
3010
3011 mtspr 256,$vrsave
3012 lvx v20,r10,$sp # ABI says so
3013 addi r10,r10,32
3014 lvx v21,r11,$sp
3015 addi r11,r11,32
3016 lvx v22,r10,$sp
3017 addi r10,r10,32
3018 lvx v23,r11,$sp
3019 addi r11,r11,32
3020 lvx v24,r10,$sp
3021 addi r10,r10,32
3022 lvx v25,r11,$sp
3023 addi r11,r11,32
3024 lvx v26,r10,$sp
3025 addi r10,r10,32
3026 lvx v27,r11,$sp
3027 addi r11,r11,32
3028 lvx v28,r10,$sp
3029 addi r10,r10,32
3030 lvx v29,r11,$sp
3031 addi r11,r11,32
3032 lvx v30,r10,$sp
3033 lvx v31,r11,$sp
3034 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3035 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3036 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3037 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3038 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3039 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3040 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3041 blr
3042 .long 0
3043 .byte 0,12,0x04,1,0x80,6,6,0
3044 .long 0
3045
3046.align 5
3047_aesp8_xts_enc5x:
3048 vcipher $out0,$out0,v24
3049 vcipher $out1,$out1,v24
3050 vcipher $out2,$out2,v24
3051 vcipher $out3,$out3,v24
3052 vcipher $out4,$out4,v24
3053 lvx v24,$x20,$key_ # round[3]
3054 addi $key_,$key_,0x20
3055
3056 vcipher $out0,$out0,v25
3057 vcipher $out1,$out1,v25
3058 vcipher $out2,$out2,v25
3059 vcipher $out3,$out3,v25
3060 vcipher $out4,$out4,v25
3061 lvx v25,$x10,$key_ # round[4]
3062 bdnz _aesp8_xts_enc5x
3063
3064 add $inp,$inp,$taillen
3065 cmpwi $taillen,0
3066 vcipher $out0,$out0,v24
3067 vcipher $out1,$out1,v24
3068 vcipher $out2,$out2,v24
3069 vcipher $out3,$out3,v24
3070 vcipher $out4,$out4,v24
3071
3072 subi $inp,$inp,16
3073 vcipher $out0,$out0,v25
3074 vcipher $out1,$out1,v25
3075 vcipher $out2,$out2,v25
3076 vcipher $out3,$out3,v25
3077 vcipher $out4,$out4,v25
3078 vxor $twk0,$twk0,v31
3079
3080 vcipher $out0,$out0,v26
3081 lvsr $inpperm,r0,$taillen # $in5 is no more
3082 vcipher $out1,$out1,v26
3083 vcipher $out2,$out2,v26
3084 vcipher $out3,$out3,v26
3085 vcipher $out4,$out4,v26
3086 vxor $in1,$twk1,v31
3087
3088 vcipher $out0,$out0,v27
3089 lvx_u $in0,0,$inp
3090 vcipher $out1,$out1,v27
3091 vcipher $out2,$out2,v27
3092 vcipher $out3,$out3,v27
3093 vcipher $out4,$out4,v27
3094 vxor $in2,$twk2,v31
3095
3096 addi $key_,$sp,$FRAME+15 # rewind $key_
3097 vcipher $out0,$out0,v28
3098 vcipher $out1,$out1,v28
3099 vcipher $out2,$out2,v28
3100 vcipher $out3,$out3,v28
3101 vcipher $out4,$out4,v28
3102 lvx v24,$x00,$key_ # re-pre-load round[1]
3103 vxor $in3,$twk3,v31
3104
3105 vcipher $out0,$out0,v29
3106 le?vperm $in0,$in0,$in0,$leperm
3107 vcipher $out1,$out1,v29
3108 vcipher $out2,$out2,v29
3109 vcipher $out3,$out3,v29
3110 vcipher $out4,$out4,v29
3111 lvx v25,$x10,$key_ # re-pre-load round[2]
3112 vxor $in4,$twk4,v31
3113
3114 vcipher $out0,$out0,v30
3115 vperm $in0,$in0,$in0,$inpperm
3116 vcipher $out1,$out1,v30
3117 vcipher $out2,$out2,v30
3118 vcipher $out3,$out3,v30
3119 vcipher $out4,$out4,v30
3120
3121 vcipherlast $out0,$out0,$twk0
3122 vcipherlast $out1,$out1,$in1
3123 vcipherlast $out2,$out2,$in2
3124 vcipherlast $out3,$out3,$in3
3125 vcipherlast $out4,$out4,$in4
3126 blr
3127 .long 0
3128 .byte 0,12,0x14,0,0,0,0,0
3129
3130.align 5
3131_aesp8_xts_decrypt6x:
3132 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3133 mflr r11
3134 li r7,`$FRAME+8*16+15`
3135 li r3,`$FRAME+8*16+31`
3136 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3137 stvx v20,r7,$sp # ABI says so
3138 addi r7,r7,32
3139 stvx v21,r3,$sp
3140 addi r3,r3,32
3141 stvx v22,r7,$sp
3142 addi r7,r7,32
3143 stvx v23,r3,$sp
3144 addi r3,r3,32
3145 stvx v24,r7,$sp
3146 addi r7,r7,32
3147 stvx v25,r3,$sp
3148 addi r3,r3,32
3149 stvx v26,r7,$sp
3150 addi r7,r7,32
3151 stvx v27,r3,$sp
3152 addi r3,r3,32
3153 stvx v28,r7,$sp
3154 addi r7,r7,32
3155 stvx v29,r3,$sp
3156 addi r3,r3,32
3157 stvx v30,r7,$sp
3158 stvx v31,r3,$sp
3159 li r0,-1
3160 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3161 li $x10,0x10
3162 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3163 li $x20,0x20
3164 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3165 li $x30,0x30
3166 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3167 li $x40,0x40
3168 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3169 li $x50,0x50
3170 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3171 li $x60,0x60
3172 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3173 li $x70,0x70
3174 mtspr 256,r0
3175
3176 xxlor 2, 32+$eighty7, 32+$eighty7
3177 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
3178 xxlor 1, 32+$eighty7, 32+$eighty7
3179
3180 # Load XOR Lconsts.
3181 mr $x70, r6
3182 bl Lconsts
3183 lxvw4x 0, $x40, r6 # load XOR contents
3184 mr r6, $x70
3185 li $x70,0x70
3186
3187 subi $rounds,$rounds,3 # -4 in total
3188
3189 lvx $rndkey0,$x00,$key1 # load key schedule
3190 lvx v30,$x10,$key1
3191 addi $key1,$key1,0x20
3192 lvx v31,$x00,$key1
3193 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3194 addi $key_,$sp,$FRAME+15
3195 mtctr $rounds
3196
3197Load_xts_dec_key:
3198 ?vperm v24,v30,v31,$keyperm
3199 lvx v30,$x10,$key1
3200 addi $key1,$key1,0x20
3201 stvx v24,$x00,$key_ # off-load round[1]
3202 ?vperm v25,v31,v30,$keyperm
3203 lvx v31,$x00,$key1
3204 stvx v25,$x10,$key_ # off-load round[2]
3205 addi $key_,$key_,0x20
3206 bdnz Load_xts_dec_key
3207
3208 lvx v26,$x10,$key1
3209 ?vperm v24,v30,v31,$keyperm
3210 lvx v27,$x20,$key1
3211 stvx v24,$x00,$key_ # off-load round[3]
3212 ?vperm v25,v31,v26,$keyperm
3213 lvx v28,$x30,$key1
3214 stvx v25,$x10,$key_ # off-load round[4]
3215 addi $key_,$sp,$FRAME+15 # rewind $key_
3216 ?vperm v26,v26,v27,$keyperm
3217 lvx v29,$x40,$key1
3218 ?vperm v27,v27,v28,$keyperm
3219 lvx v30,$x50,$key1
3220 ?vperm v28,v28,v29,$keyperm
3221 lvx v31,$x60,$key1
3222 ?vperm v29,v29,v30,$keyperm
3223 lvx $twk5,$x70,$key1 # borrow $twk5
3224 ?vperm v30,v30,v31,$keyperm
3225 lvx v24,$x00,$key_ # pre-load round[1]
3226 ?vperm v31,v31,$twk5,$keyperm
3227 lvx v25,$x10,$key_ # pre-load round[2]
3228
3229 vperm $in0,$inout,$inptail,$inpperm
3230 subi $inp,$inp,31 # undo "caller"
3231 vxor $twk0,$tweak,$rndkey0
3232 vsrab $tmp,$tweak,$seven # next tweak value
3233 vaddubm $tweak,$tweak,$tweak
3234 vand $tmp,$tmp,$eighty7
3235 vxor $out0,$in0,$twk0
3236 xxlor 32+$in1, 0, 0
3237 vpermxor $tweak, $tweak, $tmp, $in1
3238
3239 lvx_u $in1,$x10,$inp
3240 vxor $twk1,$tweak,$rndkey0
3241 vsrab $tmp,$tweak,$seven # next tweak value
3242 vaddubm $tweak,$tweak,$tweak
3243 le?vperm $in1,$in1,$in1,$leperm
3244 vand $tmp,$tmp,$eighty7
3245 vxor $out1,$in1,$twk1
3246 xxlor 32+$in2, 0, 0
3247 vpermxor $tweak, $tweak, $tmp, $in2
3248
3249 lvx_u $in2,$x20,$inp
3250 andi. $taillen,$len,15
3251 vxor $twk2,$tweak,$rndkey0
3252 vsrab $tmp,$tweak,$seven # next tweak value
3253 vaddubm $tweak,$tweak,$tweak
3254 le?vperm $in2,$in2,$in2,$leperm
3255 vand $tmp,$tmp,$eighty7
3256 vxor $out2,$in2,$twk2
3257 xxlor 32+$in3, 0, 0
3258 vpermxor $tweak, $tweak, $tmp, $in3
3259
3260 lvx_u $in3,$x30,$inp
3261 sub $len,$len,$taillen
3262 vxor $twk3,$tweak,$rndkey0
3263 vsrab $tmp,$tweak,$seven # next tweak value
3264 vaddubm $tweak,$tweak,$tweak
3265 le?vperm $in3,$in3,$in3,$leperm
3266 vand $tmp,$tmp,$eighty7
3267 vxor $out3,$in3,$twk3
3268 xxlor 32+$in4, 0, 0
3269 vpermxor $tweak, $tweak, $tmp, $in4
3270
3271 lvx_u $in4,$x40,$inp
3272 subi $len,$len,0x60
3273 vxor $twk4,$tweak,$rndkey0
3274 vsrab $tmp,$tweak,$seven # next tweak value
3275 vaddubm $tweak,$tweak,$tweak
3276 le?vperm $in4,$in4,$in4,$leperm
3277 vand $tmp,$tmp,$eighty7
3278 vxor $out4,$in4,$twk4
3279 xxlor 32+$in5, 0, 0
3280 vpermxor $tweak, $tweak, $tmp, $in5
3281
3282 lvx_u $in5,$x50,$inp
3283 addi $inp,$inp,0x60
3284 vxor $twk5,$tweak,$rndkey0
3285 vsrab $tmp,$tweak,$seven # next tweak value
3286 vaddubm $tweak,$tweak,$tweak
3287 le?vperm $in5,$in5,$in5,$leperm
3288 vand $tmp,$tmp,$eighty7
3289 vxor $out5,$in5,$twk5
3290 xxlor 32+$in0, 0, 0
3291 vpermxor $tweak, $tweak, $tmp, $in0
3292
3293 vxor v31,v31,$rndkey0
3294 mtctr $rounds
3295 b Loop_xts_dec6x
3296
3297.align 5
3298Loop_xts_dec6x:
3299 vncipher $out0,$out0,v24
3300 vncipher $out1,$out1,v24
3301 vncipher $out2,$out2,v24
3302 vncipher $out3,$out3,v24
3303 vncipher $out4,$out4,v24
3304 vncipher $out5,$out5,v24
3305 lvx v24,$x20,$key_ # round[3]
3306 addi $key_,$key_,0x20
3307
3308 vncipher $out0,$out0,v25
3309 vncipher $out1,$out1,v25
3310 vncipher $out2,$out2,v25
3311 vncipher $out3,$out3,v25
3312 vncipher $out4,$out4,v25
3313 vncipher $out5,$out5,v25
3314 lvx v25,$x10,$key_ # round[4]
3315 bdnz Loop_xts_dec6x
3316
3317 xxlor 32+$eighty7, 1, 1 # 0x010101..87
3318
3319 subic $len,$len,96 # $len-=96
3320 vxor $in0,$twk0,v31 # xor with last round key
3321 vncipher $out0,$out0,v24
3322 vncipher $out1,$out1,v24
3323 vsrab $tmp,$tweak,$seven # next tweak value
3324 vxor $twk0,$tweak,$rndkey0
3325 vaddubm $tweak,$tweak,$tweak
3326 vncipher $out2,$out2,v24
3327 vncipher $out3,$out3,v24
3328 vncipher $out4,$out4,v24
3329 vncipher $out5,$out5,v24
3330
3331 subfe. r0,r0,r0 # borrow?-1:0
3332 vand $tmp,$tmp,$eighty7
3333 vncipher $out0,$out0,v25
3334 vncipher $out1,$out1,v25
3335 xxlor 32+$in1, 0, 0
3336 vpermxor $tweak, $tweak, $tmp, $in1
3337 vncipher $out2,$out2,v25
3338 vncipher $out3,$out3,v25
3339 vxor $in1,$twk1,v31
3340 vsrab $tmp,$tweak,$seven # next tweak value
3341 vxor $twk1,$tweak,$rndkey0
3342 vncipher $out4,$out4,v25
3343 vncipher $out5,$out5,v25
3344
3345 and r0,r0,$len
3346 vaddubm $tweak,$tweak,$tweak
3347 vncipher $out0,$out0,v26
3348 vncipher $out1,$out1,v26
3349 vand $tmp,$tmp,$eighty7
3350 vncipher $out2,$out2,v26
3351 vncipher $out3,$out3,v26
3352 xxlor 32+$in2, 0, 0
3353 vpermxor $tweak, $tweak, $tmp, $in2
3354 vncipher $out4,$out4,v26
3355 vncipher $out5,$out5,v26
3356
3357 add $inp,$inp,r0 # $inp is adjusted in such
3358 # way that at exit from the
3359 # loop inX-in5 are loaded
3360 # with last "words"
3361 vxor $in2,$twk2,v31
3362 vsrab $tmp,$tweak,$seven # next tweak value
3363 vxor $twk2,$tweak,$rndkey0
3364 vaddubm $tweak,$tweak,$tweak
3365 vncipher $out0,$out0,v27
3366 vncipher $out1,$out1,v27
3367 vncipher $out2,$out2,v27
3368 vncipher $out3,$out3,v27
3369 vand $tmp,$tmp,$eighty7
3370 vncipher $out4,$out4,v27
3371 vncipher $out5,$out5,v27
3372
3373 addi $key_,$sp,$FRAME+15 # rewind $key_
3374 xxlor 32+$in3, 0, 0
3375 vpermxor $tweak, $tweak, $tmp, $in3
3376 vncipher $out0,$out0,v28
3377 vncipher $out1,$out1,v28
3378 vxor $in3,$twk3,v31
3379 vsrab $tmp,$tweak,$seven # next tweak value
3380 vxor $twk3,$tweak,$rndkey0
3381 vncipher $out2,$out2,v28
3382 vncipher $out3,$out3,v28
3383 vaddubm $tweak,$tweak,$tweak
3384 vncipher $out4,$out4,v28
3385 vncipher $out5,$out5,v28
3386 lvx v24,$x00,$key_ # re-pre-load round[1]
3387 vand $tmp,$tmp,$eighty7
3388
3389 vncipher $out0,$out0,v29
3390 vncipher $out1,$out1,v29
3391 xxlor 32+$in4, 0, 0
3392 vpermxor $tweak, $tweak, $tmp, $in4
3393 vncipher $out2,$out2,v29
3394 vncipher $out3,$out3,v29
3395 vxor $in4,$twk4,v31
3396 vsrab $tmp,$tweak,$seven # next tweak value
3397 vxor $twk4,$tweak,$rndkey0
3398 vncipher $out4,$out4,v29
3399 vncipher $out5,$out5,v29
3400 lvx v25,$x10,$key_ # re-pre-load round[2]
3401 vaddubm $tweak,$tweak,$tweak
3402
3403 vncipher $out0,$out0,v30
3404 vncipher $out1,$out1,v30
3405 vand $tmp,$tmp,$eighty7
3406 vncipher $out2,$out2,v30
3407 vncipher $out3,$out3,v30
3408 xxlor 32+$in5, 0, 0
3409 vpermxor $tweak, $tweak, $tmp, $in5
3410 vncipher $out4,$out4,v30
3411 vncipher $out5,$out5,v30
3412 vxor $in5,$twk5,v31
3413 vsrab $tmp,$tweak,$seven # next tweak value
3414 vxor $twk5,$tweak,$rndkey0
3415
3416 vncipherlast $out0,$out0,$in0
3417 lvx_u $in0,$x00,$inp # load next input block
3418 vaddubm $tweak,$tweak,$tweak
3419 vncipherlast $out1,$out1,$in1
3420 lvx_u $in1,$x10,$inp
3421 vncipherlast $out2,$out2,$in2
3422 le?vperm $in0,$in0,$in0,$leperm
3423 lvx_u $in2,$x20,$inp
3424 vand $tmp,$tmp,$eighty7
3425 vncipherlast $out3,$out3,$in3
3426 le?vperm $in1,$in1,$in1,$leperm
3427 lvx_u $in3,$x30,$inp
3428 vncipherlast $out4,$out4,$in4
3429 le?vperm $in2,$in2,$in2,$leperm
3430 lvx_u $in4,$x40,$inp
3431 xxlor 10, 32+$in0, 32+$in0
3432 xxlor 32+$in0, 0, 0
3433 vpermxor $tweak, $tweak, $tmp, $in0
3434 xxlor 32+$in0, 10, 10
3435 vncipherlast $out5,$out5,$in5
3436 le?vperm $in3,$in3,$in3,$leperm
3437 lvx_u $in5,$x50,$inp
3438 addi $inp,$inp,0x60
3439 le?vperm $in4,$in4,$in4,$leperm
3440 le?vperm $in5,$in5,$in5,$leperm
3441
3442 le?vperm $out0,$out0,$out0,$leperm
3443 le?vperm $out1,$out1,$out1,$leperm
3444 stvx_u $out0,$x00,$out # store output
3445 vxor $out0,$in0,$twk0
3446 le?vperm $out2,$out2,$out2,$leperm
3447 stvx_u $out1,$x10,$out
3448 vxor $out1,$in1,$twk1
3449 le?vperm $out3,$out3,$out3,$leperm
3450 stvx_u $out2,$x20,$out
3451 vxor $out2,$in2,$twk2
3452 le?vperm $out4,$out4,$out4,$leperm
3453 stvx_u $out3,$x30,$out
3454 vxor $out3,$in3,$twk3
3455 le?vperm $out5,$out5,$out5,$leperm
3456 stvx_u $out4,$x40,$out
3457 vxor $out4,$in4,$twk4
3458 stvx_u $out5,$x50,$out
3459 vxor $out5,$in5,$twk5
3460 addi $out,$out,0x60
3461
3462 mtctr $rounds
3463 beq Loop_xts_dec6x # did $len-=96 borrow?
3464
3465 xxlor 32+$eighty7, 2, 2 # 0x010101..87
3466
3467 addic. $len,$len,0x60
3468 beq Lxts_dec6x_zero
3469 cmpwi $len,0x20
3470 blt Lxts_dec6x_one
3471 nop
3472 beq Lxts_dec6x_two
3473 cmpwi $len,0x40
3474 blt Lxts_dec6x_three
3475 nop
3476 beq Lxts_dec6x_four
3477
3478Lxts_dec6x_five:
3479 vxor $out0,$in1,$twk0
3480 vxor $out1,$in2,$twk1
3481 vxor $out2,$in3,$twk2
3482 vxor $out3,$in4,$twk3
3483 vxor $out4,$in5,$twk4
3484
3485 bl _aesp8_xts_dec5x
3486
3487 le?vperm $out0,$out0,$out0,$leperm
3488 vmr $twk0,$twk5 # unused tweak
3489 vxor $twk1,$tweak,$rndkey0
3490 le?vperm $out1,$out1,$out1,$leperm
3491 stvx_u $out0,$x00,$out # store output
3492 vxor $out0,$in0,$twk1
3493 le?vperm $out2,$out2,$out2,$leperm
3494 stvx_u $out1,$x10,$out
3495 le?vperm $out3,$out3,$out3,$leperm
3496 stvx_u $out2,$x20,$out
3497 le?vperm $out4,$out4,$out4,$leperm
3498 stvx_u $out3,$x30,$out
3499 stvx_u $out4,$x40,$out
3500 addi $out,$out,0x50
3501 bne Lxts_dec6x_steal
3502 b Lxts_dec6x_done
3503
3504.align 4
3505Lxts_dec6x_four:
3506 vxor $out0,$in2,$twk0
3507 vxor $out1,$in3,$twk1
3508 vxor $out2,$in4,$twk2
3509 vxor $out3,$in5,$twk3
3510 vxor $out4,$out4,$out4
3511
3512 bl _aesp8_xts_dec5x
3513
3514 le?vperm $out0,$out0,$out0,$leperm
3515 vmr $twk0,$twk4 # unused tweak
3516 vmr $twk1,$twk5
3517 le?vperm $out1,$out1,$out1,$leperm
3518 stvx_u $out0,$x00,$out # store output
3519 vxor $out0,$in0,$twk5
3520 le?vperm $out2,$out2,$out2,$leperm
3521 stvx_u $out1,$x10,$out
3522 le?vperm $out3,$out3,$out3,$leperm
3523 stvx_u $out2,$x20,$out
3524 stvx_u $out3,$x30,$out
3525 addi $out,$out,0x40
3526 bne Lxts_dec6x_steal
3527 b Lxts_dec6x_done
3528
3529.align 4
3530Lxts_dec6x_three:
3531 vxor $out0,$in3,$twk0
3532 vxor $out1,$in4,$twk1
3533 vxor $out2,$in5,$twk2
3534 vxor $out3,$out3,$out3
3535 vxor $out4,$out4,$out4
3536
3537 bl _aesp8_xts_dec5x
3538
3539 le?vperm $out0,$out0,$out0,$leperm
3540 vmr $twk0,$twk3 # unused tweak
3541 vmr $twk1,$twk4
3542 le?vperm $out1,$out1,$out1,$leperm
3543 stvx_u $out0,$x00,$out # store output
3544 vxor $out0,$in0,$twk4
3545 le?vperm $out2,$out2,$out2,$leperm
3546 stvx_u $out1,$x10,$out
3547 stvx_u $out2,$x20,$out
3548 addi $out,$out,0x30
3549 bne Lxts_dec6x_steal
3550 b Lxts_dec6x_done
3551
3552.align 4
3553Lxts_dec6x_two:
3554 vxor $out0,$in4,$twk0
3555 vxor $out1,$in5,$twk1
3556 vxor $out2,$out2,$out2
3557 vxor $out3,$out3,$out3
3558 vxor $out4,$out4,$out4
3559
3560 bl _aesp8_xts_dec5x
3561
3562 le?vperm $out0,$out0,$out0,$leperm
3563 vmr $twk0,$twk2 # unused tweak
3564 vmr $twk1,$twk3
3565 le?vperm $out1,$out1,$out1,$leperm
3566 stvx_u $out0,$x00,$out # store output
3567 vxor $out0,$in0,$twk3
3568 stvx_u $out1,$x10,$out
3569 addi $out,$out,0x20
3570 bne Lxts_dec6x_steal
3571 b Lxts_dec6x_done
3572
3573.align 4
3574Lxts_dec6x_one:
3575 vxor $out0,$in5,$twk0
3576 nop
3577Loop_xts_dec1x:
3578 vncipher $out0,$out0,v24
3579 lvx v24,$x20,$key_ # round[3]
3580 addi $key_,$key_,0x20
3581
3582 vncipher $out0,$out0,v25
3583 lvx v25,$x10,$key_ # round[4]
3584 bdnz Loop_xts_dec1x
3585
3586 subi r0,$taillen,1
3587 vncipher $out0,$out0,v24
3588
3589 andi. r0,r0,16
3590 cmpwi $taillen,0
3591 vncipher $out0,$out0,v25
3592
3593 sub $inp,$inp,r0
3594 vncipher $out0,$out0,v26
3595
3596 lvx_u $in0,0,$inp
3597 vncipher $out0,$out0,v27
3598
3599 addi $key_,$sp,$FRAME+15 # rewind $key_
3600 vncipher $out0,$out0,v28
3601 lvx v24,$x00,$key_ # re-pre-load round[1]
3602
3603 vncipher $out0,$out0,v29
3604 lvx v25,$x10,$key_ # re-pre-load round[2]
3605 vxor $twk0,$twk0,v31
3606
3607 le?vperm $in0,$in0,$in0,$leperm
3608 vncipher $out0,$out0,v30
3609
3610 mtctr $rounds
3611 vncipherlast $out0,$out0,$twk0
3612
3613 vmr $twk0,$twk1 # unused tweak
3614 vmr $twk1,$twk2
3615 le?vperm $out0,$out0,$out0,$leperm
3616 stvx_u $out0,$x00,$out # store output
3617 addi $out,$out,0x10
3618 vxor $out0,$in0,$twk2
3619 bne Lxts_dec6x_steal
3620 b Lxts_dec6x_done
3621
3622.align 4
3623Lxts_dec6x_zero:
3624 cmpwi $taillen,0
3625 beq Lxts_dec6x_done
3626
3627 lvx_u $in0,0,$inp
3628 le?vperm $in0,$in0,$in0,$leperm
3629 vxor $out0,$in0,$twk1
3630Lxts_dec6x_steal:
3631 vncipher $out0,$out0,v24
3632 lvx v24,$x20,$key_ # round[3]
3633 addi $key_,$key_,0x20
3634
3635 vncipher $out0,$out0,v25
3636 lvx v25,$x10,$key_ # round[4]
3637 bdnz Lxts_dec6x_steal
3638
3639 add $inp,$inp,$taillen
3640 vncipher $out0,$out0,v24
3641
3642 cmpwi $taillen,0
3643 vncipher $out0,$out0,v25
3644
3645 lvx_u $in0,0,$inp
3646 vncipher $out0,$out0,v26
3647
3648 lvsr $inpperm,0,$taillen # $in5 is no more
3649 vncipher $out0,$out0,v27
3650
3651 addi $key_,$sp,$FRAME+15 # rewind $key_
3652 vncipher $out0,$out0,v28
3653 lvx v24,$x00,$key_ # re-pre-load round[1]
3654
3655 vncipher $out0,$out0,v29
3656 lvx v25,$x10,$key_ # re-pre-load round[2]
3657 vxor $twk1,$twk1,v31
3658
3659 le?vperm $in0,$in0,$in0,$leperm
3660 vncipher $out0,$out0,v30
3661
3662 vperm $in0,$in0,$in0,$inpperm
3663 vncipherlast $tmp,$out0,$twk1
3664
3665 le?vperm $out0,$tmp,$tmp,$leperm
3666 le?stvx_u $out0,0,$out
3667 be?stvx_u $tmp,0,$out
3668
3669 vxor $out0,$out0,$out0
3670 vspltisb $out1,-1
3671 vperm $out0,$out0,$out1,$inpperm
3672 vsel $out0,$in0,$tmp,$out0
3673 vxor $out0,$out0,$twk0
3674
3675 subi r30,$out,1
3676 mtctr $taillen
3677Loop_xts_dec6x_steal:
3678 lbzu r0,1(r30)
3679 stb r0,16(r30)
3680 bdnz Loop_xts_dec6x_steal
3681
3682 li $taillen,0
3683 mtctr $rounds
3684 b Loop_xts_dec1x # one more time...
3685
3686.align 4
3687Lxts_dec6x_done:
3688 ${UCMP}i $ivp,0
3689 beq Lxts_dec6x_ret
3690
3691 vxor $tweak,$twk0,$rndkey0
3692 le?vperm $tweak,$tweak,$tweak,$leperm
3693 stvx_u $tweak,0,$ivp
3694
3695Lxts_dec6x_ret:
3696 mtlr r11
3697 li r10,`$FRAME+15`
3698 li r11,`$FRAME+31`
3699 stvx $seven,r10,$sp # wipe copies of round keys
3700 addi r10,r10,32
3701 stvx $seven,r11,$sp
3702 addi r11,r11,32
3703 stvx $seven,r10,$sp
3704 addi r10,r10,32
3705 stvx $seven,r11,$sp
3706 addi r11,r11,32
3707 stvx $seven,r10,$sp
3708 addi r10,r10,32
3709 stvx $seven,r11,$sp
3710 addi r11,r11,32
3711 stvx $seven,r10,$sp
3712 addi r10,r10,32
3713 stvx $seven,r11,$sp
3714 addi r11,r11,32
3715
3716 mtspr 256,$vrsave
3717 lvx v20,r10,$sp # ABI says so
3718 addi r10,r10,32
3719 lvx v21,r11,$sp
3720 addi r11,r11,32
3721 lvx v22,r10,$sp
3722 addi r10,r10,32
3723 lvx v23,r11,$sp
3724 addi r11,r11,32
3725 lvx v24,r10,$sp
3726 addi r10,r10,32
3727 lvx v25,r11,$sp
3728 addi r11,r11,32
3729 lvx v26,r10,$sp
3730 addi r10,r10,32
3731 lvx v27,r11,$sp
3732 addi r11,r11,32
3733 lvx v28,r10,$sp
3734 addi r10,r10,32
3735 lvx v29,r11,$sp
3736 addi r11,r11,32
3737 lvx v30,r10,$sp
3738 lvx v31,r11,$sp
3739 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3740 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3741 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3742 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3743 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3744 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3745 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3746 blr
3747 .long 0
3748 .byte 0,12,0x04,1,0x80,6,6,0
3749 .long 0
3750
3751.align 5
3752_aesp8_xts_dec5x:
3753 vncipher $out0,$out0,v24
3754 vncipher $out1,$out1,v24
3755 vncipher $out2,$out2,v24
3756 vncipher $out3,$out3,v24
3757 vncipher $out4,$out4,v24
3758 lvx v24,$x20,$key_ # round[3]
3759 addi $key_,$key_,0x20
3760
3761 vncipher $out0,$out0,v25
3762 vncipher $out1,$out1,v25
3763 vncipher $out2,$out2,v25
3764 vncipher $out3,$out3,v25
3765 vncipher $out4,$out4,v25
3766 lvx v25,$x10,$key_ # round[4]
3767 bdnz _aesp8_xts_dec5x
3768
3769 subi r0,$taillen,1
3770 vncipher $out0,$out0,v24
3771 vncipher $out1,$out1,v24
3772 vncipher $out2,$out2,v24
3773 vncipher $out3,$out3,v24
3774 vncipher $out4,$out4,v24
3775
3776 andi. r0,r0,16
3777 cmpwi $taillen,0
3778 vncipher $out0,$out0,v25
3779 vncipher $out1,$out1,v25
3780 vncipher $out2,$out2,v25
3781 vncipher $out3,$out3,v25
3782 vncipher $out4,$out4,v25
3783 vxor $twk0,$twk0,v31
3784
3785 sub $inp,$inp,r0
3786 vncipher $out0,$out0,v26
3787 vncipher $out1,$out1,v26
3788 vncipher $out2,$out2,v26
3789 vncipher $out3,$out3,v26
3790 vncipher $out4,$out4,v26
3791 vxor $in1,$twk1,v31
3792
3793 vncipher $out0,$out0,v27
3794 lvx_u $in0,0,$inp
3795 vncipher $out1,$out1,v27
3796 vncipher $out2,$out2,v27
3797 vncipher $out3,$out3,v27
3798 vncipher $out4,$out4,v27
3799 vxor $in2,$twk2,v31
3800
3801 addi $key_,$sp,$FRAME+15 # rewind $key_
3802 vncipher $out0,$out0,v28
3803 vncipher $out1,$out1,v28
3804 vncipher $out2,$out2,v28
3805 vncipher $out3,$out3,v28
3806 vncipher $out4,$out4,v28
3807 lvx v24,$x00,$key_ # re-pre-load round[1]
3808 vxor $in3,$twk3,v31
3809
3810 vncipher $out0,$out0,v29
3811 le?vperm $in0,$in0,$in0,$leperm
3812 vncipher $out1,$out1,v29
3813 vncipher $out2,$out2,v29
3814 vncipher $out3,$out3,v29
3815 vncipher $out4,$out4,v29
3816 lvx v25,$x10,$key_ # re-pre-load round[2]
3817 vxor $in4,$twk4,v31
3818
3819 vncipher $out0,$out0,v30
3820 vncipher $out1,$out1,v30
3821 vncipher $out2,$out2,v30
3822 vncipher $out3,$out3,v30
3823 vncipher $out4,$out4,v30
3824
3825 vncipherlast $out0,$out0,$twk0
3826 vncipherlast $out1,$out1,$in1
3827 vncipherlast $out2,$out2,$in2
3828 vncipherlast $out3,$out3,$in3
3829 vncipherlast $out4,$out4,$in4
3830 mtctr $rounds
3831 blr
3832 .long 0
3833 .byte 0,12,0x14,0,0,0,0,0
3834___
3835}} }}}
3836
3837my $consts=1;
3838foreach(split("\n",$code)) {
3839 s/\`([^\`]*)\`/eval($1)/geo;
3840
3841 # constants table endian-specific conversion
3842 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3843 my $conv=$3;
3844 my @bytes=();
3845
3846 # convert to endian-agnostic format
3847 if ($1 eq "long") {
3848 foreach (split(/,\s*/,$2)) {
3849 my $l = /^0/?oct:int;
3850 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3851 }
3852 } else {
3853 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3854 }
3855
3856 # little-endian conversion
3857 if ($flavour =~ /le$/o) {
3858 SWITCH: for($conv) {
3859 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3860 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3861 }
3862 }
3863
3864 #emit
3865 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3866 next;
3867 }
3868 $consts=0 if (m/Lconsts:/o); # end of table
3869
3870 # instructions prefixed with '?' are endian-specific and need
3871 # to be adjusted accordingly...
3872 if ($flavour =~ /le$/o) { # little-endian
3873 s/le\?//o or
3874 s/be\?/#be#/o or
3875 s/\?lvsr/lvsl/o or
3876 s/\?lvsl/lvsr/o or
3877 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3878 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3879 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3880 } else { # big-endian
3881 s/le\?/#le#/o or
3882 s/be\?//o or
3883 s/\?([a-z]+)/$1/o;
3884 }
3885
3886 print $_,"\n";
3887}
3888
3889close STDOUT;