Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: p10-aes-gcm - Supporting functions for AES

This code is taken from CRYPTOGAMs[1]. The following functions are used,
aes_p8_set_encrypt_key is used to generate AES round keys and aes_p8_encrypt is used
to encrypt single block.

Signed-off-by: Danny Tsen <dtsen@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Danny Tsen and committed by
Herbert Xu
3b47ecca ca68a96c

+3846
+3846
arch/powerpc/crypto/aesp8-ppc.pl
··· 1 + #! /usr/bin/env perl 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + # This code is taken from CRYPTOGAMs[1] and is included here using the option 5 + # in the license to distribute the code under the GPL. Therefore this program 6 + # is free software; you can redistribute it and/or modify it under the terms of 7 + # the GNU General Public License version 2 as published by the Free Software 8 + # Foundation. 9 + # 10 + # [1] https://www.openssl.org/~appro/cryptogams/ 11 + 12 + # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> 13 + # All rights reserved. 14 + # 15 + # Redistribution and use in source and binary forms, with or without 16 + # modification, are permitted provided that the following conditions 17 + # are met: 18 + # 19 + # * Redistributions of source code must retain copyright notices, 20 + # this list of conditions and the following disclaimer. 21 + # 22 + # * Redistributions in binary form must reproduce the above 23 + # copyright notice, this list of conditions and the following 24 + # disclaimer in the documentation and/or other materials 25 + # provided with the distribution. 26 + # 27 + # * Neither the name of the CRYPTOGAMS nor the names of its 28 + # copyright holder and contributors may be used to endorse or 29 + # promote products derived from this software without specific 30 + # prior written permission. 31 + # 32 + # ALTERNATIVELY, provided that this notice is retained in full, this 33 + # product may be distributed under the terms of the GNU General Public 34 + # License (GPL), in which case the provisions of the GPL apply INSTEAD OF 35 + # those given above. 36 + # 37 + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 38 + # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40 + # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41 + # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 + # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43 + # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 + # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45 + # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46 + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47 + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 + 49 + # ==================================================================== 50 + # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 51 + # project. The module is, however, dual licensed under OpenSSL and 52 + # CRYPTOGAMS licenses depending on where you obtain it. For further 53 + # details see https://www.openssl.org/~appro/cryptogams/. 54 + # ==================================================================== 55 + # 56 + # This module implements support for AES instructions as per PowerISA 57 + # specification version 2.07, first implemented by POWER8 processor. 58 + # The module is endian-agnostic in sense that it supports both big- 59 + # and little-endian cases. Data alignment in parallelizable modes is 60 + # handled with VSX loads and stores, which implies MSR.VSX flag being 61 + # set. It should also be noted that ISA specification doesn't prohibit 62 + # alignment exceptions for these instructions on page boundaries. 63 + # Initially alignment was handled in pure AltiVec/VMX way [when data 64 + # is aligned programmatically, which in turn guarantees exception- 65 + # free execution], but it turned to hamper performance when vcipher 66 + # instructions are interleaved. It's reckoned that eventual 67 + # misalignment penalties at page boundaries are in average lower 68 + # than additional overhead in pure AltiVec approach. 69 + # 70 + # May 2016 71 + # 72 + # Add XTS subroutine, 9x on little- and 12x improvement on big-endian 73 + # systems were measured. 74 + # 75 + ###################################################################### 76 + # Current large-block performance in cycles per byte processed with 77 + # 128-bit key (less is better). 78 + # 79 + # CBC en-/decrypt CTR XTS 80 + # POWER8[le] 3.96/0.72 0.74 1.1 81 + # POWER8[be] 3.75/0.65 0.66 1.0 82 + 83 + $flavour = shift; 84 + 85 + if ($flavour =~ /64/) { 86 + $SIZE_T =8; 87 + $LRSAVE =2*$SIZE_T; 88 + $STU ="stdu"; 89 + $POP ="ld"; 90 + $PUSH ="std"; 91 + $UCMP ="cmpld"; 92 + $SHL ="sldi"; 93 + } elsif ($flavour =~ /32/) { 94 + $SIZE_T =4; 95 + $LRSAVE =$SIZE_T; 96 + $STU ="stwu"; 97 + $POP ="lwz"; 98 + $PUSH ="stw"; 99 + $UCMP ="cmplw"; 100 + $SHL ="slwi"; 101 + } else { die "nonsense $flavour"; } 102 + 103 + $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 104 + 105 + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 106 + ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 107 + ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 108 + die "can't locate ppc-xlate.pl"; 109 + 110 + open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 111 + 112 + $FRAME=8*$SIZE_T; 113 + $prefix="aes_p8"; 114 + 115 + $sp="r1"; 116 + $vrsave="r12"; 117 + 118 + ######################################################################### 119 + {{{ # Key setup procedures # 120 + my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 121 + my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 122 + my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 123 + 124 + $code.=<<___; 125 + .machine "any" 126 + 127 + .text 128 + 129 + .align 7 130 + rcon: 131 + .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 132 + .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 133 + .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 134 + .long 0,0,0,0 ?asis 135 + Lconsts: 136 + mflr r0 137 + bcl 20,31,\$+4 138 + mflr $ptr #vvvvv "distance between . and rcon 139 + addi $ptr,$ptr,-0x48 140 + mtlr r0 141 + blr 142 + .long 0 143 + .byte 0,12,0x14,0,0,0,0,0 144 + .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 145 + 146 + .globl .${prefix}_set_encrypt_key 147 + Lset_encrypt_key: 148 + mflr r11 149 + $PUSH r11,$LRSAVE($sp) 150 + 151 + li $ptr,-1 152 + ${UCMP}i $inp,0 153 + beq- Lenc_key_abort # if ($inp==0) return -1; 154 + ${UCMP}i $out,0 155 + beq- Lenc_key_abort # if ($out==0) return -1; 156 + li $ptr,-2 157 + cmpwi $bits,128 158 + blt- Lenc_key_abort 159 + cmpwi $bits,256 160 + bgt- Lenc_key_abort 161 + andi. r0,$bits,0x3f 162 + bne- Lenc_key_abort 163 + 164 + lis r0,0xfff0 165 + mfspr $vrsave,256 166 + mtspr 256,r0 167 + 168 + bl Lconsts 169 + mtlr r11 170 + 171 + neg r9,$inp 172 + lvx $in0,0,$inp 173 + addi $inp,$inp,15 # 15 is not typo 174 + lvsr $key,0,r9 # borrow $key 175 + li r8,0x20 176 + cmpwi $bits,192 177 + lvx $in1,0,$inp 178 + le?vspltisb $mask,0x0f # borrow $mask 179 + lvx $rcon,0,$ptr 180 + le?vxor $key,$key,$mask # adjust for byte swap 181 + lvx $mask,r8,$ptr 182 + addi $ptr,$ptr,0x10 183 + vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 184 + li $cnt,8 185 + vxor $zero,$zero,$zero 186 + mtctr $cnt 187 + 188 + ?lvsr $outperm,0,$out 189 + vspltisb $outmask,-1 190 + lvx $outhead,0,$out 191 + ?vperm $outmask,$zero,$outmask,$outperm 192 + 193 + blt Loop128 194 + addi $inp,$inp,8 195 + beq L192 196 + addi $inp,$inp,8 197 + b L256 198 + 199 + .align 4 200 + Loop128: 201 + vperm $key,$in0,$in0,$mask # rotate-n-splat 202 + vsldoi $tmp,$zero,$in0,12 # >>32 203 + vperm $outtail,$in0,$in0,$outperm # rotate 204 + vsel $stage,$outhead,$outtail,$outmask 205 + vmr $outhead,$outtail 206 + vcipherlast $key,$key,$rcon 207 + stvx $stage,0,$out 208 + addi $out,$out,16 209 + 210 + vxor $in0,$in0,$tmp 211 + vsldoi $tmp,$zero,$tmp,12 # >>32 212 + vxor $in0,$in0,$tmp 213 + vsldoi $tmp,$zero,$tmp,12 # >>32 214 + vxor $in0,$in0,$tmp 215 + vadduwm $rcon,$rcon,$rcon 216 + vxor $in0,$in0,$key 217 + bdnz Loop128 218 + 219 + lvx $rcon,0,$ptr # last two round keys 220 + 221 + vperm $key,$in0,$in0,$mask # rotate-n-splat 222 + vsldoi $tmp,$zero,$in0,12 # >>32 223 + vperm $outtail,$in0,$in0,$outperm # rotate 224 + vsel $stage,$outhead,$outtail,$outmask 225 + vmr $outhead,$outtail 226 + vcipherlast $key,$key,$rcon 227 + stvx $stage,0,$out 228 + addi $out,$out,16 229 + 230 + vxor $in0,$in0,$tmp 231 + vsldoi $tmp,$zero,$tmp,12 # >>32 232 + vxor $in0,$in0,$tmp 233 + vsldoi $tmp,$zero,$tmp,12 # >>32 234 + vxor $in0,$in0,$tmp 235 + vadduwm $rcon,$rcon,$rcon 236 + vxor $in0,$in0,$key 237 + 238 + vperm $key,$in0,$in0,$mask # rotate-n-splat 239 + vsldoi $tmp,$zero,$in0,12 # >>32 240 + vperm $outtail,$in0,$in0,$outperm # rotate 241 + vsel $stage,$outhead,$outtail,$outmask 242 + vmr $outhead,$outtail 243 + vcipherlast $key,$key,$rcon 244 + stvx $stage,0,$out 245 + addi $out,$out,16 246 + 247 + vxor $in0,$in0,$tmp 248 + vsldoi $tmp,$zero,$tmp,12 # >>32 249 + vxor $in0,$in0,$tmp 250 + vsldoi $tmp,$zero,$tmp,12 # >>32 251 + vxor $in0,$in0,$tmp 252 + vxor $in0,$in0,$key 253 + vperm $outtail,$in0,$in0,$outperm # rotate 254 + vsel $stage,$outhead,$outtail,$outmask 255 + vmr $outhead,$outtail 256 + stvx $stage,0,$out 257 + 258 + addi $inp,$out,15 # 15 is not typo 259 + addi $out,$out,0x50 260 + 261 + li $rounds,10 262 + b Ldone 263 + 264 + .align 4 265 + L192: 266 + lvx $tmp,0,$inp 267 + li $cnt,4 268 + vperm $outtail,$in0,$in0,$outperm # rotate 269 + vsel $stage,$outhead,$outtail,$outmask 270 + vmr $outhead,$outtail 271 + stvx $stage,0,$out 272 + addi $out,$out,16 273 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 274 + vspltisb $key,8 # borrow $key 275 + mtctr $cnt 276 + vsububm $mask,$mask,$key # adjust the mask 277 + 278 + Loop192: 279 + vperm $key,$in1,$in1,$mask # roate-n-splat 280 + vsldoi $tmp,$zero,$in0,12 # >>32 281 + vcipherlast $key,$key,$rcon 282 + 283 + vxor $in0,$in0,$tmp 284 + vsldoi $tmp,$zero,$tmp,12 # >>32 285 + vxor $in0,$in0,$tmp 286 + vsldoi $tmp,$zero,$tmp,12 # >>32 287 + vxor $in0,$in0,$tmp 288 + 289 + vsldoi $stage,$zero,$in1,8 290 + vspltw $tmp,$in0,3 291 + vxor $tmp,$tmp,$in1 292 + vsldoi $in1,$zero,$in1,12 # >>32 293 + vadduwm $rcon,$rcon,$rcon 294 + vxor $in1,$in1,$tmp 295 + vxor $in0,$in0,$key 296 + vxor $in1,$in1,$key 297 + vsldoi $stage,$stage,$in0,8 298 + 299 + vperm $key,$in1,$in1,$mask # rotate-n-splat 300 + vsldoi $tmp,$zero,$in0,12 # >>32 301 + vperm $outtail,$stage,$stage,$outperm # rotate 302 + vsel $stage,$outhead,$outtail,$outmask 303 + vmr $outhead,$outtail 304 + vcipherlast $key,$key,$rcon 305 + stvx $stage,0,$out 306 + addi $out,$out,16 307 + 308 + vsldoi $stage,$in0,$in1,8 309 + vxor $in0,$in0,$tmp 310 + vsldoi $tmp,$zero,$tmp,12 # >>32 311 + vperm $outtail,$stage,$stage,$outperm # rotate 312 + vsel $stage,$outhead,$outtail,$outmask 313 + vmr $outhead,$outtail 314 + vxor $in0,$in0,$tmp 315 + vsldoi $tmp,$zero,$tmp,12 # >>32 316 + vxor $in0,$in0,$tmp 317 + stvx $stage,0,$out 318 + addi $out,$out,16 319 + 320 + vspltw $tmp,$in0,3 321 + vxor $tmp,$tmp,$in1 322 + vsldoi $in1,$zero,$in1,12 # >>32 323 + vadduwm $rcon,$rcon,$rcon 324 + vxor $in1,$in1,$tmp 325 + vxor $in0,$in0,$key 326 + vxor $in1,$in1,$key 327 + vperm $outtail,$in0,$in0,$outperm # rotate 328 + vsel $stage,$outhead,$outtail,$outmask 329 + vmr $outhead,$outtail 330 + stvx $stage,0,$out 331 + addi $inp,$out,15 # 15 is not typo 332 + addi $out,$out,16 333 + bdnz Loop192 334 + 335 + li $rounds,12 336 + addi $out,$out,0x20 337 + b Ldone 338 + 339 + .align 4 340 + L256: 341 + lvx $tmp,0,$inp 342 + li $cnt,7 343 + li $rounds,14 344 + vperm $outtail,$in0,$in0,$outperm # rotate 345 + vsel $stage,$outhead,$outtail,$outmask 346 + vmr $outhead,$outtail 347 + stvx $stage,0,$out 348 + addi $out,$out,16 349 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 350 + mtctr $cnt 351 + 352 + Loop256: 353 + vperm $key,$in1,$in1,$mask # rotate-n-splat 354 + vsldoi $tmp,$zero,$in0,12 # >>32 355 + vperm $outtail,$in1,$in1,$outperm # rotate 356 + vsel $stage,$outhead,$outtail,$outmask 357 + vmr $outhead,$outtail 358 + vcipherlast $key,$key,$rcon 359 + stvx $stage,0,$out 360 + addi $out,$out,16 361 + 362 + vxor $in0,$in0,$tmp 363 + vsldoi $tmp,$zero,$tmp,12 # >>32 364 + vxor $in0,$in0,$tmp 365 + vsldoi $tmp,$zero,$tmp,12 # >>32 366 + vxor $in0,$in0,$tmp 367 + vadduwm $rcon,$rcon,$rcon 368 + vxor $in0,$in0,$key 369 + vperm $outtail,$in0,$in0,$outperm # rotate 370 + vsel $stage,$outhead,$outtail,$outmask 371 + vmr $outhead,$outtail 372 + stvx $stage,0,$out 373 + addi $inp,$out,15 # 15 is not typo 374 + addi $out,$out,16 375 + bdz Ldone 376 + 377 + vspltw $key,$in0,3 # just splat 378 + vsldoi $tmp,$zero,$in1,12 # >>32 379 + vsbox $key,$key 380 + 381 + vxor $in1,$in1,$tmp 382 + vsldoi $tmp,$zero,$tmp,12 # >>32 383 + vxor $in1,$in1,$tmp 384 + vsldoi $tmp,$zero,$tmp,12 # >>32 385 + vxor $in1,$in1,$tmp 386 + 387 + vxor $in1,$in1,$key 388 + b Loop256 389 + 390 + .align 4 391 + Ldone: 392 + lvx $in1,0,$inp # redundant in aligned case 393 + vsel $in1,$outhead,$in1,$outmask 394 + stvx $in1,0,$inp 395 + li $ptr,0 396 + mtspr 256,$vrsave 397 + stw $rounds,0($out) 398 + 399 + Lenc_key_abort: 400 + mr r3,$ptr 401 + blr 402 + .long 0 403 + .byte 0,12,0x14,1,0,0,3,0 404 + .long 0 405 + .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 406 + 407 + .globl .${prefix}_set_decrypt_key 408 + $STU $sp,-$FRAME($sp) 409 + mflr r10 410 + $PUSH r10,$FRAME+$LRSAVE($sp) 411 + bl Lset_encrypt_key 412 + mtlr r10 413 + 414 + cmpwi r3,0 415 + bne- Ldec_key_abort 416 + 417 + slwi $cnt,$rounds,4 418 + subi $inp,$out,240 # first round key 419 + srwi $rounds,$rounds,1 420 + add $out,$inp,$cnt # last round key 421 + mtctr $rounds 422 + 423 + Ldeckey: 424 + lwz r0, 0($inp) 425 + lwz r6, 4($inp) 426 + lwz r7, 8($inp) 427 + lwz r8, 12($inp) 428 + addi $inp,$inp,16 429 + lwz r9, 0($out) 430 + lwz r10,4($out) 431 + lwz r11,8($out) 432 + lwz r12,12($out) 433 + stw r0, 0($out) 434 + stw r6, 4($out) 435 + stw r7, 8($out) 436 + stw r8, 12($out) 437 + subi $out,$out,16 438 + stw r9, -16($inp) 439 + stw r10,-12($inp) 440 + stw r11,-8($inp) 441 + stw r12,-4($inp) 442 + bdnz Ldeckey 443 + 444 + xor r3,r3,r3 # return value 445 + Ldec_key_abort: 446 + addi $sp,$sp,$FRAME 447 + blr 448 + .long 0 449 + .byte 0,12,4,1,0x80,0,3,0 450 + .long 0 451 + .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 452 + ___ 453 + }}} 454 + ######################################################################### 455 + {{{ # Single block en- and decrypt procedures # 456 + sub gen_block () { 457 + my $dir = shift; 458 + my $n = $dir eq "de" ? "n" : ""; 459 + my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 460 + 461 + $code.=<<___; 462 + .globl .${prefix}_${dir}crypt 463 + lwz $rounds,240($key) 464 + lis r0,0xfc00 465 + mfspr $vrsave,256 466 + li $idx,15 # 15 is not typo 467 + mtspr 256,r0 468 + 469 + lvx v0,0,$inp 470 + neg r11,$out 471 + lvx v1,$idx,$inp 472 + lvsl v2,0,$inp # inpperm 473 + le?vspltisb v4,0x0f 474 + ?lvsl v3,0,r11 # outperm 475 + le?vxor v2,v2,v4 476 + li $idx,16 477 + vperm v0,v0,v1,v2 # align [and byte swap in LE] 478 + lvx v1,0,$key 479 + ?lvsl v5,0,$key # keyperm 480 + srwi $rounds,$rounds,1 481 + lvx v2,$idx,$key 482 + addi $idx,$idx,16 483 + subi $rounds,$rounds,1 484 + ?vperm v1,v1,v2,v5 # align round key 485 + 486 + vxor v0,v0,v1 487 + lvx v1,$idx,$key 488 + addi $idx,$idx,16 489 + mtctr $rounds 490 + 491 + Loop_${dir}c: 492 + ?vperm v2,v2,v1,v5 493 + v${n}cipher v0,v0,v2 494 + lvx v2,$idx,$key 495 + addi $idx,$idx,16 496 + ?vperm v1,v1,v2,v5 497 + v${n}cipher v0,v0,v1 498 + lvx v1,$idx,$key 499 + addi $idx,$idx,16 500 + bdnz Loop_${dir}c 501 + 502 + ?vperm v2,v2,v1,v5 503 + v${n}cipher v0,v0,v2 504 + lvx v2,$idx,$key 505 + ?vperm v1,v1,v2,v5 506 + v${n}cipherlast v0,v0,v1 507 + 508 + vspltisb v2,-1 509 + vxor v1,v1,v1 510 + li $idx,15 # 15 is not typo 511 + ?vperm v2,v1,v2,v3 # outmask 512 + le?vxor v3,v3,v4 513 + lvx v1,0,$out # outhead 514 + vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 515 + vsel v1,v1,v0,v2 516 + lvx v4,$idx,$out 517 + stvx v1,0,$out 518 + vsel v0,v0,v4,v2 519 + stvx v0,$idx,$out 520 + 521 + mtspr 256,$vrsave 522 + blr 523 + .long 0 524 + .byte 0,12,0x14,0,0,0,3,0 525 + .long 0 526 + .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 527 + ___ 528 + } 529 + &gen_block("en"); 530 + &gen_block("de"); 531 + }}} 532 + ######################################################################### 533 + {{{ # CBC en- and decrypt procedures # 534 + my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 535 + my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 536 + my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 537 + map("v$_",(4..10)); 538 + $code.=<<___; 539 + .globl .${prefix}_cbc_encrypt 540 + ${UCMP}i $len,16 541 + bltlr- 542 + 543 + cmpwi $enc,0 # test direction 544 + lis r0,0xffe0 545 + mfspr $vrsave,256 546 + mtspr 256,r0 547 + 548 + li $idx,15 549 + vxor $rndkey0,$rndkey0,$rndkey0 550 + le?vspltisb $tmp,0x0f 551 + 552 + lvx $ivec,0,$ivp # load [unaligned] iv 553 + lvsl $inpperm,0,$ivp 554 + lvx $inptail,$idx,$ivp 555 + le?vxor $inpperm,$inpperm,$tmp 556 + vperm $ivec,$ivec,$inptail,$inpperm 557 + 558 + neg r11,$inp 559 + ?lvsl $keyperm,0,$key # prepare for unaligned key 560 + lwz $rounds,240($key) 561 + 562 + lvsr $inpperm,0,r11 # prepare for unaligned load 563 + lvx $inptail,0,$inp 564 + addi $inp,$inp,15 # 15 is not typo 565 + le?vxor $inpperm,$inpperm,$tmp 566 + 567 + ?lvsr $outperm,0,$out # prepare for unaligned store 568 + vspltisb $outmask,-1 569 + lvx $outhead,0,$out 570 + ?vperm $outmask,$rndkey0,$outmask,$outperm 571 + le?vxor $outperm,$outperm,$tmp 572 + 573 + srwi $rounds,$rounds,1 574 + li $idx,16 575 + subi $rounds,$rounds,1 576 + beq Lcbc_dec 577 + 578 + Lcbc_enc: 579 + vmr $inout,$inptail 580 + lvx $inptail,0,$inp 581 + addi $inp,$inp,16 582 + mtctr $rounds 583 + subi $len,$len,16 # len-=16 584 + 585 + lvx $rndkey0,0,$key 586 + vperm $inout,$inout,$inptail,$inpperm 587 + lvx $rndkey1,$idx,$key 588 + addi $idx,$idx,16 589 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 590 + vxor $inout,$inout,$rndkey0 591 + lvx $rndkey0,$idx,$key 592 + addi $idx,$idx,16 593 + vxor $inout,$inout,$ivec 594 + 595 + Loop_cbc_enc: 596 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 597 + vcipher $inout,$inout,$rndkey1 598 + lvx $rndkey1,$idx,$key 599 + addi $idx,$idx,16 600 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 601 + vcipher $inout,$inout,$rndkey0 602 + lvx $rndkey0,$idx,$key 603 + addi $idx,$idx,16 604 + bdnz Loop_cbc_enc 605 + 606 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 607 + vcipher $inout,$inout,$rndkey1 608 + lvx $rndkey1,$idx,$key 609 + li $idx,16 610 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 611 + vcipherlast $ivec,$inout,$rndkey0 612 + ${UCMP}i $len,16 613 + 614 + vperm $tmp,$ivec,$ivec,$outperm 615 + vsel $inout,$outhead,$tmp,$outmask 616 + vmr $outhead,$tmp 617 + stvx $inout,0,$out 618 + addi $out,$out,16 619 + bge Lcbc_enc 620 + 621 + b Lcbc_done 622 + 623 + .align 4 624 + Lcbc_dec: 625 + ${UCMP}i $len,128 626 + bge _aesp8_cbc_decrypt8x 627 + vmr $tmp,$inptail 628 + lvx $inptail,0,$inp 629 + addi $inp,$inp,16 630 + mtctr $rounds 631 + subi $len,$len,16 # len-=16 632 + 633 + lvx $rndkey0,0,$key 634 + vperm $tmp,$tmp,$inptail,$inpperm 635 + lvx $rndkey1,$idx,$key 636 + addi $idx,$idx,16 637 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 638 + vxor $inout,$tmp,$rndkey0 639 + lvx $rndkey0,$idx,$key 640 + addi $idx,$idx,16 641 + 642 + Loop_cbc_dec: 643 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 644 + vncipher $inout,$inout,$rndkey1 645 + lvx $rndkey1,$idx,$key 646 + addi $idx,$idx,16 647 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 648 + vncipher $inout,$inout,$rndkey0 649 + lvx $rndkey0,$idx,$key 650 + addi $idx,$idx,16 651 + bdnz Loop_cbc_dec 652 + 653 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 654 + vncipher $inout,$inout,$rndkey1 655 + lvx $rndkey1,$idx,$key 656 + li $idx,16 657 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 658 + vncipherlast $inout,$inout,$rndkey0 659 + ${UCMP}i $len,16 660 + 661 + vxor $inout,$inout,$ivec 662 + vmr $ivec,$tmp 663 + vperm $tmp,$inout,$inout,$outperm 664 + vsel $inout,$outhead,$tmp,$outmask 665 + vmr $outhead,$tmp 666 + stvx $inout,0,$out 667 + addi $out,$out,16 668 + bge Lcbc_dec 669 + 670 + Lcbc_done: 671 + addi $out,$out,-1 672 + lvx $inout,0,$out # redundant in aligned case 673 + vsel $inout,$outhead,$inout,$outmask 674 + stvx $inout,0,$out 675 + 676 + neg $enc,$ivp # write [unaligned] iv 677 + li $idx,15 # 15 is not typo 678 + vxor $rndkey0,$rndkey0,$rndkey0 679 + vspltisb $outmask,-1 680 + le?vspltisb $tmp,0x0f 681 + ?lvsl $outperm,0,$enc 682 + ?vperm $outmask,$rndkey0,$outmask,$outperm 683 + le?vxor $outperm,$outperm,$tmp 684 + lvx $outhead,0,$ivp 685 + vperm $ivec,$ivec,$ivec,$outperm 686 + vsel $inout,$outhead,$ivec,$outmask 687 + lvx $inptail,$idx,$ivp 688 + stvx $inout,0,$ivp 689 + vsel $inout,$ivec,$inptail,$outmask 690 + stvx $inout,$idx,$ivp 691 + 692 + mtspr 256,$vrsave 693 + blr 694 + .long 0 695 + .byte 0,12,0x14,0,0,0,6,0 696 + .long 0 697 + ___ 698 + ######################################################################### 699 + {{ # Optimized CBC decrypt procedure # 700 + my $key_="r11"; 701 + my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 702 + my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 703 + my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 704 + my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 705 + # v26-v31 last 6 round keys 706 + my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 707 + 708 + $code.=<<___; 709 + .align 5 710 + _aesp8_cbc_decrypt8x: 711 + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 712 + li r10,`$FRAME+8*16+15` 713 + li r11,`$FRAME+8*16+31` 714 + stvx v20,r10,$sp # ABI says so 715 + addi r10,r10,32 716 + stvx v21,r11,$sp 717 + addi r11,r11,32 718 + stvx v22,r10,$sp 719 + addi r10,r10,32 720 + stvx v23,r11,$sp 721 + addi r11,r11,32 722 + stvx v24,r10,$sp 723 + addi r10,r10,32 724 + stvx v25,r11,$sp 725 + addi r11,r11,32 726 + stvx v26,r10,$sp 727 + addi r10,r10,32 728 + stvx v27,r11,$sp 729 + addi r11,r11,32 730 + stvx v28,r10,$sp 731 + addi r10,r10,32 732 + stvx v29,r11,$sp 733 + addi r11,r11,32 734 + stvx v30,r10,$sp 735 + stvx v31,r11,$sp 736 + li r0,-1 737 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 738 + li $x10,0x10 739 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 740 + li $x20,0x20 741 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 742 + li $x30,0x30 743 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 744 + li $x40,0x40 745 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 746 + li $x50,0x50 747 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 748 + li $x60,0x60 749 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 750 + li $x70,0x70 751 + mtspr 256,r0 752 + 753 + subi $rounds,$rounds,3 # -4 in total 754 + subi $len,$len,128 # bias 755 + 756 + lvx $rndkey0,$x00,$key # load key schedule 757 + lvx v30,$x10,$key 758 + addi $key,$key,0x20 759 + lvx v31,$x00,$key 760 + ?vperm $rndkey0,$rndkey0,v30,$keyperm 761 + addi $key_,$sp,$FRAME+15 762 + mtctr $rounds 763 + 764 + Load_cbc_dec_key: 765 + ?vperm v24,v30,v31,$keyperm 766 + lvx v30,$x10,$key 767 + addi $key,$key,0x20 768 + stvx v24,$x00,$key_ # off-load round[1] 769 + ?vperm v25,v31,v30,$keyperm 770 + lvx v31,$x00,$key 771 + stvx v25,$x10,$key_ # off-load round[2] 772 + addi $key_,$key_,0x20 773 + bdnz Load_cbc_dec_key 774 + 775 + lvx v26,$x10,$key 776 + ?vperm v24,v30,v31,$keyperm 777 + lvx v27,$x20,$key 778 + stvx v24,$x00,$key_ # off-load round[3] 779 + ?vperm v25,v31,v26,$keyperm 780 + lvx v28,$x30,$key 781 + stvx v25,$x10,$key_ # off-load round[4] 782 + addi $key_,$sp,$FRAME+15 # rewind $key_ 783 + ?vperm v26,v26,v27,$keyperm 784 + lvx v29,$x40,$key 785 + ?vperm v27,v27,v28,$keyperm 786 + lvx v30,$x50,$key 787 + ?vperm v28,v28,v29,$keyperm 788 + lvx v31,$x60,$key 789 + ?vperm v29,v29,v30,$keyperm 790 + lvx $out0,$x70,$key # borrow $out0 791 + ?vperm v30,v30,v31,$keyperm 792 + lvx v24,$x00,$key_ # pre-load round[1] 793 + ?vperm v31,v31,$out0,$keyperm 794 + lvx v25,$x10,$key_ # pre-load round[2] 795 + 796 + #lvx $inptail,0,$inp # "caller" already did this 797 + #addi $inp,$inp,15 # 15 is not typo 798 + subi $inp,$inp,15 # undo "caller" 799 + 800 + le?li $idx,8 801 + lvx_u $in0,$x00,$inp # load first 8 "words" 802 + le?lvsl $inpperm,0,$idx 803 + le?vspltisb $tmp,0x0f 804 + lvx_u $in1,$x10,$inp 805 + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 806 + lvx_u $in2,$x20,$inp 807 + le?vperm $in0,$in0,$in0,$inpperm 808 + lvx_u $in3,$x30,$inp 809 + le?vperm $in1,$in1,$in1,$inpperm 810 + lvx_u $in4,$x40,$inp 811 + le?vperm $in2,$in2,$in2,$inpperm 812 + vxor $out0,$in0,$rndkey0 813 + lvx_u $in5,$x50,$inp 814 + le?vperm $in3,$in3,$in3,$inpperm 815 + vxor $out1,$in1,$rndkey0 816 + lvx_u $in6,$x60,$inp 817 + le?vperm $in4,$in4,$in4,$inpperm 818 + vxor $out2,$in2,$rndkey0 819 + lvx_u $in7,$x70,$inp 820 + addi $inp,$inp,0x80 821 + le?vperm $in5,$in5,$in5,$inpperm 822 + vxor $out3,$in3,$rndkey0 823 + le?vperm $in6,$in6,$in6,$inpperm 824 + vxor $out4,$in4,$rndkey0 825 + le?vperm $in7,$in7,$in7,$inpperm 826 + vxor $out5,$in5,$rndkey0 827 + vxor $out6,$in6,$rndkey0 828 + vxor $out7,$in7,$rndkey0 829 + 830 + mtctr $rounds 831 + b Loop_cbc_dec8x 832 + .align 5 833 + Loop_cbc_dec8x: 834 + vncipher $out0,$out0,v24 835 + vncipher $out1,$out1,v24 836 + vncipher $out2,$out2,v24 837 + vncipher $out3,$out3,v24 838 + vncipher $out4,$out4,v24 839 + vncipher $out5,$out5,v24 840 + vncipher $out6,$out6,v24 841 + vncipher $out7,$out7,v24 842 + lvx v24,$x20,$key_ # round[3] 843 + addi $key_,$key_,0x20 844 + 845 + vncipher $out0,$out0,v25 846 + vncipher $out1,$out1,v25 847 + vncipher $out2,$out2,v25 848 + vncipher $out3,$out3,v25 849 + vncipher $out4,$out4,v25 850 + vncipher $out5,$out5,v25 851 + vncipher $out6,$out6,v25 852 + vncipher $out7,$out7,v25 853 + lvx v25,$x10,$key_ # round[4] 854 + bdnz Loop_cbc_dec8x 855 + 856 + subic $len,$len,128 # $len-=128 857 + vncipher $out0,$out0,v24 858 + vncipher $out1,$out1,v24 859 + vncipher $out2,$out2,v24 860 + vncipher $out3,$out3,v24 861 + vncipher $out4,$out4,v24 862 + vncipher $out5,$out5,v24 863 + vncipher $out6,$out6,v24 864 + vncipher $out7,$out7,v24 865 + 866 + subfe. r0,r0,r0 # borrow?-1:0 867 + vncipher $out0,$out0,v25 868 + vncipher $out1,$out1,v25 869 + vncipher $out2,$out2,v25 870 + vncipher $out3,$out3,v25 871 + vncipher $out4,$out4,v25 872 + vncipher $out5,$out5,v25 873 + vncipher $out6,$out6,v25 874 + vncipher $out7,$out7,v25 875 + 876 + and r0,r0,$len 877 + vncipher $out0,$out0,v26 878 + vncipher $out1,$out1,v26 879 + vncipher $out2,$out2,v26 880 + vncipher $out3,$out3,v26 881 + vncipher $out4,$out4,v26 882 + vncipher $out5,$out5,v26 883 + vncipher $out6,$out6,v26 884 + vncipher $out7,$out7,v26 885 + 886 + add $inp,$inp,r0 # $inp is adjusted in such 887 + # way that at exit from the 888 + # loop inX-in7 are loaded 889 + # with last "words" 890 + vncipher $out0,$out0,v27 891 + vncipher $out1,$out1,v27 892 + vncipher $out2,$out2,v27 893 + vncipher $out3,$out3,v27 894 + vncipher $out4,$out4,v27 895 + vncipher $out5,$out5,v27 896 + vncipher $out6,$out6,v27 897 + vncipher $out7,$out7,v27 898 + 899 + addi $key_,$sp,$FRAME+15 # rewind $key_ 900 + vncipher $out0,$out0,v28 901 + vncipher $out1,$out1,v28 902 + vncipher $out2,$out2,v28 903 + vncipher $out3,$out3,v28 904 + vncipher $out4,$out4,v28 905 + vncipher $out5,$out5,v28 906 + vncipher $out6,$out6,v28 907 + vncipher $out7,$out7,v28 908 + lvx v24,$x00,$key_ # re-pre-load round[1] 909 + 910 + vncipher $out0,$out0,v29 911 + vncipher $out1,$out1,v29 912 + vncipher $out2,$out2,v29 913 + vncipher $out3,$out3,v29 914 + vncipher $out4,$out4,v29 915 + vncipher $out5,$out5,v29 916 + vncipher $out6,$out6,v29 917 + vncipher $out7,$out7,v29 918 + lvx v25,$x10,$key_ # re-pre-load round[2] 919 + 920 + vncipher $out0,$out0,v30 921 + vxor $ivec,$ivec,v31 # xor with last round key 922 + vncipher $out1,$out1,v30 923 + vxor $in0,$in0,v31 924 + vncipher $out2,$out2,v30 925 + vxor $in1,$in1,v31 926 + vncipher $out3,$out3,v30 927 + vxor $in2,$in2,v31 928 + vncipher $out4,$out4,v30 929 + vxor $in3,$in3,v31 930 + vncipher $out5,$out5,v30 931 + vxor $in4,$in4,v31 932 + vncipher $out6,$out6,v30 933 + vxor $in5,$in5,v31 934 + vncipher $out7,$out7,v30 935 + vxor $in6,$in6,v31 936 + 937 + vncipherlast $out0,$out0,$ivec 938 + vncipherlast $out1,$out1,$in0 939 + lvx_u $in0,$x00,$inp # load next input block 940 + vncipherlast $out2,$out2,$in1 941 + lvx_u $in1,$x10,$inp 942 + vncipherlast $out3,$out3,$in2 943 + le?vperm $in0,$in0,$in0,$inpperm 944 + lvx_u $in2,$x20,$inp 945 + vncipherlast $out4,$out4,$in3 946 + le?vperm $in1,$in1,$in1,$inpperm 947 + lvx_u $in3,$x30,$inp 948 + vncipherlast $out5,$out5,$in4 949 + le?vperm $in2,$in2,$in2,$inpperm 950 + lvx_u $in4,$x40,$inp 951 + vncipherlast $out6,$out6,$in5 952 + le?vperm $in3,$in3,$in3,$inpperm 953 + lvx_u $in5,$x50,$inp 954 + vncipherlast $out7,$out7,$in6 955 + le?vperm $in4,$in4,$in4,$inpperm 956 + lvx_u $in6,$x60,$inp 957 + vmr $ivec,$in7 958 + le?vperm $in5,$in5,$in5,$inpperm 959 + lvx_u $in7,$x70,$inp 960 + addi $inp,$inp,0x80 961 + 962 + le?vperm $out0,$out0,$out0,$inpperm 963 + le?vperm $out1,$out1,$out1,$inpperm 964 + stvx_u $out0,$x00,$out 965 + le?vperm $in6,$in6,$in6,$inpperm 966 + vxor $out0,$in0,$rndkey0 967 + le?vperm $out2,$out2,$out2,$inpperm 968 + stvx_u $out1,$x10,$out 969 + le?vperm $in7,$in7,$in7,$inpperm 970 + vxor $out1,$in1,$rndkey0 971 + le?vperm $out3,$out3,$out3,$inpperm 972 + stvx_u $out2,$x20,$out 973 + vxor $out2,$in2,$rndkey0 974 + le?vperm $out4,$out4,$out4,$inpperm 975 + stvx_u $out3,$x30,$out 976 + vxor $out3,$in3,$rndkey0 977 + le?vperm $out5,$out5,$out5,$inpperm 978 + stvx_u $out4,$x40,$out 979 + vxor $out4,$in4,$rndkey0 980 + le?vperm $out6,$out6,$out6,$inpperm 981 + stvx_u $out5,$x50,$out 982 + vxor $out5,$in5,$rndkey0 983 + le?vperm $out7,$out7,$out7,$inpperm 984 + stvx_u $out6,$x60,$out 985 + vxor $out6,$in6,$rndkey0 986 + stvx_u $out7,$x70,$out 987 + addi $out,$out,0x80 988 + vxor $out7,$in7,$rndkey0 989 + 990 + mtctr $rounds 991 + beq Loop_cbc_dec8x # did $len-=128 borrow? 992 + 993 + addic. $len,$len,128 994 + beq Lcbc_dec8x_done 995 + nop 996 + nop 997 + 998 + Loop_cbc_dec8x_tail: # up to 7 "words" tail... 999 + vncipher $out1,$out1,v24 1000 + vncipher $out2,$out2,v24 1001 + vncipher $out3,$out3,v24 1002 + vncipher $out4,$out4,v24 1003 + vncipher $out5,$out5,v24 1004 + vncipher $out6,$out6,v24 1005 + vncipher $out7,$out7,v24 1006 + lvx v24,$x20,$key_ # round[3] 1007 + addi $key_,$key_,0x20 1008 + 1009 + vncipher $out1,$out1,v25 1010 + vncipher $out2,$out2,v25 1011 + vncipher $out3,$out3,v25 1012 + vncipher $out4,$out4,v25 1013 + vncipher $out5,$out5,v25 1014 + vncipher $out6,$out6,v25 1015 + vncipher $out7,$out7,v25 1016 + lvx v25,$x10,$key_ # round[4] 1017 + bdnz Loop_cbc_dec8x_tail 1018 + 1019 + vncipher $out1,$out1,v24 1020 + vncipher $out2,$out2,v24 1021 + vncipher $out3,$out3,v24 1022 + vncipher $out4,$out4,v24 1023 + vncipher $out5,$out5,v24 1024 + vncipher $out6,$out6,v24 1025 + vncipher $out7,$out7,v24 1026 + 1027 + vncipher $out1,$out1,v25 1028 + vncipher $out2,$out2,v25 1029 + vncipher $out3,$out3,v25 1030 + vncipher $out4,$out4,v25 1031 + vncipher $out5,$out5,v25 1032 + vncipher $out6,$out6,v25 1033 + vncipher $out7,$out7,v25 1034 + 1035 + vncipher $out1,$out1,v26 1036 + vncipher $out2,$out2,v26 1037 + vncipher $out3,$out3,v26 1038 + vncipher $out4,$out4,v26 1039 + vncipher $out5,$out5,v26 1040 + vncipher $out6,$out6,v26 1041 + vncipher $out7,$out7,v26 1042 + 1043 + vncipher $out1,$out1,v27 1044 + vncipher $out2,$out2,v27 1045 + vncipher $out3,$out3,v27 1046 + vncipher $out4,$out4,v27 1047 + vncipher $out5,$out5,v27 1048 + vncipher $out6,$out6,v27 1049 + vncipher $out7,$out7,v27 1050 + 1051 + vncipher $out1,$out1,v28 1052 + vncipher $out2,$out2,v28 1053 + vncipher $out3,$out3,v28 1054 + vncipher $out4,$out4,v28 1055 + vncipher $out5,$out5,v28 1056 + vncipher $out6,$out6,v28 1057 + vncipher $out7,$out7,v28 1058 + 1059 + vncipher $out1,$out1,v29 1060 + vncipher $out2,$out2,v29 1061 + vncipher $out3,$out3,v29 1062 + vncipher $out4,$out4,v29 1063 + vncipher $out5,$out5,v29 1064 + vncipher $out6,$out6,v29 1065 + vncipher $out7,$out7,v29 1066 + 1067 + vncipher $out1,$out1,v30 1068 + vxor $ivec,$ivec,v31 # last round key 1069 + vncipher $out2,$out2,v30 1070 + vxor $in1,$in1,v31 1071 + vncipher $out3,$out3,v30 1072 + vxor $in2,$in2,v31 1073 + vncipher $out4,$out4,v30 1074 + vxor $in3,$in3,v31 1075 + vncipher $out5,$out5,v30 1076 + vxor $in4,$in4,v31 1077 + vncipher $out6,$out6,v30 1078 + vxor $in5,$in5,v31 1079 + vncipher $out7,$out7,v30 1080 + vxor $in6,$in6,v31 1081 + 1082 + cmplwi $len,32 # switch($len) 1083 + blt Lcbc_dec8x_one 1084 + nop 1085 + beq Lcbc_dec8x_two 1086 + cmplwi $len,64 1087 + blt Lcbc_dec8x_three 1088 + nop 1089 + beq Lcbc_dec8x_four 1090 + cmplwi $len,96 1091 + blt Lcbc_dec8x_five 1092 + nop 1093 + beq Lcbc_dec8x_six 1094 + 1095 + Lcbc_dec8x_seven: 1096 + vncipherlast $out1,$out1,$ivec 1097 + vncipherlast $out2,$out2,$in1 1098 + vncipherlast $out3,$out3,$in2 1099 + vncipherlast $out4,$out4,$in3 1100 + vncipherlast $out5,$out5,$in4 1101 + vncipherlast $out6,$out6,$in5 1102 + vncipherlast $out7,$out7,$in6 1103 + vmr $ivec,$in7 1104 + 1105 + le?vperm $out1,$out1,$out1,$inpperm 1106 + le?vperm $out2,$out2,$out2,$inpperm 1107 + stvx_u $out1,$x00,$out 1108 + le?vperm $out3,$out3,$out3,$inpperm 1109 + stvx_u $out2,$x10,$out 1110 + le?vperm $out4,$out4,$out4,$inpperm 1111 + stvx_u $out3,$x20,$out 1112 + le?vperm $out5,$out5,$out5,$inpperm 1113 + stvx_u $out4,$x30,$out 1114 + le?vperm $out6,$out6,$out6,$inpperm 1115 + stvx_u $out5,$x40,$out 1116 + le?vperm $out7,$out7,$out7,$inpperm 1117 + stvx_u $out6,$x50,$out 1118 + stvx_u $out7,$x60,$out 1119 + addi $out,$out,0x70 1120 + b Lcbc_dec8x_done 1121 + 1122 + .align 5 1123 + Lcbc_dec8x_six: 1124 + vncipherlast $out2,$out2,$ivec 1125 + vncipherlast $out3,$out3,$in2 1126 + vncipherlast $out4,$out4,$in3 1127 + vncipherlast $out5,$out5,$in4 1128 + vncipherlast $out6,$out6,$in5 1129 + vncipherlast $out7,$out7,$in6 1130 + vmr $ivec,$in7 1131 + 1132 + le?vperm $out2,$out2,$out2,$inpperm 1133 + le?vperm $out3,$out3,$out3,$inpperm 1134 + stvx_u $out2,$x00,$out 1135 + le?vperm $out4,$out4,$out4,$inpperm 1136 + stvx_u $out3,$x10,$out 1137 + le?vperm $out5,$out5,$out5,$inpperm 1138 + stvx_u $out4,$x20,$out 1139 + le?vperm $out6,$out6,$out6,$inpperm 1140 + stvx_u $out5,$x30,$out 1141 + le?vperm $out7,$out7,$out7,$inpperm 1142 + stvx_u $out6,$x40,$out 1143 + stvx_u $out7,$x50,$out 1144 + addi $out,$out,0x60 1145 + b Lcbc_dec8x_done 1146 + 1147 + .align 5 1148 + Lcbc_dec8x_five: 1149 + vncipherlast $out3,$out3,$ivec 1150 + vncipherlast $out4,$out4,$in3 1151 + vncipherlast $out5,$out5,$in4 1152 + vncipherlast $out6,$out6,$in5 1153 + vncipherlast $out7,$out7,$in6 1154 + vmr $ivec,$in7 1155 + 1156 + le?vperm $out3,$out3,$out3,$inpperm 1157 + le?vperm $out4,$out4,$out4,$inpperm 1158 + stvx_u $out3,$x00,$out 1159 + le?vperm $out5,$out5,$out5,$inpperm 1160 + stvx_u $out4,$x10,$out 1161 + le?vperm $out6,$out6,$out6,$inpperm 1162 + stvx_u $out5,$x20,$out 1163 + le?vperm $out7,$out7,$out7,$inpperm 1164 + stvx_u $out6,$x30,$out 1165 + stvx_u $out7,$x40,$out 1166 + addi $out,$out,0x50 1167 + b Lcbc_dec8x_done 1168 + 1169 + .align 5 1170 + Lcbc_dec8x_four: 1171 + vncipherlast $out4,$out4,$ivec 1172 + vncipherlast $out5,$out5,$in4 1173 + vncipherlast $out6,$out6,$in5 1174 + vncipherlast $out7,$out7,$in6 1175 + vmr $ivec,$in7 1176 + 1177 + le?vperm $out4,$out4,$out4,$inpperm 1178 + le?vperm $out5,$out5,$out5,$inpperm 1179 + stvx_u $out4,$x00,$out 1180 + le?vperm $out6,$out6,$out6,$inpperm 1181 + stvx_u $out5,$x10,$out 1182 + le?vperm $out7,$out7,$out7,$inpperm 1183 + stvx_u $out6,$x20,$out 1184 + stvx_u $out7,$x30,$out 1185 + addi $out,$out,0x40 1186 + b Lcbc_dec8x_done 1187 + 1188 + .align 5 1189 + Lcbc_dec8x_three: 1190 + vncipherlast $out5,$out5,$ivec 1191 + vncipherlast $out6,$out6,$in5 1192 + vncipherlast $out7,$out7,$in6 1193 + vmr $ivec,$in7 1194 + 1195 + le?vperm $out5,$out5,$out5,$inpperm 1196 + le?vperm $out6,$out6,$out6,$inpperm 1197 + stvx_u $out5,$x00,$out 1198 + le?vperm $out7,$out7,$out7,$inpperm 1199 + stvx_u $out6,$x10,$out 1200 + stvx_u $out7,$x20,$out 1201 + addi $out,$out,0x30 1202 + b Lcbc_dec8x_done 1203 + 1204 + .align 5 1205 + Lcbc_dec8x_two: 1206 + vncipherlast $out6,$out6,$ivec 1207 + vncipherlast $out7,$out7,$in6 1208 + vmr $ivec,$in7 1209 + 1210 + le?vperm $out6,$out6,$out6,$inpperm 1211 + le?vperm $out7,$out7,$out7,$inpperm 1212 + stvx_u $out6,$x00,$out 1213 + stvx_u $out7,$x10,$out 1214 + addi $out,$out,0x20 1215 + b Lcbc_dec8x_done 1216 + 1217 + .align 5 1218 + Lcbc_dec8x_one: 1219 + vncipherlast $out7,$out7,$ivec 1220 + vmr $ivec,$in7 1221 + 1222 + le?vperm $out7,$out7,$out7,$inpperm 1223 + stvx_u $out7,0,$out 1224 + addi $out,$out,0x10 1225 + 1226 + Lcbc_dec8x_done: 1227 + le?vperm $ivec,$ivec,$ivec,$inpperm 1228 + stvx_u $ivec,0,$ivp # write [unaligned] iv 1229 + 1230 + li r10,`$FRAME+15` 1231 + li r11,`$FRAME+31` 1232 + stvx $inpperm,r10,$sp # wipe copies of round keys 1233 + addi r10,r10,32 1234 + stvx $inpperm,r11,$sp 1235 + addi r11,r11,32 1236 + stvx $inpperm,r10,$sp 1237 + addi r10,r10,32 1238 + stvx $inpperm,r11,$sp 1239 + addi r11,r11,32 1240 + stvx $inpperm,r10,$sp 1241 + addi r10,r10,32 1242 + stvx $inpperm,r11,$sp 1243 + addi r11,r11,32 1244 + stvx $inpperm,r10,$sp 1245 + addi r10,r10,32 1246 + stvx $inpperm,r11,$sp 1247 + addi r11,r11,32 1248 + 1249 + mtspr 256,$vrsave 1250 + lvx v20,r10,$sp # ABI says so 1251 + addi r10,r10,32 1252 + lvx v21,r11,$sp 1253 + addi r11,r11,32 1254 + lvx v22,r10,$sp 1255 + addi r10,r10,32 1256 + lvx v23,r11,$sp 1257 + addi r11,r11,32 1258 + lvx v24,r10,$sp 1259 + addi r10,r10,32 1260 + lvx v25,r11,$sp 1261 + addi r11,r11,32 1262 + lvx v26,r10,$sp 1263 + addi r10,r10,32 1264 + lvx v27,r11,$sp 1265 + addi r11,r11,32 1266 + lvx v28,r10,$sp 1267 + addi r10,r10,32 1268 + lvx v29,r11,$sp 1269 + addi r11,r11,32 1270 + lvx v30,r10,$sp 1271 + lvx v31,r11,$sp 1272 + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1273 + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1274 + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1275 + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1276 + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1277 + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1278 + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1279 + blr 1280 + .long 0 1281 + .byte 0,12,0x14,0,0x80,6,6,0 1282 + .long 0 1283 + .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1284 + ___ 1285 + }} }}} 1286 + 1287 + ######################################################################### 1288 + {{{ # CTR procedure[s] # 1289 + 1290 + ####################### WARNING: Here be dragons! ####################### 1291 + # 1292 + # This code is written as 'ctr32', based on a 32-bit counter used 1293 + # upstream. The kernel does *not* use a 32-bit counter. The kernel uses 1294 + # a 128-bit counter. 1295 + # 1296 + # This leads to subtle changes from the upstream code: the counter 1297 + # is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in 1298 + # both the bulk (8 blocks at a time) path, and in the individual block 1299 + # path. Be aware of this when doing updates. 1300 + # 1301 + # See: 1302 + # 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug") 1303 + # 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword") 1304 + # https://github.com/openssl/openssl/pull/8942 1305 + # 1306 + ######################################################################### 1307 + my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1308 + my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1309 + my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1310 + map("v$_",(4..11)); 1311 + my $dat=$tmp; 1312 + 1313 + $code.=<<___; 1314 + .globl .${prefix}_ctr32_encrypt_blocks 1315 + ${UCMP}i $len,1 1316 + bltlr- 1317 + 1318 + lis r0,0xfff0 1319 + mfspr $vrsave,256 1320 + mtspr 256,r0 1321 + 1322 + li $idx,15 1323 + vxor $rndkey0,$rndkey0,$rndkey0 1324 + le?vspltisb $tmp,0x0f 1325 + 1326 + lvx $ivec,0,$ivp # load [unaligned] iv 1327 + lvsl $inpperm,0,$ivp 1328 + lvx $inptail,$idx,$ivp 1329 + vspltisb $one,1 1330 + le?vxor $inpperm,$inpperm,$tmp 1331 + vperm $ivec,$ivec,$inptail,$inpperm 1332 + vsldoi $one,$rndkey0,$one,1 1333 + 1334 + neg r11,$inp 1335 + ?lvsl $keyperm,0,$key # prepare for unaligned key 1336 + lwz $rounds,240($key) 1337 + 1338 + lvsr $inpperm,0,r11 # prepare for unaligned load 1339 + lvx $inptail,0,$inp 1340 + addi $inp,$inp,15 # 15 is not typo 1341 + le?vxor $inpperm,$inpperm,$tmp 1342 + 1343 + srwi $rounds,$rounds,1 1344 + li $idx,16 1345 + subi $rounds,$rounds,1 1346 + 1347 + ${UCMP}i $len,8 1348 + bge _aesp8_ctr32_encrypt8x 1349 + 1350 + ?lvsr $outperm,0,$out # prepare for unaligned store 1351 + vspltisb $outmask,-1 1352 + lvx $outhead,0,$out 1353 + ?vperm $outmask,$rndkey0,$outmask,$outperm 1354 + le?vxor $outperm,$outperm,$tmp 1355 + 1356 + lvx $rndkey0,0,$key 1357 + mtctr $rounds 1358 + lvx $rndkey1,$idx,$key 1359 + addi $idx,$idx,16 1360 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1361 + vxor $inout,$ivec,$rndkey0 1362 + lvx $rndkey0,$idx,$key 1363 + addi $idx,$idx,16 1364 + b Loop_ctr32_enc 1365 + 1366 + .align 5 1367 + Loop_ctr32_enc: 1368 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1369 + vcipher $inout,$inout,$rndkey1 1370 + lvx $rndkey1,$idx,$key 1371 + addi $idx,$idx,16 1372 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1373 + vcipher $inout,$inout,$rndkey0 1374 + lvx $rndkey0,$idx,$key 1375 + addi $idx,$idx,16 1376 + bdnz Loop_ctr32_enc 1377 + 1378 + vadduqm $ivec,$ivec,$one # Kernel change for 128-bit 1379 + vmr $dat,$inptail 1380 + lvx $inptail,0,$inp 1381 + addi $inp,$inp,16 1382 + subic. $len,$len,1 # blocks-- 1383 + 1384 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1385 + vcipher $inout,$inout,$rndkey1 1386 + lvx $rndkey1,$idx,$key 1387 + vperm $dat,$dat,$inptail,$inpperm 1388 + li $idx,16 1389 + ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1390 + lvx $rndkey0,0,$key 1391 + vxor $dat,$dat,$rndkey1 # last round key 1392 + vcipherlast $inout,$inout,$dat 1393 + 1394 + lvx $rndkey1,$idx,$key 1395 + addi $idx,$idx,16 1396 + vperm $inout,$inout,$inout,$outperm 1397 + vsel $dat,$outhead,$inout,$outmask 1398 + mtctr $rounds 1399 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1400 + vmr $outhead,$inout 1401 + vxor $inout,$ivec,$rndkey0 1402 + lvx $rndkey0,$idx,$key 1403 + addi $idx,$idx,16 1404 + stvx $dat,0,$out 1405 + addi $out,$out,16 1406 + bne Loop_ctr32_enc 1407 + 1408 + addi $out,$out,-1 1409 + lvx $inout,0,$out # redundant in aligned case 1410 + vsel $inout,$outhead,$inout,$outmask 1411 + stvx $inout,0,$out 1412 + 1413 + mtspr 256,$vrsave 1414 + blr 1415 + .long 0 1416 + .byte 0,12,0x14,0,0,0,6,0 1417 + .long 0 1418 + ___ 1419 + ######################################################################### 1420 + {{ # Optimized CTR procedure # 1421 + my $key_="r11"; 1422 + my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1423 + my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1424 + my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1425 + my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1426 + # v26-v31 last 6 round keys 1427 + my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1428 + my ($two,$three,$four)=($outhead,$outperm,$outmask); 1429 + 1430 + $code.=<<___; 1431 + .align 5 1432 + _aesp8_ctr32_encrypt8x: 1433 + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1434 + li r10,`$FRAME+8*16+15` 1435 + li r11,`$FRAME+8*16+31` 1436 + stvx v20,r10,$sp # ABI says so 1437 + addi r10,r10,32 1438 + stvx v21,r11,$sp 1439 + addi r11,r11,32 1440 + stvx v22,r10,$sp 1441 + addi r10,r10,32 1442 + stvx v23,r11,$sp 1443 + addi r11,r11,32 1444 + stvx v24,r10,$sp 1445 + addi r10,r10,32 1446 + stvx v25,r11,$sp 1447 + addi r11,r11,32 1448 + stvx v26,r10,$sp 1449 + addi r10,r10,32 1450 + stvx v27,r11,$sp 1451 + addi r11,r11,32 1452 + stvx v28,r10,$sp 1453 + addi r10,r10,32 1454 + stvx v29,r11,$sp 1455 + addi r11,r11,32 1456 + stvx v30,r10,$sp 1457 + stvx v31,r11,$sp 1458 + li r0,-1 1459 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1460 + li $x10,0x10 1461 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1462 + li $x20,0x20 1463 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1464 + li $x30,0x30 1465 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1466 + li $x40,0x40 1467 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1468 + li $x50,0x50 1469 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1470 + li $x60,0x60 1471 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1472 + li $x70,0x70 1473 + mtspr 256,r0 1474 + 1475 + subi $rounds,$rounds,3 # -4 in total 1476 + 1477 + lvx $rndkey0,$x00,$key # load key schedule 1478 + lvx v30,$x10,$key 1479 + addi $key,$key,0x20 1480 + lvx v31,$x00,$key 1481 + ?vperm $rndkey0,$rndkey0,v30,$keyperm 1482 + addi $key_,$sp,$FRAME+15 1483 + mtctr $rounds 1484 + 1485 + Load_ctr32_enc_key: 1486 + ?vperm v24,v30,v31,$keyperm 1487 + lvx v30,$x10,$key 1488 + addi $key,$key,0x20 1489 + stvx v24,$x00,$key_ # off-load round[1] 1490 + ?vperm v25,v31,v30,$keyperm 1491 + lvx v31,$x00,$key 1492 + stvx v25,$x10,$key_ # off-load round[2] 1493 + addi $key_,$key_,0x20 1494 + bdnz Load_ctr32_enc_key 1495 + 1496 + lvx v26,$x10,$key 1497 + ?vperm v24,v30,v31,$keyperm 1498 + lvx v27,$x20,$key 1499 + stvx v24,$x00,$key_ # off-load round[3] 1500 + ?vperm v25,v31,v26,$keyperm 1501 + lvx v28,$x30,$key 1502 + stvx v25,$x10,$key_ # off-load round[4] 1503 + addi $key_,$sp,$FRAME+15 # rewind $key_ 1504 + ?vperm v26,v26,v27,$keyperm 1505 + lvx v29,$x40,$key 1506 + ?vperm v27,v27,v28,$keyperm 1507 + lvx v30,$x50,$key 1508 + ?vperm v28,v28,v29,$keyperm 1509 + lvx v31,$x60,$key 1510 + ?vperm v29,v29,v30,$keyperm 1511 + lvx $out0,$x70,$key # borrow $out0 1512 + ?vperm v30,v30,v31,$keyperm 1513 + lvx v24,$x00,$key_ # pre-load round[1] 1514 + ?vperm v31,v31,$out0,$keyperm 1515 + lvx v25,$x10,$key_ # pre-load round[2] 1516 + 1517 + vadduqm $two,$one,$one 1518 + subi $inp,$inp,15 # undo "caller" 1519 + $SHL $len,$len,4 1520 + 1521 + vadduqm $out1,$ivec,$one # counter values ... 1522 + vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit) 1523 + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1524 + le?li $idx,8 1525 + vadduqm $out3,$out1,$two 1526 + vxor $out1,$out1,$rndkey0 1527 + le?lvsl $inpperm,0,$idx 1528 + vadduqm $out4,$out2,$two 1529 + vxor $out2,$out2,$rndkey0 1530 + le?vspltisb $tmp,0x0f 1531 + vadduqm $out5,$out3,$two 1532 + vxor $out3,$out3,$rndkey0 1533 + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1534 + vadduqm $out6,$out4,$two 1535 + vxor $out4,$out4,$rndkey0 1536 + vadduqm $out7,$out5,$two 1537 + vxor $out5,$out5,$rndkey0 1538 + vadduqm $ivec,$out6,$two # next counter value 1539 + vxor $out6,$out6,$rndkey0 1540 + vxor $out7,$out7,$rndkey0 1541 + 1542 + mtctr $rounds 1543 + b Loop_ctr32_enc8x 1544 + .align 5 1545 + Loop_ctr32_enc8x: 1546 + vcipher $out0,$out0,v24 1547 + vcipher $out1,$out1,v24 1548 + vcipher $out2,$out2,v24 1549 + vcipher $out3,$out3,v24 1550 + vcipher $out4,$out4,v24 1551 + vcipher $out5,$out5,v24 1552 + vcipher $out6,$out6,v24 1553 + vcipher $out7,$out7,v24 1554 + Loop_ctr32_enc8x_middle: 1555 + lvx v24,$x20,$key_ # round[3] 1556 + addi $key_,$key_,0x20 1557 + 1558 + vcipher $out0,$out0,v25 1559 + vcipher $out1,$out1,v25 1560 + vcipher $out2,$out2,v25 1561 + vcipher $out3,$out3,v25 1562 + vcipher $out4,$out4,v25 1563 + vcipher $out5,$out5,v25 1564 + vcipher $out6,$out6,v25 1565 + vcipher $out7,$out7,v25 1566 + lvx v25,$x10,$key_ # round[4] 1567 + bdnz Loop_ctr32_enc8x 1568 + 1569 + subic r11,$len,256 # $len-256, borrow $key_ 1570 + vcipher $out0,$out0,v24 1571 + vcipher $out1,$out1,v24 1572 + vcipher $out2,$out2,v24 1573 + vcipher $out3,$out3,v24 1574 + vcipher $out4,$out4,v24 1575 + vcipher $out5,$out5,v24 1576 + vcipher $out6,$out6,v24 1577 + vcipher $out7,$out7,v24 1578 + 1579 + subfe r0,r0,r0 # borrow?-1:0 1580 + vcipher $out0,$out0,v25 1581 + vcipher $out1,$out1,v25 1582 + vcipher $out2,$out2,v25 1583 + vcipher $out3,$out3,v25 1584 + vcipher $out4,$out4,v25 1585 + vcipher $out5,$out5,v25 1586 + vcipher $out6,$out6,v25 1587 + vcipher $out7,$out7,v25 1588 + 1589 + and r0,r0,r11 1590 + addi $key_,$sp,$FRAME+15 # rewind $key_ 1591 + vcipher $out0,$out0,v26 1592 + vcipher $out1,$out1,v26 1593 + vcipher $out2,$out2,v26 1594 + vcipher $out3,$out3,v26 1595 + vcipher $out4,$out4,v26 1596 + vcipher $out5,$out5,v26 1597 + vcipher $out6,$out6,v26 1598 + vcipher $out7,$out7,v26 1599 + lvx v24,$x00,$key_ # re-pre-load round[1] 1600 + 1601 + subic $len,$len,129 # $len-=129 1602 + vcipher $out0,$out0,v27 1603 + addi $len,$len,1 # $len-=128 really 1604 + vcipher $out1,$out1,v27 1605 + vcipher $out2,$out2,v27 1606 + vcipher $out3,$out3,v27 1607 + vcipher $out4,$out4,v27 1608 + vcipher $out5,$out5,v27 1609 + vcipher $out6,$out6,v27 1610 + vcipher $out7,$out7,v27 1611 + lvx v25,$x10,$key_ # re-pre-load round[2] 1612 + 1613 + vcipher $out0,$out0,v28 1614 + lvx_u $in0,$x00,$inp # load input 1615 + vcipher $out1,$out1,v28 1616 + lvx_u $in1,$x10,$inp 1617 + vcipher $out2,$out2,v28 1618 + lvx_u $in2,$x20,$inp 1619 + vcipher $out3,$out3,v28 1620 + lvx_u $in3,$x30,$inp 1621 + vcipher $out4,$out4,v28 1622 + lvx_u $in4,$x40,$inp 1623 + vcipher $out5,$out5,v28 1624 + lvx_u $in5,$x50,$inp 1625 + vcipher $out6,$out6,v28 1626 + lvx_u $in6,$x60,$inp 1627 + vcipher $out7,$out7,v28 1628 + lvx_u $in7,$x70,$inp 1629 + addi $inp,$inp,0x80 1630 + 1631 + vcipher $out0,$out0,v29 1632 + le?vperm $in0,$in0,$in0,$inpperm 1633 + vcipher $out1,$out1,v29 1634 + le?vperm $in1,$in1,$in1,$inpperm 1635 + vcipher $out2,$out2,v29 1636 + le?vperm $in2,$in2,$in2,$inpperm 1637 + vcipher $out3,$out3,v29 1638 + le?vperm $in3,$in3,$in3,$inpperm 1639 + vcipher $out4,$out4,v29 1640 + le?vperm $in4,$in4,$in4,$inpperm 1641 + vcipher $out5,$out5,v29 1642 + le?vperm $in5,$in5,$in5,$inpperm 1643 + vcipher $out6,$out6,v29 1644 + le?vperm $in6,$in6,$in6,$inpperm 1645 + vcipher $out7,$out7,v29 1646 + le?vperm $in7,$in7,$in7,$inpperm 1647 + 1648 + add $inp,$inp,r0 # $inp is adjusted in such 1649 + # way that at exit from the 1650 + # loop inX-in7 are loaded 1651 + # with last "words" 1652 + subfe. r0,r0,r0 # borrow?-1:0 1653 + vcipher $out0,$out0,v30 1654 + vxor $in0,$in0,v31 # xor with last round key 1655 + vcipher $out1,$out1,v30 1656 + vxor $in1,$in1,v31 1657 + vcipher $out2,$out2,v30 1658 + vxor $in2,$in2,v31 1659 + vcipher $out3,$out3,v30 1660 + vxor $in3,$in3,v31 1661 + vcipher $out4,$out4,v30 1662 + vxor $in4,$in4,v31 1663 + vcipher $out5,$out5,v30 1664 + vxor $in5,$in5,v31 1665 + vcipher $out6,$out6,v30 1666 + vxor $in6,$in6,v31 1667 + vcipher $out7,$out7,v30 1668 + vxor $in7,$in7,v31 1669 + 1670 + bne Lctr32_enc8x_break # did $len-129 borrow? 1671 + 1672 + vcipherlast $in0,$out0,$in0 1673 + vcipherlast $in1,$out1,$in1 1674 + vadduqm $out1,$ivec,$one # counter values ... 1675 + vcipherlast $in2,$out2,$in2 1676 + vadduqm $out2,$ivec,$two 1677 + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1678 + vcipherlast $in3,$out3,$in3 1679 + vadduqm $out3,$out1,$two 1680 + vxor $out1,$out1,$rndkey0 1681 + vcipherlast $in4,$out4,$in4 1682 + vadduqm $out4,$out2,$two 1683 + vxor $out2,$out2,$rndkey0 1684 + vcipherlast $in5,$out5,$in5 1685 + vadduqm $out5,$out3,$two 1686 + vxor $out3,$out3,$rndkey0 1687 + vcipherlast $in6,$out6,$in6 1688 + vadduqm $out6,$out4,$two 1689 + vxor $out4,$out4,$rndkey0 1690 + vcipherlast $in7,$out7,$in7 1691 + vadduqm $out7,$out5,$two 1692 + vxor $out5,$out5,$rndkey0 1693 + le?vperm $in0,$in0,$in0,$inpperm 1694 + vadduqm $ivec,$out6,$two # next counter value 1695 + vxor $out6,$out6,$rndkey0 1696 + le?vperm $in1,$in1,$in1,$inpperm 1697 + vxor $out7,$out7,$rndkey0 1698 + mtctr $rounds 1699 + 1700 + vcipher $out0,$out0,v24 1701 + stvx_u $in0,$x00,$out 1702 + le?vperm $in2,$in2,$in2,$inpperm 1703 + vcipher $out1,$out1,v24 1704 + stvx_u $in1,$x10,$out 1705 + le?vperm $in3,$in3,$in3,$inpperm 1706 + vcipher $out2,$out2,v24 1707 + stvx_u $in2,$x20,$out 1708 + le?vperm $in4,$in4,$in4,$inpperm 1709 + vcipher $out3,$out3,v24 1710 + stvx_u $in3,$x30,$out 1711 + le?vperm $in5,$in5,$in5,$inpperm 1712 + vcipher $out4,$out4,v24 1713 + stvx_u $in4,$x40,$out 1714 + le?vperm $in6,$in6,$in6,$inpperm 1715 + vcipher $out5,$out5,v24 1716 + stvx_u $in5,$x50,$out 1717 + le?vperm $in7,$in7,$in7,$inpperm 1718 + vcipher $out6,$out6,v24 1719 + stvx_u $in6,$x60,$out 1720 + vcipher $out7,$out7,v24 1721 + stvx_u $in7,$x70,$out 1722 + addi $out,$out,0x80 1723 + 1724 + b Loop_ctr32_enc8x_middle 1725 + 1726 + .align 5 1727 + Lctr32_enc8x_break: 1728 + cmpwi $len,-0x60 1729 + blt Lctr32_enc8x_one 1730 + nop 1731 + beq Lctr32_enc8x_two 1732 + cmpwi $len,-0x40 1733 + blt Lctr32_enc8x_three 1734 + nop 1735 + beq Lctr32_enc8x_four 1736 + cmpwi $len,-0x20 1737 + blt Lctr32_enc8x_five 1738 + nop 1739 + beq Lctr32_enc8x_six 1740 + cmpwi $len,0x00 1741 + blt Lctr32_enc8x_seven 1742 + 1743 + Lctr32_enc8x_eight: 1744 + vcipherlast $out0,$out0,$in0 1745 + vcipherlast $out1,$out1,$in1 1746 + vcipherlast $out2,$out2,$in2 1747 + vcipherlast $out3,$out3,$in3 1748 + vcipherlast $out4,$out4,$in4 1749 + vcipherlast $out5,$out5,$in5 1750 + vcipherlast $out6,$out6,$in6 1751 + vcipherlast $out7,$out7,$in7 1752 + 1753 + le?vperm $out0,$out0,$out0,$inpperm 1754 + le?vperm $out1,$out1,$out1,$inpperm 1755 + stvx_u $out0,$x00,$out 1756 + le?vperm $out2,$out2,$out2,$inpperm 1757 + stvx_u $out1,$x10,$out 1758 + le?vperm $out3,$out3,$out3,$inpperm 1759 + stvx_u $out2,$x20,$out 1760 + le?vperm $out4,$out4,$out4,$inpperm 1761 + stvx_u $out3,$x30,$out 1762 + le?vperm $out5,$out5,$out5,$inpperm 1763 + stvx_u $out4,$x40,$out 1764 + le?vperm $out6,$out6,$out6,$inpperm 1765 + stvx_u $out5,$x50,$out 1766 + le?vperm $out7,$out7,$out7,$inpperm 1767 + stvx_u $out6,$x60,$out 1768 + stvx_u $out7,$x70,$out 1769 + addi $out,$out,0x80 1770 + b Lctr32_enc8x_done 1771 + 1772 + .align 5 1773 + Lctr32_enc8x_seven: 1774 + vcipherlast $out0,$out0,$in1 1775 + vcipherlast $out1,$out1,$in2 1776 + vcipherlast $out2,$out2,$in3 1777 + vcipherlast $out3,$out3,$in4 1778 + vcipherlast $out4,$out4,$in5 1779 + vcipherlast $out5,$out5,$in6 1780 + vcipherlast $out6,$out6,$in7 1781 + 1782 + le?vperm $out0,$out0,$out0,$inpperm 1783 + le?vperm $out1,$out1,$out1,$inpperm 1784 + stvx_u $out0,$x00,$out 1785 + le?vperm $out2,$out2,$out2,$inpperm 1786 + stvx_u $out1,$x10,$out 1787 + le?vperm $out3,$out3,$out3,$inpperm 1788 + stvx_u $out2,$x20,$out 1789 + le?vperm $out4,$out4,$out4,$inpperm 1790 + stvx_u $out3,$x30,$out 1791 + le?vperm $out5,$out5,$out5,$inpperm 1792 + stvx_u $out4,$x40,$out 1793 + le?vperm $out6,$out6,$out6,$inpperm 1794 + stvx_u $out5,$x50,$out 1795 + stvx_u $out6,$x60,$out 1796 + addi $out,$out,0x70 1797 + b Lctr32_enc8x_done 1798 + 1799 + .align 5 1800 + Lctr32_enc8x_six: 1801 + vcipherlast $out0,$out0,$in2 1802 + vcipherlast $out1,$out1,$in3 1803 + vcipherlast $out2,$out2,$in4 1804 + vcipherlast $out3,$out3,$in5 1805 + vcipherlast $out4,$out4,$in6 1806 + vcipherlast $out5,$out5,$in7 1807 + 1808 + le?vperm $out0,$out0,$out0,$inpperm 1809 + le?vperm $out1,$out1,$out1,$inpperm 1810 + stvx_u $out0,$x00,$out 1811 + le?vperm $out2,$out2,$out2,$inpperm 1812 + stvx_u $out1,$x10,$out 1813 + le?vperm $out3,$out3,$out3,$inpperm 1814 + stvx_u $out2,$x20,$out 1815 + le?vperm $out4,$out4,$out4,$inpperm 1816 + stvx_u $out3,$x30,$out 1817 + le?vperm $out5,$out5,$out5,$inpperm 1818 + stvx_u $out4,$x40,$out 1819 + stvx_u $out5,$x50,$out 1820 + addi $out,$out,0x60 1821 + b Lctr32_enc8x_done 1822 + 1823 + .align 5 1824 + Lctr32_enc8x_five: 1825 + vcipherlast $out0,$out0,$in3 1826 + vcipherlast $out1,$out1,$in4 1827 + vcipherlast $out2,$out2,$in5 1828 + vcipherlast $out3,$out3,$in6 1829 + vcipherlast $out4,$out4,$in7 1830 + 1831 + le?vperm $out0,$out0,$out0,$inpperm 1832 + le?vperm $out1,$out1,$out1,$inpperm 1833 + stvx_u $out0,$x00,$out 1834 + le?vperm $out2,$out2,$out2,$inpperm 1835 + stvx_u $out1,$x10,$out 1836 + le?vperm $out3,$out3,$out3,$inpperm 1837 + stvx_u $out2,$x20,$out 1838 + le?vperm $out4,$out4,$out4,$inpperm 1839 + stvx_u $out3,$x30,$out 1840 + stvx_u $out4,$x40,$out 1841 + addi $out,$out,0x50 1842 + b Lctr32_enc8x_done 1843 + 1844 + .align 5 1845 + Lctr32_enc8x_four: 1846 + vcipherlast $out0,$out0,$in4 1847 + vcipherlast $out1,$out1,$in5 1848 + vcipherlast $out2,$out2,$in6 1849 + vcipherlast $out3,$out3,$in7 1850 + 1851 + le?vperm $out0,$out0,$out0,$inpperm 1852 + le?vperm $out1,$out1,$out1,$inpperm 1853 + stvx_u $out0,$x00,$out 1854 + le?vperm $out2,$out2,$out2,$inpperm 1855 + stvx_u $out1,$x10,$out 1856 + le?vperm $out3,$out3,$out3,$inpperm 1857 + stvx_u $out2,$x20,$out 1858 + stvx_u $out3,$x30,$out 1859 + addi $out,$out,0x40 1860 + b Lctr32_enc8x_done 1861 + 1862 + .align 5 1863 + Lctr32_enc8x_three: 1864 + vcipherlast $out0,$out0,$in5 1865 + vcipherlast $out1,$out1,$in6 1866 + vcipherlast $out2,$out2,$in7 1867 + 1868 + le?vperm $out0,$out0,$out0,$inpperm 1869 + le?vperm $out1,$out1,$out1,$inpperm 1870 + stvx_u $out0,$x00,$out 1871 + le?vperm $out2,$out2,$out2,$inpperm 1872 + stvx_u $out1,$x10,$out 1873 + stvx_u $out2,$x20,$out 1874 + addi $out,$out,0x30 1875 + b Lctr32_enc8x_done 1876 + 1877 + .align 5 1878 + Lctr32_enc8x_two: 1879 + vcipherlast $out0,$out0,$in6 1880 + vcipherlast $out1,$out1,$in7 1881 + 1882 + le?vperm $out0,$out0,$out0,$inpperm 1883 + le?vperm $out1,$out1,$out1,$inpperm 1884 + stvx_u $out0,$x00,$out 1885 + stvx_u $out1,$x10,$out 1886 + addi $out,$out,0x20 1887 + b Lctr32_enc8x_done 1888 + 1889 + .align 5 1890 + Lctr32_enc8x_one: 1891 + vcipherlast $out0,$out0,$in7 1892 + 1893 + le?vperm $out0,$out0,$out0,$inpperm 1894 + stvx_u $out0,0,$out 1895 + addi $out,$out,0x10 1896 + 1897 + Lctr32_enc8x_done: 1898 + li r10,`$FRAME+15` 1899 + li r11,`$FRAME+31` 1900 + stvx $inpperm,r10,$sp # wipe copies of round keys 1901 + addi r10,r10,32 1902 + stvx $inpperm,r11,$sp 1903 + addi r11,r11,32 1904 + stvx $inpperm,r10,$sp 1905 + addi r10,r10,32 1906 + stvx $inpperm,r11,$sp 1907 + addi r11,r11,32 1908 + stvx $inpperm,r10,$sp 1909 + addi r10,r10,32 1910 + stvx $inpperm,r11,$sp 1911 + addi r11,r11,32 1912 + stvx $inpperm,r10,$sp 1913 + addi r10,r10,32 1914 + stvx $inpperm,r11,$sp 1915 + addi r11,r11,32 1916 + 1917 + mtspr 256,$vrsave 1918 + lvx v20,r10,$sp # ABI says so 1919 + addi r10,r10,32 1920 + lvx v21,r11,$sp 1921 + addi r11,r11,32 1922 + lvx v22,r10,$sp 1923 + addi r10,r10,32 1924 + lvx v23,r11,$sp 1925 + addi r11,r11,32 1926 + lvx v24,r10,$sp 1927 + addi r10,r10,32 1928 + lvx v25,r11,$sp 1929 + addi r11,r11,32 1930 + lvx v26,r10,$sp 1931 + addi r10,r10,32 1932 + lvx v27,r11,$sp 1933 + addi r11,r11,32 1934 + lvx v28,r10,$sp 1935 + addi r10,r10,32 1936 + lvx v29,r11,$sp 1937 + addi r11,r11,32 1938 + lvx v30,r10,$sp 1939 + lvx v31,r11,$sp 1940 + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1941 + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1942 + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1943 + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1944 + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1945 + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1946 + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1947 + blr 1948 + .long 0 1949 + .byte 0,12,0x14,0,0x80,6,6,0 1950 + .long 0 1951 + .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1952 + ___ 1953 + }} }}} 1954 + 1955 + ######################################################################### 1956 + {{{ # XTS procedures # 1957 + # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1958 + # const AES_KEY *key1, const AES_KEY *key2, # 1959 + # [const] unsigned char iv[16]); # 1960 + # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1961 + # input tweak value is assumed to be encrypted already, and last tweak # 1962 + # value, one suitable for consecutive call on same chunk of data, is # 1963 + # written back to original buffer. In addition, in "tweak chaining" # 1964 + # mode only complete input blocks are processed. # 1965 + 1966 + my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1967 + my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1968 + my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1969 + my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1970 + my $taillen = $key2; 1971 + 1972 + ($inp,$idx) = ($idx,$inp); # reassign 1973 + 1974 + $code.=<<___; 1975 + .globl .${prefix}_xts_encrypt 1976 + mr $inp,r3 # reassign 1977 + li r3,-1 1978 + ${UCMP}i $len,16 1979 + bltlr- 1980 + 1981 + lis r0,0xfff0 1982 + mfspr r12,256 # save vrsave 1983 + li r11,0 1984 + mtspr 256,r0 1985 + 1986 + vspltisb $seven,0x07 # 0x070707..07 1987 + le?lvsl $leperm,r11,r11 1988 + le?vspltisb $tmp,0x0f 1989 + le?vxor $leperm,$leperm,$seven 1990 + 1991 + li $idx,15 1992 + lvx $tweak,0,$ivp # load [unaligned] iv 1993 + lvsl $inpperm,0,$ivp 1994 + lvx $inptail,$idx,$ivp 1995 + le?vxor $inpperm,$inpperm,$tmp 1996 + vperm $tweak,$tweak,$inptail,$inpperm 1997 + 1998 + neg r11,$inp 1999 + lvsr $inpperm,0,r11 # prepare for unaligned load 2000 + lvx $inout,0,$inp 2001 + addi $inp,$inp,15 # 15 is not typo 2002 + le?vxor $inpperm,$inpperm,$tmp 2003 + 2004 + ${UCMP}i $key2,0 # key2==NULL? 2005 + beq Lxts_enc_no_key2 2006 + 2007 + ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2008 + lwz $rounds,240($key2) 2009 + srwi $rounds,$rounds,1 2010 + subi $rounds,$rounds,1 2011 + li $idx,16 2012 + 2013 + lvx $rndkey0,0,$key2 2014 + lvx $rndkey1,$idx,$key2 2015 + addi $idx,$idx,16 2016 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2017 + vxor $tweak,$tweak,$rndkey0 2018 + lvx $rndkey0,$idx,$key2 2019 + addi $idx,$idx,16 2020 + mtctr $rounds 2021 + 2022 + Ltweak_xts_enc: 2023 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2024 + vcipher $tweak,$tweak,$rndkey1 2025 + lvx $rndkey1,$idx,$key2 2026 + addi $idx,$idx,16 2027 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2028 + vcipher $tweak,$tweak,$rndkey0 2029 + lvx $rndkey0,$idx,$key2 2030 + addi $idx,$idx,16 2031 + bdnz Ltweak_xts_enc 2032 + 2033 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2034 + vcipher $tweak,$tweak,$rndkey1 2035 + lvx $rndkey1,$idx,$key2 2036 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2037 + vcipherlast $tweak,$tweak,$rndkey0 2038 + 2039 + li $ivp,0 # don't chain the tweak 2040 + b Lxts_enc 2041 + 2042 + Lxts_enc_no_key2: 2043 + li $idx,-16 2044 + and $len,$len,$idx # in "tweak chaining" 2045 + # mode only complete 2046 + # blocks are processed 2047 + Lxts_enc: 2048 + lvx $inptail,0,$inp 2049 + addi $inp,$inp,16 2050 + 2051 + ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2052 + lwz $rounds,240($key1) 2053 + srwi $rounds,$rounds,1 2054 + subi $rounds,$rounds,1 2055 + li $idx,16 2056 + 2057 + vslb $eighty7,$seven,$seven # 0x808080..80 2058 + vor $eighty7,$eighty7,$seven # 0x878787..87 2059 + vspltisb $tmp,1 # 0x010101..01 2060 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2061 + 2062 + ${UCMP}i $len,96 2063 + bge _aesp8_xts_encrypt6x 2064 + 2065 + andi. $taillen,$len,15 2066 + subic r0,$len,32 2067 + subi $taillen,$taillen,16 2068 + subfe r0,r0,r0 2069 + and r0,r0,$taillen 2070 + add $inp,$inp,r0 2071 + 2072 + lvx $rndkey0,0,$key1 2073 + lvx $rndkey1,$idx,$key1 2074 + addi $idx,$idx,16 2075 + vperm $inout,$inout,$inptail,$inpperm 2076 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2077 + vxor $inout,$inout,$tweak 2078 + vxor $inout,$inout,$rndkey0 2079 + lvx $rndkey0,$idx,$key1 2080 + addi $idx,$idx,16 2081 + mtctr $rounds 2082 + b Loop_xts_enc 2083 + 2084 + .align 5 2085 + Loop_xts_enc: 2086 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2087 + vcipher $inout,$inout,$rndkey1 2088 + lvx $rndkey1,$idx,$key1 2089 + addi $idx,$idx,16 2090 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2091 + vcipher $inout,$inout,$rndkey0 2092 + lvx $rndkey0,$idx,$key1 2093 + addi $idx,$idx,16 2094 + bdnz Loop_xts_enc 2095 + 2096 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2097 + vcipher $inout,$inout,$rndkey1 2098 + lvx $rndkey1,$idx,$key1 2099 + li $idx,16 2100 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2101 + vxor $rndkey0,$rndkey0,$tweak 2102 + vcipherlast $output,$inout,$rndkey0 2103 + 2104 + le?vperm $tmp,$output,$output,$leperm 2105 + be?nop 2106 + le?stvx_u $tmp,0,$out 2107 + be?stvx_u $output,0,$out 2108 + addi $out,$out,16 2109 + 2110 + subic. $len,$len,16 2111 + beq Lxts_enc_done 2112 + 2113 + vmr $inout,$inptail 2114 + lvx $inptail,0,$inp 2115 + addi $inp,$inp,16 2116 + lvx $rndkey0,0,$key1 2117 + lvx $rndkey1,$idx,$key1 2118 + addi $idx,$idx,16 2119 + 2120 + subic r0,$len,32 2121 + subfe r0,r0,r0 2122 + and r0,r0,$taillen 2123 + add $inp,$inp,r0 2124 + 2125 + vsrab $tmp,$tweak,$seven # next tweak value 2126 + vaddubm $tweak,$tweak,$tweak 2127 + vsldoi $tmp,$tmp,$tmp,15 2128 + vand $tmp,$tmp,$eighty7 2129 + vxor $tweak,$tweak,$tmp 2130 + 2131 + vperm $inout,$inout,$inptail,$inpperm 2132 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2133 + vxor $inout,$inout,$tweak 2134 + vxor $output,$output,$rndkey0 # just in case $len<16 2135 + vxor $inout,$inout,$rndkey0 2136 + lvx $rndkey0,$idx,$key1 2137 + addi $idx,$idx,16 2138 + 2139 + mtctr $rounds 2140 + ${UCMP}i $len,16 2141 + bge Loop_xts_enc 2142 + 2143 + vxor $output,$output,$tweak 2144 + lvsr $inpperm,0,$len # $inpperm is no longer needed 2145 + vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2146 + vspltisb $tmp,-1 2147 + vperm $inptail,$inptail,$tmp,$inpperm 2148 + vsel $inout,$inout,$output,$inptail 2149 + 2150 + subi r11,$out,17 2151 + subi $out,$out,16 2152 + mtctr $len 2153 + li $len,16 2154 + Loop_xts_enc_steal: 2155 + lbzu r0,1(r11) 2156 + stb r0,16(r11) 2157 + bdnz Loop_xts_enc_steal 2158 + 2159 + mtctr $rounds 2160 + b Loop_xts_enc # one more time... 2161 + 2162 + Lxts_enc_done: 2163 + ${UCMP}i $ivp,0 2164 + beq Lxts_enc_ret 2165 + 2166 + vsrab $tmp,$tweak,$seven # next tweak value 2167 + vaddubm $tweak,$tweak,$tweak 2168 + vsldoi $tmp,$tmp,$tmp,15 2169 + vand $tmp,$tmp,$eighty7 2170 + vxor $tweak,$tweak,$tmp 2171 + 2172 + le?vperm $tweak,$tweak,$tweak,$leperm 2173 + stvx_u $tweak,0,$ivp 2174 + 2175 + Lxts_enc_ret: 2176 + mtspr 256,r12 # restore vrsave 2177 + li r3,0 2178 + blr 2179 + .long 0 2180 + .byte 0,12,0x04,0,0x80,6,6,0 2181 + .long 0 2182 + .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2183 + 2184 + .globl .${prefix}_xts_decrypt 2185 + mr $inp,r3 # reassign 2186 + li r3,-1 2187 + ${UCMP}i $len,16 2188 + bltlr- 2189 + 2190 + lis r0,0xfff8 2191 + mfspr r12,256 # save vrsave 2192 + li r11,0 2193 + mtspr 256,r0 2194 + 2195 + andi. r0,$len,15 2196 + neg r0,r0 2197 + andi. r0,r0,16 2198 + sub $len,$len,r0 2199 + 2200 + vspltisb $seven,0x07 # 0x070707..07 2201 + le?lvsl $leperm,r11,r11 2202 + le?vspltisb $tmp,0x0f 2203 + le?vxor $leperm,$leperm,$seven 2204 + 2205 + li $idx,15 2206 + lvx $tweak,0,$ivp # load [unaligned] iv 2207 + lvsl $inpperm,0,$ivp 2208 + lvx $inptail,$idx,$ivp 2209 + le?vxor $inpperm,$inpperm,$tmp 2210 + vperm $tweak,$tweak,$inptail,$inpperm 2211 + 2212 + neg r11,$inp 2213 + lvsr $inpperm,0,r11 # prepare for unaligned load 2214 + lvx $inout,0,$inp 2215 + addi $inp,$inp,15 # 15 is not typo 2216 + le?vxor $inpperm,$inpperm,$tmp 2217 + 2218 + ${UCMP}i $key2,0 # key2==NULL? 2219 + beq Lxts_dec_no_key2 2220 + 2221 + ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2222 + lwz $rounds,240($key2) 2223 + srwi $rounds,$rounds,1 2224 + subi $rounds,$rounds,1 2225 + li $idx,16 2226 + 2227 + lvx $rndkey0,0,$key2 2228 + lvx $rndkey1,$idx,$key2 2229 + addi $idx,$idx,16 2230 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2231 + vxor $tweak,$tweak,$rndkey0 2232 + lvx $rndkey0,$idx,$key2 2233 + addi $idx,$idx,16 2234 + mtctr $rounds 2235 + 2236 + Ltweak_xts_dec: 2237 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2238 + vcipher $tweak,$tweak,$rndkey1 2239 + lvx $rndkey1,$idx,$key2 2240 + addi $idx,$idx,16 2241 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2242 + vcipher $tweak,$tweak,$rndkey0 2243 + lvx $rndkey0,$idx,$key2 2244 + addi $idx,$idx,16 2245 + bdnz Ltweak_xts_dec 2246 + 2247 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2248 + vcipher $tweak,$tweak,$rndkey1 2249 + lvx $rndkey1,$idx,$key2 2250 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2251 + vcipherlast $tweak,$tweak,$rndkey0 2252 + 2253 + li $ivp,0 # don't chain the tweak 2254 + b Lxts_dec 2255 + 2256 + Lxts_dec_no_key2: 2257 + neg $idx,$len 2258 + andi. $idx,$idx,15 2259 + add $len,$len,$idx # in "tweak chaining" 2260 + # mode only complete 2261 + # blocks are processed 2262 + Lxts_dec: 2263 + lvx $inptail,0,$inp 2264 + addi $inp,$inp,16 2265 + 2266 + ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2267 + lwz $rounds,240($key1) 2268 + srwi $rounds,$rounds,1 2269 + subi $rounds,$rounds,1 2270 + li $idx,16 2271 + 2272 + vslb $eighty7,$seven,$seven # 0x808080..80 2273 + vor $eighty7,$eighty7,$seven # 0x878787..87 2274 + vspltisb $tmp,1 # 0x010101..01 2275 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2276 + 2277 + ${UCMP}i $len,96 2278 + bge _aesp8_xts_decrypt6x 2279 + 2280 + lvx $rndkey0,0,$key1 2281 + lvx $rndkey1,$idx,$key1 2282 + addi $idx,$idx,16 2283 + vperm $inout,$inout,$inptail,$inpperm 2284 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2285 + vxor $inout,$inout,$tweak 2286 + vxor $inout,$inout,$rndkey0 2287 + lvx $rndkey0,$idx,$key1 2288 + addi $idx,$idx,16 2289 + mtctr $rounds 2290 + 2291 + ${UCMP}i $len,16 2292 + blt Ltail_xts_dec 2293 + be?b Loop_xts_dec 2294 + 2295 + .align 5 2296 + Loop_xts_dec: 2297 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2298 + vncipher $inout,$inout,$rndkey1 2299 + lvx $rndkey1,$idx,$key1 2300 + addi $idx,$idx,16 2301 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2302 + vncipher $inout,$inout,$rndkey0 2303 + lvx $rndkey0,$idx,$key1 2304 + addi $idx,$idx,16 2305 + bdnz Loop_xts_dec 2306 + 2307 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2308 + vncipher $inout,$inout,$rndkey1 2309 + lvx $rndkey1,$idx,$key1 2310 + li $idx,16 2311 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2312 + vxor $rndkey0,$rndkey0,$tweak 2313 + vncipherlast $output,$inout,$rndkey0 2314 + 2315 + le?vperm $tmp,$output,$output,$leperm 2316 + be?nop 2317 + le?stvx_u $tmp,0,$out 2318 + be?stvx_u $output,0,$out 2319 + addi $out,$out,16 2320 + 2321 + subic. $len,$len,16 2322 + beq Lxts_dec_done 2323 + 2324 + vmr $inout,$inptail 2325 + lvx $inptail,0,$inp 2326 + addi $inp,$inp,16 2327 + lvx $rndkey0,0,$key1 2328 + lvx $rndkey1,$idx,$key1 2329 + addi $idx,$idx,16 2330 + 2331 + vsrab $tmp,$tweak,$seven # next tweak value 2332 + vaddubm $tweak,$tweak,$tweak 2333 + vsldoi $tmp,$tmp,$tmp,15 2334 + vand $tmp,$tmp,$eighty7 2335 + vxor $tweak,$tweak,$tmp 2336 + 2337 + vperm $inout,$inout,$inptail,$inpperm 2338 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2339 + vxor $inout,$inout,$tweak 2340 + vxor $inout,$inout,$rndkey0 2341 + lvx $rndkey0,$idx,$key1 2342 + addi $idx,$idx,16 2343 + 2344 + mtctr $rounds 2345 + ${UCMP}i $len,16 2346 + bge Loop_xts_dec 2347 + 2348 + Ltail_xts_dec: 2349 + vsrab $tmp,$tweak,$seven # next tweak value 2350 + vaddubm $tweak1,$tweak,$tweak 2351 + vsldoi $tmp,$tmp,$tmp,15 2352 + vand $tmp,$tmp,$eighty7 2353 + vxor $tweak1,$tweak1,$tmp 2354 + 2355 + subi $inp,$inp,16 2356 + add $inp,$inp,$len 2357 + 2358 + vxor $inout,$inout,$tweak # :-( 2359 + vxor $inout,$inout,$tweak1 # :-) 2360 + 2361 + Loop_xts_dec_short: 2362 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2363 + vncipher $inout,$inout,$rndkey1 2364 + lvx $rndkey1,$idx,$key1 2365 + addi $idx,$idx,16 2366 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2367 + vncipher $inout,$inout,$rndkey0 2368 + lvx $rndkey0,$idx,$key1 2369 + addi $idx,$idx,16 2370 + bdnz Loop_xts_dec_short 2371 + 2372 + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2373 + vncipher $inout,$inout,$rndkey1 2374 + lvx $rndkey1,$idx,$key1 2375 + li $idx,16 2376 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2377 + vxor $rndkey0,$rndkey0,$tweak1 2378 + vncipherlast $output,$inout,$rndkey0 2379 + 2380 + le?vperm $tmp,$output,$output,$leperm 2381 + be?nop 2382 + le?stvx_u $tmp,0,$out 2383 + be?stvx_u $output,0,$out 2384 + 2385 + vmr $inout,$inptail 2386 + lvx $inptail,0,$inp 2387 + #addi $inp,$inp,16 2388 + lvx $rndkey0,0,$key1 2389 + lvx $rndkey1,$idx,$key1 2390 + addi $idx,$idx,16 2391 + vperm $inout,$inout,$inptail,$inpperm 2392 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2393 + 2394 + lvsr $inpperm,0,$len # $inpperm is no longer needed 2395 + vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2396 + vspltisb $tmp,-1 2397 + vperm $inptail,$inptail,$tmp,$inpperm 2398 + vsel $inout,$inout,$output,$inptail 2399 + 2400 + vxor $rndkey0,$rndkey0,$tweak 2401 + vxor $inout,$inout,$rndkey0 2402 + lvx $rndkey0,$idx,$key1 2403 + addi $idx,$idx,16 2404 + 2405 + subi r11,$out,1 2406 + mtctr $len 2407 + li $len,16 2408 + Loop_xts_dec_steal: 2409 + lbzu r0,1(r11) 2410 + stb r0,16(r11) 2411 + bdnz Loop_xts_dec_steal 2412 + 2413 + mtctr $rounds 2414 + b Loop_xts_dec # one more time... 2415 + 2416 + Lxts_dec_done: 2417 + ${UCMP}i $ivp,0 2418 + beq Lxts_dec_ret 2419 + 2420 + vsrab $tmp,$tweak,$seven # next tweak value 2421 + vaddubm $tweak,$tweak,$tweak 2422 + vsldoi $tmp,$tmp,$tmp,15 2423 + vand $tmp,$tmp,$eighty7 2424 + vxor $tweak,$tweak,$tmp 2425 + 2426 + le?vperm $tweak,$tweak,$tweak,$leperm 2427 + stvx_u $tweak,0,$ivp 2428 + 2429 + Lxts_dec_ret: 2430 + mtspr 256,r12 # restore vrsave 2431 + li r3,0 2432 + blr 2433 + .long 0 2434 + .byte 0,12,0x04,0,0x80,6,6,0 2435 + .long 0 2436 + .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2437 + ___ 2438 + ######################################################################### 2439 + {{ # Optimized XTS procedures # 2440 + my $key_=$key2; 2441 + my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2442 + $x00=0 if ($flavour =~ /osx/); 2443 + my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); 2444 + my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2445 + my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2446 + my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2447 + # v26-v31 last 6 round keys 2448 + my ($keyperm)=($out0); # aliases with "caller", redundant assignment 2449 + my $taillen=$x70; 2450 + 2451 + $code.=<<___; 2452 + .align 5 2453 + _aesp8_xts_encrypt6x: 2454 + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2455 + mflr r11 2456 + li r7,`$FRAME+8*16+15` 2457 + li r3,`$FRAME+8*16+31` 2458 + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2459 + stvx v20,r7,$sp # ABI says so 2460 + addi r7,r7,32 2461 + stvx v21,r3,$sp 2462 + addi r3,r3,32 2463 + stvx v22,r7,$sp 2464 + addi r7,r7,32 2465 + stvx v23,r3,$sp 2466 + addi r3,r3,32 2467 + stvx v24,r7,$sp 2468 + addi r7,r7,32 2469 + stvx v25,r3,$sp 2470 + addi r3,r3,32 2471 + stvx v26,r7,$sp 2472 + addi r7,r7,32 2473 + stvx v27,r3,$sp 2474 + addi r3,r3,32 2475 + stvx v28,r7,$sp 2476 + addi r7,r7,32 2477 + stvx v29,r3,$sp 2478 + addi r3,r3,32 2479 + stvx v30,r7,$sp 2480 + stvx v31,r3,$sp 2481 + li r0,-1 2482 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2483 + li $x10,0x10 2484 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2485 + li $x20,0x20 2486 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2487 + li $x30,0x30 2488 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2489 + li $x40,0x40 2490 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2491 + li $x50,0x50 2492 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2493 + li $x60,0x60 2494 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2495 + li $x70,0x70 2496 + mtspr 256,r0 2497 + 2498 + subi $rounds,$rounds,3 # -4 in total 2499 + 2500 + lvx $rndkey0,$x00,$key1 # load key schedule 2501 + lvx v30,$x10,$key1 2502 + addi $key1,$key1,0x20 2503 + lvx v31,$x00,$key1 2504 + ?vperm $rndkey0,$rndkey0,v30,$keyperm 2505 + addi $key_,$sp,$FRAME+15 2506 + mtctr $rounds 2507 + 2508 + Load_xts_enc_key: 2509 + ?vperm v24,v30,v31,$keyperm 2510 + lvx v30,$x10,$key1 2511 + addi $key1,$key1,0x20 2512 + stvx v24,$x00,$key_ # off-load round[1] 2513 + ?vperm v25,v31,v30,$keyperm 2514 + lvx v31,$x00,$key1 2515 + stvx v25,$x10,$key_ # off-load round[2] 2516 + addi $key_,$key_,0x20 2517 + bdnz Load_xts_enc_key 2518 + 2519 + lvx v26,$x10,$key1 2520 + ?vperm v24,v30,v31,$keyperm 2521 + lvx v27,$x20,$key1 2522 + stvx v24,$x00,$key_ # off-load round[3] 2523 + ?vperm v25,v31,v26,$keyperm 2524 + lvx v28,$x30,$key1 2525 + stvx v25,$x10,$key_ # off-load round[4] 2526 + addi $key_,$sp,$FRAME+15 # rewind $key_ 2527 + ?vperm v26,v26,v27,$keyperm 2528 + lvx v29,$x40,$key1 2529 + ?vperm v27,v27,v28,$keyperm 2530 + lvx v30,$x50,$key1 2531 + ?vperm v28,v28,v29,$keyperm 2532 + lvx v31,$x60,$key1 2533 + ?vperm v29,v29,v30,$keyperm 2534 + lvx $twk5,$x70,$key1 # borrow $twk5 2535 + ?vperm v30,v30,v31,$keyperm 2536 + lvx v24,$x00,$key_ # pre-load round[1] 2537 + ?vperm v31,v31,$twk5,$keyperm 2538 + lvx v25,$x10,$key_ # pre-load round[2] 2539 + 2540 + vperm $in0,$inout,$inptail,$inpperm 2541 + subi $inp,$inp,31 # undo "caller" 2542 + vxor $twk0,$tweak,$rndkey0 2543 + vsrab $tmp,$tweak,$seven # next tweak value 2544 + vaddubm $tweak,$tweak,$tweak 2545 + vsldoi $tmp,$tmp,$tmp,15 2546 + vand $tmp,$tmp,$eighty7 2547 + vxor $out0,$in0,$twk0 2548 + vxor $tweak,$tweak,$tmp 2549 + 2550 + lvx_u $in1,$x10,$inp 2551 + vxor $twk1,$tweak,$rndkey0 2552 + vsrab $tmp,$tweak,$seven # next tweak value 2553 + vaddubm $tweak,$tweak,$tweak 2554 + vsldoi $tmp,$tmp,$tmp,15 2555 + le?vperm $in1,$in1,$in1,$leperm 2556 + vand $tmp,$tmp,$eighty7 2557 + vxor $out1,$in1,$twk1 2558 + vxor $tweak,$tweak,$tmp 2559 + 2560 + lvx_u $in2,$x20,$inp 2561 + andi. $taillen,$len,15 2562 + vxor $twk2,$tweak,$rndkey0 2563 + vsrab $tmp,$tweak,$seven # next tweak value 2564 + vaddubm $tweak,$tweak,$tweak 2565 + vsldoi $tmp,$tmp,$tmp,15 2566 + le?vperm $in2,$in2,$in2,$leperm 2567 + vand $tmp,$tmp,$eighty7 2568 + vxor $out2,$in2,$twk2 2569 + vxor $tweak,$tweak,$tmp 2570 + 2571 + lvx_u $in3,$x30,$inp 2572 + sub $len,$len,$taillen 2573 + vxor $twk3,$tweak,$rndkey0 2574 + vsrab $tmp,$tweak,$seven # next tweak value 2575 + vaddubm $tweak,$tweak,$tweak 2576 + vsldoi $tmp,$tmp,$tmp,15 2577 + le?vperm $in3,$in3,$in3,$leperm 2578 + vand $tmp,$tmp,$eighty7 2579 + vxor $out3,$in3,$twk3 2580 + vxor $tweak,$tweak,$tmp 2581 + 2582 + lvx_u $in4,$x40,$inp 2583 + subi $len,$len,0x60 2584 + vxor $twk4,$tweak,$rndkey0 2585 + vsrab $tmp,$tweak,$seven # next tweak value 2586 + vaddubm $tweak,$tweak,$tweak 2587 + vsldoi $tmp,$tmp,$tmp,15 2588 + le?vperm $in4,$in4,$in4,$leperm 2589 + vand $tmp,$tmp,$eighty7 2590 + vxor $out4,$in4,$twk4 2591 + vxor $tweak,$tweak,$tmp 2592 + 2593 + lvx_u $in5,$x50,$inp 2594 + addi $inp,$inp,0x60 2595 + vxor $twk5,$tweak,$rndkey0 2596 + vsrab $tmp,$tweak,$seven # next tweak value 2597 + vaddubm $tweak,$tweak,$tweak 2598 + vsldoi $tmp,$tmp,$tmp,15 2599 + le?vperm $in5,$in5,$in5,$leperm 2600 + vand $tmp,$tmp,$eighty7 2601 + vxor $out5,$in5,$twk5 2602 + vxor $tweak,$tweak,$tmp 2603 + 2604 + vxor v31,v31,$rndkey0 2605 + mtctr $rounds 2606 + b Loop_xts_enc6x 2607 + 2608 + .align 5 2609 + Loop_xts_enc6x: 2610 + vcipher $out0,$out0,v24 2611 + vcipher $out1,$out1,v24 2612 + vcipher $out2,$out2,v24 2613 + vcipher $out3,$out3,v24 2614 + vcipher $out4,$out4,v24 2615 + vcipher $out5,$out5,v24 2616 + lvx v24,$x20,$key_ # round[3] 2617 + addi $key_,$key_,0x20 2618 + 2619 + vcipher $out0,$out0,v25 2620 + vcipher $out1,$out1,v25 2621 + vcipher $out2,$out2,v25 2622 + vcipher $out3,$out3,v25 2623 + vcipher $out4,$out4,v25 2624 + vcipher $out5,$out5,v25 2625 + lvx v25,$x10,$key_ # round[4] 2626 + bdnz Loop_xts_enc6x 2627 + 2628 + subic $len,$len,96 # $len-=96 2629 + vxor $in0,$twk0,v31 # xor with last round key 2630 + vcipher $out0,$out0,v24 2631 + vcipher $out1,$out1,v24 2632 + vsrab $tmp,$tweak,$seven # next tweak value 2633 + vxor $twk0,$tweak,$rndkey0 2634 + vaddubm $tweak,$tweak,$tweak 2635 + vcipher $out2,$out2,v24 2636 + vcipher $out3,$out3,v24 2637 + vsldoi $tmp,$tmp,$tmp,15 2638 + vcipher $out4,$out4,v24 2639 + vcipher $out5,$out5,v24 2640 + 2641 + subfe. r0,r0,r0 # borrow?-1:0 2642 + vand $tmp,$tmp,$eighty7 2643 + vcipher $out0,$out0,v25 2644 + vcipher $out1,$out1,v25 2645 + vxor $tweak,$tweak,$tmp 2646 + vcipher $out2,$out2,v25 2647 + vcipher $out3,$out3,v25 2648 + vxor $in1,$twk1,v31 2649 + vsrab $tmp,$tweak,$seven # next tweak value 2650 + vxor $twk1,$tweak,$rndkey0 2651 + vcipher $out4,$out4,v25 2652 + vcipher $out5,$out5,v25 2653 + 2654 + and r0,r0,$len 2655 + vaddubm $tweak,$tweak,$tweak 2656 + vsldoi $tmp,$tmp,$tmp,15 2657 + vcipher $out0,$out0,v26 2658 + vcipher $out1,$out1,v26 2659 + vand $tmp,$tmp,$eighty7 2660 + vcipher $out2,$out2,v26 2661 + vcipher $out3,$out3,v26 2662 + vxor $tweak,$tweak,$tmp 2663 + vcipher $out4,$out4,v26 2664 + vcipher $out5,$out5,v26 2665 + 2666 + add $inp,$inp,r0 # $inp is adjusted in such 2667 + # way that at exit from the 2668 + # loop inX-in5 are loaded 2669 + # with last "words" 2670 + vxor $in2,$twk2,v31 2671 + vsrab $tmp,$tweak,$seven # next tweak value 2672 + vxor $twk2,$tweak,$rndkey0 2673 + vaddubm $tweak,$tweak,$tweak 2674 + vcipher $out0,$out0,v27 2675 + vcipher $out1,$out1,v27 2676 + vsldoi $tmp,$tmp,$tmp,15 2677 + vcipher $out2,$out2,v27 2678 + vcipher $out3,$out3,v27 2679 + vand $tmp,$tmp,$eighty7 2680 + vcipher $out4,$out4,v27 2681 + vcipher $out5,$out5,v27 2682 + 2683 + addi $key_,$sp,$FRAME+15 # rewind $key_ 2684 + vxor $tweak,$tweak,$tmp 2685 + vcipher $out0,$out0,v28 2686 + vcipher $out1,$out1,v28 2687 + vxor $in3,$twk3,v31 2688 + vsrab $tmp,$tweak,$seven # next tweak value 2689 + vxor $twk3,$tweak,$rndkey0 2690 + vcipher $out2,$out2,v28 2691 + vcipher $out3,$out3,v28 2692 + vaddubm $tweak,$tweak,$tweak 2693 + vsldoi $tmp,$tmp,$tmp,15 2694 + vcipher $out4,$out4,v28 2695 + vcipher $out5,$out5,v28 2696 + lvx v24,$x00,$key_ # re-pre-load round[1] 2697 + vand $tmp,$tmp,$eighty7 2698 + 2699 + vcipher $out0,$out0,v29 2700 + vcipher $out1,$out1,v29 2701 + vxor $tweak,$tweak,$tmp 2702 + vcipher $out2,$out2,v29 2703 + vcipher $out3,$out3,v29 2704 + vxor $in4,$twk4,v31 2705 + vsrab $tmp,$tweak,$seven # next tweak value 2706 + vxor $twk4,$tweak,$rndkey0 2707 + vcipher $out4,$out4,v29 2708 + vcipher $out5,$out5,v29 2709 + lvx v25,$x10,$key_ # re-pre-load round[2] 2710 + vaddubm $tweak,$tweak,$tweak 2711 + vsldoi $tmp,$tmp,$tmp,15 2712 + 2713 + vcipher $out0,$out0,v30 2714 + vcipher $out1,$out1,v30 2715 + vand $tmp,$tmp,$eighty7 2716 + vcipher $out2,$out2,v30 2717 + vcipher $out3,$out3,v30 2718 + vxor $tweak,$tweak,$tmp 2719 + vcipher $out4,$out4,v30 2720 + vcipher $out5,$out5,v30 2721 + vxor $in5,$twk5,v31 2722 + vsrab $tmp,$tweak,$seven # next tweak value 2723 + vxor $twk5,$tweak,$rndkey0 2724 + 2725 + vcipherlast $out0,$out0,$in0 2726 + lvx_u $in0,$x00,$inp # load next input block 2727 + vaddubm $tweak,$tweak,$tweak 2728 + vsldoi $tmp,$tmp,$tmp,15 2729 + vcipherlast $out1,$out1,$in1 2730 + lvx_u $in1,$x10,$inp 2731 + vcipherlast $out2,$out2,$in2 2732 + le?vperm $in0,$in0,$in0,$leperm 2733 + lvx_u $in2,$x20,$inp 2734 + vand $tmp,$tmp,$eighty7 2735 + vcipherlast $out3,$out3,$in3 2736 + le?vperm $in1,$in1,$in1,$leperm 2737 + lvx_u $in3,$x30,$inp 2738 + vcipherlast $out4,$out4,$in4 2739 + le?vperm $in2,$in2,$in2,$leperm 2740 + lvx_u $in4,$x40,$inp 2741 + vxor $tweak,$tweak,$tmp 2742 + vcipherlast $tmp,$out5,$in5 # last block might be needed 2743 + # in stealing mode 2744 + le?vperm $in3,$in3,$in3,$leperm 2745 + lvx_u $in5,$x50,$inp 2746 + addi $inp,$inp,0x60 2747 + le?vperm $in4,$in4,$in4,$leperm 2748 + le?vperm $in5,$in5,$in5,$leperm 2749 + 2750 + le?vperm $out0,$out0,$out0,$leperm 2751 + le?vperm $out1,$out1,$out1,$leperm 2752 + stvx_u $out0,$x00,$out # store output 2753 + vxor $out0,$in0,$twk0 2754 + le?vperm $out2,$out2,$out2,$leperm 2755 + stvx_u $out1,$x10,$out 2756 + vxor $out1,$in1,$twk1 2757 + le?vperm $out3,$out3,$out3,$leperm 2758 + stvx_u $out2,$x20,$out 2759 + vxor $out2,$in2,$twk2 2760 + le?vperm $out4,$out4,$out4,$leperm 2761 + stvx_u $out3,$x30,$out 2762 + vxor $out3,$in3,$twk3 2763 + le?vperm $out5,$tmp,$tmp,$leperm 2764 + stvx_u $out4,$x40,$out 2765 + vxor $out4,$in4,$twk4 2766 + le?stvx_u $out5,$x50,$out 2767 + be?stvx_u $tmp, $x50,$out 2768 + vxor $out5,$in5,$twk5 2769 + addi $out,$out,0x60 2770 + 2771 + mtctr $rounds 2772 + beq Loop_xts_enc6x # did $len-=96 borrow? 2773 + 2774 + addic. $len,$len,0x60 2775 + beq Lxts_enc6x_zero 2776 + cmpwi $len,0x20 2777 + blt Lxts_enc6x_one 2778 + nop 2779 + beq Lxts_enc6x_two 2780 + cmpwi $len,0x40 2781 + blt Lxts_enc6x_three 2782 + nop 2783 + beq Lxts_enc6x_four 2784 + 2785 + Lxts_enc6x_five: 2786 + vxor $out0,$in1,$twk0 2787 + vxor $out1,$in2,$twk1 2788 + vxor $out2,$in3,$twk2 2789 + vxor $out3,$in4,$twk3 2790 + vxor $out4,$in5,$twk4 2791 + 2792 + bl _aesp8_xts_enc5x 2793 + 2794 + le?vperm $out0,$out0,$out0,$leperm 2795 + vmr $twk0,$twk5 # unused tweak 2796 + le?vperm $out1,$out1,$out1,$leperm 2797 + stvx_u $out0,$x00,$out # store output 2798 + le?vperm $out2,$out2,$out2,$leperm 2799 + stvx_u $out1,$x10,$out 2800 + le?vperm $out3,$out3,$out3,$leperm 2801 + stvx_u $out2,$x20,$out 2802 + vxor $tmp,$out4,$twk5 # last block prep for stealing 2803 + le?vperm $out4,$out4,$out4,$leperm 2804 + stvx_u $out3,$x30,$out 2805 + stvx_u $out4,$x40,$out 2806 + addi $out,$out,0x50 2807 + bne Lxts_enc6x_steal 2808 + b Lxts_enc6x_done 2809 + 2810 + .align 4 2811 + Lxts_enc6x_four: 2812 + vxor $out0,$in2,$twk0 2813 + vxor $out1,$in3,$twk1 2814 + vxor $out2,$in4,$twk2 2815 + vxor $out3,$in5,$twk3 2816 + vxor $out4,$out4,$out4 2817 + 2818 + bl _aesp8_xts_enc5x 2819 + 2820 + le?vperm $out0,$out0,$out0,$leperm 2821 + vmr $twk0,$twk4 # unused tweak 2822 + le?vperm $out1,$out1,$out1,$leperm 2823 + stvx_u $out0,$x00,$out # store output 2824 + le?vperm $out2,$out2,$out2,$leperm 2825 + stvx_u $out1,$x10,$out 2826 + vxor $tmp,$out3,$twk4 # last block prep for stealing 2827 + le?vperm $out3,$out3,$out3,$leperm 2828 + stvx_u $out2,$x20,$out 2829 + stvx_u $out3,$x30,$out 2830 + addi $out,$out,0x40 2831 + bne Lxts_enc6x_steal 2832 + b Lxts_enc6x_done 2833 + 2834 + .align 4 2835 + Lxts_enc6x_three: 2836 + vxor $out0,$in3,$twk0 2837 + vxor $out1,$in4,$twk1 2838 + vxor $out2,$in5,$twk2 2839 + vxor $out3,$out3,$out3 2840 + vxor $out4,$out4,$out4 2841 + 2842 + bl _aesp8_xts_enc5x 2843 + 2844 + le?vperm $out0,$out0,$out0,$leperm 2845 + vmr $twk0,$twk3 # unused tweak 2846 + le?vperm $out1,$out1,$out1,$leperm 2847 + stvx_u $out0,$x00,$out # store output 2848 + vxor $tmp,$out2,$twk3 # last block prep for stealing 2849 + le?vperm $out2,$out2,$out2,$leperm 2850 + stvx_u $out1,$x10,$out 2851 + stvx_u $out2,$x20,$out 2852 + addi $out,$out,0x30 2853 + bne Lxts_enc6x_steal 2854 + b Lxts_enc6x_done 2855 + 2856 + .align 4 2857 + Lxts_enc6x_two: 2858 + vxor $out0,$in4,$twk0 2859 + vxor $out1,$in5,$twk1 2860 + vxor $out2,$out2,$out2 2861 + vxor $out3,$out3,$out3 2862 + vxor $out4,$out4,$out4 2863 + 2864 + bl _aesp8_xts_enc5x 2865 + 2866 + le?vperm $out0,$out0,$out0,$leperm 2867 + vmr $twk0,$twk2 # unused tweak 2868 + vxor $tmp,$out1,$twk2 # last block prep for stealing 2869 + le?vperm $out1,$out1,$out1,$leperm 2870 + stvx_u $out0,$x00,$out # store output 2871 + stvx_u $out1,$x10,$out 2872 + addi $out,$out,0x20 2873 + bne Lxts_enc6x_steal 2874 + b Lxts_enc6x_done 2875 + 2876 + .align 4 2877 + Lxts_enc6x_one: 2878 + vxor $out0,$in5,$twk0 2879 + nop 2880 + Loop_xts_enc1x: 2881 + vcipher $out0,$out0,v24 2882 + lvx v24,$x20,$key_ # round[3] 2883 + addi $key_,$key_,0x20 2884 + 2885 + vcipher $out0,$out0,v25 2886 + lvx v25,$x10,$key_ # round[4] 2887 + bdnz Loop_xts_enc1x 2888 + 2889 + add $inp,$inp,$taillen 2890 + cmpwi $taillen,0 2891 + vcipher $out0,$out0,v24 2892 + 2893 + subi $inp,$inp,16 2894 + vcipher $out0,$out0,v25 2895 + 2896 + lvsr $inpperm,0,$taillen 2897 + vcipher $out0,$out0,v26 2898 + 2899 + lvx_u $in0,0,$inp 2900 + vcipher $out0,$out0,v27 2901 + 2902 + addi $key_,$sp,$FRAME+15 # rewind $key_ 2903 + vcipher $out0,$out0,v28 2904 + lvx v24,$x00,$key_ # re-pre-load round[1] 2905 + 2906 + vcipher $out0,$out0,v29 2907 + lvx v25,$x10,$key_ # re-pre-load round[2] 2908 + vxor $twk0,$twk0,v31 2909 + 2910 + le?vperm $in0,$in0,$in0,$leperm 2911 + vcipher $out0,$out0,v30 2912 + 2913 + vperm $in0,$in0,$in0,$inpperm 2914 + vcipherlast $out0,$out0,$twk0 2915 + 2916 + vmr $twk0,$twk1 # unused tweak 2917 + vxor $tmp,$out0,$twk1 # last block prep for stealing 2918 + le?vperm $out0,$out0,$out0,$leperm 2919 + stvx_u $out0,$x00,$out # store output 2920 + addi $out,$out,0x10 2921 + bne Lxts_enc6x_steal 2922 + b Lxts_enc6x_done 2923 + 2924 + .align 4 2925 + Lxts_enc6x_zero: 2926 + cmpwi $taillen,0 2927 + beq Lxts_enc6x_done 2928 + 2929 + add $inp,$inp,$taillen 2930 + subi $inp,$inp,16 2931 + lvx_u $in0,0,$inp 2932 + lvsr $inpperm,0,$taillen # $in5 is no more 2933 + le?vperm $in0,$in0,$in0,$leperm 2934 + vperm $in0,$in0,$in0,$inpperm 2935 + vxor $tmp,$tmp,$twk0 2936 + Lxts_enc6x_steal: 2937 + vxor $in0,$in0,$twk0 2938 + vxor $out0,$out0,$out0 2939 + vspltisb $out1,-1 2940 + vperm $out0,$out0,$out1,$inpperm 2941 + vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2942 + 2943 + subi r30,$out,17 2944 + subi $out,$out,16 2945 + mtctr $taillen 2946 + Loop_xts_enc6x_steal: 2947 + lbzu r0,1(r30) 2948 + stb r0,16(r30) 2949 + bdnz Loop_xts_enc6x_steal 2950 + 2951 + li $taillen,0 2952 + mtctr $rounds 2953 + b Loop_xts_enc1x # one more time... 2954 + 2955 + .align 4 2956 + Lxts_enc6x_done: 2957 + ${UCMP}i $ivp,0 2958 + beq Lxts_enc6x_ret 2959 + 2960 + vxor $tweak,$twk0,$rndkey0 2961 + le?vperm $tweak,$tweak,$tweak,$leperm 2962 + stvx_u $tweak,0,$ivp 2963 + 2964 + Lxts_enc6x_ret: 2965 + mtlr r11 2966 + li r10,`$FRAME+15` 2967 + li r11,`$FRAME+31` 2968 + stvx $seven,r10,$sp # wipe copies of round keys 2969 + addi r10,r10,32 2970 + stvx $seven,r11,$sp 2971 + addi r11,r11,32 2972 + stvx $seven,r10,$sp 2973 + addi r10,r10,32 2974 + stvx $seven,r11,$sp 2975 + addi r11,r11,32 2976 + stvx $seven,r10,$sp 2977 + addi r10,r10,32 2978 + stvx $seven,r11,$sp 2979 + addi r11,r11,32 2980 + stvx $seven,r10,$sp 2981 + addi r10,r10,32 2982 + stvx $seven,r11,$sp 2983 + addi r11,r11,32 2984 + 2985 + mtspr 256,$vrsave 2986 + lvx v20,r10,$sp # ABI says so 2987 + addi r10,r10,32 2988 + lvx v21,r11,$sp 2989 + addi r11,r11,32 2990 + lvx v22,r10,$sp 2991 + addi r10,r10,32 2992 + lvx v23,r11,$sp 2993 + addi r11,r11,32 2994 + lvx v24,r10,$sp 2995 + addi r10,r10,32 2996 + lvx v25,r11,$sp 2997 + addi r11,r11,32 2998 + lvx v26,r10,$sp 2999 + addi r10,r10,32 3000 + lvx v27,r11,$sp 3001 + addi r11,r11,32 3002 + lvx v28,r10,$sp 3003 + addi r10,r10,32 3004 + lvx v29,r11,$sp 3005 + addi r11,r11,32 3006 + lvx v30,r10,$sp 3007 + lvx v31,r11,$sp 3008 + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3009 + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3010 + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3011 + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3012 + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3013 + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3014 + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3015 + blr 3016 + .long 0 3017 + .byte 0,12,0x04,1,0x80,6,6,0 3018 + .long 0 3019 + 3020 + .align 5 3021 + _aesp8_xts_enc5x: 3022 + vcipher $out0,$out0,v24 3023 + vcipher $out1,$out1,v24 3024 + vcipher $out2,$out2,v24 3025 + vcipher $out3,$out3,v24 3026 + vcipher $out4,$out4,v24 3027 + lvx v24,$x20,$key_ # round[3] 3028 + addi $key_,$key_,0x20 3029 + 3030 + vcipher $out0,$out0,v25 3031 + vcipher $out1,$out1,v25 3032 + vcipher $out2,$out2,v25 3033 + vcipher $out3,$out3,v25 3034 + vcipher $out4,$out4,v25 3035 + lvx v25,$x10,$key_ # round[4] 3036 + bdnz _aesp8_xts_enc5x 3037 + 3038 + add $inp,$inp,$taillen 3039 + cmpwi $taillen,0 3040 + vcipher $out0,$out0,v24 3041 + vcipher $out1,$out1,v24 3042 + vcipher $out2,$out2,v24 3043 + vcipher $out3,$out3,v24 3044 + vcipher $out4,$out4,v24 3045 + 3046 + subi $inp,$inp,16 3047 + vcipher $out0,$out0,v25 3048 + vcipher $out1,$out1,v25 3049 + vcipher $out2,$out2,v25 3050 + vcipher $out3,$out3,v25 3051 + vcipher $out4,$out4,v25 3052 + vxor $twk0,$twk0,v31 3053 + 3054 + vcipher $out0,$out0,v26 3055 + lvsr $inpperm,r0,$taillen # $in5 is no more 3056 + vcipher $out1,$out1,v26 3057 + vcipher $out2,$out2,v26 3058 + vcipher $out3,$out3,v26 3059 + vcipher $out4,$out4,v26 3060 + vxor $in1,$twk1,v31 3061 + 3062 + vcipher $out0,$out0,v27 3063 + lvx_u $in0,0,$inp 3064 + vcipher $out1,$out1,v27 3065 + vcipher $out2,$out2,v27 3066 + vcipher $out3,$out3,v27 3067 + vcipher $out4,$out4,v27 3068 + vxor $in2,$twk2,v31 3069 + 3070 + addi $key_,$sp,$FRAME+15 # rewind $key_ 3071 + vcipher $out0,$out0,v28 3072 + vcipher $out1,$out1,v28 3073 + vcipher $out2,$out2,v28 3074 + vcipher $out3,$out3,v28 3075 + vcipher $out4,$out4,v28 3076 + lvx v24,$x00,$key_ # re-pre-load round[1] 3077 + vxor $in3,$twk3,v31 3078 + 3079 + vcipher $out0,$out0,v29 3080 + le?vperm $in0,$in0,$in0,$leperm 3081 + vcipher $out1,$out1,v29 3082 + vcipher $out2,$out2,v29 3083 + vcipher $out3,$out3,v29 3084 + vcipher $out4,$out4,v29 3085 + lvx v25,$x10,$key_ # re-pre-load round[2] 3086 + vxor $in4,$twk4,v31 3087 + 3088 + vcipher $out0,$out0,v30 3089 + vperm $in0,$in0,$in0,$inpperm 3090 + vcipher $out1,$out1,v30 3091 + vcipher $out2,$out2,v30 3092 + vcipher $out3,$out3,v30 3093 + vcipher $out4,$out4,v30 3094 + 3095 + vcipherlast $out0,$out0,$twk0 3096 + vcipherlast $out1,$out1,$in1 3097 + vcipherlast $out2,$out2,$in2 3098 + vcipherlast $out3,$out3,$in3 3099 + vcipherlast $out4,$out4,$in4 3100 + blr 3101 + .long 0 3102 + .byte 0,12,0x14,0,0,0,0,0 3103 + 3104 + .align 5 3105 + _aesp8_xts_decrypt6x: 3106 + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3107 + mflr r11 3108 + li r7,`$FRAME+8*16+15` 3109 + li r3,`$FRAME+8*16+31` 3110 + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3111 + stvx v20,r7,$sp # ABI says so 3112 + addi r7,r7,32 3113 + stvx v21,r3,$sp 3114 + addi r3,r3,32 3115 + stvx v22,r7,$sp 3116 + addi r7,r7,32 3117 + stvx v23,r3,$sp 3118 + addi r3,r3,32 3119 + stvx v24,r7,$sp 3120 + addi r7,r7,32 3121 + stvx v25,r3,$sp 3122 + addi r3,r3,32 3123 + stvx v26,r7,$sp 3124 + addi r7,r7,32 3125 + stvx v27,r3,$sp 3126 + addi r3,r3,32 3127 + stvx v28,r7,$sp 3128 + addi r7,r7,32 3129 + stvx v29,r3,$sp 3130 + addi r3,r3,32 3131 + stvx v30,r7,$sp 3132 + stvx v31,r3,$sp 3133 + li r0,-1 3134 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3135 + li $x10,0x10 3136 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3137 + li $x20,0x20 3138 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3139 + li $x30,0x30 3140 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3141 + li $x40,0x40 3142 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3143 + li $x50,0x50 3144 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3145 + li $x60,0x60 3146 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3147 + li $x70,0x70 3148 + mtspr 256,r0 3149 + 3150 + subi $rounds,$rounds,3 # -4 in total 3151 + 3152 + lvx $rndkey0,$x00,$key1 # load key schedule 3153 + lvx v30,$x10,$key1 3154 + addi $key1,$key1,0x20 3155 + lvx v31,$x00,$key1 3156 + ?vperm $rndkey0,$rndkey0,v30,$keyperm 3157 + addi $key_,$sp,$FRAME+15 3158 + mtctr $rounds 3159 + 3160 + Load_xts_dec_key: 3161 + ?vperm v24,v30,v31,$keyperm 3162 + lvx v30,$x10,$key1 3163 + addi $key1,$key1,0x20 3164 + stvx v24,$x00,$key_ # off-load round[1] 3165 + ?vperm v25,v31,v30,$keyperm 3166 + lvx v31,$x00,$key1 3167 + stvx v25,$x10,$key_ # off-load round[2] 3168 + addi $key_,$key_,0x20 3169 + bdnz Load_xts_dec_key 3170 + 3171 + lvx v26,$x10,$key1 3172 + ?vperm v24,v30,v31,$keyperm 3173 + lvx v27,$x20,$key1 3174 + stvx v24,$x00,$key_ # off-load round[3] 3175 + ?vperm v25,v31,v26,$keyperm 3176 + lvx v28,$x30,$key1 3177 + stvx v25,$x10,$key_ # off-load round[4] 3178 + addi $key_,$sp,$FRAME+15 # rewind $key_ 3179 + ?vperm v26,v26,v27,$keyperm 3180 + lvx v29,$x40,$key1 3181 + ?vperm v27,v27,v28,$keyperm 3182 + lvx v30,$x50,$key1 3183 + ?vperm v28,v28,v29,$keyperm 3184 + lvx v31,$x60,$key1 3185 + ?vperm v29,v29,v30,$keyperm 3186 + lvx $twk5,$x70,$key1 # borrow $twk5 3187 + ?vperm v30,v30,v31,$keyperm 3188 + lvx v24,$x00,$key_ # pre-load round[1] 3189 + ?vperm v31,v31,$twk5,$keyperm 3190 + lvx v25,$x10,$key_ # pre-load round[2] 3191 + 3192 + vperm $in0,$inout,$inptail,$inpperm 3193 + subi $inp,$inp,31 # undo "caller" 3194 + vxor $twk0,$tweak,$rndkey0 3195 + vsrab $tmp,$tweak,$seven # next tweak value 3196 + vaddubm $tweak,$tweak,$tweak 3197 + vsldoi $tmp,$tmp,$tmp,15 3198 + vand $tmp,$tmp,$eighty7 3199 + vxor $out0,$in0,$twk0 3200 + vxor $tweak,$tweak,$tmp 3201 + 3202 + lvx_u $in1,$x10,$inp 3203 + vxor $twk1,$tweak,$rndkey0 3204 + vsrab $tmp,$tweak,$seven # next tweak value 3205 + vaddubm $tweak,$tweak,$tweak 3206 + vsldoi $tmp,$tmp,$tmp,15 3207 + le?vperm $in1,$in1,$in1,$leperm 3208 + vand $tmp,$tmp,$eighty7 3209 + vxor $out1,$in1,$twk1 3210 + vxor $tweak,$tweak,$tmp 3211 + 3212 + lvx_u $in2,$x20,$inp 3213 + andi. $taillen,$len,15 3214 + vxor $twk2,$tweak,$rndkey0 3215 + vsrab $tmp,$tweak,$seven # next tweak value 3216 + vaddubm $tweak,$tweak,$tweak 3217 + vsldoi $tmp,$tmp,$tmp,15 3218 + le?vperm $in2,$in2,$in2,$leperm 3219 + vand $tmp,$tmp,$eighty7 3220 + vxor $out2,$in2,$twk2 3221 + vxor $tweak,$tweak,$tmp 3222 + 3223 + lvx_u $in3,$x30,$inp 3224 + sub $len,$len,$taillen 3225 + vxor $twk3,$tweak,$rndkey0 3226 + vsrab $tmp,$tweak,$seven # next tweak value 3227 + vaddubm $tweak,$tweak,$tweak 3228 + vsldoi $tmp,$tmp,$tmp,15 3229 + le?vperm $in3,$in3,$in3,$leperm 3230 + vand $tmp,$tmp,$eighty7 3231 + vxor $out3,$in3,$twk3 3232 + vxor $tweak,$tweak,$tmp 3233 + 3234 + lvx_u $in4,$x40,$inp 3235 + subi $len,$len,0x60 3236 + vxor $twk4,$tweak,$rndkey0 3237 + vsrab $tmp,$tweak,$seven # next tweak value 3238 + vaddubm $tweak,$tweak,$tweak 3239 + vsldoi $tmp,$tmp,$tmp,15 3240 + le?vperm $in4,$in4,$in4,$leperm 3241 + vand $tmp,$tmp,$eighty7 3242 + vxor $out4,$in4,$twk4 3243 + vxor $tweak,$tweak,$tmp 3244 + 3245 + lvx_u $in5,$x50,$inp 3246 + addi $inp,$inp,0x60 3247 + vxor $twk5,$tweak,$rndkey0 3248 + vsrab $tmp,$tweak,$seven # next tweak value 3249 + vaddubm $tweak,$tweak,$tweak 3250 + vsldoi $tmp,$tmp,$tmp,15 3251 + le?vperm $in5,$in5,$in5,$leperm 3252 + vand $tmp,$tmp,$eighty7 3253 + vxor $out5,$in5,$twk5 3254 + vxor $tweak,$tweak,$tmp 3255 + 3256 + vxor v31,v31,$rndkey0 3257 + mtctr $rounds 3258 + b Loop_xts_dec6x 3259 + 3260 + .align 5 3261 + Loop_xts_dec6x: 3262 + vncipher $out0,$out0,v24 3263 + vncipher $out1,$out1,v24 3264 + vncipher $out2,$out2,v24 3265 + vncipher $out3,$out3,v24 3266 + vncipher $out4,$out4,v24 3267 + vncipher $out5,$out5,v24 3268 + lvx v24,$x20,$key_ # round[3] 3269 + addi $key_,$key_,0x20 3270 + 3271 + vncipher $out0,$out0,v25 3272 + vncipher $out1,$out1,v25 3273 + vncipher $out2,$out2,v25 3274 + vncipher $out3,$out3,v25 3275 + vncipher $out4,$out4,v25 3276 + vncipher $out5,$out5,v25 3277 + lvx v25,$x10,$key_ # round[4] 3278 + bdnz Loop_xts_dec6x 3279 + 3280 + subic $len,$len,96 # $len-=96 3281 + vxor $in0,$twk0,v31 # xor with last round key 3282 + vncipher $out0,$out0,v24 3283 + vncipher $out1,$out1,v24 3284 + vsrab $tmp,$tweak,$seven # next tweak value 3285 + vxor $twk0,$tweak,$rndkey0 3286 + vaddubm $tweak,$tweak,$tweak 3287 + vncipher $out2,$out2,v24 3288 + vncipher $out3,$out3,v24 3289 + vsldoi $tmp,$tmp,$tmp,15 3290 + vncipher $out4,$out4,v24 3291 + vncipher $out5,$out5,v24 3292 + 3293 + subfe. r0,r0,r0 # borrow?-1:0 3294 + vand $tmp,$tmp,$eighty7 3295 + vncipher $out0,$out0,v25 3296 + vncipher $out1,$out1,v25 3297 + vxor $tweak,$tweak,$tmp 3298 + vncipher $out2,$out2,v25 3299 + vncipher $out3,$out3,v25 3300 + vxor $in1,$twk1,v31 3301 + vsrab $tmp,$tweak,$seven # next tweak value 3302 + vxor $twk1,$tweak,$rndkey0 3303 + vncipher $out4,$out4,v25 3304 + vncipher $out5,$out5,v25 3305 + 3306 + and r0,r0,$len 3307 + vaddubm $tweak,$tweak,$tweak 3308 + vsldoi $tmp,$tmp,$tmp,15 3309 + vncipher $out0,$out0,v26 3310 + vncipher $out1,$out1,v26 3311 + vand $tmp,$tmp,$eighty7 3312 + vncipher $out2,$out2,v26 3313 + vncipher $out3,$out3,v26 3314 + vxor $tweak,$tweak,$tmp 3315 + vncipher $out4,$out4,v26 3316 + vncipher $out5,$out5,v26 3317 + 3318 + add $inp,$inp,r0 # $inp is adjusted in such 3319 + # way that at exit from the 3320 + # loop inX-in5 are loaded 3321 + # with last "words" 3322 + vxor $in2,$twk2,v31 3323 + vsrab $tmp,$tweak,$seven # next tweak value 3324 + vxor $twk2,$tweak,$rndkey0 3325 + vaddubm $tweak,$tweak,$tweak 3326 + vncipher $out0,$out0,v27 3327 + vncipher $out1,$out1,v27 3328 + vsldoi $tmp,$tmp,$tmp,15 3329 + vncipher $out2,$out2,v27 3330 + vncipher $out3,$out3,v27 3331 + vand $tmp,$tmp,$eighty7 3332 + vncipher $out4,$out4,v27 3333 + vncipher $out5,$out5,v27 3334 + 3335 + addi $key_,$sp,$FRAME+15 # rewind $key_ 3336 + vxor $tweak,$tweak,$tmp 3337 + vncipher $out0,$out0,v28 3338 + vncipher $out1,$out1,v28 3339 + vxor $in3,$twk3,v31 3340 + vsrab $tmp,$tweak,$seven # next tweak value 3341 + vxor $twk3,$tweak,$rndkey0 3342 + vncipher $out2,$out2,v28 3343 + vncipher $out3,$out3,v28 3344 + vaddubm $tweak,$tweak,$tweak 3345 + vsldoi $tmp,$tmp,$tmp,15 3346 + vncipher $out4,$out4,v28 3347 + vncipher $out5,$out5,v28 3348 + lvx v24,$x00,$key_ # re-pre-load round[1] 3349 + vand $tmp,$tmp,$eighty7 3350 + 3351 + vncipher $out0,$out0,v29 3352 + vncipher $out1,$out1,v29 3353 + vxor $tweak,$tweak,$tmp 3354 + vncipher $out2,$out2,v29 3355 + vncipher $out3,$out3,v29 3356 + vxor $in4,$twk4,v31 3357 + vsrab $tmp,$tweak,$seven # next tweak value 3358 + vxor $twk4,$tweak,$rndkey0 3359 + vncipher $out4,$out4,v29 3360 + vncipher $out5,$out5,v29 3361 + lvx v25,$x10,$key_ # re-pre-load round[2] 3362 + vaddubm $tweak,$tweak,$tweak 3363 + vsldoi $tmp,$tmp,$tmp,15 3364 + 3365 + vncipher $out0,$out0,v30 3366 + vncipher $out1,$out1,v30 3367 + vand $tmp,$tmp,$eighty7 3368 + vncipher $out2,$out2,v30 3369 + vncipher $out3,$out3,v30 3370 + vxor $tweak,$tweak,$tmp 3371 + vncipher $out4,$out4,v30 3372 + vncipher $out5,$out5,v30 3373 + vxor $in5,$twk5,v31 3374 + vsrab $tmp,$tweak,$seven # next tweak value 3375 + vxor $twk5,$tweak,$rndkey0 3376 + 3377 + vncipherlast $out0,$out0,$in0 3378 + lvx_u $in0,$x00,$inp # load next input block 3379 + vaddubm $tweak,$tweak,$tweak 3380 + vsldoi $tmp,$tmp,$tmp,15 3381 + vncipherlast $out1,$out1,$in1 3382 + lvx_u $in1,$x10,$inp 3383 + vncipherlast $out2,$out2,$in2 3384 + le?vperm $in0,$in0,$in0,$leperm 3385 + lvx_u $in2,$x20,$inp 3386 + vand $tmp,$tmp,$eighty7 3387 + vncipherlast $out3,$out3,$in3 3388 + le?vperm $in1,$in1,$in1,$leperm 3389 + lvx_u $in3,$x30,$inp 3390 + vncipherlast $out4,$out4,$in4 3391 + le?vperm $in2,$in2,$in2,$leperm 3392 + lvx_u $in4,$x40,$inp 3393 + vxor $tweak,$tweak,$tmp 3394 + vncipherlast $out5,$out5,$in5 3395 + le?vperm $in3,$in3,$in3,$leperm 3396 + lvx_u $in5,$x50,$inp 3397 + addi $inp,$inp,0x60 3398 + le?vperm $in4,$in4,$in4,$leperm 3399 + le?vperm $in5,$in5,$in5,$leperm 3400 + 3401 + le?vperm $out0,$out0,$out0,$leperm 3402 + le?vperm $out1,$out1,$out1,$leperm 3403 + stvx_u $out0,$x00,$out # store output 3404 + vxor $out0,$in0,$twk0 3405 + le?vperm $out2,$out2,$out2,$leperm 3406 + stvx_u $out1,$x10,$out 3407 + vxor $out1,$in1,$twk1 3408 + le?vperm $out3,$out3,$out3,$leperm 3409 + stvx_u $out2,$x20,$out 3410 + vxor $out2,$in2,$twk2 3411 + le?vperm $out4,$out4,$out4,$leperm 3412 + stvx_u $out3,$x30,$out 3413 + vxor $out3,$in3,$twk3 3414 + le?vperm $out5,$out5,$out5,$leperm 3415 + stvx_u $out4,$x40,$out 3416 + vxor $out4,$in4,$twk4 3417 + stvx_u $out5,$x50,$out 3418 + vxor $out5,$in5,$twk5 3419 + addi $out,$out,0x60 3420 + 3421 + mtctr $rounds 3422 + beq Loop_xts_dec6x # did $len-=96 borrow? 3423 + 3424 + addic. $len,$len,0x60 3425 + beq Lxts_dec6x_zero 3426 + cmpwi $len,0x20 3427 + blt Lxts_dec6x_one 3428 + nop 3429 + beq Lxts_dec6x_two 3430 + cmpwi $len,0x40 3431 + blt Lxts_dec6x_three 3432 + nop 3433 + beq Lxts_dec6x_four 3434 + 3435 + Lxts_dec6x_five: 3436 + vxor $out0,$in1,$twk0 3437 + vxor $out1,$in2,$twk1 3438 + vxor $out2,$in3,$twk2 3439 + vxor $out3,$in4,$twk3 3440 + vxor $out4,$in5,$twk4 3441 + 3442 + bl _aesp8_xts_dec5x 3443 + 3444 + le?vperm $out0,$out0,$out0,$leperm 3445 + vmr $twk0,$twk5 # unused tweak 3446 + vxor $twk1,$tweak,$rndkey0 3447 + le?vperm $out1,$out1,$out1,$leperm 3448 + stvx_u $out0,$x00,$out # store output 3449 + vxor $out0,$in0,$twk1 3450 + le?vperm $out2,$out2,$out2,$leperm 3451 + stvx_u $out1,$x10,$out 3452 + le?vperm $out3,$out3,$out3,$leperm 3453 + stvx_u $out2,$x20,$out 3454 + le?vperm $out4,$out4,$out4,$leperm 3455 + stvx_u $out3,$x30,$out 3456 + stvx_u $out4,$x40,$out 3457 + addi $out,$out,0x50 3458 + bne Lxts_dec6x_steal 3459 + b Lxts_dec6x_done 3460 + 3461 + .align 4 3462 + Lxts_dec6x_four: 3463 + vxor $out0,$in2,$twk0 3464 + vxor $out1,$in3,$twk1 3465 + vxor $out2,$in4,$twk2 3466 + vxor $out3,$in5,$twk3 3467 + vxor $out4,$out4,$out4 3468 + 3469 + bl _aesp8_xts_dec5x 3470 + 3471 + le?vperm $out0,$out0,$out0,$leperm 3472 + vmr $twk0,$twk4 # unused tweak 3473 + vmr $twk1,$twk5 3474 + le?vperm $out1,$out1,$out1,$leperm 3475 + stvx_u $out0,$x00,$out # store output 3476 + vxor $out0,$in0,$twk5 3477 + le?vperm $out2,$out2,$out2,$leperm 3478 + stvx_u $out1,$x10,$out 3479 + le?vperm $out3,$out3,$out3,$leperm 3480 + stvx_u $out2,$x20,$out 3481 + stvx_u $out3,$x30,$out 3482 + addi $out,$out,0x40 3483 + bne Lxts_dec6x_steal 3484 + b Lxts_dec6x_done 3485 + 3486 + .align 4 3487 + Lxts_dec6x_three: 3488 + vxor $out0,$in3,$twk0 3489 + vxor $out1,$in4,$twk1 3490 + vxor $out2,$in5,$twk2 3491 + vxor $out3,$out3,$out3 3492 + vxor $out4,$out4,$out4 3493 + 3494 + bl _aesp8_xts_dec5x 3495 + 3496 + le?vperm $out0,$out0,$out0,$leperm 3497 + vmr $twk0,$twk3 # unused tweak 3498 + vmr $twk1,$twk4 3499 + le?vperm $out1,$out1,$out1,$leperm 3500 + stvx_u $out0,$x00,$out # store output 3501 + vxor $out0,$in0,$twk4 3502 + le?vperm $out2,$out2,$out2,$leperm 3503 + stvx_u $out1,$x10,$out 3504 + stvx_u $out2,$x20,$out 3505 + addi $out,$out,0x30 3506 + bne Lxts_dec6x_steal 3507 + b Lxts_dec6x_done 3508 + 3509 + .align 4 3510 + Lxts_dec6x_two: 3511 + vxor $out0,$in4,$twk0 3512 + vxor $out1,$in5,$twk1 3513 + vxor $out2,$out2,$out2 3514 + vxor $out3,$out3,$out3 3515 + vxor $out4,$out4,$out4 3516 + 3517 + bl _aesp8_xts_dec5x 3518 + 3519 + le?vperm $out0,$out0,$out0,$leperm 3520 + vmr $twk0,$twk2 # unused tweak 3521 + vmr $twk1,$twk3 3522 + le?vperm $out1,$out1,$out1,$leperm 3523 + stvx_u $out0,$x00,$out # store output 3524 + vxor $out0,$in0,$twk3 3525 + stvx_u $out1,$x10,$out 3526 + addi $out,$out,0x20 3527 + bne Lxts_dec6x_steal 3528 + b Lxts_dec6x_done 3529 + 3530 + .align 4 3531 + Lxts_dec6x_one: 3532 + vxor $out0,$in5,$twk0 3533 + nop 3534 + Loop_xts_dec1x: 3535 + vncipher $out0,$out0,v24 3536 + lvx v24,$x20,$key_ # round[3] 3537 + addi $key_,$key_,0x20 3538 + 3539 + vncipher $out0,$out0,v25 3540 + lvx v25,$x10,$key_ # round[4] 3541 + bdnz Loop_xts_dec1x 3542 + 3543 + subi r0,$taillen,1 3544 + vncipher $out0,$out0,v24 3545 + 3546 + andi. r0,r0,16 3547 + cmpwi $taillen,0 3548 + vncipher $out0,$out0,v25 3549 + 3550 + sub $inp,$inp,r0 3551 + vncipher $out0,$out0,v26 3552 + 3553 + lvx_u $in0,0,$inp 3554 + vncipher $out0,$out0,v27 3555 + 3556 + addi $key_,$sp,$FRAME+15 # rewind $key_ 3557 + vncipher $out0,$out0,v28 3558 + lvx v24,$x00,$key_ # re-pre-load round[1] 3559 + 3560 + vncipher $out0,$out0,v29 3561 + lvx v25,$x10,$key_ # re-pre-load round[2] 3562 + vxor $twk0,$twk0,v31 3563 + 3564 + le?vperm $in0,$in0,$in0,$leperm 3565 + vncipher $out0,$out0,v30 3566 + 3567 + mtctr $rounds 3568 + vncipherlast $out0,$out0,$twk0 3569 + 3570 + vmr $twk0,$twk1 # unused tweak 3571 + vmr $twk1,$twk2 3572 + le?vperm $out0,$out0,$out0,$leperm 3573 + stvx_u $out0,$x00,$out # store output 3574 + addi $out,$out,0x10 3575 + vxor $out0,$in0,$twk2 3576 + bne Lxts_dec6x_steal 3577 + b Lxts_dec6x_done 3578 + 3579 + .align 4 3580 + Lxts_dec6x_zero: 3581 + cmpwi $taillen,0 3582 + beq Lxts_dec6x_done 3583 + 3584 + lvx_u $in0,0,$inp 3585 + le?vperm $in0,$in0,$in0,$leperm 3586 + vxor $out0,$in0,$twk1 3587 + Lxts_dec6x_steal: 3588 + vncipher $out0,$out0,v24 3589 + lvx v24,$x20,$key_ # round[3] 3590 + addi $key_,$key_,0x20 3591 + 3592 + vncipher $out0,$out0,v25 3593 + lvx v25,$x10,$key_ # round[4] 3594 + bdnz Lxts_dec6x_steal 3595 + 3596 + add $inp,$inp,$taillen 3597 + vncipher $out0,$out0,v24 3598 + 3599 + cmpwi $taillen,0 3600 + vncipher $out0,$out0,v25 3601 + 3602 + lvx_u $in0,0,$inp 3603 + vncipher $out0,$out0,v26 3604 + 3605 + lvsr $inpperm,0,$taillen # $in5 is no more 3606 + vncipher $out0,$out0,v27 3607 + 3608 + addi $key_,$sp,$FRAME+15 # rewind $key_ 3609 + vncipher $out0,$out0,v28 3610 + lvx v24,$x00,$key_ # re-pre-load round[1] 3611 + 3612 + vncipher $out0,$out0,v29 3613 + lvx v25,$x10,$key_ # re-pre-load round[2] 3614 + vxor $twk1,$twk1,v31 3615 + 3616 + le?vperm $in0,$in0,$in0,$leperm 3617 + vncipher $out0,$out0,v30 3618 + 3619 + vperm $in0,$in0,$in0,$inpperm 3620 + vncipherlast $tmp,$out0,$twk1 3621 + 3622 + le?vperm $out0,$tmp,$tmp,$leperm 3623 + le?stvx_u $out0,0,$out 3624 + be?stvx_u $tmp,0,$out 3625 + 3626 + vxor $out0,$out0,$out0 3627 + vspltisb $out1,-1 3628 + vperm $out0,$out0,$out1,$inpperm 3629 + vsel $out0,$in0,$tmp,$out0 3630 + vxor $out0,$out0,$twk0 3631 + 3632 + subi r30,$out,1 3633 + mtctr $taillen 3634 + Loop_xts_dec6x_steal: 3635 + lbzu r0,1(r30) 3636 + stb r0,16(r30) 3637 + bdnz Loop_xts_dec6x_steal 3638 + 3639 + li $taillen,0 3640 + mtctr $rounds 3641 + b Loop_xts_dec1x # one more time... 3642 + 3643 + .align 4 3644 + Lxts_dec6x_done: 3645 + ${UCMP}i $ivp,0 3646 + beq Lxts_dec6x_ret 3647 + 3648 + vxor $tweak,$twk0,$rndkey0 3649 + le?vperm $tweak,$tweak,$tweak,$leperm 3650 + stvx_u $tweak,0,$ivp 3651 + 3652 + Lxts_dec6x_ret: 3653 + mtlr r11 3654 + li r10,`$FRAME+15` 3655 + li r11,`$FRAME+31` 3656 + stvx $seven,r10,$sp # wipe copies of round keys 3657 + addi r10,r10,32 3658 + stvx $seven,r11,$sp 3659 + addi r11,r11,32 3660 + stvx $seven,r10,$sp 3661 + addi r10,r10,32 3662 + stvx $seven,r11,$sp 3663 + addi r11,r11,32 3664 + stvx $seven,r10,$sp 3665 + addi r10,r10,32 3666 + stvx $seven,r11,$sp 3667 + addi r11,r11,32 3668 + stvx $seven,r10,$sp 3669 + addi r10,r10,32 3670 + stvx $seven,r11,$sp 3671 + addi r11,r11,32 3672 + 3673 + mtspr 256,$vrsave 3674 + lvx v20,r10,$sp # ABI says so 3675 + addi r10,r10,32 3676 + lvx v21,r11,$sp 3677 + addi r11,r11,32 3678 + lvx v22,r10,$sp 3679 + addi r10,r10,32 3680 + lvx v23,r11,$sp 3681 + addi r11,r11,32 3682 + lvx v24,r10,$sp 3683 + addi r10,r10,32 3684 + lvx v25,r11,$sp 3685 + addi r11,r11,32 3686 + lvx v26,r10,$sp 3687 + addi r10,r10,32 3688 + lvx v27,r11,$sp 3689 + addi r11,r11,32 3690 + lvx v28,r10,$sp 3691 + addi r10,r10,32 3692 + lvx v29,r11,$sp 3693 + addi r11,r11,32 3694 + lvx v30,r10,$sp 3695 + lvx v31,r11,$sp 3696 + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3697 + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3698 + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3699 + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3700 + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3701 + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3702 + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3703 + blr 3704 + .long 0 3705 + .byte 0,12,0x04,1,0x80,6,6,0 3706 + .long 0 3707 + 3708 + .align 5 3709 + _aesp8_xts_dec5x: 3710 + vncipher $out0,$out0,v24 3711 + vncipher $out1,$out1,v24 3712 + vncipher $out2,$out2,v24 3713 + vncipher $out3,$out3,v24 3714 + vncipher $out4,$out4,v24 3715 + lvx v24,$x20,$key_ # round[3] 3716 + addi $key_,$key_,0x20 3717 + 3718 + vncipher $out0,$out0,v25 3719 + vncipher $out1,$out1,v25 3720 + vncipher $out2,$out2,v25 3721 + vncipher $out3,$out3,v25 3722 + vncipher $out4,$out4,v25 3723 + lvx v25,$x10,$key_ # round[4] 3724 + bdnz _aesp8_xts_dec5x 3725 + 3726 + subi r0,$taillen,1 3727 + vncipher $out0,$out0,v24 3728 + vncipher $out1,$out1,v24 3729 + vncipher $out2,$out2,v24 3730 + vncipher $out3,$out3,v24 3731 + vncipher $out4,$out4,v24 3732 + 3733 + andi. r0,r0,16 3734 + cmpwi $taillen,0 3735 + vncipher $out0,$out0,v25 3736 + vncipher $out1,$out1,v25 3737 + vncipher $out2,$out2,v25 3738 + vncipher $out3,$out3,v25 3739 + vncipher $out4,$out4,v25 3740 + vxor $twk0,$twk0,v31 3741 + 3742 + sub $inp,$inp,r0 3743 + vncipher $out0,$out0,v26 3744 + vncipher $out1,$out1,v26 3745 + vncipher $out2,$out2,v26 3746 + vncipher $out3,$out3,v26 3747 + vncipher $out4,$out4,v26 3748 + vxor $in1,$twk1,v31 3749 + 3750 + vncipher $out0,$out0,v27 3751 + lvx_u $in0,0,$inp 3752 + vncipher $out1,$out1,v27 3753 + vncipher $out2,$out2,v27 3754 + vncipher $out3,$out3,v27 3755 + vncipher $out4,$out4,v27 3756 + vxor $in2,$twk2,v31 3757 + 3758 + addi $key_,$sp,$FRAME+15 # rewind $key_ 3759 + vncipher $out0,$out0,v28 3760 + vncipher $out1,$out1,v28 3761 + vncipher $out2,$out2,v28 3762 + vncipher $out3,$out3,v28 3763 + vncipher $out4,$out4,v28 3764 + lvx v24,$x00,$key_ # re-pre-load round[1] 3765 + vxor $in3,$twk3,v31 3766 + 3767 + vncipher $out0,$out0,v29 3768 + le?vperm $in0,$in0,$in0,$leperm 3769 + vncipher $out1,$out1,v29 3770 + vncipher $out2,$out2,v29 3771 + vncipher $out3,$out3,v29 3772 + vncipher $out4,$out4,v29 3773 + lvx v25,$x10,$key_ # re-pre-load round[2] 3774 + vxor $in4,$twk4,v31 3775 + 3776 + vncipher $out0,$out0,v30 3777 + vncipher $out1,$out1,v30 3778 + vncipher $out2,$out2,v30 3779 + vncipher $out3,$out3,v30 3780 + vncipher $out4,$out4,v30 3781 + 3782 + vncipherlast $out0,$out0,$twk0 3783 + vncipherlast $out1,$out1,$in1 3784 + vncipherlast $out2,$out2,$in2 3785 + vncipherlast $out3,$out3,$in3 3786 + vncipherlast $out4,$out4,$in4 3787 + mtctr $rounds 3788 + blr 3789 + .long 0 3790 + .byte 0,12,0x14,0,0,0,0,0 3791 + ___ 3792 + }} }}} 3793 + 3794 + my $consts=1; 3795 + foreach(split("\n",$code)) { 3796 + s/\`([^\`]*)\`/eval($1)/geo; 3797 + 3798 + # constants table endian-specific conversion 3799 + if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3800 + my $conv=$3; 3801 + my @bytes=(); 3802 + 3803 + # convert to endian-agnostic format 3804 + if ($1 eq "long") { 3805 + foreach (split(/,\s*/,$2)) { 3806 + my $l = /^0/?oct:int; 3807 + push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3808 + } 3809 + } else { 3810 + @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3811 + } 3812 + 3813 + # little-endian conversion 3814 + if ($flavour =~ /le$/o) { 3815 + SWITCH: for($conv) { 3816 + /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3817 + /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3818 + } 3819 + } 3820 + 3821 + #emit 3822 + print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3823 + next; 3824 + } 3825 + $consts=0 if (m/Lconsts:/o); # end of table 3826 + 3827 + # instructions prefixed with '?' are endian-specific and need 3828 + # to be adjusted accordingly... 3829 + if ($flavour =~ /le$/o) { # little-endian 3830 + s/le\?//o or 3831 + s/be\?/#be#/o or 3832 + s/\?lvsr/lvsl/o or 3833 + s/\?lvsl/lvsr/o or 3834 + s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3835 + s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3836 + s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3837 + } else { # big-endian 3838 + s/le\?/#le#/o or 3839 + s/be\?//o or 3840 + s/\?([a-z]+)/$1/o; 3841 + } 3842 + 3843 + print $_,"\n"; 3844 + } 3845 + 3846 + close STDOUT;