Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: powerpc/aes - ECB/CBC/CTR/XTS modes

The assembler block cipher module that controls the core
AES functions.

Signed-off-by: Markus Stockhausen <stockhausen@collogia.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Markus Stockhausen and committed by
Herbert Xu
f2e2ad2e f98992af

+630
+630
arch/powerpc/crypto/aes-spe-modes.S
··· 1 + /* 2 + * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation 3 + * 4 + * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms of the GNU General Public License as published by the Free 8 + * Software Foundation; either version 2 of the License, or (at your option) 9 + * any later version. 10 + * 11 + */ 12 + 13 + #include <asm/ppc_asm.h> 14 + #include "aes-spe-regs.h" 15 + 16 + #ifdef __BIG_ENDIAN__ /* Macros for big endian builds */ 17 + 18 + #define LOAD_DATA(reg, off) \ 19 + lwz reg,off(rSP); /* load with offset */ 20 + #define SAVE_DATA(reg, off) \ 21 + stw reg,off(rDP); /* save with offset */ 22 + #define NEXT_BLOCK \ 23 + addi rSP,rSP,16; /* increment pointers per bloc */ \ 24 + addi rDP,rDP,16; 25 + #define LOAD_IV(reg, off) \ 26 + lwz reg,off(rIP); /* IV loading with offset */ 27 + #define SAVE_IV(reg, off) \ 28 + stw reg,off(rIP); /* IV saving with offset */ 29 + #define START_IV /* nothing to reset */ 30 + #define CBC_DEC 16 /* CBC decrement per block */ 31 + #define CTR_DEC 1 /* CTR decrement one byte */ 32 + 33 + #else /* Macros for little endian */ 34 + 35 + #define LOAD_DATA(reg, off) \ 36 + lwbrx reg,0,rSP; /* load reversed */ \ 37 + addi rSP,rSP,4; /* and increment pointer */ 38 + #define SAVE_DATA(reg, off) \ 39 + stwbrx reg,0,rDP; /* save reversed */ \ 40 + addi rDP,rDP,4; /* and increment pointer */ 41 + #define NEXT_BLOCK /* nothing todo */ 42 + #define LOAD_IV(reg, off) \ 43 + lwbrx reg,0,rIP; /* load reversed */ \ 44 + addi rIP,rIP,4; /* and increment pointer */ 45 + #define SAVE_IV(reg, off) \ 46 + stwbrx reg,0,rIP; /* load reversed */ \ 47 + addi rIP,rIP,4; /* and increment pointer */ 48 + #define START_IV \ 49 + subi rIP,rIP,16; /* must reset pointer */ 50 + #define CBC_DEC 32 /* 2 blocks because of incs */ 51 + #define CTR_DEC 17 /* 1 block because of incs */ 52 + 53 + #endif 54 + 55 + #define SAVE_0_REGS 56 + #define LOAD_0_REGS 57 + 58 + #define SAVE_4_REGS \ 59 + stw rI0,96(r1); /* save 32 bit registers */ \ 60 + stw rI1,100(r1); \ 61 + stw rI2,104(r1); \ 62 + stw rI3,108(r1); 63 + 64 + #define LOAD_4_REGS \ 65 + lwz rI0,96(r1); /* restore 32 bit registers */ \ 66 + lwz rI1,100(r1); \ 67 + lwz rI2,104(r1); \ 68 + lwz rI3,108(r1); 69 + 70 + #define SAVE_8_REGS \ 71 + SAVE_4_REGS \ 72 + stw rG0,112(r1); /* save 32 bit registers */ \ 73 + stw rG1,116(r1); \ 74 + stw rG2,120(r1); \ 75 + stw rG3,124(r1); 76 + 77 + #define LOAD_8_REGS \ 78 + LOAD_4_REGS \ 79 + lwz rG0,112(r1); /* restore 32 bit registers */ \ 80 + lwz rG1,116(r1); \ 81 + lwz rG2,120(r1); \ 82 + lwz rG3,124(r1); 83 + 84 + #define INITIALIZE_CRYPT(tab,nr32bitregs) \ 85 + mflr r0; \ 86 + stwu r1,-160(r1); /* create stack frame */ \ 87 + lis rT0,tab@h; /* en-/decryption table pointer */ \ 88 + stw r0,8(r1); /* save link register */ \ 89 + ori rT0,rT0,tab@l; \ 90 + evstdw r14,16(r1); \ 91 + mr rKS,rKP; \ 92 + evstdw r15,24(r1); /* We must save non volatile */ \ 93 + evstdw r16,32(r1); /* registers. Take the chance */ \ 94 + evstdw r17,40(r1); /* and save the SPE part too */ \ 95 + evstdw r18,48(r1); \ 96 + evstdw r19,56(r1); \ 97 + evstdw r20,64(r1); \ 98 + evstdw r21,72(r1); \ 99 + evstdw r22,80(r1); \ 100 + evstdw r23,88(r1); \ 101 + SAVE_##nr32bitregs##_REGS 102 + 103 + #define FINALIZE_CRYPT(nr32bitregs) \ 104 + lwz r0,8(r1); \ 105 + evldw r14,16(r1); /* restore SPE registers */ \ 106 + evldw r15,24(r1); \ 107 + evldw r16,32(r1); \ 108 + evldw r17,40(r1); \ 109 + evldw r18,48(r1); \ 110 + evldw r19,56(r1); \ 111 + evldw r20,64(r1); \ 112 + evldw r21,72(r1); \ 113 + evldw r22,80(r1); \ 114 + evldw r23,88(r1); \ 115 + LOAD_##nr32bitregs##_REGS \ 116 + mtlr r0; /* restore link register */ \ 117 + xor r0,r0,r0; \ 118 + stw r0,16(r1); /* delete sensitive data */ \ 119 + stw r0,24(r1); /* that we might have pushed */ \ 120 + stw r0,32(r1); /* from other context that runs */ \ 121 + stw r0,40(r1); /* the same code */ \ 122 + stw r0,48(r1); \ 123 + stw r0,56(r1); \ 124 + stw r0,64(r1); \ 125 + stw r0,72(r1); \ 126 + stw r0,80(r1); \ 127 + stw r0,88(r1); \ 128 + addi r1,r1,160; /* cleanup stack frame */ 129 + 130 + #define ENDIAN_SWAP(t0, t1, s0, s1) \ 131 + rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \ 132 + rotrwi t1,s1,8; \ 133 + rlwimi t0,s0,8,8,15; \ 134 + rlwimi t1,s1,8,8,15; \ 135 + rlwimi t0,s0,8,24,31; \ 136 + rlwimi t1,s1,8,24,31; 137 + 138 + #define GF128_MUL(d0, d1, d2, d3, t0) \ 139 + li t0,0x87; /* multiplication in GF128 */ \ 140 + cmpwi d3,-1; \ 141 + iselgt t0,0,t0; \ 142 + rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \ 143 + rotlwi d3,d3,1; \ 144 + rlwimi d2,d1,0,0,0; \ 145 + rotlwi d2,d2,1; \ 146 + rlwimi d1,d0,0,0,0; \ 147 + slwi d0,d0,1; /* shift left 128 bit */ \ 148 + rotlwi d1,d1,1; \ 149 + xor d0,d0,t0; 150 + 151 + #define START_KEY(d0, d1, d2, d3) \ 152 + lwz rW0,0(rKP); \ 153 + mtctr rRR; \ 154 + lwz rW1,4(rKP); \ 155 + lwz rW2,8(rKP); \ 156 + lwz rW3,12(rKP); \ 157 + xor rD0,d0,rW0; \ 158 + xor rD1,d1,rW1; \ 159 + xor rD2,d2,rW2; \ 160 + xor rD3,d3,rW3; 161 + 162 + /* 163 + * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, 164 + * u32 rounds) 165 + * 166 + * called from glue layer to encrypt a single 16 byte block 167 + * round values are AES128 = 4, AES192 = 5, AES256 = 6 168 + * 169 + */ 170 + _GLOBAL(ppc_encrypt_aes) 171 + INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0) 172 + LOAD_DATA(rD0, 0) 173 + LOAD_DATA(rD1, 4) 174 + LOAD_DATA(rD2, 8) 175 + LOAD_DATA(rD3, 12) 176 + START_KEY(rD0, rD1, rD2, rD3) 177 + bl ppc_encrypt_block 178 + xor rD0,rD0,rW0 179 + SAVE_DATA(rD0, 0) 180 + xor rD1,rD1,rW1 181 + SAVE_DATA(rD1, 4) 182 + xor rD2,rD2,rW2 183 + SAVE_DATA(rD2, 8) 184 + xor rD3,rD3,rW3 185 + SAVE_DATA(rD3, 12) 186 + FINALIZE_CRYPT(0) 187 + blr 188 + 189 + /* 190 + * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, 191 + * u32 rounds) 192 + * 193 + * called from glue layer to decrypt a single 16 byte block 194 + * round values are AES128 = 4, AES192 = 5, AES256 = 6 195 + * 196 + */ 197 + _GLOBAL(ppc_decrypt_aes) 198 + INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0) 199 + LOAD_DATA(rD0, 0) 200 + addi rT1,rT0,4096 201 + LOAD_DATA(rD1, 4) 202 + LOAD_DATA(rD2, 8) 203 + LOAD_DATA(rD3, 12) 204 + START_KEY(rD0, rD1, rD2, rD3) 205 + bl ppc_decrypt_block 206 + xor rD0,rD0,rW0 207 + SAVE_DATA(rD0, 0) 208 + xor rD1,rD1,rW1 209 + SAVE_DATA(rD1, 4) 210 + xor rD2,rD2,rW2 211 + SAVE_DATA(rD2, 8) 212 + xor rD3,rD3,rW3 213 + SAVE_DATA(rD3, 12) 214 + FINALIZE_CRYPT(0) 215 + blr 216 + 217 + /* 218 + * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, 219 + * u32 rounds, u32 bytes); 220 + * 221 + * called from glue layer to encrypt multiple blocks via ECB 222 + * Bytes must be larger or equal 16 and only whole blocks are 223 + * processed. round values are AES128 = 4, AES192 = 5 and 224 + * AES256 = 6 225 + * 226 + */ 227 + _GLOBAL(ppc_encrypt_ecb) 228 + INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0) 229 + ppc_encrypt_ecb_loop: 230 + LOAD_DATA(rD0, 0) 231 + mr rKP,rKS 232 + LOAD_DATA(rD1, 4) 233 + subi rLN,rLN,16 234 + LOAD_DATA(rD2, 8) 235 + cmpwi rLN,15 236 + LOAD_DATA(rD3, 12) 237 + START_KEY(rD0, rD1, rD2, rD3) 238 + bl ppc_encrypt_block 239 + xor rD0,rD0,rW0 240 + SAVE_DATA(rD0, 0) 241 + xor rD1,rD1,rW1 242 + SAVE_DATA(rD1, 4) 243 + xor rD2,rD2,rW2 244 + SAVE_DATA(rD2, 8) 245 + xor rD3,rD3,rW3 246 + SAVE_DATA(rD3, 12) 247 + NEXT_BLOCK 248 + bt gt,ppc_encrypt_ecb_loop 249 + FINALIZE_CRYPT(0) 250 + blr 251 + 252 + /* 253 + * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, 254 + * u32 rounds, u32 bytes); 255 + * 256 + * called from glue layer to decrypt multiple blocks via ECB 257 + * Bytes must be larger or equal 16 and only whole blocks are 258 + * processed. round values are AES128 = 4, AES192 = 5 and 259 + * AES256 = 6 260 + * 261 + */ 262 + _GLOBAL(ppc_decrypt_ecb) 263 + INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0) 264 + addi rT1,rT0,4096 265 + ppc_decrypt_ecb_loop: 266 + LOAD_DATA(rD0, 0) 267 + mr rKP,rKS 268 + LOAD_DATA(rD1, 4) 269 + subi rLN,rLN,16 270 + LOAD_DATA(rD2, 8) 271 + cmpwi rLN,15 272 + LOAD_DATA(rD3, 12) 273 + START_KEY(rD0, rD1, rD2, rD3) 274 + bl ppc_decrypt_block 275 + xor rD0,rD0,rW0 276 + SAVE_DATA(rD0, 0) 277 + xor rD1,rD1,rW1 278 + SAVE_DATA(rD1, 4) 279 + xor rD2,rD2,rW2 280 + SAVE_DATA(rD2, 8) 281 + xor rD3,rD3,rW3 282 + SAVE_DATA(rD3, 12) 283 + NEXT_BLOCK 284 + bt gt,ppc_decrypt_ecb_loop 285 + FINALIZE_CRYPT(0) 286 + blr 287 + 288 + /* 289 + * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, 290 + * 32 rounds, u32 bytes, u8 *iv); 291 + * 292 + * called from glue layer to encrypt multiple blocks via CBC 293 + * Bytes must be larger or equal 16 and only whole blocks are 294 + * processed. round values are AES128 = 4, AES192 = 5 and 295 + * AES256 = 6 296 + * 297 + */ 298 + _GLOBAL(ppc_encrypt_cbc) 299 + INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4) 300 + LOAD_IV(rI0, 0) 301 + LOAD_IV(rI1, 4) 302 + LOAD_IV(rI2, 8) 303 + LOAD_IV(rI3, 12) 304 + ppc_encrypt_cbc_loop: 305 + LOAD_DATA(rD0, 0) 306 + mr rKP,rKS 307 + LOAD_DATA(rD1, 4) 308 + subi rLN,rLN,16 309 + LOAD_DATA(rD2, 8) 310 + cmpwi rLN,15 311 + LOAD_DATA(rD3, 12) 312 + xor rD0,rD0,rI0 313 + xor rD1,rD1,rI1 314 + xor rD2,rD2,rI2 315 + xor rD3,rD3,rI3 316 + START_KEY(rD0, rD1, rD2, rD3) 317 + bl ppc_encrypt_block 318 + xor rI0,rD0,rW0 319 + SAVE_DATA(rI0, 0) 320 + xor rI1,rD1,rW1 321 + SAVE_DATA(rI1, 4) 322 + xor rI2,rD2,rW2 323 + SAVE_DATA(rI2, 8) 324 + xor rI3,rD3,rW3 325 + SAVE_DATA(rI3, 12) 326 + NEXT_BLOCK 327 + bt gt,ppc_encrypt_cbc_loop 328 + START_IV 329 + SAVE_IV(rI0, 0) 330 + SAVE_IV(rI1, 4) 331 + SAVE_IV(rI2, 8) 332 + SAVE_IV(rI3, 12) 333 + FINALIZE_CRYPT(4) 334 + blr 335 + 336 + /* 337 + * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, 338 + * u32 rounds, u32 bytes, u8 *iv); 339 + * 340 + * called from glue layer to decrypt multiple blocks via CBC 341 + * round values are AES128 = 4, AES192 = 5, AES256 = 6 342 + * 343 + */ 344 + _GLOBAL(ppc_decrypt_cbc) 345 + INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4) 346 + li rT1,15 347 + LOAD_IV(rI0, 0) 348 + andc rLN,rLN,rT1 349 + LOAD_IV(rI1, 4) 350 + subi rLN,rLN,16 351 + LOAD_IV(rI2, 8) 352 + add rSP,rSP,rLN /* reverse processing */ 353 + LOAD_IV(rI3, 12) 354 + add rDP,rDP,rLN 355 + LOAD_DATA(rD0, 0) 356 + addi rT1,rT0,4096 357 + LOAD_DATA(rD1, 4) 358 + LOAD_DATA(rD2, 8) 359 + LOAD_DATA(rD3, 12) 360 + START_IV 361 + SAVE_IV(rD0, 0) 362 + SAVE_IV(rD1, 4) 363 + SAVE_IV(rD2, 8) 364 + cmpwi rLN,16 365 + SAVE_IV(rD3, 12) 366 + bt lt,ppc_decrypt_cbc_end 367 + ppc_decrypt_cbc_loop: 368 + mr rKP,rKS 369 + START_KEY(rD0, rD1, rD2, rD3) 370 + bl ppc_decrypt_block 371 + subi rLN,rLN,16 372 + subi rSP,rSP,CBC_DEC 373 + xor rW0,rD0,rW0 374 + LOAD_DATA(rD0, 0) 375 + xor rW1,rD1,rW1 376 + LOAD_DATA(rD1, 4) 377 + xor rW2,rD2,rW2 378 + LOAD_DATA(rD2, 8) 379 + xor rW3,rD3,rW3 380 + LOAD_DATA(rD3, 12) 381 + xor rW0,rW0,rD0 382 + SAVE_DATA(rW0, 0) 383 + xor rW1,rW1,rD1 384 + SAVE_DATA(rW1, 4) 385 + xor rW2,rW2,rD2 386 + SAVE_DATA(rW2, 8) 387 + xor rW3,rW3,rD3 388 + SAVE_DATA(rW3, 12) 389 + cmpwi rLN,15 390 + subi rDP,rDP,CBC_DEC 391 + bt gt,ppc_decrypt_cbc_loop 392 + ppc_decrypt_cbc_end: 393 + mr rKP,rKS 394 + START_KEY(rD0, rD1, rD2, rD3) 395 + bl ppc_decrypt_block 396 + xor rW0,rW0,rD0 397 + xor rW1,rW1,rD1 398 + xor rW2,rW2,rD2 399 + xor rW3,rW3,rD3 400 + xor rW0,rW0,rI0 /* decrypt with initial IV */ 401 + SAVE_DATA(rW0, 0) 402 + xor rW1,rW1,rI1 403 + SAVE_DATA(rW1, 4) 404 + xor rW2,rW2,rI2 405 + SAVE_DATA(rW2, 8) 406 + xor rW3,rW3,rI3 407 + SAVE_DATA(rW3, 12) 408 + FINALIZE_CRYPT(4) 409 + blr 410 + 411 + /* 412 + * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc, 413 + * u32 rounds, u32 bytes, u8 *iv); 414 + * 415 + * called from glue layer to encrypt/decrypt multiple blocks 416 + * via CTR. Number of bytes does not need to be a multiple of 417 + * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6 418 + * 419 + */ 420 + _GLOBAL(ppc_crypt_ctr) 421 + INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4) 422 + LOAD_IV(rI0, 0) 423 + LOAD_IV(rI1, 4) 424 + LOAD_IV(rI2, 8) 425 + cmpwi rLN,16 426 + LOAD_IV(rI3, 12) 427 + START_IV 428 + bt lt,ppc_crypt_ctr_partial 429 + ppc_crypt_ctr_loop: 430 + mr rKP,rKS 431 + START_KEY(rI0, rI1, rI2, rI3) 432 + bl ppc_encrypt_block 433 + xor rW0,rD0,rW0 434 + xor rW1,rD1,rW1 435 + xor rW2,rD2,rW2 436 + xor rW3,rD3,rW3 437 + LOAD_DATA(rD0, 0) 438 + subi rLN,rLN,16 439 + LOAD_DATA(rD1, 4) 440 + LOAD_DATA(rD2, 8) 441 + LOAD_DATA(rD3, 12) 442 + xor rD0,rD0,rW0 443 + SAVE_DATA(rD0, 0) 444 + xor rD1,rD1,rW1 445 + SAVE_DATA(rD1, 4) 446 + xor rD2,rD2,rW2 447 + SAVE_DATA(rD2, 8) 448 + xor rD3,rD3,rW3 449 + SAVE_DATA(rD3, 12) 450 + addic rI3,rI3,1 /* increase counter */ 451 + addze rI2,rI2 452 + addze rI1,rI1 453 + addze rI0,rI0 454 + NEXT_BLOCK 455 + cmpwi rLN,15 456 + bt gt,ppc_crypt_ctr_loop 457 + ppc_crypt_ctr_partial: 458 + cmpwi rLN,0 459 + bt eq,ppc_crypt_ctr_end 460 + mr rKP,rKS 461 + START_KEY(rI0, rI1, rI2, rI3) 462 + bl ppc_encrypt_block 463 + xor rW0,rD0,rW0 464 + SAVE_IV(rW0, 0) 465 + xor rW1,rD1,rW1 466 + SAVE_IV(rW1, 4) 467 + xor rW2,rD2,rW2 468 + SAVE_IV(rW2, 8) 469 + xor rW3,rD3,rW3 470 + SAVE_IV(rW3, 12) 471 + mtctr rLN 472 + subi rIP,rIP,CTR_DEC 473 + subi rSP,rSP,1 474 + subi rDP,rDP,1 475 + ppc_crypt_ctr_xorbyte: 476 + lbzu rW4,1(rIP) /* bytewise xor for partial block */ 477 + lbzu rW5,1(rSP) 478 + xor rW4,rW4,rW5 479 + stbu rW4,1(rDP) 480 + bdnz ppc_crypt_ctr_xorbyte 481 + subf rIP,rLN,rIP 482 + addi rIP,rIP,1 483 + addic rI3,rI3,1 484 + addze rI2,rI2 485 + addze rI1,rI1 486 + addze rI0,rI0 487 + ppc_crypt_ctr_end: 488 + SAVE_IV(rI0, 0) 489 + SAVE_IV(rI1, 4) 490 + SAVE_IV(rI2, 8) 491 + SAVE_IV(rI3, 12) 492 + FINALIZE_CRYPT(4) 493 + blr 494 + 495 + /* 496 + * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, 497 + * u32 rounds, u32 bytes, u8 *iv, u32 *key_twk); 498 + * 499 + * called from glue layer to encrypt multiple blocks via XTS 500 + * If key_twk is given, the initial IV encryption will be 501 + * processed too. Round values are AES128 = 4, AES192 = 5, 502 + * AES256 = 6 503 + * 504 + */ 505 + _GLOBAL(ppc_encrypt_xts) 506 + INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8) 507 + LOAD_IV(rI0, 0) 508 + LOAD_IV(rI1, 4) 509 + LOAD_IV(rI2, 8) 510 + cmpwi rKT,0 511 + LOAD_IV(rI3, 12) 512 + bt eq,ppc_encrypt_xts_notweak 513 + mr rKP,rKT 514 + START_KEY(rI0, rI1, rI2, rI3) 515 + bl ppc_encrypt_block 516 + xor rI0,rD0,rW0 517 + xor rI1,rD1,rW1 518 + xor rI2,rD2,rW2 519 + xor rI3,rD3,rW3 520 + ppc_encrypt_xts_notweak: 521 + ENDIAN_SWAP(rG0, rG1, rI0, rI1) 522 + ENDIAN_SWAP(rG2, rG3, rI2, rI3) 523 + ppc_encrypt_xts_loop: 524 + LOAD_DATA(rD0, 0) 525 + mr rKP,rKS 526 + LOAD_DATA(rD1, 4) 527 + subi rLN,rLN,16 528 + LOAD_DATA(rD2, 8) 529 + LOAD_DATA(rD3, 12) 530 + xor rD0,rD0,rI0 531 + xor rD1,rD1,rI1 532 + xor rD2,rD2,rI2 533 + xor rD3,rD3,rI3 534 + START_KEY(rD0, rD1, rD2, rD3) 535 + bl ppc_encrypt_block 536 + xor rD0,rD0,rW0 537 + xor rD1,rD1,rW1 538 + xor rD2,rD2,rW2 539 + xor rD3,rD3,rW3 540 + xor rD0,rD0,rI0 541 + SAVE_DATA(rD0, 0) 542 + xor rD1,rD1,rI1 543 + SAVE_DATA(rD1, 4) 544 + xor rD2,rD2,rI2 545 + SAVE_DATA(rD2, 8) 546 + xor rD3,rD3,rI3 547 + SAVE_DATA(rD3, 12) 548 + GF128_MUL(rG0, rG1, rG2, rG3, rW0) 549 + ENDIAN_SWAP(rI0, rI1, rG0, rG1) 550 + ENDIAN_SWAP(rI2, rI3, rG2, rG3) 551 + cmpwi rLN,0 552 + NEXT_BLOCK 553 + bt gt,ppc_encrypt_xts_loop 554 + START_IV 555 + SAVE_IV(rI0, 0) 556 + SAVE_IV(rI1, 4) 557 + SAVE_IV(rI2, 8) 558 + SAVE_IV(rI3, 12) 559 + FINALIZE_CRYPT(8) 560 + blr 561 + 562 + /* 563 + * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, 564 + * u32 rounds, u32 blocks, u8 *iv, u32 *key_twk); 565 + * 566 + * called from glue layer to decrypt multiple blocks via XTS 567 + * If key_twk is given, the initial IV encryption will be 568 + * processed too. Round values are AES128 = 4, AES192 = 5, 569 + * AES256 = 6 570 + * 571 + */ 572 + _GLOBAL(ppc_decrypt_xts) 573 + INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8) 574 + LOAD_IV(rI0, 0) 575 + addi rT1,rT0,4096 576 + LOAD_IV(rI1, 4) 577 + LOAD_IV(rI2, 8) 578 + cmpwi rKT,0 579 + LOAD_IV(rI3, 12) 580 + bt eq,ppc_decrypt_xts_notweak 581 + subi rT0,rT0,4096 582 + mr rKP,rKT 583 + START_KEY(rI0, rI1, rI2, rI3) 584 + bl ppc_encrypt_block 585 + xor rI0,rD0,rW0 586 + xor rI1,rD1,rW1 587 + xor rI2,rD2,rW2 588 + xor rI3,rD3,rW3 589 + addi rT0,rT0,4096 590 + ppc_decrypt_xts_notweak: 591 + ENDIAN_SWAP(rG0, rG1, rI0, rI1) 592 + ENDIAN_SWAP(rG2, rG3, rI2, rI3) 593 + ppc_decrypt_xts_loop: 594 + LOAD_DATA(rD0, 0) 595 + mr rKP,rKS 596 + LOAD_DATA(rD1, 4) 597 + subi rLN,rLN,16 598 + LOAD_DATA(rD2, 8) 599 + LOAD_DATA(rD3, 12) 600 + xor rD0,rD0,rI0 601 + xor rD1,rD1,rI1 602 + xor rD2,rD2,rI2 603 + xor rD3,rD3,rI3 604 + START_KEY(rD0, rD1, rD2, rD3) 605 + bl ppc_decrypt_block 606 + xor rD0,rD0,rW0 607 + xor rD1,rD1,rW1 608 + xor rD2,rD2,rW2 609 + xor rD3,rD3,rW3 610 + xor rD0,rD0,rI0 611 + SAVE_DATA(rD0, 0) 612 + xor rD1,rD1,rI1 613 + SAVE_DATA(rD1, 4) 614 + xor rD2,rD2,rI2 615 + SAVE_DATA(rD2, 8) 616 + xor rD3,rD3,rI3 617 + SAVE_DATA(rD3, 12) 618 + GF128_MUL(rG0, rG1, rG2, rG3, rW0) 619 + ENDIAN_SWAP(rI0, rI1, rG0, rG1) 620 + ENDIAN_SWAP(rI2, rI3, rG2, rG3) 621 + cmpwi rLN,0 622 + NEXT_BLOCK 623 + bt gt,ppc_decrypt_xts_loop 624 + START_IV 625 + SAVE_IV(rI0, 0) 626 + SAVE_IV(rI1, 4) 627 + SAVE_IV(rI2, 8) 628 + SAVE_IV(rI3, 12) 629 + FINALIZE_CRYPT(8) 630 + blr