crypto: padlock-aes - work around Nano CPU errata in CBC mode

Extend previous workarounds for the prefetch bug to cover CBC mode,
clean up the code a bit.

Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
Acked-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by Chuck Ebbert and committed by Herbert Xu 8d8409f7 a76c1c23

+65 -18
+65 -18
drivers/crypto/padlock-aes.c
··· 22 22 #include <asm/i387.h> 23 23 #include "padlock.h" 24 24 25 - /* number of data blocks actually fetched for each xcrypt insn */ 25 + /* 26 + * Number of data blocks actually fetched for each xcrypt insn. 27 + * Processors with prefetch errata will fetch extra blocks. 28 + */ 26 29 static unsigned int ecb_fetch_blocks = 2; 27 - static unsigned int cbc_fetch_blocks = 1; 28 - 30 + #define MAX_ECB_FETCH_BLOCKS (8) 29 31 #define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE) 32 + 33 + static unsigned int cbc_fetch_blocks = 1; 34 + #define MAX_CBC_FETCH_BLOCKS (4) 30 35 #define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE) 31 36 32 37 /* Control word. */ ··· 185 180 * should be used only inside the irq_ts_save/restore() context 186 181 */ 187 182 188 - static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, 183 + static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key, 189 184 struct cword *control_word, int count) 190 185 { 191 186 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ ··· 193 188 : "d"(control_word), "b"(key), "c"(count)); 194 189 } 195 190 196 - static void aes_crypt_copy(const u8 *in, u8 *out, u32 *key, 191 + static inline u8 *rep_xcrypt_cbc(const u8 *input, u8 *output, void *key, 192 + u8 *iv, struct cword *control_word, int count) 193 + { 194 + asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ 195 + : "+S" (input), "+D" (output), "+a" (iv) 196 + : "d" (control_word), "b" (key), "c" (count)); 197 + return iv; 198 + } 199 + 200 + static void ecb_crypt_copy(const u8 *in, u8 *out, u32 *key, 197 201 struct cword *cword, int count) 198 202 { 199 203 /* 200 204 * Padlock prefetches extra data so we must provide mapped input buffers. 201 205 * Assume there are at least 16 bytes of stack already in use. 202 206 */ 203 - u8 buf[AES_BLOCK_SIZE * 7 + PADLOCK_ALIGNMENT - 1]; 207 + u8 buf[AES_BLOCK_SIZE * (MAX_ECB_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; 204 208 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 205 209 206 210 memcpy(tmp, in, count * AES_BLOCK_SIZE); 207 - padlock_xcrypt(tmp, out, key, cword, count); 211 + rep_xcrypt_ecb(tmp, out, key, cword, count); 208 212 } 209 213 210 - static inline void aes_crypt(const u8 *in, u8 *out, u32 *key, 214 + static u8 *cbc_crypt_copy(const u8 *in, u8 *out, u32 *key, 215 + u8 *iv, struct cword *cword, int count) 216 + { 217 + /* 218 + * Padlock prefetches extra data so we must provide mapped input buffers. 219 + * Assume there are at least 16 bytes of stack already in use. 220 + */ 221 + u8 buf[AES_BLOCK_SIZE * (MAX_CBC_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; 222 + u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 223 + 224 + memcpy(tmp, in, count * AES_BLOCK_SIZE); 225 + return rep_xcrypt_cbc(tmp, out, key, iv, cword, count); 226 + } 227 + 228 + static inline void ecb_crypt(const u8 *in, u8 *out, u32 *key, 211 229 struct cword *cword, int count) 212 230 { 213 231 /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data. 214 232 * We could avoid some copying here but it's probably not worth it. 215 233 */ 216 234 if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) { 217 - aes_crypt_copy(in, out, key, cword, count); 235 + ecb_crypt_copy(in, out, key, cword, count); 218 236 return; 219 237 } 220 238 221 - padlock_xcrypt(in, out, key, cword, count); 239 + rep_xcrypt_ecb(in, out, key, cword, count); 240 + } 241 + 242 + static inline u8 *cbc_crypt(const u8 *in, u8 *out, u32 *key, 243 + u8 *iv, struct cword *cword, int count) 244 + { 245 + /* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */ 246 + if (unlikely(((unsigned long)in & PAGE_SIZE) + cbc_fetch_bytes > PAGE_SIZE)) 247 + return cbc_crypt_copy(in, out, key, iv, cword, count); 248 + 249 + return rep_xcrypt_cbc(in, out, key, iv, cword, count); 222 250 } 223 251 224 252 static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, ··· 260 222 u32 initial = count & (ecb_fetch_blocks - 1); 261 223 262 224 if (count < ecb_fetch_blocks) { 263 - aes_crypt(input, output, key, control_word, count); 225 + ecb_crypt(input, output, key, control_word, count); 264 226 return; 265 227 } 266 228 ··· 277 239 static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, 278 240 u8 *iv, void *control_word, u32 count) 279 241 { 280 - /* rep xcryptcbc */ 281 - asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" 242 + u32 initial = count & (cbc_fetch_blocks - 1); 243 + 244 + if (count < cbc_fetch_blocks) 245 + return cbc_crypt(input, output, key, iv, control_word, count); 246 + 247 + if (initial) 248 + asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ 249 + : "+S" (input), "+D" (output), "+a" (iv) 250 + : "d" (control_word), "b" (key), "c" (count)); 251 + 252 + asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ 282 253 : "+S" (input), "+D" (output), "+a" (iv) 283 - : "d" (control_word), "b" (key), "c" (count)); 254 + : "d" (control_word), "b" (key), "c" (count-initial)); 284 255 return iv; 285 256 } 286 257 ··· 300 253 301 254 padlock_reset_key(&ctx->cword.encrypt); 302 255 ts_state = irq_ts_save(); 303 - aes_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); 256 + ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); 304 257 irq_ts_restore(ts_state); 305 258 padlock_store_cword(&ctx->cword.encrypt); 306 259 } ··· 312 265 313 266 padlock_reset_key(&ctx->cword.encrypt); 314 267 ts_state = irq_ts_save(); 315 - aes_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); 268 + ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); 316 269 irq_ts_restore(ts_state); 317 270 padlock_store_cword(&ctx->cword.encrypt); 318 271 } ··· 529 482 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); 530 483 531 484 if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { 532 - ecb_fetch_blocks = 8; 533 - cbc_fetch_blocks = 4; /* NOTE: notused */ 485 + ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; 486 + cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; 534 487 printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); 535 488 } 536 489