crypto: padlock-aes - work around Nano CPU errata in CBC mode

Extend previous workarounds for the prefetch bug to cover CBC mode,
clean up the code a bit.

Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
Acked-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by Chuck Ebbert and committed by Herbert Xu 8d8409f7 a76c1c23

+65 -18
+65 -18
drivers/crypto/padlock-aes.c
··· 22 #include <asm/i387.h> 23 #include "padlock.h" 24 25 - /* number of data blocks actually fetched for each xcrypt insn */ 26 static unsigned int ecb_fetch_blocks = 2; 27 - static unsigned int cbc_fetch_blocks = 1; 28 - 29 #define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE) 30 #define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE) 31 32 /* Control word. */ ··· 185 * should be used only inside the irq_ts_save/restore() context 186 */ 187 188 - static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, 189 struct cword *control_word, int count) 190 { 191 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ ··· 193 : "d"(control_word), "b"(key), "c"(count)); 194 } 195 196 - static void aes_crypt_copy(const u8 *in, u8 *out, u32 *key, 197 struct cword *cword, int count) 198 { 199 /* 200 * Padlock prefetches extra data so we must provide mapped input buffers. 201 * Assume there are at least 16 bytes of stack already in use. 202 */ 203 - u8 buf[AES_BLOCK_SIZE * 7 + PADLOCK_ALIGNMENT - 1]; 204 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 205 206 memcpy(tmp, in, count * AES_BLOCK_SIZE); 207 - padlock_xcrypt(tmp, out, key, cword, count); 208 } 209 210 - static inline void aes_crypt(const u8 *in, u8 *out, u32 *key, 211 struct cword *cword, int count) 212 { 213 /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data. 214 * We could avoid some copying here but it's probably not worth it. 215 */ 216 if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) { 217 - aes_crypt_copy(in, out, key, cword, count); 218 return; 219 } 220 221 - padlock_xcrypt(in, out, key, cword, count); 222 } 223 224 static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, ··· 260 u32 initial = count & (ecb_fetch_blocks - 1); 261 262 if (count < ecb_fetch_blocks) { 263 - aes_crypt(input, output, key, control_word, count); 264 return; 265 } 266 ··· 277 static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, 278 u8 *iv, void *control_word, u32 count) 279 { 280 - /* rep xcryptcbc */ 281 - asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" 282 : "+S" (input), "+D" (output), "+a" (iv) 283 - : "d" (control_word), "b" (key), "c" (count)); 284 return iv; 285 } 286 ··· 300 301 padlock_reset_key(&ctx->cword.encrypt); 302 ts_state = irq_ts_save(); 303 - aes_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); 304 irq_ts_restore(ts_state); 305 padlock_store_cword(&ctx->cword.encrypt); 306 } ··· 312 313 padlock_reset_key(&ctx->cword.encrypt); 314 ts_state = irq_ts_save(); 315 - aes_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); 316 irq_ts_restore(ts_state); 317 padlock_store_cword(&ctx->cword.encrypt); 318 } ··· 529 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); 530 531 if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { 532 - ecb_fetch_blocks = 8; 533 - cbc_fetch_blocks = 4; /* NOTE: notused */ 534 printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); 535 } 536
··· 22 #include <asm/i387.h> 23 #include "padlock.h" 24 25 + /* 26 + * Number of data blocks actually fetched for each xcrypt insn. 27 + * Processors with prefetch errata will fetch extra blocks. 28 + */ 29 static unsigned int ecb_fetch_blocks = 2; 30 + #define MAX_ECB_FETCH_BLOCKS (8) 31 #define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE) 32 + 33 + static unsigned int cbc_fetch_blocks = 1; 34 + #define MAX_CBC_FETCH_BLOCKS (4) 35 #define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE) 36 37 /* Control word. */ ··· 180 * should be used only inside the irq_ts_save/restore() context 181 */ 182 183 + static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key, 184 struct cword *control_word, int count) 185 { 186 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ ··· 188 : "d"(control_word), "b"(key), "c"(count)); 189 } 190 191 + static inline u8 *rep_xcrypt_cbc(const u8 *input, u8 *output, void *key, 192 + u8 *iv, struct cword *control_word, int count) 193 + { 194 + asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ 195 + : "+S" (input), "+D" (output), "+a" (iv) 196 + : "d" (control_word), "b" (key), "c" (count)); 197 + return iv; 198 + } 199 + 200 + static void ecb_crypt_copy(const u8 *in, u8 *out, u32 *key, 201 struct cword *cword, int count) 202 { 203 /* 204 * Padlock prefetches extra data so we must provide mapped input buffers. 205 * Assume there are at least 16 bytes of stack already in use. 206 */ 207 + u8 buf[AES_BLOCK_SIZE * (MAX_ECB_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; 208 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 209 210 memcpy(tmp, in, count * AES_BLOCK_SIZE); 211 + rep_xcrypt_ecb(tmp, out, key, cword, count); 212 } 213 214 + static u8 *cbc_crypt_copy(const u8 *in, u8 *out, u32 *key, 215 + u8 *iv, struct cword *cword, int count) 216 + { 217 + /* 218 + * Padlock prefetches extra data so we must provide mapped input buffers. 219 + * Assume there are at least 16 bytes of stack already in use. 220 + */ 221 + u8 buf[AES_BLOCK_SIZE * (MAX_CBC_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; 222 + u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 223 + 224 + memcpy(tmp, in, count * AES_BLOCK_SIZE); 225 + return rep_xcrypt_cbc(tmp, out, key, iv, cword, count); 226 + } 227 + 228 + static inline void ecb_crypt(const u8 *in, u8 *out, u32 *key, 229 struct cword *cword, int count) 230 { 231 /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data. 232 * We could avoid some copying here but it's probably not worth it. 233 */ 234 if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) { 235 + ecb_crypt_copy(in, out, key, cword, count); 236 return; 237 } 238 239 + rep_xcrypt_ecb(in, out, key, cword, count); 240 + } 241 + 242 + static inline u8 *cbc_crypt(const u8 *in, u8 *out, u32 *key, 243 + u8 *iv, struct cword *cword, int count) 244 + { 245 + /* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */ 246 + if (unlikely(((unsigned long)in & PAGE_SIZE) + cbc_fetch_bytes > PAGE_SIZE)) 247 + return cbc_crypt_copy(in, out, key, iv, cword, count); 248 + 249 + return rep_xcrypt_cbc(in, out, key, iv, cword, count); 250 } 251 252 static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, ··· 222 u32 initial = count & (ecb_fetch_blocks - 1); 223 224 if (count < ecb_fetch_blocks) { 225 + ecb_crypt(input, output, key, control_word, count); 226 return; 227 } 228 ··· 239 static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, 240 u8 *iv, void *control_word, u32 count) 241 { 242 + u32 initial = count & (cbc_fetch_blocks - 1); 243 + 244 + if (count < cbc_fetch_blocks) 245 + return cbc_crypt(input, output, key, iv, control_word, count); 246 + 247 + if (initial) 248 + asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ 249 + : "+S" (input), "+D" (output), "+a" (iv) 250 + : "d" (control_word), "b" (key), "c" (count)); 251 + 252 + asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ 253 : "+S" (input), "+D" (output), "+a" (iv) 254 + : "d" (control_word), "b" (key), "c" (count-initial)); 255 return iv; 256 } 257 ··· 253 254 padlock_reset_key(&ctx->cword.encrypt); 255 ts_state = irq_ts_save(); 256 + ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); 257 irq_ts_restore(ts_state); 258 padlock_store_cword(&ctx->cword.encrypt); 259 } ··· 265 266 padlock_reset_key(&ctx->cword.encrypt); 267 ts_state = irq_ts_save(); 268 + ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); 269 irq_ts_restore(ts_state); 270 padlock_store_cword(&ctx->cword.encrypt); 271 } ··· 482 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); 483 484 if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { 485 + ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; 486 + cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; 487 printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); 488 } 489