crypto: padlock-aes - work around Nano CPU errata in ECB mode

The VIA Nano processor has a bug that makes it prefetch extra data
during encryption operations, causing spurious page faults. Extend
existing workarounds for ECB mode to copy the data to an temporary
buffer to avoid the problem.

Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
Acked-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by Chuck Ebbert and committed by Herbert Xu a76c1c23 1d89b30c

+46 -35
+46 -35
drivers/crypto/padlock-aes.c
··· 18 18 #include <linux/percpu.h> 19 19 #include <linux/smp.h> 20 20 #include <asm/byteorder.h> 21 + #include <asm/processor.h> 21 22 #include <asm/i387.h> 22 23 #include "padlock.h" 24 + 25 + /* number of data blocks actually fetched for each xcrypt insn */ 26 + static unsigned int ecb_fetch_blocks = 2; 27 + static unsigned int cbc_fetch_blocks = 1; 28 + 29 + #define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE) 30 + #define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE) 23 31 24 32 /* Control word. */ 25 33 struct cword { ··· 181 173 */ 182 174 183 175 static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, 184 - struct cword *control_word) 176 + struct cword *control_word, int count) 185 177 { 186 178 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ 187 179 : "+S"(input), "+D"(output) 188 - : "d"(control_word), "b"(key), "c"(1)); 180 + : "d"(control_word), "b"(key), "c"(count)); 189 181 } 190 182 191 - static void aes_crypt_copy(const u8 *in, u8 *out, u32 *key, struct cword *cword) 183 + static void aes_crypt_copy(const u8 *in, u8 *out, u32 *key, 184 + struct cword *cword, int count) 192 185 { 193 - u8 buf[AES_BLOCK_SIZE * 2 + PADLOCK_ALIGNMENT - 1]; 186 + /* 187 + * Padlock prefetches extra data so we must provide mapped input buffers. 188 + * Assume there are at least 16 bytes of stack already in use. 189 + */ 190 + u8 buf[AES_BLOCK_SIZE * 7 + PADLOCK_ALIGNMENT - 1]; 194 191 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 195 192 196 - memcpy(tmp, in, AES_BLOCK_SIZE); 197 - padlock_xcrypt(tmp, out, key, cword); 193 + memcpy(tmp, in, count * AES_BLOCK_SIZE); 194 + padlock_xcrypt(tmp, out, key, cword, count); 198 195 } 199 196 200 197 static inline void aes_crypt(const u8 *in, u8 *out, u32 *key, 201 - struct cword *cword) 198 + struct cword *cword, int count) 202 199 { 203 - /* padlock_xcrypt requires at least two blocks of data. */ 204 - if (unlikely(!(((unsigned long)in ^ (PAGE_SIZE - AES_BLOCK_SIZE)) & 205 - (PAGE_SIZE - 1)))) { 206 - aes_crypt_copy(in, out, key, cword); 200 + /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data. 201 + * We could avoid some copying here but it's probably not worth it. 202 + */ 203 + if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) { 204 + aes_crypt_copy(in, out, key, cword, count); 207 205 return; 208 206 } 209 207 210 - padlock_xcrypt(in, out, key, cword); 208 + padlock_xcrypt(in, out, key, cword, count); 211 209 } 212 210 213 211 static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, 214 212 void *control_word, u32 count) 215 213 { 216 - if (count == 1) { 217 - aes_crypt(input, output, key, control_word); 214 + u32 initial = count & (ecb_fetch_blocks - 1); 215 + 216 + if (count < ecb_fetch_blocks) { 217 + aes_crypt(input, output, key, control_word, count); 218 218 return; 219 219 } 220 220 221 - asm volatile ("test $1, %%cl;" 222 - "je 1f;" 223 - #ifndef CONFIG_X86_64 224 - "lea -1(%%ecx), %%eax;" 225 - "mov $1, %%ecx;" 226 - #else 227 - "lea -1(%%rcx), %%rax;" 228 - "mov $1, %%rcx;" 229 - #endif 230 - ".byte 0xf3,0x0f,0xa7,0xc8;" /* rep xcryptecb */ 231 - #ifndef CONFIG_X86_64 232 - "mov %%eax, %%ecx;" 233 - #else 234 - "mov %%rax, %%rcx;" 235 - #endif 236 - "1:" 237 - ".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ 221 + if (initial) 222 + asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ 223 + : "+S"(input), "+D"(output) 224 + : "d"(control_word), "b"(key), "c"(initial)); 225 + 226 + asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ 238 227 : "+S"(input), "+D"(output) 239 - : "d"(control_word), "b"(key), "c"(count) 240 - : "ax"); 228 + : "d"(control_word), "b"(key), "c"(count - initial)); 241 229 } 242 230 243 231 static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, ··· 253 249 254 250 padlock_reset_key(&ctx->cword.encrypt); 255 251 ts_state = irq_ts_save(); 256 - aes_crypt(in, out, ctx->E, &ctx->cword.encrypt); 252 + aes_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); 257 253 irq_ts_restore(ts_state); 258 254 padlock_store_cword(&ctx->cword.encrypt); 259 255 } ··· 265 261 266 262 padlock_reset_key(&ctx->cword.encrypt); 267 263 ts_state = irq_ts_save(); 268 - aes_crypt(in, out, ctx->D, &ctx->cword.decrypt); 264 + aes_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); 269 265 irq_ts_restore(ts_state); 270 266 padlock_store_cword(&ctx->cword.encrypt); 271 267 } ··· 458 454 static int __init padlock_init(void) 459 455 { 460 456 int ret; 457 + struct cpuinfo_x86 *c = &cpu_data(0); 461 458 462 459 if (!cpu_has_xcrypt) { 463 460 printk(KERN_NOTICE PFX "VIA PadLock not detected.\n"); ··· 480 475 goto cbc_aes_err; 481 476 482 477 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); 478 + 479 + if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { 480 + ecb_fetch_blocks = 8; 481 + cbc_fetch_blocks = 4; /* NOTE: notused */ 482 + printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); 483 + } 483 484 484 485 out: 485 486 return ret;