crypto: caam - Fix edesc/iv ordering mixup

The attempt to add DMA alignment padding by moving IV to the front
of edesc was completely broken as it didn't change the places where
edesc was freed.

It's also wrong as the IV may still share a cache-line with the
edesc.

Fix this by restoring the original layout and simply reserving
enough memmory so that the IV is on a DMA cache-line by itself.

Reported-by: Meenakshi Aggarwal <meenakshi.aggarwal@nxp.com>
Fixes: 199354d7fb6e ("crypto: caam - Remove GFP_DMA and add DMA alignment padding")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Changed files
+53 -23
drivers
+19 -7
drivers/crypto/caam/caamalg.c
··· 60 60 #include <crypto/xts.h> 61 61 #include <asm/unaligned.h> 62 62 #include <linux/dma-mapping.h> 63 + #include <linux/device.h> 64 + #include <linux/err.h> 63 65 #include <linux/kernel.h> 66 + #include <linux/slab.h> 67 + #include <linux/string.h> 64 68 65 69 /* 66 70 * crypto alg ··· 1004 1000 crypto_finalize_aead_request(jrp->engine, req, ecode); 1005 1001 } 1006 1002 1003 + static inline u8 *skcipher_edesc_iv(struct skcipher_edesc *edesc) 1004 + { 1005 + 1006 + return PTR_ALIGN((u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes, 1007 + dma_get_cache_alignment()); 1008 + } 1009 + 1007 1010 static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err, 1008 1011 void *context) 1009 1012 { ··· 1038 1027 * This is used e.g. by the CTS mode. 1039 1028 */ 1040 1029 if (ivsize && !ecode) { 1041 - memcpy(req->iv, (u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes, 1042 - ivsize); 1030 + memcpy(req->iv, skcipher_edesc_iv(edesc), ivsize); 1043 1031 1044 1032 print_hex_dump_debug("dstiv @" __stringify(__LINE__)": ", 1045 1033 DUMP_PREFIX_ADDRESS, 16, 4, req->iv, ··· 1693 1683 /* 1694 1684 * allocate space for base edesc and hw desc commands, link tables, IV 1695 1685 */ 1696 - aligned_size = ALIGN(ivsize, __alignof__(*edesc)); 1697 - aligned_size += sizeof(*edesc) + desc_bytes + sec4_sg_bytes; 1686 + aligned_size = sizeof(*edesc) + desc_bytes + sec4_sg_bytes; 1698 1687 aligned_size = ALIGN(aligned_size, dma_get_cache_alignment()); 1699 - iv = kzalloc(aligned_size, flags); 1700 - if (!iv) { 1688 + aligned_size += ~(ARCH_KMALLOC_MINALIGN - 1) & 1689 + (dma_get_cache_alignment() - 1); 1690 + aligned_size += ALIGN(ivsize, dma_get_cache_alignment()); 1691 + edesc = kzalloc(aligned_size, flags); 1692 + if (!edesc) { 1701 1693 dev_err(jrdev, "could not allocate extended descriptor\n"); 1702 1694 caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0, 1703 1695 0, 0, 0); 1704 1696 return ERR_PTR(-ENOMEM); 1705 1697 } 1706 1698 1707 - edesc = (void *)(iv + ALIGN(ivsize, __alignof__(*edesc))); 1708 1699 edesc->src_nents = src_nents; 1709 1700 edesc->dst_nents = dst_nents; 1710 1701 edesc->mapped_src_nents = mapped_src_nents; ··· 1717 1706 1718 1707 /* Make sure IV is located in a DMAable area */ 1719 1708 if (ivsize) { 1709 + iv = skcipher_edesc_iv(edesc); 1720 1710 memcpy(iv, req->iv, ivsize); 1721 1711 1722 1712 iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_BIDIRECTIONAL);
+26 -14
drivers/crypto/caam/caamalg_qi.c
··· 20 20 #include "caamalg_desc.h" 21 21 #include <crypto/xts.h> 22 22 #include <asm/unaligned.h> 23 + #include <linux/device.h> 24 + #include <linux/err.h> 23 25 #include <linux/dma-mapping.h> 24 26 #include <linux/kernel.h> 27 + #include <linux/string.h> 25 28 26 29 /* 27 30 * crypto alg ··· 1207 1204 false); 1208 1205 } 1209 1206 1207 + static inline u8 *skcipher_edesc_iv(struct skcipher_edesc *edesc) 1208 + { 1209 + return PTR_ALIGN((u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes, 1210 + dma_get_cache_alignment()); 1211 + } 1212 + 1210 1213 static void skcipher_done(struct caam_drv_req *drv_req, u32 status) 1211 1214 { 1212 1215 struct skcipher_edesc *edesc; ··· 1245 1236 * This is used e.g. by the CTS mode. 1246 1237 */ 1247 1238 if (!ecode) 1248 - memcpy(req->iv, (u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes, 1249 - ivsize); 1239 + memcpy(req->iv, skcipher_edesc_iv(edesc), ivsize); 1250 1240 1251 1241 qi_cache_free(edesc); 1252 1242 skcipher_request_complete(req, ecode); ··· 1267 1259 int dst_sg_idx, qm_sg_ents, qm_sg_bytes; 1268 1260 struct qm_sg_entry *sg_table, *fd_sgt; 1269 1261 struct caam_drv_ctx *drv_ctx; 1262 + unsigned int len; 1270 1263 1271 1264 drv_ctx = get_drv_ctx(ctx, encrypt ? ENCRYPT : DECRYPT); 1272 1265 if (IS_ERR(drv_ctx)) ··· 1328 1319 qm_sg_ents = 1 + pad_sg_nents(qm_sg_ents); 1329 1320 1330 1321 qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry); 1331 - if (unlikely(ALIGN(ivsize, __alignof__(*edesc)) + 1332 - offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes > 1333 - CAAM_QI_MEMCACHE_SIZE)) { 1322 + 1323 + len = offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes; 1324 + len = ALIGN(len, dma_get_cache_alignment()); 1325 + len += ivsize; 1326 + 1327 + if (unlikely(len > CAAM_QI_MEMCACHE_SIZE)) { 1334 1328 dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n", 1335 1329 qm_sg_ents, ivsize); 1336 1330 caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0, ··· 1342 1330 } 1343 1331 1344 1332 /* allocate space for base edesc, link tables and IV */ 1345 - iv = qi_cache_alloc(flags); 1346 - if (unlikely(!iv)) { 1333 + edesc = qi_cache_alloc(flags); 1334 + if (unlikely(!edesc)) { 1347 1335 dev_err(qidev, "could not allocate extended descriptor\n"); 1348 1336 caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0, 1349 1337 0, DMA_NONE, 0, 0); 1350 1338 return ERR_PTR(-ENOMEM); 1351 1339 } 1352 1340 1353 - edesc = (void *)(iv + ALIGN(ivsize, __alignof__(*edesc))); 1341 + edesc->src_nents = src_nents; 1342 + edesc->dst_nents = dst_nents; 1343 + edesc->qm_sg_bytes = qm_sg_bytes; 1344 + edesc->drv_req.app_ctx = req; 1345 + edesc->drv_req.cbk = skcipher_done; 1346 + edesc->drv_req.drv_ctx = drv_ctx; 1354 1347 1355 1348 /* Make sure IV is located in a DMAable area */ 1356 1349 sg_table = &edesc->sgt[0]; 1350 + iv = skcipher_edesc_iv(edesc); 1357 1351 memcpy(iv, req->iv, ivsize); 1358 1352 1359 1353 iv_dma = dma_map_single(qidev, iv, ivsize, DMA_BIDIRECTIONAL); ··· 1371 1353 return ERR_PTR(-ENOMEM); 1372 1354 } 1373 1355 1374 - edesc->src_nents = src_nents; 1375 - edesc->dst_nents = dst_nents; 1376 1356 edesc->iv_dma = iv_dma; 1377 - edesc->qm_sg_bytes = qm_sg_bytes; 1378 - edesc->drv_req.app_ctx = req; 1379 - edesc->drv_req.cbk = skcipher_done; 1380 - edesc->drv_req.drv_ctx = drv_ctx; 1381 1357 1382 1358 dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0); 1383 1359 sg_to_qm_sg(req->src, req->cryptlen, sg_table + 1, 0);
+8 -2
drivers/crypto/caam/qi.c
··· 8 8 */ 9 9 10 10 #include <linux/cpumask.h> 11 + #include <linux/device.h> 12 + #include <linux/dma-mapping.h> 13 + #include <linux/kernel.h> 11 14 #include <linux/kthread.h> 15 + #include <linux/netdevice.h> 16 + #include <linux/slab.h> 17 + #include <linux/string.h> 12 18 #include <soc/fsl/qman.h> 13 19 14 20 #include "debugfs.h" ··· 761 755 napi_enable(irqtask); 762 756 } 763 757 764 - qi_cache = kmem_cache_create("caamqicache", CAAM_QI_MEMCACHE_SIZE, 0, 765 - 0, NULL); 758 + qi_cache = kmem_cache_create("caamqicache", CAAM_QI_MEMCACHE_SIZE, 759 + dma_get_cache_alignment(), 0, NULL); 766 760 if (!qi_cache) { 767 761 dev_err(qidev, "Can't allocate CAAM cache\n"); 768 762 free_rsp_fqs();