Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: scatterwalk - don't split at page boundaries when !HIGHMEM

When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.

For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.

Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Eric Biggers and committed by
Herbert Xu
641938d3 fa94e454

+59 -24
+2 -2
crypto/skcipher.c
··· 206 206 207 207 diff = offset_in_page(walk->in.offset) - 208 208 offset_in_page(walk->out.offset); 209 - diff |= (u8 *)scatterwalk_page(&walk->in) - 210 - (u8 *)scatterwalk_page(&walk->out); 209 + diff |= (u8 *)(sg_page(walk->in.sg) + (walk->in.offset >> PAGE_SHIFT)) - 210 + (u8 *)(sg_page(walk->out.sg) + (walk->out.offset >> PAGE_SHIFT)); 211 211 212 212 skcipher_map_src(walk); 213 213 walk->dst.virt.addr = walk->src.virt.addr;
+57 -22
include/crypto/scatterwalk.h
··· 49 49 walk->offset = sg->offset + pos; 50 50 } 51 51 52 - static inline unsigned int scatterwalk_pagelen(struct scatter_walk *walk) 53 - { 54 - unsigned int len = walk->sg->offset + walk->sg->length - walk->offset; 55 - unsigned int len_this_page = offset_in_page(~walk->offset) + 1; 56 - return len_this_page > len ? len : len_this_page; 57 - } 58 - 59 52 static inline unsigned int scatterwalk_clamp(struct scatter_walk *walk, 60 53 unsigned int nbytes) 61 54 { 55 + unsigned int len_this_sg; 56 + unsigned int limit; 57 + 62 58 if (walk->offset >= walk->sg->offset + walk->sg->length) 63 59 scatterwalk_start(walk, sg_next(walk->sg)); 64 - return min(nbytes, scatterwalk_pagelen(walk)); 65 - } 60 + len_this_sg = walk->sg->offset + walk->sg->length - walk->offset; 66 61 67 - static inline struct page *scatterwalk_page(struct scatter_walk *walk) 68 - { 69 - return sg_page(walk->sg) + (walk->offset >> PAGE_SHIFT); 62 + /* 63 + * HIGHMEM case: the page may have to be mapped into memory. To avoid 64 + * the complexity of having to map multiple pages at once per sg entry, 65 + * clamp the returned length to not cross a page boundary. 66 + * 67 + * !HIGHMEM case: no mapping is needed; all pages of the sg entry are 68 + * already mapped contiguously in the kernel's direct map. For improved 69 + * performance, allow the walker to return data segments that cross a 70 + * page boundary. Do still cap the length to PAGE_SIZE, since some 71 + * users rely on that to avoid disabling preemption for too long when 72 + * using SIMD. It's also needed for when skcipher_walk uses a bounce 73 + * page due to the data not being aligned to the algorithm's alignmask. 74 + */ 75 + if (IS_ENABLED(CONFIG_HIGHMEM)) 76 + limit = PAGE_SIZE - offset_in_page(walk->offset); 77 + else 78 + limit = PAGE_SIZE; 79 + 80 + return min3(nbytes, len_this_sg, limit); 70 81 } 71 82 72 83 /* ··· 97 86 scatterwalk_crypto_chain(sg_out, sg_next(walk->sg), 2); 98 87 } 99 88 100 - static inline void scatterwalk_unmap(void *vaddr) 101 - { 102 - kunmap_local(vaddr); 103 - } 104 - 105 89 static inline void *scatterwalk_map(struct scatter_walk *walk) 106 90 { 107 - return kmap_local_page(scatterwalk_page(walk)) + 108 - offset_in_page(walk->offset); 91 + struct page *base_page = sg_page(walk->sg); 92 + 93 + if (IS_ENABLED(CONFIG_HIGHMEM)) 94 + return kmap_local_page(base_page + (walk->offset >> PAGE_SHIFT)) + 95 + offset_in_page(walk->offset); 96 + /* 97 + * When !HIGHMEM we allow the walker to return segments that span a page 98 + * boundary; see scatterwalk_clamp(). To make it clear that in this 99 + * case we're working in the linear buffer of the whole sg entry in the 100 + * kernel's direct map rather than within the mapped buffer of a single 101 + * page, compute the address as an offset from the page_address() of the 102 + * first page of the sg entry. Either way the result is the address in 103 + * the direct map, but this makes it clearer what is really going on. 104 + */ 105 + return page_address(base_page) + walk->offset; 109 106 } 110 107 111 108 /** ··· 134 115 return scatterwalk_map(walk); 135 116 } 136 117 118 + static inline void scatterwalk_unmap(const void *vaddr) 119 + { 120 + if (IS_ENABLED(CONFIG_HIGHMEM)) 121 + kunmap_local(vaddr); 122 + } 123 + 137 124 static inline void scatterwalk_advance(struct scatter_walk *walk, 138 125 unsigned int nbytes) 139 126 { ··· 158 133 static inline void scatterwalk_done_src(struct scatter_walk *walk, 159 134 const void *vaddr, unsigned int nbytes) 160 135 { 161 - scatterwalk_unmap((void *)vaddr); 136 + scatterwalk_unmap(vaddr); 162 137 scatterwalk_advance(walk, nbytes); 163 138 } 164 139 ··· 179 154 * Explicitly check ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE instead of just 180 155 * relying on flush_dcache_page() being a no-op when not implemented, 181 156 * since otherwise the BUG_ON in sg_page() does not get optimized out. 157 + * This also avoids having to consider whether the loop would get 158 + * reliably optimized out or not. 182 159 */ 183 - if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE) 184 - flush_dcache_page(scatterwalk_page(walk)); 160 + if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE) { 161 + struct page *base_page, *start_page, *end_page, *page; 162 + 163 + base_page = sg_page(walk->sg); 164 + start_page = base_page + (walk->offset >> PAGE_SHIFT); 165 + end_page = base_page + ((walk->offset + nbytes + 166 + PAGE_SIZE - 1) >> PAGE_SHIFT); 167 + for (page = start_page; page < end_page; page++) 168 + flush_dcache_page(page); 169 + } 185 170 scatterwalk_advance(walk, nbytes); 186 171 } 187 172