crypto: scatterwalk - Fix memcpy_sglist() to always succeed

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

The original implementation of memcpy_sglist() was broken because it
didn't handle scatterlists that describe exactly the same memory, which
is a case that many callers rely on. The current implementation is
broken too because it calls the skcipher_walk functions which can fail.
It ignores any errors from those functions.

Fix it by replacing it with a new implementation written from scratch.
It always succeeds. It's also a bit faster, since it avoids the
overhead of skcipher_walk. skcipher_walk includes a lot of
functionality (such as alignmask handling) that's irrelevant here.

Reported-by: Colin Ian King <coking@nvidia.com>
Closes: https://lore.kernel.org/r/20251114122620.111623-1-coking@nvidia.com
Fixes: 131bdceca1f0 ("crypto: scatterwalk - Add memcpy_sglist")
Fixes: 0f8d42bf128d ("crypto: scatterwalk - Move skcipher walk and use it for memcpy_sglist")
Cc: stable@vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Eric Biggers and committed by

Herbert Xu 4 months ago 4dffc9bb 5727a844

+114 -33

2 changed files

expand all

crypto

scatterwalk.c

include

crypto

scatterwalk.h

+83 -12

crypto/scatterwalk.c

··· 101 101 } 102 102 EXPORT_SYMBOL_GPL(memcpy_to_sglist); 103 103 104 + /** 105 + * memcpy_sglist() - Copy data from one scatterlist to another 106 + * @dst: The destination scatterlist. Can be NULL if @nbytes == 0. 107 + * @src: The source scatterlist. Can be NULL if @nbytes == 0. 108 + * @nbytes: Number of bytes to copy 109 + * 110 + * The scatterlists can describe exactly the same memory, in which case this 111 + * function is a no-op. No other overlaps are supported. 112 + * 113 + * Context: Any context 114 + */ 104 115 void memcpy_sglist(struct scatterlist *dst, struct scatterlist *src, 105 116 unsigned int nbytes) 106 117 { 107 - struct skcipher_walk walk = {}; 118 + unsigned int src_offset, dst_offset; 108 119 109 - if (unlikely(nbytes == 0)) /* in case sg == NULL */ 120 + if (unlikely(nbytes == 0)) /* in case src and/or dst is NULL */ 110 121 return; 111 122 112 - walk.total = nbytes; 123 + src_offset = src->offset; 124 + dst_offset = dst->offset; 125 + for (;;) { 126 + /* Compute the length to copy this step. */ 127 + unsigned int len = min3(src->offset + src->length - src_offset, 128 + dst->offset + dst->length - dst_offset, 129 + nbytes); 130 + struct page *src_page = sg_page(src); 131 + struct page *dst_page = sg_page(dst); 132 + const void *src_virt; 133 + void *dst_virt; 113 134 114 - scatterwalk_start(&walk.in, src); 115 - scatterwalk_start(&walk.out, dst); 135 + if (IS_ENABLED(CONFIG_HIGHMEM)) { 136 + /* HIGHMEM: we may have to actually map the pages. */ 137 + const unsigned int src_oip = offset_in_page(src_offset); 138 + const unsigned int dst_oip = offset_in_page(dst_offset); 139 + const unsigned int limit = PAGE_SIZE; 116 140 117 - skcipher_walk_first(&walk, true); 118 - do { 119 - if (walk.src.virt.addr != walk.dst.virt.addr) 120 - memcpy(walk.dst.virt.addr, walk.src.virt.addr, 121 - walk.nbytes); 122 - skcipher_walk_done(&walk, 0); 123 - } while (walk.nbytes); 141 + /* Further limit len to not cross a page boundary. */ 142 + len = min3(len, limit - src_oip, limit - dst_oip); 143 + 144 + /* Compute the source and destination pages. */ 145 + src_page += src_offset / PAGE_SIZE; 146 + dst_page += dst_offset / PAGE_SIZE; 147 + 148 + if (src_page != dst_page) { 149 + /* Copy between different pages. */ 150 + memcpy_page(dst_page, dst_oip, 151 + src_page, src_oip, len); 152 + flush_dcache_page(dst_page); 153 + } else if (src_oip != dst_oip) { 154 + /* Copy between different parts of same page. */ 155 + dst_virt = kmap_local_page(dst_page); 156 + memcpy(dst_virt + dst_oip, dst_virt + src_oip, 157 + len); 158 + kunmap_local(dst_virt); 159 + flush_dcache_page(dst_page); 160 + } /* Else, it's the same memory. No action needed. */ 161 + } else { 162 + /* 163 + * !HIGHMEM: no mapping needed. Just work in the linear 164 + * buffer of each sg entry. Note that we can cross page 165 + * boundaries, as they are not significant in this case. 166 + */ 167 + src_virt = page_address(src_page) + src_offset; 168 + dst_virt = page_address(dst_page) + dst_offset; 169 + if (src_virt != dst_virt) { 170 + memcpy(dst_virt, src_virt, len); 171 + if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE) 172 + __scatterwalk_flush_dcache_pages( 173 + dst_page, dst_offset, len); 174 + } /* Else, it's the same memory. No action needed. */ 175 + } 176 + nbytes -= len; 177 + if (nbytes == 0) /* No more to copy? */ 178 + break; 179 + 180 + /* 181 + * There's more to copy. Advance the offsets by the length 182 + * copied this step, and advance the sg entries as needed. 183 + */ 184 + src_offset += len; 185 + if (src_offset >= src->offset + src->length) { 186 + src = sg_next(src); 187 + src_offset = src->offset; 188 + } 189 + dst_offset += len; 190 + if (dst_offset >= dst->offset + dst->length) { 191 + dst = sg_next(dst); 192 + dst_offset = dst->offset; 193 + } 194 + } 124 195 } 125 196 EXPORT_SYMBOL_GPL(memcpy_sglist); 126 197

+31 -21

include/crypto/scatterwalk.h

··· 227 227 scatterwalk_advance(walk, nbytes); 228 228 } 229 229 230 + /* 231 + * Flush the dcache of any pages that overlap the region 232 + * [offset, offset + nbytes) relative to base_page. 233 + * 234 + * This should be called only when ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, to ensure 235 + * that all relevant code (including the call to sg_page() in the caller, if 236 + * applicable) gets fully optimized out when !ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE. 237 + */ 238 + static inline void __scatterwalk_flush_dcache_pages(struct page *base_page, 239 + unsigned int offset, 240 + unsigned int nbytes) 241 + { 242 + unsigned int num_pages; 243 + 244 + base_page += offset / PAGE_SIZE; 245 + offset %= PAGE_SIZE; 246 + 247 + /* 248 + * This is an overflow-safe version of 249 + * num_pages = DIV_ROUND_UP(offset + nbytes, PAGE_SIZE). 250 + */ 251 + num_pages = nbytes / PAGE_SIZE; 252 + num_pages += DIV_ROUND_UP(offset + (nbytes % PAGE_SIZE), PAGE_SIZE); 253 + 254 + for (unsigned int i = 0; i < num_pages; i++) 255 + flush_dcache_page(base_page + i); 256 + } 257 + 230 258 /** 231 259 * scatterwalk_done_dst() - Finish one step of a walk of destination scatterlist 232 260 * @walk: the scatter_walk ··· 268 240 unsigned int nbytes) 269 241 { 270 242 scatterwalk_unmap(walk); 271 - /* 272 - * Explicitly check ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE instead of just 273 - * relying on flush_dcache_page() being a no-op when not implemented, 274 - * since otherwise the BUG_ON in sg_page() does not get optimized out. 275 - * This also avoids having to consider whether the loop would get 276 - * reliably optimized out or not. 277 - */ 278 - if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE) { 279 - struct page *base_page; 280 - unsigned int offset; 281 - int start, end, i; 282 - 283 - base_page = sg_page(walk->sg); 284 - offset = walk->offset; 285 - start = offset >> PAGE_SHIFT; 286 - end = start + (nbytes >> PAGE_SHIFT); 287 - end += (offset_in_page(offset) + offset_in_page(nbytes) + 288 - PAGE_SIZE - 1) >> PAGE_SHIFT; 289 - for (i = start; i < end; i++) 290 - flush_dcache_page(base_page + i); 291 - } 243 + if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE) 244 + __scatterwalk_flush_dcache_pages(sg_page(walk->sg), 245 + walk->offset, nbytes); 292 246 scatterwalk_advance(walk, nbytes); 293 247 } 294 248