Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto fixes from Herbert Xu:
"This fixes a potential scheduling latency problem for the algorithms
used by WireGuard"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
crypto: arch/nhpoly1305 - process in explicit 4k chunks
crypto: arch/lib - limit simd usage to 4k chunks

+11 -3
arch/arm/crypto/chacha-glue.c
··· 91 91 return; 92 92 } 93 93 94 - kernel_neon_begin(); 95 - chacha_doneon(state, dst, src, bytes, nrounds); 96 - kernel_neon_end(); 94 + do { 95 + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); 96 + 97 + kernel_neon_begin(); 98 + chacha_doneon(state, dst, src, todo, nrounds); 99 + kernel_neon_end(); 100 + 101 + bytes -= todo; 102 + src += todo; 103 + dst += todo; 104 + } while (bytes); 97 105 } 98 106 EXPORT_SYMBOL(chacha_crypt_arch); 99 107
+1 -1
arch/arm/crypto/nhpoly1305-neon-glue.c
··· 30 30 return crypto_nhpoly1305_update(desc, src, srclen); 31 31 32 32 do { 33 - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); 33 + unsigned int n = min_t(unsigned int, srclen, SZ_4K); 34 34 35 35 kernel_neon_begin(); 36 36 crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
+11 -4
arch/arm/crypto/poly1305-glue.c
··· 160 160 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); 161 161 162 162 if (static_branch_likely(&have_neon) && do_neon) { 163 - kernel_neon_begin(); 164 - poly1305_blocks_neon(&dctx->h, src, len, 1); 165 - kernel_neon_end(); 163 + do { 164 + unsigned int todo = min_t(unsigned int, len, SZ_4K); 165 + 166 + kernel_neon_begin(); 167 + poly1305_blocks_neon(&dctx->h, src, todo, 1); 168 + kernel_neon_end(); 169 + 170 + len -= todo; 171 + src += todo; 172 + } while (len); 166 173 } else { 167 174 poly1305_blocks_arm(&dctx->h, src, len, 1); 175 + src += len; 168 176 } 169 - src += len; 170 177 nbytes %= POLY1305_BLOCK_SIZE; 171 178 } 172 179
+11 -3
arch/arm64/crypto/chacha-neon-glue.c
··· 87 87 !crypto_simd_usable()) 88 88 return chacha_crypt_generic(state, dst, src, bytes, nrounds); 89 89 90 - kernel_neon_begin(); 91 - chacha_doneon(state, dst, src, bytes, nrounds); 92 - kernel_neon_end(); 90 + do { 91 + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); 92 + 93 + kernel_neon_begin(); 94 + chacha_doneon(state, dst, src, todo, nrounds); 95 + kernel_neon_end(); 96 + 97 + bytes -= todo; 98 + src += todo; 99 + dst += todo; 100 + } while (bytes); 93 101 } 94 102 EXPORT_SYMBOL(chacha_crypt_arch); 95 103
+1 -1
arch/arm64/crypto/nhpoly1305-neon-glue.c
··· 30 30 return crypto_nhpoly1305_update(desc, src, srclen); 31 31 32 32 do { 33 - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); 33 + unsigned int n = min_t(unsigned int, srclen, SZ_4K); 34 34 35 35 kernel_neon_begin(); 36 36 crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
+11 -4
arch/arm64/crypto/poly1305-glue.c
··· 143 143 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); 144 144 145 145 if (static_branch_likely(&have_neon) && crypto_simd_usable()) { 146 - kernel_neon_begin(); 147 - poly1305_blocks_neon(&dctx->h, src, len, 1); 148 - kernel_neon_end(); 146 + do { 147 + unsigned int todo = min_t(unsigned int, len, SZ_4K); 148 + 149 + kernel_neon_begin(); 150 + poly1305_blocks_neon(&dctx->h, src, todo, 1); 151 + kernel_neon_end(); 152 + 153 + len -= todo; 154 + src += todo; 155 + } while (len); 149 156 } else { 150 157 poly1305_blocks(&dctx->h, src, len, 1); 158 + src += len; 151 159 } 152 - src += len; 153 160 nbytes %= POLY1305_BLOCK_SIZE; 154 161 } 155 162
+4 -6
arch/x86/crypto/blake2s-glue.c
··· 32 32 const u32 inc) 33 33 { 34 34 /* SIMD disables preemption, so relax after processing each page. */ 35 - BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); 35 + BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); 36 36 37 37 if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { 38 38 blake2s_compress_generic(state, block, nblocks, inc); 39 39 return; 40 40 } 41 41 42 - for (;;) { 42 + do { 43 43 const size_t blocks = min_t(size_t, nblocks, 44 - PAGE_SIZE / BLAKE2S_BLOCK_SIZE); 44 + SZ_4K / BLAKE2S_BLOCK_SIZE); 45 45 46 46 kernel_fpu_begin(); 47 47 if (IS_ENABLED(CONFIG_AS_AVX512) && ··· 52 52 kernel_fpu_end(); 53 53 54 54 nblocks -= blocks; 55 - if (!nblocks) 56 - break; 57 55 block += blocks * BLAKE2S_BLOCK_SIZE; 58 - } 56 + } while (nblocks); 59 57 } 60 58 EXPORT_SYMBOL(blake2s_compress_arch); 61 59
+11 -3
arch/x86/crypto/chacha_glue.c
··· 153 153 bytes <= CHACHA_BLOCK_SIZE) 154 154 return chacha_crypt_generic(state, dst, src, bytes, nrounds); 155 155 156 - kernel_fpu_begin(); 157 - chacha_dosimd(state, dst, src, bytes, nrounds); 158 - kernel_fpu_end(); 156 + do { 157 + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); 158 + 159 + kernel_fpu_begin(); 160 + chacha_dosimd(state, dst, src, todo, nrounds); 161 + kernel_fpu_end(); 162 + 163 + bytes -= todo; 164 + src += todo; 165 + dst += todo; 166 + } while (bytes); 159 167 } 160 168 EXPORT_SYMBOL(chacha_crypt_arch); 161 169
+1 -1
arch/x86/crypto/nhpoly1305-avx2-glue.c
··· 29 29 return crypto_nhpoly1305_update(desc, src, srclen); 30 30 31 31 do { 32 - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); 32 + unsigned int n = min_t(unsigned int, srclen, SZ_4K); 33 33 34 34 kernel_fpu_begin(); 35 35 crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2);
+1 -1
arch/x86/crypto/nhpoly1305-sse2-glue.c
··· 29 29 return crypto_nhpoly1305_update(desc, src, srclen); 30 30 31 31 do { 32 - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); 32 + unsigned int n = min_t(unsigned int, srclen, SZ_4K); 33 33 34 34 kernel_fpu_begin(); 35 35 crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2);
+6 -7
arch/x86/crypto/poly1305_glue.c
··· 91 91 struct poly1305_arch_internal *state = ctx; 92 92 93 93 /* SIMD disables preemption, so relax after processing each page. */ 94 - BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || 95 - PAGE_SIZE % POLY1305_BLOCK_SIZE); 94 + BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || 95 + SZ_4K % POLY1305_BLOCK_SIZE); 96 96 97 97 if (!static_branch_likely(&poly1305_use_avx) || 98 98 (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || ··· 102 102 return; 103 103 } 104 104 105 - for (;;) { 106 - const size_t bytes = min_t(size_t, len, PAGE_SIZE); 105 + do { 106 + const size_t bytes = min_t(size_t, len, SZ_4K); 107 107 108 108 kernel_fpu_begin(); 109 109 if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) ··· 113 113 else 114 114 poly1305_blocks_avx(ctx, inp, bytes, padbit); 115 115 kernel_fpu_end(); 116 + 116 117 len -= bytes; 117 - if (!len) 118 - break; 119 118 inp += bytes; 120 - } 119 + } while (len); 121 120 } 122 121 123 122 static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],