crypto: arm64/sha2-ce - simplify NEON yield

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Instead of calling into kernel_neon_end() and kernel_neon_begin() (and
potentially into schedule()) from the assembler code when running in
task mode and a reschedule is pending, perform only the preempt count
check in assembler, but simply return early in this case, and let the C
code deal with the consequences.

This reverts commit d82f37ab5e2426287013eba38b1212e8b71e5be3.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Ard Biesheuvel and committed by

Herbert Xu 5 years ago b2eadbf4 5a69e1b7

+25 -35

2 changed files

expand all

arch

arm64

crypto

sha2-ce-core.S

sha2-ce-glue.c

+13 -25

arch/arm64/crypto/sha2-ce-core.S

··· 76 76 */ 77 77 .text 78 78 SYM_FUNC_START(sha2_ce_transform) 79 - frame_push 3 80 - 81 - mov x19, x0 82 - mov x20, x1 83 - mov x21, x2 84 - 85 79 /* load round constants */ 86 - 0: adr_l x8, .Lsha2_rcon 80 + adr_l x8, .Lsha2_rcon 87 81 ld1 { v0.4s- v3.4s}, [x8], #64 88 82 ld1 { v4.4s- v7.4s}, [x8], #64 89 83 ld1 { v8.4s-v11.4s}, [x8], #64 90 84 ld1 {v12.4s-v15.4s}, [x8] 91 85 92 86 /* load state */ 93 - ld1 {dgav.4s, dgbv.4s}, [x19] 87 + ld1 {dgav.4s, dgbv.4s}, [x0] 94 88 95 89 /* load sha256_ce_state::finalize */ 96 90 ldr_l w4, sha256_ce_offsetof_finalize, x4 97 - ldr w4, [x19, x4] 91 + ldr w4, [x0, x4] 98 92 99 93 /* load input */ 100 - 1: ld1 {v16.4s-v19.4s}, [x20], #64 101 - sub w21, w21, #1 94 + 0: ld1 {v16.4s-v19.4s}, [x1], #64 95 + sub w2, w2, #1 102 96 103 97 CPU_LE( rev32 v16.16b, v16.16b ) 104 98 CPU_LE( rev32 v17.16b, v17.16b ) 105 99 CPU_LE( rev32 v18.16b, v18.16b ) 106 100 CPU_LE( rev32 v19.16b, v19.16b ) 107 101 108 - 2: add t0.4s, v16.4s, v0.4s 102 + 1: add t0.4s, v16.4s, v0.4s 109 103 mov dg0v.16b, dgav.16b 110 104 mov dg1v.16b, dgbv.16b 111 105 ··· 128 134 add dgbv.4s, dgbv.4s, dg1v.4s 129 135 130 136 /* handled all input blocks? */ 131 - cbz w21, 3f 132 - 133 - if_will_cond_yield_neon 134 - st1 {dgav.4s, dgbv.4s}, [x19] 135 - do_cond_yield_neon 137 + cbz w2, 2f 138 + cond_yield 3f, x5 136 139 b 0b 137 - endif_yield_neon 138 - 139 - b 1b 140 140 141 141 /* 142 142 * Final block: add padding and total bit count. 143 143 * Skip if the input size was not a round multiple of the block size, 144 144 * the padding is handled by the C code in that case. 145 145 */ 146 - 3: cbz x4, 4f 146 + 2: cbz x4, 3f 147 147 ldr_l w4, sha256_ce_offsetof_count, x4 148 - ldr x4, [x19, x4] 148 + ldr x4, [x0, x4] 149 149 movi v17.2d, #0 150 150 mov x8, #0x80000000 151 151 movi v18.2d, #0 ··· 148 160 mov x4, #0 149 161 mov v19.d[0], xzr 150 162 mov v19.d[1], x7 151 - b 2b 163 + b 1b 152 164 153 165 /* store new state */ 154 - 4: st1 {dgav.4s, dgbv.4s}, [x19] 155 - frame_pop 166 + 3: st1 {dgav.4s, dgbv.4s}, [x0] 167 + mov w0, w2 156 168 ret 157 169 SYM_FUNC_END(sha2_ce_transform)

+12 -10

arch/arm64/crypto/sha2-ce-glue.c

··· 30 30 extern const u32 sha256_ce_offsetof_count; 31 31 extern const u32 sha256_ce_offsetof_finalize; 32 32 33 - asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, 34 - int blocks); 33 + asmlinkage int sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, 34 + int blocks); 35 35 36 36 static void __sha2_ce_transform(struct sha256_state *sst, u8 const *src, 37 37 int blocks) 38 38 { 39 - sha2_ce_transform(container_of(sst, struct sha256_ce_state, sst), src, 40 - blocks); 39 + while (blocks) { 40 + int rem; 41 + 42 + kernel_neon_begin(); 43 + rem = sha2_ce_transform(container_of(sst, struct sha256_ce_state, 44 + sst), src, blocks); 45 + kernel_neon_end(); 46 + src += (blocks - rem) * SHA256_BLOCK_SIZE; 47 + blocks = rem; 48 + } 41 49 } 42 50 43 51 const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, ··· 71 63 __sha256_block_data_order); 72 64 73 65 sctx->finalize = 0; 74 - kernel_neon_begin(); 75 66 sha256_base_do_update(desc, data, len, __sha2_ce_transform); 76 - kernel_neon_end(); 77 67 78 68 return 0; 79 69 } ··· 96 90 */ 97 91 sctx->finalize = finalize; 98 92 99 - kernel_neon_begin(); 100 93 sha256_base_do_update(desc, data, len, __sha2_ce_transform); 101 94 if (!finalize) 102 95 sha256_base_do_finalize(desc, __sha2_ce_transform); 103 - kernel_neon_end(); 104 96 return sha256_base_finish(desc, out); 105 97 } 106 98 ··· 112 108 } 113 109 114 110 sctx->finalize = 0; 115 - kernel_neon_begin(); 116 111 sha256_base_do_finalize(desc, __sha2_ce_transform); 117 - kernel_neon_end(); 118 112 return sha256_base_finish(desc, out); 119 113 } 120 114