Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: arm64/sha1-ce - move SHA-1 ARMv8 implementation to base layer

This removes all the boilerplate from the existing implementation,
and replaces it with calls into the base layer.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Ard Biesheuvel and committed by
Herbert Xu
07eb54d3 9205b949

+64 -130
+15 -18
arch/arm64/crypto/sha1-ce-core.S
··· 66 66 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 67 67 68 68 /* 69 - * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, 70 - * u8 *head, long bytes) 69 + * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, 70 + * int blocks) 71 71 */ 72 72 ENTRY(sha1_ce_transform) 73 73 /* load round constants */ ··· 78 78 ld1r {k3.4s}, [x6] 79 79 80 80 /* load state */ 81 - ldr dga, [x2] 82 - ldr dgb, [x2, #16] 81 + ldr dga, [x0] 82 + ldr dgb, [x0, #16] 83 83 84 - /* load partial state (if supplied) */ 85 - cbz x3, 0f 86 - ld1 {v8.4s-v11.4s}, [x3] 87 - b 1f 84 + /* load sha1_ce_state::finalize */ 85 + ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize] 88 86 89 87 /* load input */ 90 88 0: ld1 {v8.4s-v11.4s}, [x1], #64 91 - sub w0, w0, #1 89 + sub w2, w2, #1 92 90 93 - 1: 94 91 CPU_LE( rev32 v8.16b, v8.16b ) 95 92 CPU_LE( rev32 v9.16b, v9.16b ) 96 93 CPU_LE( rev32 v10.16b, v10.16b ) 97 94 CPU_LE( rev32 v11.16b, v11.16b ) 98 95 99 - 2: add t0.4s, v8.4s, k0.4s 96 + 1: add t0.4s, v8.4s, k0.4s 100 97 mov dg0v.16b, dgav.16b 101 98 102 99 add_update c, ev, k0, 8, 9, 10, 11, dgb ··· 124 127 add dgbv.2s, dgbv.2s, dg1v.2s 125 128 add dgav.4s, dgav.4s, dg0v.4s 126 129 127 - cbnz w0, 0b 130 + cbnz w2, 0b 128 131 129 132 /* 130 133 * Final block: add padding and total bit count. 131 - * Skip if we have no total byte count in x4. In that case, the input 132 - * size was not a round multiple of the block size, and the padding is 133 - * handled by the C code. 134 + * Skip if the input size was not a round multiple of the block size, 135 + * the padding is handled by the C code in that case. 134 136 */ 135 137 cbz x4, 3f 138 + ldr x4, [x0, #:lo12:sha1_ce_offsetof_count] 136 139 movi v9.2d, #0 137 140 mov x8, #0x80000000 138 141 movi v10.2d, #0 ··· 141 144 mov x4, #0 142 145 mov v11.d[0], xzr 143 146 mov v11.d[1], x7 144 - b 2b 147 + b 1b 145 148 146 149 /* store new state */ 147 - 3: str dga, [x2] 148 - str dgb, [x2, #16] 150 + 3: str dga, [x0] 151 + str dgb, [x0, #16] 149 152 ret 150 153 ENDPROC(sha1_ce_transform)
+49 -112
arch/arm64/crypto/sha1-ce-glue.c
··· 12 12 #include <asm/unaligned.h> 13 13 #include <crypto/internal/hash.h> 14 14 #include <crypto/sha.h> 15 + #include <crypto/sha1_base.h> 15 16 #include <linux/cpufeature.h> 16 17 #include <linux/crypto.h> 17 18 #include <linux/module.h> 19 + 20 + #define ASM_EXPORT(sym, val) \ 21 + asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); 18 22 19 23 MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); 20 24 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 21 25 MODULE_LICENSE("GPL v2"); 22 26 23 - asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, 24 - u8 *head, long bytes); 27 + struct sha1_ce_state { 28 + struct sha1_state sst; 29 + u32 finalize; 30 + }; 25 31 26 - static int sha1_init(struct shash_desc *desc) 32 + asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, 33 + int blocks); 34 + 35 + static int sha1_ce_update(struct shash_desc *desc, const u8 *data, 36 + unsigned int len) 27 37 { 28 - struct sha1_state *sctx = shash_desc_ctx(desc); 38 + struct sha1_ce_state *sctx = shash_desc_ctx(desc); 29 39 30 - *sctx = (struct sha1_state){ 31 - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, 32 - }; 33 - return 0; 34 - } 35 - 36 - static int sha1_update(struct shash_desc *desc, const u8 *data, 37 - unsigned int len) 38 - { 39 - struct sha1_state *sctx = shash_desc_ctx(desc); 40 - unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; 41 - 42 - sctx->count += len; 43 - 44 - if ((partial + len) >= SHA1_BLOCK_SIZE) { 45 - int blocks; 46 - 47 - if (partial) { 48 - int p = SHA1_BLOCK_SIZE - partial; 49 - 50 - memcpy(sctx->buffer + partial, data, p); 51 - data += p; 52 - len -= p; 53 - } 54 - 55 - blocks = len / SHA1_BLOCK_SIZE; 56 - len %= SHA1_BLOCK_SIZE; 57 - 58 - kernel_neon_begin_partial(16); 59 - sha1_ce_transform(blocks, data, sctx->state, 60 - partial ? sctx->buffer : NULL, 0); 61 - kernel_neon_end(); 62 - 63 - data += blocks * SHA1_BLOCK_SIZE; 64 - partial = 0; 65 - } 66 - if (len) 67 - memcpy(sctx->buffer + partial, data, len); 68 - return 0; 69 - } 70 - 71 - static int sha1_final(struct shash_desc *desc, u8 *out) 72 - { 73 - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; 74 - 75 - struct sha1_state *sctx = shash_desc_ctx(desc); 76 - __be64 bits = cpu_to_be64(sctx->count << 3); 77 - __be32 *dst = (__be32 *)out; 78 - int i; 79 - 80 - u32 padlen = SHA1_BLOCK_SIZE 81 - - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE); 82 - 83 - sha1_update(desc, padding, padlen); 84 - sha1_update(desc, (const u8 *)&bits, sizeof(bits)); 85 - 86 - for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) 87 - put_unaligned_be32(sctx->state[i], dst++); 88 - 89 - *sctx = (struct sha1_state){}; 90 - return 0; 91 - } 92 - 93 - static int sha1_finup(struct shash_desc *desc, const u8 *data, 94 - unsigned int len, u8 *out) 95 - { 96 - struct sha1_state *sctx = shash_desc_ctx(desc); 97 - __be32 *dst = (__be32 *)out; 98 - int blocks; 99 - int i; 100 - 101 - if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) { 102 - sha1_update(desc, data, len); 103 - return sha1_final(desc, out); 104 - } 105 - 106 - /* 107 - * Use a fast path if the input is a multiple of 64 bytes. In 108 - * this case, there is no need to copy data around, and we can 109 - * perform the entire digest calculation in a single invocation 110 - * of sha1_ce_transform() 111 - */ 112 - blocks = len / SHA1_BLOCK_SIZE; 113 - 40 + sctx->finalize = 0; 114 41 kernel_neon_begin_partial(16); 115 - sha1_ce_transform(blocks, data, sctx->state, NULL, len); 42 + sha1_base_do_update(desc, data, len, 43 + (sha1_block_fn *)sha1_ce_transform); 116 44 kernel_neon_end(); 117 45 118 - for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) 119 - put_unaligned_be32(sctx->state[i], dst++); 120 - 121 - *sctx = (struct sha1_state){}; 122 46 return 0; 123 47 } 124 48 125 - static int sha1_export(struct shash_desc *desc, void *out) 49 + static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, 50 + unsigned int len, u8 *out) 126 51 { 127 - struct sha1_state *sctx = shash_desc_ctx(desc); 128 - struct sha1_state *dst = out; 52 + struct sha1_ce_state *sctx = shash_desc_ctx(desc); 53 + bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); 129 54 130 - *dst = *sctx; 131 - return 0; 55 + ASM_EXPORT(sha1_ce_offsetof_count, 56 + offsetof(struct sha1_ce_state, sst.count)); 57 + ASM_EXPORT(sha1_ce_offsetof_finalize, 58 + offsetof(struct sha1_ce_state, finalize)); 59 + 60 + /* 61 + * Allow the asm code to perform the finalization if there is no 62 + * partial data and the input is a round multiple of the block size. 63 + */ 64 + sctx->finalize = finalize; 65 + 66 + kernel_neon_begin_partial(16); 67 + sha1_base_do_update(desc, data, len, 68 + (sha1_block_fn *)sha1_ce_transform); 69 + if (!finalize) 70 + sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); 71 + kernel_neon_end(); 72 + return sha1_base_finish(desc, out); 132 73 } 133 74 134 - static int sha1_import(struct shash_desc *desc, const void *in) 75 + static int sha1_ce_final(struct shash_desc *desc, u8 *out) 135 76 { 136 - struct sha1_state *sctx = shash_desc_ctx(desc); 137 - struct sha1_state const *src = in; 138 - 139 - *sctx = *src; 140 - return 0; 77 + kernel_neon_begin_partial(16); 78 + sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); 79 + kernel_neon_end(); 80 + return sha1_base_finish(desc, out); 141 81 } 142 82 143 83 static struct shash_alg alg = { 144 - .init = sha1_init, 145 - .update = sha1_update, 146 - .final = sha1_final, 147 - .finup = sha1_finup, 148 - .export = sha1_export, 149 - .import = sha1_import, 150 - .descsize = sizeof(struct sha1_state), 84 + .init = sha1_base_init, 85 + .update = sha1_ce_update, 86 + .final = sha1_ce_final, 87 + .finup = sha1_ce_finup, 88 + .descsize = sizeof(struct sha1_ce_state), 151 89 .digestsize = SHA1_DIGEST_SIZE, 152 - .statesize = sizeof(struct sha1_state), 153 90 .base = { 154 91 .cra_name = "sha1", 155 92 .cra_driver_name = "sha1-ce",