Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: arm/nhpoly1305 - add NEON-accelerated NHPoly1305

Add an ARM NEON implementation of NHPoly1305, an ε-almost-∆-universal
hash function used in the Adiantum encryption mode. For now, only the
NH portion is actually NEON-accelerated; the Poly1305 part is less
performance-critical so is just implemented in C.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Eric Biggers and committed by
Herbert Xu
16aae359 26609a21

+200
+5
arch/arm/crypto/Kconfig
··· 131 131 select CRYPTO_BLKCIPHER 132 132 select CRYPTO_CHACHA20 133 133 134 + config CRYPTO_NHPOLY1305_NEON 135 + tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" 136 + depends on KERNEL_MODE_NEON 137 + select CRYPTO_NHPOLY1305 138 + 134 139 endif
+2
arch/arm/crypto/Makefile
··· 10 10 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o 11 11 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o 12 12 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o 13 + obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o 13 14 14 15 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o 15 16 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o ··· 54 53 crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o 55 54 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o 56 55 chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o 56 + nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o 57 57 58 58 ifdef REGENERATE_ARM_CRYPTO 59 59 quiet_cmd_perl = PERL $@
+116
arch/arm/crypto/nh-neon-core.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * NH - ε-almost-universal hash function, NEON accelerated version 4 + * 5 + * Copyright 2018 Google LLC 6 + * 7 + * Author: Eric Biggers <ebiggers@google.com> 8 + */ 9 + 10 + #include <linux/linkage.h> 11 + 12 + .text 13 + .fpu neon 14 + 15 + KEY .req r0 16 + MESSAGE .req r1 17 + MESSAGE_LEN .req r2 18 + HASH .req r3 19 + 20 + PASS0_SUMS .req q0 21 + PASS0_SUM_A .req d0 22 + PASS0_SUM_B .req d1 23 + PASS1_SUMS .req q1 24 + PASS1_SUM_A .req d2 25 + PASS1_SUM_B .req d3 26 + PASS2_SUMS .req q2 27 + PASS2_SUM_A .req d4 28 + PASS2_SUM_B .req d5 29 + PASS3_SUMS .req q3 30 + PASS3_SUM_A .req d6 31 + PASS3_SUM_B .req d7 32 + K0 .req q4 33 + K1 .req q5 34 + K2 .req q6 35 + K3 .req q7 36 + T0 .req q8 37 + T0_L .req d16 38 + T0_H .req d17 39 + T1 .req q9 40 + T1_L .req d18 41 + T1_H .req d19 42 + T2 .req q10 43 + T2_L .req d20 44 + T2_H .req d21 45 + T3 .req q11 46 + T3_L .req d22 47 + T3_H .req d23 48 + 49 + .macro _nh_stride k0, k1, k2, k3 50 + 51 + // Load next message stride 52 + vld1.8 {T3}, [MESSAGE]! 53 + 54 + // Load next key stride 55 + vld1.32 {\k3}, [KEY]! 56 + 57 + // Add message words to key words 58 + vadd.u32 T0, T3, \k0 59 + vadd.u32 T1, T3, \k1 60 + vadd.u32 T2, T3, \k2 61 + vadd.u32 T3, T3, \k3 62 + 63 + // Multiply 32x32 => 64 and accumulate 64 + vmlal.u32 PASS0_SUMS, T0_L, T0_H 65 + vmlal.u32 PASS1_SUMS, T1_L, T1_H 66 + vmlal.u32 PASS2_SUMS, T2_L, T2_H 67 + vmlal.u32 PASS3_SUMS, T3_L, T3_H 68 + .endm 69 + 70 + /* 71 + * void nh_neon(const u32 *key, const u8 *message, size_t message_len, 72 + * u8 hash[NH_HASH_BYTES]) 73 + * 74 + * It's guaranteed that message_len % 16 == 0. 75 + */ 76 + ENTRY(nh_neon) 77 + 78 + vld1.32 {K0,K1}, [KEY]! 79 + vmov.u64 PASS0_SUMS, #0 80 + vmov.u64 PASS1_SUMS, #0 81 + vld1.32 {K2}, [KEY]! 82 + vmov.u64 PASS2_SUMS, #0 83 + vmov.u64 PASS3_SUMS, #0 84 + 85 + subs MESSAGE_LEN, MESSAGE_LEN, #64 86 + blt .Lloop4_done 87 + .Lloop4: 88 + _nh_stride K0, K1, K2, K3 89 + _nh_stride K1, K2, K3, K0 90 + _nh_stride K2, K3, K0, K1 91 + _nh_stride K3, K0, K1, K2 92 + subs MESSAGE_LEN, MESSAGE_LEN, #64 93 + bge .Lloop4 94 + 95 + .Lloop4_done: 96 + ands MESSAGE_LEN, MESSAGE_LEN, #63 97 + beq .Ldone 98 + _nh_stride K0, K1, K2, K3 99 + 100 + subs MESSAGE_LEN, MESSAGE_LEN, #16 101 + beq .Ldone 102 + _nh_stride K1, K2, K3, K0 103 + 104 + subs MESSAGE_LEN, MESSAGE_LEN, #16 105 + beq .Ldone 106 + _nh_stride K2, K3, K0, K1 107 + 108 + .Ldone: 109 + // Sum the accumulators for each pass, then store the sums to 'hash' 110 + vadd.u64 T0_L, PASS0_SUM_A, PASS0_SUM_B 111 + vadd.u64 T0_H, PASS1_SUM_A, PASS1_SUM_B 112 + vadd.u64 T1_L, PASS2_SUM_A, PASS2_SUM_B 113 + vadd.u64 T1_H, PASS3_SUM_A, PASS3_SUM_B 114 + vst1.8 {T0-T1}, [HASH] 115 + bx lr 116 + ENDPROC(nh_neon)
+77
arch/arm/crypto/nhpoly1305-neon-glue.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum 4 + * (NEON accelerated version) 5 + * 6 + * Copyright 2018 Google LLC 7 + */ 8 + 9 + #include <asm/neon.h> 10 + #include <asm/simd.h> 11 + #include <crypto/internal/hash.h> 12 + #include <crypto/nhpoly1305.h> 13 + #include <linux/module.h> 14 + 15 + asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len, 16 + u8 hash[NH_HASH_BYTES]); 17 + 18 + /* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ 19 + static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, 20 + __le64 hash[NH_NUM_PASSES]) 21 + { 22 + nh_neon(key, message, message_len, (u8 *)hash); 23 + } 24 + 25 + static int nhpoly1305_neon_update(struct shash_desc *desc, 26 + const u8 *src, unsigned int srclen) 27 + { 28 + if (srclen < 64 || !may_use_simd()) 29 + return crypto_nhpoly1305_update(desc, src, srclen); 30 + 31 + do { 32 + unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); 33 + 34 + kernel_neon_begin(); 35 + crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); 36 + kernel_neon_end(); 37 + src += n; 38 + srclen -= n; 39 + } while (srclen); 40 + return 0; 41 + } 42 + 43 + static struct shash_alg nhpoly1305_alg = { 44 + .base.cra_name = "nhpoly1305", 45 + .base.cra_driver_name = "nhpoly1305-neon", 46 + .base.cra_priority = 200, 47 + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), 48 + .base.cra_module = THIS_MODULE, 49 + .digestsize = POLY1305_DIGEST_SIZE, 50 + .init = crypto_nhpoly1305_init, 51 + .update = nhpoly1305_neon_update, 52 + .final = crypto_nhpoly1305_final, 53 + .setkey = crypto_nhpoly1305_setkey, 54 + .descsize = sizeof(struct nhpoly1305_state), 55 + }; 56 + 57 + static int __init nhpoly1305_mod_init(void) 58 + { 59 + if (!(elf_hwcap & HWCAP_NEON)) 60 + return -ENODEV; 61 + 62 + return crypto_register_shash(&nhpoly1305_alg); 63 + } 64 + 65 + static void __exit nhpoly1305_mod_exit(void) 66 + { 67 + crypto_unregister_shash(&nhpoly1305_alg); 68 + } 69 + 70 + module_init(nhpoly1305_mod_init); 71 + module_exit(nhpoly1305_mod_exit); 72 + 73 + MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)"); 74 + MODULE_LICENSE("GPL v2"); 75 + MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); 76 + MODULE_ALIAS_CRYPTO("nhpoly1305"); 77 + MODULE_ALIAS_CRYPTO("nhpoly1305-neon");