Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * ChaCha and HChaCha functions (ARM optimized)
4 *
5 * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
6 * Copyright (C) 2015 Martin Willi
7 */
8
9#include <crypto/internal/simd.h>
10#include <linux/jump_label.h>
11#include <linux/kernel.h>
12
13#include <asm/cputype.h>
14#include <asm/hwcap.h>
15#include <asm/simd.h>
16
17asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
18 u8 *dst, const u8 *src, int nrounds);
19asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state,
20 u8 *dst, const u8 *src,
21 int nrounds, unsigned int nbytes);
22asmlinkage void hchacha_block_arm(const struct chacha_state *state,
23 u32 out[HCHACHA_OUT_WORDS], int nrounds);
24asmlinkage void hchacha_block_neon(const struct chacha_state *state,
25 u32 out[HCHACHA_OUT_WORDS], int nrounds);
26
27asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
28 const struct chacha_state *state, int nrounds);
29
30static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
31
32static inline bool neon_usable(void)
33{
34 return static_branch_likely(&use_neon) && crypto_simd_usable();
35}
36
37static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src,
38 unsigned int bytes, int nrounds)
39{
40 u8 buf[CHACHA_BLOCK_SIZE];
41
42 while (bytes > CHACHA_BLOCK_SIZE) {
43 unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
44
45 chacha_4block_xor_neon(state, dst, src, nrounds, l);
46 bytes -= l;
47 src += l;
48 dst += l;
49 state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
50 }
51 if (bytes) {
52 const u8 *s = src;
53 u8 *d = dst;
54
55 if (bytes != CHACHA_BLOCK_SIZE)
56 s = d = memcpy(buf, src, bytes);
57 chacha_block_xor_neon(state, d, s, nrounds);
58 if (d != dst)
59 memcpy(dst, buf, bytes);
60 state->x[12]++;
61 }
62}
63
64static void hchacha_block_arch(const struct chacha_state *state,
65 u32 out[HCHACHA_OUT_WORDS], int nrounds)
66{
67 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
68 hchacha_block_arm(state, out, nrounds);
69 } else {
70 scoped_ksimd()
71 hchacha_block_neon(state, out, nrounds);
72 }
73}
74
75static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
76 const u8 *src, unsigned int bytes, int nrounds)
77{
78 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
79 bytes <= CHACHA_BLOCK_SIZE) {
80 chacha_doarm(dst, src, bytes, state, nrounds);
81 state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
82 return;
83 }
84
85 do {
86 unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
87
88 scoped_ksimd()
89 chacha_doneon(state, dst, src, todo, nrounds);
90
91 bytes -= todo;
92 src += todo;
93 dst += todo;
94 } while (bytes);
95}
96
97#define chacha_mod_init_arch chacha_mod_init_arch
98static void chacha_mod_init_arch(void)
99{
100 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
101 switch (read_cpuid_part()) {
102 case ARM_CPU_PART_CORTEX_A7:
103 case ARM_CPU_PART_CORTEX_A5:
104 /*
105 * The Cortex-A7 and Cortex-A5 do not perform well with
106 * the NEON implementation but do incredibly with the
107 * scalar one and use less power.
108 */
109 break;
110 default:
111 static_branch_enable(&use_neon);
112 }
113 }
114}