Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 71 lines 2.6 kB view raw
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * Macros for accessing the [V]PCLMULQDQ-based CRC functions that are 4 * instantiated by crc-pclmul-template.S 5 * 6 * Copyright 2025 Google LLC 7 * 8 * Author: Eric Biggers <ebiggers@google.com> 9 */ 10#ifndef _CRC_PCLMUL_TEMPLATE_H 11#define _CRC_PCLMUL_TEMPLATE_H 12 13#include <asm/cpufeatures.h> 14#include <asm/simd.h> 15#include <linux/static_call.h> 16#include "crc-pclmul-consts.h" 17 18#define DECLARE_CRC_PCLMUL_FUNCS(prefix, crc_t) \ 19crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len, \ 20 const void *consts_ptr); \ 21crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \ 22 const void *consts_ptr); \ 23crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \ 24 const void *consts_ptr); \ 25DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse) 26 27static inline bool have_vpclmul(void) 28{ 29 return boot_cpu_has(X86_FEATURE_VPCLMULQDQ) && 30 boot_cpu_has(X86_FEATURE_AVX2) && 31 cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL); 32} 33 34static inline bool have_avx512(void) 35{ 36 return boot_cpu_has(X86_FEATURE_AVX512BW) && 37 boot_cpu_has(X86_FEATURE_AVX512VL) && 38 !boot_cpu_has(X86_FEATURE_PREFER_YMM) && 39 cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL); 40} 41 42/* 43 * Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 16 44 * bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD. 45 * 46 * 16 bytes is the minimum length supported by the [V]PCLMULQDQ functions. 47 * There is overhead associated with kernel_fpu_begin() and kernel_fpu_end(), 48 * varying by CPU and factors such as which parts of the "FPU" state userspace 49 * has touched, which could result in a larger cutoff being better. Indeed, a 50 * larger cutoff is usually better for a *single* message. However, the 51 * overhead of the FPU section gets amortized if multiple FPU sections get 52 * executed before returning to userspace, since the XSAVE and XRSTOR occur only 53 * once. Considering that and the fact that the [V]PCLMULQDQ code is lighter on 54 * the dcache than the table-based code is, a 16-byte cutoff seems to work well. 55 */ 56#define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq) \ 57do { \ 58 if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) && \ 59 likely(irq_fpu_usable())) { \ 60 const void *consts_ptr; \ 61 \ 62 consts_ptr = (consts).fold_across_128_bits_consts; \ 63 kernel_fpu_begin(); \ 64 crc = static_call(prefix##_pclmul)((crc), (p), (len), \ 65 consts_ptr); \ 66 kernel_fpu_end(); \ 67 return crc; \ 68 } \ 69} while (0) 70 71#endif /* _CRC_PCLMUL_TEMPLATE_H */