Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux

+1

MAINTAINERS

··· 6147 6147 F: arch/*/lib/crc* 6148 6148 F: include/linux/crc* 6149 6149 F: lib/crc* 6150 + F: scripts/gen-crc-consts.py 6150 6151 6151 6152 CREATIVE SB0540 6152 6153 M: Bastien Nocera <hadess@hadess.net>

-1

arch/arm/configs/dove_defconfig

··· 129 129 # CONFIG_CRYPTO_ANSI_CPRNG is not set 130 130 CONFIG_CRYPTO_DEV_MARVELL_CESA=y 131 131 CONFIG_CRC_CCITT=y 132 - CONFIG_LIBCRC32C=y 133 132 CONFIG_PRINTK_TIME=y 134 133 # CONFIG_DEBUG_BUGVERBOSE is not set 135 134 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y

-1

arch/arm/configs/ep93xx_defconfig

··· 113 113 CONFIG_ROOT_NFS=y 114 114 CONFIG_NLS_CODEPAGE_437=y 115 115 CONFIG_NLS_ISO8859_1=y 116 - CONFIG_LIBCRC32C=y 117 116 CONFIG_MAGIC_SYSRQ=y 118 117 CONFIG_DEBUG_SLAB=y 119 118 CONFIG_DEBUG_SPINLOCK=y

-2

arch/arm/configs/imx_v6_v7_defconfig

··· 483 483 CONFIG_CRYPTO_DEV_MXS_DCP=y 484 484 CONFIG_CRC_CCITT=m 485 485 CONFIG_CRC_T10DIF=y 486 - CONFIG_CRC7=m 487 - CONFIG_LIBCRC32C=m 488 486 CONFIG_CMA_SIZE_MBYTES=64 489 487 CONFIG_FONTS=y 490 488 CONFIG_FONT_8x8=y

-1

arch/arm/configs/lpc18xx_defconfig

··· 148 148 CONFIG_JFFS2_FS=y 149 149 # CONFIG_NETWORK_FILESYSTEMS is not set 150 150 CONFIG_CRC_ITU_T=y 151 - CONFIG_CRC7=y 152 151 CONFIG_PRINTK_TIME=y 153 152 # CONFIG_ENABLE_MUST_CHECK is not set 154 153 # CONFIG_DEBUG_BUGVERBOSE is not set

-1

arch/arm/configs/moxart_defconfig

··· 118 118 CONFIG_CONFIGFS_FS=y 119 119 CONFIG_JFFS2_FS=y 120 120 CONFIG_KEYS=y 121 - CONFIG_CRC32_BIT=y 122 121 CONFIG_DMA_API_DEBUG=y 123 122 CONFIG_PRINTK_TIME=y 124 123 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y

-1

arch/arm/configs/multi_v5_defconfig

··· 290 290 CONFIG_CRYPTO_PCBC=m 291 291 CONFIG_CRYPTO_DEV_MARVELL_CESA=y 292 292 CONFIG_CRC_CCITT=y 293 - CONFIG_LIBCRC32C=y 294 293 CONFIG_DEBUG_KERNEL=y 295 294 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y 296 295 CONFIG_MAGIC_SYSRQ=y

-1

arch/arm/configs/mvebu_v5_defconfig

··· 188 188 CONFIG_CRYPTO_PCBC=m 189 189 CONFIG_CRYPTO_DEV_MARVELL_CESA=y 190 190 CONFIG_CRC_CCITT=y 191 - CONFIG_LIBCRC32C=y 192 191 CONFIG_DEBUG_KERNEL=y 193 192 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y 194 193 CONFIG_MAGIC_SYSRQ=y

-1

arch/arm/configs/mxs_defconfig

··· 161 161 CONFIG_NLS_ISO8859_15=y 162 162 CONFIG_CRYPTO_DEV_MXS_DCP=y 163 163 CONFIG_CRC_ITU_T=m 164 - CONFIG_CRC7=m 165 164 CONFIG_FONTS=y 166 165 CONFIG_PRINTK_TIME=y 167 166 CONFIG_DEBUG_KERNEL=y

-1

arch/arm/configs/omap1_defconfig

··· 221 221 CONFIG_CRYPTO_DEFLATE=y 222 222 CONFIG_CRYPTO_LZO=y 223 223 # CONFIG_CRYPTO_ANSI_CPRNG is not set 224 - CONFIG_LIBCRC32C=y 225 224 CONFIG_FONTS=y 226 225 CONFIG_FONT_8x8=y 227 226 CONFIG_FONT_8x16=y

-2

arch/arm/configs/omap2plus_defconfig

··· 710 710 CONFIG_CRC_CCITT=y 711 711 CONFIG_CRC_T10DIF=y 712 712 CONFIG_CRC_ITU_T=y 713 - CONFIG_CRC7=y 714 - CONFIG_LIBCRC32C=y 715 713 CONFIG_DMA_CMA=y 716 714 CONFIG_FONTS=y 717 715 CONFIG_FONT_8x8=y

-1

arch/arm/configs/spitz_defconfig

··· 235 235 CONFIG_CRYPTO_SHA512=m 236 236 CONFIG_CRYPTO_WP512=m 237 237 CONFIG_CRC_CCITT=y 238 - CONFIG_LIBCRC32C=m 239 238 CONFIG_FONTS=y 240 239 CONFIG_FONT_8x8=y 241 240 CONFIG_FONT_8x16=y

-1

arch/arm/configs/stm32_defconfig

··· 75 75 # CONFIG_INOTIFY_USER is not set 76 76 CONFIG_NLS=y 77 77 CONFIG_CRC_ITU_T=y 78 - CONFIG_CRC7=y 79 78 CONFIG_PRINTK_TIME=y 80 79 # CONFIG_DEBUG_BUGVERBOSE is not set 81 80 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y

-1

arch/arm/configs/wpcm450_defconfig

··· 193 193 CONFIG_SYSTEM_TRUSTED_KEYRING=y 194 194 CONFIG_CRC_CCITT=y 195 195 CONFIG_CRC_ITU_T=m 196 - CONFIG_LIBCRC32C=y 197 196 CONFIG_PRINTK_TIME=y 198 197 CONFIG_DEBUG_KERNEL=y 199 198 CONFIG_MAGIC_SYSRQ=y

-6

arch/arm/lib/crc-t10dif-glue.c

··· 69 69 } 70 70 module_exit(crc_t10dif_arm_exit); 71 71 72 - bool crc_t10dif_is_optimized(void) 73 - { 74 - return static_key_enabled(&have_neon); 75 - } 76 - EXPORT_SYMBOL(crc_t10dif_is_optimized); 77 - 78 72 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 79 73 MODULE_DESCRIPTION("Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions"); 80 74 MODULE_LICENSE("GPL v2");

+6 -6

arch/arm/lib/crc32-glue.c

··· 59 59 } 60 60 EXPORT_SYMBOL(crc32_le_arch); 61 61 62 - static u32 crc32c_le_scalar(u32 crc, const u8 *p, size_t len) 62 + static u32 crc32c_scalar(u32 crc, const u8 *p, size_t len) 63 63 { 64 64 if (static_branch_likely(&have_crc32)) 65 65 return crc32c_armv8_le(crc, p, len); 66 - return crc32c_le_base(crc, p, len); 66 + return crc32c_base(crc, p, len); 67 67 } 68 68 69 - u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) 69 + u32 crc32c_arch(u32 crc, const u8 *p, size_t len) 70 70 { 71 71 if (len >= PMULL_MIN_LEN + 15 && 72 72 static_branch_likely(&have_pmull) && crypto_simd_usable()) { ··· 74 74 75 75 /* align p to 16-byte boundary */ 76 76 if (n) { 77 - crc = crc32c_le_scalar(crc, p, n); 77 + crc = crc32c_scalar(crc, p, n); 78 78 p += n; 79 79 len -= n; 80 80 } ··· 85 85 p += n; 86 86 len -= n; 87 87 } 88 - return crc32c_le_scalar(crc, p, len); 88 + return crc32c_scalar(crc, p, len); 89 89 } 90 - EXPORT_SYMBOL(crc32c_le_arch); 90 + EXPORT_SYMBOL(crc32c_arch); 91 91 92 92 u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) 93 93 {

-6

arch/arm64/lib/crc-t10dif-glue.c

··· 70 70 } 71 71 module_exit(crc_t10dif_arm64_exit); 72 72 73 - bool crc_t10dif_is_optimized(void) 74 - { 75 - return static_key_enabled(&have_asimd); 76 - } 77 - EXPORT_SYMBOL(crc_t10dif_is_optimized); 78 - 79 73 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 80 74 MODULE_DESCRIPTION("CRC-T10DIF using arm64 NEON and Crypto Extensions"); 81 75 MODULE_LICENSE("GPL v2");

+5 -5

arch/arm64/lib/crc32-glue.c

··· 22 22 asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len); 23 23 asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len); 24 24 25 - u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) 25 + u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) 26 26 { 27 27 if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) 28 28 return crc32_le_base(crc, p, len); ··· 43 43 } 44 44 EXPORT_SYMBOL(crc32_le_arch); 45 45 46 - u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) 46 + u32 crc32c_arch(u32 crc, const u8 *p, size_t len) 47 47 { 48 48 if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) 49 - return crc32c_le_base(crc, p, len); 49 + return crc32c_base(crc, p, len); 50 50 51 51 if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) { 52 52 kernel_neon_begin(); ··· 62 62 63 63 return crc32c_le_arm64(crc, p, len); 64 64 } 65 - EXPORT_SYMBOL(crc32c_le_arch); 65 + EXPORT_SYMBOL(crc32c_arch); 66 66 67 - u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) 67 + u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) 68 68 { 69 69 if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) 70 70 return crc32_be_base(crc, p, len);

-1

arch/hexagon/configs/comet_defconfig

··· 75 75 CONFIG_CRC_CCITT=y 76 76 CONFIG_CRC16=y 77 77 CONFIG_CRC_T10DIF=y 78 - CONFIG_LIBCRC32C=y 79 78 CONFIG_FRAME_WARN=0 80 79 CONFIG_MAGIC_SYSRQ=y 81 80 CONFIG_DEBUG_FS=y

+3 -3

arch/loongarch/lib/crc32-loongarch.c

··· 65 65 } 66 66 EXPORT_SYMBOL(crc32_le_arch); 67 67 68 - u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) 68 + u32 crc32c_arch(u32 crc, const u8 *p, size_t len) 69 69 { 70 70 if (!static_branch_likely(&have_crc32)) 71 - return crc32c_le_base(crc, p, len); 71 + return crc32c_base(crc, p, len); 72 72 73 73 while (len >= sizeof(u64)) { 74 74 u64 value = get_unaligned_le64(p); ··· 100 100 101 101 return crc; 102 102 } 103 - EXPORT_SYMBOL(crc32c_le_arch); 103 + EXPORT_SYMBOL(crc32c_arch); 104 104 105 105 u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) 106 106 {

-1

arch/mips/configs/bcm47xx_defconfig

··· 69 69 CONFIG_USB_HCD_SSB=y 70 70 CONFIG_LEDS_TRIGGER_TIMER=y 71 71 CONFIG_LEDS_TRIGGER_DEFAULT_ON=y 72 - CONFIG_CRC32_SARWATE=y 73 72 CONFIG_PRINTK_TIME=y 74 73 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y 75 74 CONFIG_DEBUG_INFO_REDUCED=y

-1

arch/mips/configs/bigsur_defconfig

··· 239 239 CONFIG_CRYPTO_TWOFISH=m 240 240 CONFIG_CRYPTO_LZO=m 241 241 CONFIG_CRC_T10DIF=m 242 - CONFIG_CRC7=m 243 242 CONFIG_MAGIC_SYSRQ=y 244 243 CONFIG_DEBUG_MEMORY_INIT=y 245 244 CONFIG_DETECT_HUNG_TASK=y

-1

arch/mips/configs/cobalt_defconfig

··· 70 70 CONFIG_NFS_V3_ACL=y 71 71 CONFIG_NFSD=y 72 72 CONFIG_NFSD_V3_ACL=y 73 - CONFIG_LIBCRC32C=y

-1

arch/mips/configs/db1xxx_defconfig

··· 216 216 CONFIG_CRYPTO_CRYPTD=y 217 217 CONFIG_CRYPTO_USER_API_HASH=y 218 218 CONFIG_CRYPTO_USER_API_SKCIPHER=y 219 - CONFIG_CRC32_SLICEBY4=y 220 219 CONFIG_FONTS=y 221 220 CONFIG_FONT_8x8=y 222 221 CONFIG_MAGIC_SYSRQ=y

-1

arch/mips/configs/decstation_64_defconfig

··· 180 180 CONFIG_CRYPTO_CMAC=m 181 181 CONFIG_CRYPTO_XCBC=m 182 182 CONFIG_CRYPTO_CRC32=m 183 - CONFIG_CRYPTO_CRCT10DIF=m 184 183 CONFIG_CRYPTO_MD4=m 185 184 CONFIG_CRYPTO_MICHAEL_MIC=m 186 185 CONFIG_CRYPTO_RMD160=m

-1

arch/mips/configs/decstation_defconfig

··· 175 175 CONFIG_CRYPTO_CMAC=m 176 176 CONFIG_CRYPTO_XCBC=m 177 177 CONFIG_CRYPTO_CRC32=m 178 - CONFIG_CRYPTO_CRCT10DIF=m 179 178 CONFIG_CRYPTO_MD4=m 180 179 CONFIG_CRYPTO_MICHAEL_MIC=m 181 180 CONFIG_CRYPTO_RMD160=m

-1

arch/mips/configs/decstation_r4k_defconfig

··· 175 175 CONFIG_CRYPTO_CMAC=m 176 176 CONFIG_CRYPTO_XCBC=m 177 177 CONFIG_CRYPTO_CRC32=m 178 - CONFIG_CRYPTO_CRCT10DIF=m 179 178 CONFIG_CRYPTO_MD4=m 180 179 CONFIG_CRYPTO_MICHAEL_MIC=m 181 180 CONFIG_CRYPTO_RMD160=m

-1

arch/mips/configs/fuloong2e_defconfig

··· 219 219 CONFIG_CRYPTO_LZO=m 220 220 # CONFIG_CRYPTO_HW is not set 221 221 CONFIG_CRC_CCITT=y 222 - CONFIG_CRC7=m

-1

arch/mips/configs/ip32_defconfig

··· 178 178 CONFIG_CRYPTO_TWOFISH=y 179 179 CONFIG_CRYPTO_DEFLATE=y 180 180 CONFIG_CRC_T10DIF=y 181 - CONFIG_LIBCRC32C=y 182 181 CONFIG_FONTS=y 183 182 CONFIG_FONT_8x8=y 184 183 CONFIG_FONT_8x16=y

-1

arch/mips/configs/rt305x_defconfig

··· 129 129 CONFIG_SQUASHFS_XZ=y 130 130 CONFIG_CRYPTO_ARC4=m 131 131 CONFIG_CRC_ITU_T=m 132 - CONFIG_CRC32_SARWATE=y 133 132 # CONFIG_XZ_DEC_X86 is not set 134 133 # CONFIG_XZ_DEC_POWERPC is not set 135 134 # CONFIG_XZ_DEC_IA64 is not set

-1

arch/mips/configs/xway_defconfig

··· 141 141 CONFIG_SQUASHFS_XZ=y 142 142 CONFIG_CRYPTO_ARC4=m 143 143 CONFIG_CRC_ITU_T=m 144 - CONFIG_CRC32_SARWATE=y 145 144 CONFIG_PRINTK_TIME=y 146 145 CONFIG_STRIP_ASM_SYMS=y 147 146 CONFIG_DEBUG_FS=y

+3 -12

arch/mips/lib/crc32-mips.c

··· 16 16 #include <asm/mipsregs.h> 17 17 #include <linux/unaligned.h> 18 18 19 - enum crc_op_size { 20 - b, h, w, d, 21 - }; 22 - 23 - enum crc_type { 24 - crc32, 25 - crc32c, 26 - }; 27 - 28 19 #ifndef TOOLCHAIN_SUPPORTS_CRC 29 20 #define _ASM_SET_CRC(OP, SZ, TYPE) \ 30 21 _ASM_MACRO_3R(OP, rt, rs, rt2, \ ··· 108 117 } 109 118 EXPORT_SYMBOL(crc32_le_arch); 110 119 111 - u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) 120 + u32 crc32c_arch(u32 crc, const u8 *p, size_t len) 112 121 { 113 122 if (!static_branch_likely(&have_crc32)) 114 - return crc32c_le_base(crc, p, len); 123 + return crc32c_base(crc, p, len); 115 124 116 125 if (IS_ENABLED(CONFIG_64BIT)) { 117 126 for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) { ··· 149 158 } 150 159 return crc; 151 160 } 152 - EXPORT_SYMBOL(crc32c_le_arch); 161 + EXPORT_SYMBOL(crc32c_arch); 153 162 154 163 u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) 155 164 {

-1

arch/parisc/configs/generic-64bit_defconfig

··· 293 293 CONFIG_CRYPTO_DEFLATE=m 294 294 # CONFIG_CRYPTO_HW is not set 295 295 CONFIG_CRC_CCITT=m 296 - CONFIG_LIBCRC32C=y 297 296 CONFIG_PRINTK_TIME=y 298 297 CONFIG_DEBUG_KERNEL=y 299 298 CONFIG_STRIP_ASM_SYMS=y

-1

arch/powerpc/configs/85xx/ge_imp3a_defconfig

··· 223 223 CONFIG_NLS_UTF8=y 224 224 CONFIG_CRC_CCITT=y 225 225 CONFIG_CRC_T10DIF=y 226 - CONFIG_LIBCRC32C=y 227 226 CONFIG_MAGIC_SYSRQ=y 228 227 CONFIG_CRYPTO_CBC=y 229 228 CONFIG_CRYPTO_MD5=y

-1

arch/powerpc/configs/adder875_defconfig

··· 44 44 CONFIG_CRAMFS=y 45 45 CONFIG_NFS_FS=y 46 46 CONFIG_ROOT_NFS=y 47 - CONFIG_CRC32_SLICEBY4=y 48 47 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y 49 48 CONFIG_DEBUG_FS=y 50 49 CONFIG_MAGIC_SYSRQ=y

-1

arch/powerpc/configs/ep88xc_defconfig

··· 47 47 CONFIG_CRAMFS=y 48 48 CONFIG_NFS_FS=y 49 49 CONFIG_ROOT_NFS=y 50 - CONFIG_CRC32_SLICEBY4=y 51 50 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y 52 51 CONFIG_MAGIC_SYSRQ=y 53 52 CONFIG_DETECT_HUNG_TASK=y

-1

arch/powerpc/configs/mpc866_ads_defconfig

··· 39 39 CONFIG_NFS_FS=y 40 40 CONFIG_ROOT_NFS=y 41 41 CONFIG_CRC_CCITT=y 42 - CONFIG_CRC32_SLICEBY4=y

-1

arch/powerpc/configs/mpc885_ads_defconfig

··· 70 70 CONFIG_ROOT_NFS=y 71 71 CONFIG_CRYPTO=y 72 72 CONFIG_CRYPTO_DEV_TALITOS=y 73 - CONFIG_CRC32_SLICEBY4=y 74 73 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y 75 74 CONFIG_MAGIC_SYSRQ=y 76 75 CONFIG_DEBUG_FS=y

-1

arch/powerpc/configs/skiroot_defconfig

··· 281 281 # CONFIG_CRYPTO_HW is not set 282 282 CONFIG_CRC16=y 283 283 CONFIG_CRC_ITU_T=y 284 - CONFIG_LIBCRC32C=y 285 284 # CONFIG_XZ_DEC_X86 is not set 286 285 # CONFIG_XZ_DEC_IA64 is not set 287 286 # CONFIG_XZ_DEC_ARM is not set

-1

arch/powerpc/configs/tqm8xx_defconfig

··· 54 54 CONFIG_CRAMFS=y 55 55 CONFIG_NFS_FS=y 56 56 CONFIG_ROOT_NFS=y 57 - CONFIG_CRC32_SLICEBY4=y 58 57 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y 59 58 CONFIG_MAGIC_SYSRQ=y 60 59 CONFIG_DETECT_HUNG_TASK=y

-6

arch/powerpc/lib/crc-t10dif-glue.c

··· 78 78 } 79 79 module_exit(crc_t10dif_powerpc_exit); 80 80 81 - bool crc_t10dif_is_optimized(void) 82 - { 83 - return static_key_enabled(&have_vec_crypto); 84 - } 85 - EXPORT_SYMBOL(crc_t10dif_is_optimized); 86 - 87 81 MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>"); 88 82 MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions"); 89 83 MODULE_LICENSE("GPL");

+5 -5

arch/powerpc/lib/crc32-glue.c

··· 23 23 } 24 24 EXPORT_SYMBOL(crc32_le_arch); 25 25 26 - u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) 26 + u32 crc32c_arch(u32 crc, const u8 *p, size_t len) 27 27 { 28 28 unsigned int prealign; 29 29 unsigned int tail; 30 30 31 31 if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || 32 32 !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable()) 33 - return crc32c_le_base(crc, p, len); 33 + return crc32c_base(crc, p, len); 34 34 35 35 if ((unsigned long)p & VMX_ALIGN_MASK) { 36 36 prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); 37 - crc = crc32c_le_base(crc, p, prealign); 37 + crc = crc32c_base(crc, p, prealign); 38 38 len -= prealign; 39 39 p += prealign; 40 40 } ··· 52 52 tail = len & VMX_ALIGN_MASK; 53 53 if (tail) { 54 54 p += len & ~VMX_ALIGN_MASK; 55 - crc = crc32c_le_base(crc, p, tail); 55 + crc = crc32c_base(crc, p, tail); 56 56 } 57 57 58 58 return crc; 59 59 } 60 - EXPORT_SYMBOL(crc32c_le_arch); 60 + EXPORT_SYMBOL(crc32c_arch); 61 61 62 62 u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) 63 63 {

+2

arch/riscv/Kconfig

··· 25 25 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE 26 26 select ARCH_HAS_BINFMT_FLAT 27 27 select ARCH_HAS_CRC32 if RISCV_ISA_ZBC 28 + select ARCH_HAS_CRC64 if 64BIT && RISCV_ISA_ZBC 29 + select ARCH_HAS_CRC_T10DIF if RISCV_ISA_ZBC 28 30 select ARCH_HAS_CURRENT_STACK_POINTER 29 31 select ARCH_HAS_DEBUG_VIRTUAL if MMU 30 32 select ARCH_HAS_DEBUG_VM_PGTABLE

+5

arch/riscv/lib/Makefile

··· 16 16 lib-$(CONFIG_64BIT) += tishift.o 17 17 lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o 18 18 obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o 19 + crc32-riscv-y := crc32.o crc32_msb.o crc32_lsb.o 20 + obj-$(CONFIG_CRC64_ARCH) += crc64-riscv.o 21 + crc64-riscv-y := crc64.o crc64_msb.o crc64_lsb.o 22 + obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o 23 + crc-t10dif-riscv-y := crc-t10dif.o crc16_msb.o 19 24 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o 20 25 lib-$(CONFIG_RISCV_ISA_V) += xor.o 21 26 lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o

+122

arch/riscv/lib/crc-clmul-consts.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * CRC constants generated by: 4 + * 5 + * ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5 6 + * 7 + * Do not edit manually. 8 + */ 9 + 10 + struct crc_clmul_consts { 11 + unsigned long fold_across_2_longs_const_hi; 12 + unsigned long fold_across_2_longs_const_lo; 13 + unsigned long barrett_reduction_const_1; 14 + unsigned long barrett_reduction_const_2; 15 + }; 16 + 17 + /* 18 + * Constants generated for most-significant-bit-first CRC-16 using 19 + * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 20 + */ 21 + static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = { 22 + #ifdef CONFIG_64BIT 23 + .fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */ 24 + .fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */ 25 + .barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */ 26 + .barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */ 27 + #else 28 + .fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */ 29 + .fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */ 30 + .barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */ 31 + .barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */ 32 + #endif 33 + }; 34 + 35 + /* 36 + * Constants generated for most-significant-bit-first CRC-32 using 37 + * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + 38 + * x^5 + x^4 + x^2 + x^1 + x^0 39 + */ 40 + static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = { 41 + #ifdef CONFIG_64BIT 42 + .fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */ 43 + .fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */ 44 + .barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */ 45 + .barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */ 46 + #else 47 + .fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */ 48 + .fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */ 49 + .barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */ 50 + .barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */ 51 + #endif 52 + }; 53 + 54 + /* 55 + * Constants generated for least-significant-bit-first CRC-32 using 56 + * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + 57 + * x^5 + x^4 + x^2 + x^1 + x^0 58 + */ 59 + static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = { 60 + #ifdef CONFIG_64BIT 61 + .fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */ 62 + .fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */ 63 + .barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */ 64 + .barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */ 65 + #else 66 + .fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */ 67 + .fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */ 68 + .barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */ 69 + .barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */ 70 + #endif 71 + }; 72 + 73 + /* 74 + * Constants generated for least-significant-bit-first CRC-32 using 75 + * G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 + 76 + * x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0 77 + */ 78 + static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = { 79 + #ifdef CONFIG_64BIT 80 + .fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */ 81 + .fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */ 82 + .barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */ 83 + .barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */ 84 + #else 85 + .fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */ 86 + .fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */ 87 + .barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */ 88 + .barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */ 89 + #endif 90 + }; 91 + 92 + /* 93 + * Constants generated for most-significant-bit-first CRC-64 using 94 + * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + 95 + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + 96 + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + 97 + * x^7 + x^4 + x^1 + x^0 98 + */ 99 + #ifdef CONFIG_64BIT 100 + static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = { 101 + .fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */ 102 + .fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */ 103 + .barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */ 104 + .barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */ 105 + }; 106 + #endif 107 + 108 + /* 109 + * Constants generated for least-significant-bit-first CRC-64 using 110 + * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 + 111 + * x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 + 112 + * x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 + 113 + * x^4 + x^3 + x^0 114 + */ 115 + #ifdef CONFIG_64BIT 116 + static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = { 117 + .fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */ 118 + .fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */ 119 + .barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */ 120 + .barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */ 121 + }; 122 + #endif

+265

arch/riscv/lib/crc-clmul-template.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* Copyright 2025 Google LLC */ 3 + 4 + /* 5 + * This file is a "template" that generates a CRC function optimized using the 6 + * RISC-V Zbc (scalar carryless multiplication) extension. The includer of this 7 + * file must define the following parameters to specify the type of CRC: 8 + * 9 + * crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC 10 + * LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural 11 + * mapping between bits and polynomial coefficients 12 + * 1 for a lsb (least-significant-bit) first CRC, i.e. reflected 13 + * mapping between bits and polynomial coefficients 14 + */ 15 + 16 + #include <asm/byteorder.h> 17 + #include <linux/minmax.h> 18 + 19 + #define CRC_BITS (8 * sizeof(crc_t)) /* a.k.a. 'n' */ 20 + 21 + static inline unsigned long clmul(unsigned long a, unsigned long b) 22 + { 23 + unsigned long res; 24 + 25 + asm(".option push\n" 26 + ".option arch,+zbc\n" 27 + "clmul %0, %1, %2\n" 28 + ".option pop\n" 29 + : "=r" (res) : "r" (a), "r" (b)); 30 + return res; 31 + } 32 + 33 + static inline unsigned long clmulh(unsigned long a, unsigned long b) 34 + { 35 + unsigned long res; 36 + 37 + asm(".option push\n" 38 + ".option arch,+zbc\n" 39 + "clmulh %0, %1, %2\n" 40 + ".option pop\n" 41 + : "=r" (res) : "r" (a), "r" (b)); 42 + return res; 43 + } 44 + 45 + static inline unsigned long clmulr(unsigned long a, unsigned long b) 46 + { 47 + unsigned long res; 48 + 49 + asm(".option push\n" 50 + ".option arch,+zbc\n" 51 + "clmulr %0, %1, %2\n" 52 + ".option pop\n" 53 + : "=r" (res) : "r" (a), "r" (b)); 54 + return res; 55 + } 56 + 57 + /* 58 + * crc_load_long() loads one "unsigned long" of aligned data bytes, producing a 59 + * polynomial whose bit order matches the CRC's bit order. 60 + */ 61 + #ifdef CONFIG_64BIT 62 + # if LSB_CRC 63 + # define crc_load_long(x) le64_to_cpup(x) 64 + # else 65 + # define crc_load_long(x) be64_to_cpup(x) 66 + # endif 67 + #else 68 + # if LSB_CRC 69 + # define crc_load_long(x) le32_to_cpup(x) 70 + # else 71 + # define crc_load_long(x) be32_to_cpup(x) 72 + # endif 73 + #endif 74 + 75 + /* XOR @crc into the end of @msgpoly that represents the high-order terms. */ 76 + static inline unsigned long 77 + crc_clmul_prep(crc_t crc, unsigned long msgpoly) 78 + { 79 + #if LSB_CRC 80 + return msgpoly ^ crc; 81 + #else 82 + return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS)); 83 + #endif 84 + } 85 + 86 + /* 87 + * Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it 88 + * modulo the generator polynomial G. This gives the CRC of @msgpoly. 89 + */ 90 + static inline crc_t 91 + crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts) 92 + { 93 + unsigned long tmp; 94 + 95 + /* 96 + * First step of Barrett reduction with integrated multiplication by 97 + * x^n: calculate floor((msgpoly * x^n) / G). This is the value by 98 + * which G needs to be multiplied to cancel out the x^n and higher terms 99 + * of msgpoly * x^n. Do it using the following formula: 100 + * 101 + * msb-first: 102 + * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1)) 103 + * lsb-first: 104 + * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG) 105 + * 106 + * barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G), 107 + * which fits a long exactly. Using any lower power of x there would 108 + * not carry enough precision through the calculation, while using any 109 + * higher power of x would require extra instructions to handle a wider 110 + * multiplication. In the msb-first case, using this power of x results 111 + * in needing a floored division by x^(BITS_PER_LONG-1), which matches 112 + * what clmulr produces. In the lsb-first case, a factor of x gets 113 + * implicitly introduced by each carryless multiplication (shown as 114 + * '* x' above), and the floored division instead needs to be by 115 + * x^BITS_PER_LONG which matches what clmul produces. 116 + */ 117 + #if LSB_CRC 118 + tmp = clmul(msgpoly, consts->barrett_reduction_const_1); 119 + #else 120 + tmp = clmulr(msgpoly, consts->barrett_reduction_const_1); 121 + #endif 122 + 123 + /* 124 + * Second step of Barrett reduction: 125 + * 126 + * crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G)) 127 + * 128 + * This reduces (msgpoly * x^n) modulo G by adding the appropriate 129 + * multiple of G to it. The result uses only the x^0..x^(n-1) terms. 130 + * HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those 131 + * terms in the first place, it is more efficient to do the equivalent: 132 + * 133 + * crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n 134 + * 135 + * In the lsb-first case further modify it to the following which avoids 136 + * a shift, as the crc ends up in the physically low n bits from clmulr: 137 + * 138 + * product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x 139 + * crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n 140 + * 141 + * barrett_reduction_const_2 contains the constant multiplier (G - x^n) 142 + * or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above. The 143 + * cast of the result to crc_t is essential, as it applies the mod x^n! 144 + */ 145 + #if LSB_CRC 146 + return clmulr(tmp, consts->barrett_reduction_const_2); 147 + #else 148 + return clmul(tmp, consts->barrett_reduction_const_2); 149 + #endif 150 + } 151 + 152 + /* Update @crc with the data from @msgpoly. */ 153 + static inline crc_t 154 + crc_clmul_update_long(crc_t crc, unsigned long msgpoly, 155 + const struct crc_clmul_consts *consts) 156 + { 157 + return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts); 158 + } 159 + 160 + /* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */ 161 + static inline crc_t 162 + crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len, 163 + const struct crc_clmul_consts *consts) 164 + { 165 + unsigned long msgpoly; 166 + size_t i; 167 + 168 + #if LSB_CRC 169 + msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8); 170 + for (i = 1; i < len; i++) 171 + msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8)); 172 + #else 173 + msgpoly = p[0]; 174 + for (i = 1; i < len; i++) 175 + msgpoly = (msgpoly << 8) ^ p[i]; 176 + #endif 177 + 178 + if (len >= sizeof(crc_t)) { 179 + #if LSB_CRC 180 + msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len); 181 + #else 182 + msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS); 183 + #endif 184 + return crc_clmul_long(msgpoly, consts); 185 + } 186 + #if LSB_CRC 187 + msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len); 188 + return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len)); 189 + #else 190 + msgpoly ^= crc >> (CRC_BITS - 8*len); 191 + return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len)); 192 + #endif 193 + } 194 + 195 + static inline crc_t 196 + crc_clmul(crc_t crc, const void *p, size_t len, 197 + const struct crc_clmul_consts *consts) 198 + { 199 + size_t align; 200 + 201 + /* This implementation assumes that the CRC fits in an unsigned long. */ 202 + BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long)); 203 + 204 + /* If the buffer is not long-aligned, align it. */ 205 + align = (unsigned long)p % sizeof(unsigned long); 206 + if (align && len) { 207 + align = min(sizeof(unsigned long) - align, len); 208 + crc = crc_clmul_update_partial(crc, p, align, consts); 209 + p += align; 210 + len -= align; 211 + } 212 + 213 + if (len >= 4 * sizeof(unsigned long)) { 214 + unsigned long m0, m1; 215 + 216 + m0 = crc_clmul_prep(crc, crc_load_long(p)); 217 + m1 = crc_load_long(p + sizeof(unsigned long)); 218 + p += 2 * sizeof(unsigned long); 219 + len -= 2 * sizeof(unsigned long); 220 + /* 221 + * Main loop. Each iteration starts with a message polynomial 222 + * (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two 223 + * more longs of data to form x^(3*BITS_PER_LONG)*m0 + 224 + * x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then 225 + * "folds" that back into a congruent (modulo G) value that uses 226 + * just m0 and m1 again. This is done by multiplying m0 by the 227 + * precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by 228 + * the precomputed constant (x^(2*BITS_PER_LONG) mod G), then 229 + * adding the results to m2 and m3 as appropriate. Each such 230 + * multiplication produces a result twice the length of a long, 231 + * which in RISC-V is two instructions clmul and clmulh. 232 + * 233 + * This could be changed to fold across more than 2 longs at a 234 + * time if there is a CPU that can take advantage of it. 235 + */ 236 + do { 237 + unsigned long p0, p1, p2, p3; 238 + 239 + p0 = clmulh(m0, consts->fold_across_2_longs_const_hi); 240 + p1 = clmul(m0, consts->fold_across_2_longs_const_hi); 241 + p2 = clmulh(m1, consts->fold_across_2_longs_const_lo); 242 + p3 = clmul(m1, consts->fold_across_2_longs_const_lo); 243 + m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p); 244 + m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^ 245 + crc_load_long(p + sizeof(unsigned long)); 246 + 247 + p += 2 * sizeof(unsigned long); 248 + len -= 2 * sizeof(unsigned long); 249 + } while (len >= 2 * sizeof(unsigned long)); 250 + 251 + crc = crc_clmul_long(m0, consts); 252 + crc = crc_clmul_update_long(crc, m1, consts); 253 + } 254 + 255 + while (len >= sizeof(unsigned long)) { 256 + crc = crc_clmul_update_long(crc, crc_load_long(p), consts); 257 + p += sizeof(unsigned long); 258 + len -= sizeof(unsigned long); 259 + } 260 + 261 + if (len) 262 + crc = crc_clmul_update_partial(crc, p, len, consts); 263 + 264 + return crc; 265 + }

+23

arch/riscv/lib/crc-clmul.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* Copyright 2025 Google LLC */ 3 + 4 + #ifndef _RISCV_CRC_CLMUL_H 5 + #define _RISCV_CRC_CLMUL_H 6 + 7 + #include <linux/types.h> 8 + #include "crc-clmul-consts.h" 9 + 10 + u16 crc16_msb_clmul(u16 crc, const void *p, size_t len, 11 + const struct crc_clmul_consts *consts); 12 + u32 crc32_msb_clmul(u32 crc, const void *p, size_t len, 13 + const struct crc_clmul_consts *consts); 14 + u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len, 15 + const struct crc_clmul_consts *consts); 16 + #ifdef CONFIG_64BIT 17 + u64 crc64_msb_clmul(u64 crc, const void *p, size_t len, 18 + const struct crc_clmul_consts *consts); 19 + u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len, 20 + const struct crc_clmul_consts *consts); 21 + #endif 22 + 23 + #endif /* _RISCV_CRC_CLMUL_H */

+24

arch/riscv/lib/crc-t10dif.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * RISC-V optimized CRC-T10DIF function 4 + * 5 + * Copyright 2025 Google LLC 6 + */ 7 + 8 + #include <asm/hwcap.h> 9 + #include <asm/alternative-macros.h> 10 + #include <linux/crc-t10dif.h> 11 + #include <linux/module.h> 12 + 13 + #include "crc-clmul.h" 14 + 15 + u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len) 16 + { 17 + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) 18 + return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts); 19 + return crc_t10dif_generic(crc, p, len); 20 + } 21 + EXPORT_SYMBOL(crc_t10dif_arch); 22 + 23 + MODULE_DESCRIPTION("RISC-V optimized CRC-T10DIF function"); 24 + MODULE_LICENSE("GPL");

+18

arch/riscv/lib/crc16_msb.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * RISC-V optimized most-significant-bit-first CRC16 4 + * 5 + * Copyright 2025 Google LLC 6 + */ 7 + 8 + #include "crc-clmul.h" 9 + 10 + typedef u16 crc_t; 11 + #define LSB_CRC 0 12 + #include "crc-clmul-template.h" 13 + 14 + u16 crc16_msb_clmul(u16 crc, const void *p, size_t len, 15 + const struct crc_clmul_consts *consts) 16 + { 17 + return crc_clmul(crc, p, len, consts); 18 + }

-311

arch/riscv/lib/crc32-riscv.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * Accelerated CRC32 implementation with Zbc extension. 4 - * 5 - * Copyright (C) 2024 Intel Corporation 6 - */ 7 - 8 - #include <asm/hwcap.h> 9 - #include <asm/alternative-macros.h> 10 - #include <asm/byteorder.h> 11 - 12 - #include <linux/types.h> 13 - #include <linux/minmax.h> 14 - #include <linux/crc32poly.h> 15 - #include <linux/crc32.h> 16 - #include <linux/byteorder/generic.h> 17 - #include <linux/module.h> 18 - 19 - /* 20 - * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for 21 - * better understanding of how this math works. 22 - * 23 - * let "+" denotes polynomial add (XOR) 24 - * let "-" denotes polynomial sub (XOR) 25 - * let "*" denotes polynomial multiplication 26 - * let "/" denotes polynomial floor division 27 - * let "S" denotes source data, XLEN bit wide 28 - * let "P" denotes CRC32 polynomial 29 - * let "T" denotes 2^(XLEN+32) 30 - * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit 31 - * 32 - * crc32(S, P) 33 - * => S * (2^32) - S * (2^32) / P * P 34 - * => lowest 32 bits of: S * (2^32) / P * P 35 - * => lowest 32 bits of: S * (2^32) * (T / P) / T * P 36 - * => lowest 32 bits of: S * (2^32) * quotient / T * P 37 - * => lowest 32 bits of: S * quotient / 2^XLEN * P 38 - * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P 39 - * => clmul_low_part(clmul_high_part(S, QT) + S, P) 40 - * 41 - * In terms of below implementations, the BE case is more intuitive, since the 42 - * higher order bit sits at more significant position. 43 - */ 44 - 45 - #if __riscv_xlen == 64 46 - /* Slide by XLEN bits per iteration */ 47 - # define STEP_ORDER 3 48 - 49 - /* Each below polynomial quotient has an implicit bit for 2^XLEN */ 50 - 51 - /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */ 52 - # define CRC32_POLY_QT_LE 0x5a72d812fb808b20 53 - 54 - /* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */ 55 - # define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8 56 - 57 - /* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be 58 - * the same as the bit-reversed version of CRC32_POLY_QT_LE 59 - */ 60 - # define CRC32_POLY_QT_BE 0x04d101df481b4e5a 61 - 62 - static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr) 63 - { 64 - return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr); 65 - } 66 - 67 - static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) 68 - { 69 - u32 crc; 70 - 71 - /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ 72 - asm volatile (".option push\n" 73 - ".option arch,+zbc\n" 74 - "clmul %0, %1, %2\n" 75 - "slli %0, %0, 1\n" 76 - "xor %0, %0, %1\n" 77 - "clmulr %0, %0, %3\n" 78 - "srli %0, %0, 32\n" 79 - ".option pop\n" 80 - : "=&r" (crc) 81 - : "r" (s), 82 - "r" (poly_qt), 83 - "r" ((u64)poly << 32) 84 - :); 85 - return crc; 86 - } 87 - 88 - static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr) 89 - { 90 - return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr); 91 - } 92 - 93 - #elif __riscv_xlen == 32 94 - # define STEP_ORDER 2 95 - /* Each quotient should match the upper half of its analog in RV64 */ 96 - # define CRC32_POLY_QT_LE 0xfb808b20 97 - # define CRC32C_POLY_QT_LE 0x6f5389f8 98 - # define CRC32_POLY_QT_BE 0x04d101df 99 - 100 - static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr) 101 - { 102 - return crc ^ (__force u32)__cpu_to_le32(*ptr); 103 - } 104 - 105 - static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) 106 - { 107 - u32 crc; 108 - 109 - /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ 110 - asm volatile (".option push\n" 111 - ".option arch,+zbc\n" 112 - "clmul %0, %1, %2\n" 113 - "slli %0, %0, 1\n" 114 - "xor %0, %0, %1\n" 115 - "clmulr %0, %0, %3\n" 116 - ".option pop\n" 117 - : "=&r" (crc) 118 - : "r" (s), 119 - "r" (poly_qt), 120 - "r" (poly) 121 - :); 122 - return crc; 123 - } 124 - 125 - static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr) 126 - { 127 - return crc ^ (__force u32)__cpu_to_be32(*ptr); 128 - } 129 - 130 - #else 131 - # error "Unexpected __riscv_xlen" 132 - #endif 133 - 134 - static inline u32 crc32_be_zbc(unsigned long s) 135 - { 136 - u32 crc; 137 - 138 - asm volatile (".option push\n" 139 - ".option arch,+zbc\n" 140 - "clmulh %0, %1, %2\n" 141 - "xor %0, %0, %1\n" 142 - "clmul %0, %0, %3\n" 143 - ".option pop\n" 144 - : "=&r" (crc) 145 - : "r" (s), 146 - "r" (CRC32_POLY_QT_BE), 147 - "r" (CRC32_POLY_BE) 148 - :); 149 - return crc; 150 - } 151 - 152 - #define STEP (1 << STEP_ORDER) 153 - #define OFFSET_MASK (STEP - 1) 154 - 155 - typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len); 156 - 157 - static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p, 158 - size_t len, u32 poly, 159 - unsigned long poly_qt) 160 - { 161 - size_t bits = len * 8; 162 - unsigned long s = 0; 163 - u32 crc_low = 0; 164 - 165 - for (int i = 0; i < len; i++) 166 - s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8); 167 - 168 - s ^= (unsigned long)crc << (__riscv_xlen - bits); 169 - if (__riscv_xlen == 32 || len < sizeof(u32)) 170 - crc_low = crc >> bits; 171 - 172 - crc = crc32_le_zbc(s, poly, poly_qt); 173 - crc ^= crc_low; 174 - 175 - return crc; 176 - } 177 - 178 - static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, 179 - size_t len, u32 poly, 180 - unsigned long poly_qt, 181 - fallback crc_fb) 182 - { 183 - size_t offset, head_len, tail_len; 184 - unsigned long const *p_ul; 185 - unsigned long s; 186 - 187 - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, 188 - RISCV_ISA_EXT_ZBC, 1) 189 - : : : : legacy); 190 - 191 - /* Handle the unaligned head. */ 192 - offset = (unsigned long)p & OFFSET_MASK; 193 - if (offset && len) { 194 - head_len = min(STEP - offset, len); 195 - crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt); 196 - p += head_len; 197 - len -= head_len; 198 - } 199 - 200 - tail_len = len & OFFSET_MASK; 201 - len = len >> STEP_ORDER; 202 - p_ul = (unsigned long const *)p; 203 - 204 - for (int i = 0; i < len; i++) { 205 - s = crc32_le_prep(crc, p_ul); 206 - crc = crc32_le_zbc(s, poly, poly_qt); 207 - p_ul++; 208 - } 209 - 210 - /* Handle the tail bytes. */ 211 - p = (unsigned char const *)p_ul; 212 - if (tail_len) 213 - crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt); 214 - 215 - return crc; 216 - 217 - legacy: 218 - return crc_fb(crc, p, len); 219 - } 220 - 221 - u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) 222 - { 223 - return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE, 224 - crc32_le_base); 225 - } 226 - EXPORT_SYMBOL(crc32_le_arch); 227 - 228 - u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) 229 - { 230 - return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, 231 - CRC32C_POLY_QT_LE, crc32c_le_base); 232 - } 233 - EXPORT_SYMBOL(crc32c_le_arch); 234 - 235 - static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, 236 - size_t len) 237 - { 238 - size_t bits = len * 8; 239 - unsigned long s = 0; 240 - u32 crc_low = 0; 241 - 242 - s = 0; 243 - for (int i = 0; i < len; i++) 244 - s = *p++ | (s << 8); 245 - 246 - if (__riscv_xlen == 32 || len < sizeof(u32)) { 247 - s ^= crc >> (32 - bits); 248 - crc_low = crc << bits; 249 - } else { 250 - s ^= (unsigned long)crc << (bits - 32); 251 - } 252 - 253 - crc = crc32_be_zbc(s); 254 - crc ^= crc_low; 255 - 256 - return crc; 257 - } 258 - 259 - u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) 260 - { 261 - size_t offset, head_len, tail_len; 262 - unsigned long const *p_ul; 263 - unsigned long s; 264 - 265 - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, 266 - RISCV_ISA_EXT_ZBC, 1) 267 - : : : : legacy); 268 - 269 - /* Handle the unaligned head. */ 270 - offset = (unsigned long)p & OFFSET_MASK; 271 - if (offset && len) { 272 - head_len = min(STEP - offset, len); 273 - crc = crc32_be_unaligned(crc, p, head_len); 274 - p += head_len; 275 - len -= head_len; 276 - } 277 - 278 - tail_len = len & OFFSET_MASK; 279 - len = len >> STEP_ORDER; 280 - p_ul = (unsigned long const *)p; 281 - 282 - for (int i = 0; i < len; i++) { 283 - s = crc32_be_prep(crc, p_ul); 284 - crc = crc32_be_zbc(s); 285 - p_ul++; 286 - } 287 - 288 - /* Handle the tail bytes. */ 289 - p = (unsigned char const *)p_ul; 290 - if (tail_len) 291 - crc = crc32_be_unaligned(crc, p, tail_len); 292 - 293 - return crc; 294 - 295 - legacy: 296 - return crc32_be_base(crc, p, len); 297 - } 298 - EXPORT_SYMBOL(crc32_be_arch); 299 - 300 - u32 crc32_optimizations(void) 301 - { 302 - if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) 303 - return CRC32_LE_OPTIMIZATION | 304 - CRC32_BE_OPTIMIZATION | 305 - CRC32C_OPTIMIZATION; 306 - return 0; 307 - } 308 - EXPORT_SYMBOL(crc32_optimizations); 309 - 310 - MODULE_LICENSE("GPL"); 311 - MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");

+53

arch/riscv/lib/crc32.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * RISC-V optimized CRC32 functions 4 + * 5 + * Copyright 2025 Google LLC 6 + */ 7 + 8 + #include <asm/hwcap.h> 9 + #include <asm/alternative-macros.h> 10 + #include <linux/crc32.h> 11 + #include <linux/module.h> 12 + 13 + #include "crc-clmul.h" 14 + 15 + u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) 16 + { 17 + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) 18 + return crc32_lsb_clmul(crc, p, len, 19 + &crc32_lsb_0xedb88320_consts); 20 + return crc32_le_base(crc, p, len); 21 + } 22 + EXPORT_SYMBOL(crc32_le_arch); 23 + 24 + u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) 25 + { 26 + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) 27 + return crc32_msb_clmul(crc, p, len, 28 + &crc32_msb_0x04c11db7_consts); 29 + return crc32_be_base(crc, p, len); 30 + } 31 + EXPORT_SYMBOL(crc32_be_arch); 32 + 33 + u32 crc32c_arch(u32 crc, const u8 *p, size_t len) 34 + { 35 + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) 36 + return crc32_lsb_clmul(crc, p, len, 37 + &crc32_lsb_0x82f63b78_consts); 38 + return crc32c_base(crc, p, len); 39 + } 40 + EXPORT_SYMBOL(crc32c_arch); 41 + 42 + u32 crc32_optimizations(void) 43 + { 44 + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) 45 + return CRC32_LE_OPTIMIZATION | 46 + CRC32_BE_OPTIMIZATION | 47 + CRC32C_OPTIMIZATION; 48 + return 0; 49 + } 50 + EXPORT_SYMBOL(crc32_optimizations); 51 + 52 + MODULE_DESCRIPTION("RISC-V optimized CRC32 functions"); 53 + MODULE_LICENSE("GPL");

+18

arch/riscv/lib/crc32_lsb.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * RISC-V optimized least-significant-bit-first CRC32 4 + * 5 + * Copyright 2025 Google LLC 6 + */ 7 + 8 + #include "crc-clmul.h" 9 + 10 + typedef u32 crc_t; 11 + #define LSB_CRC 1 12 + #include "crc-clmul-template.h" 13 + 14 + u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len, 15 + const struct crc_clmul_consts *consts) 16 + { 17 + return crc_clmul(crc, p, len, consts); 18 + }

+18

arch/riscv/lib/crc32_msb.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * RISC-V optimized most-significant-bit-first CRC32 4 + * 5 + * Copyright 2025 Google LLC 6 + */ 7 + 8 + #include "crc-clmul.h" 9 + 10 + typedef u32 crc_t; 11 + #define LSB_CRC 0 12 + #include "crc-clmul-template.h" 13 + 14 + u32 crc32_msb_clmul(u32 crc, const void *p, size_t len, 15 + const struct crc_clmul_consts *consts) 16 + { 17 + return crc_clmul(crc, p, len, consts); 18 + }

+34

arch/riscv/lib/crc64.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * RISC-V optimized CRC64 functions 4 + * 5 + * Copyright 2025 Google LLC 6 + */ 7 + 8 + #include <asm/hwcap.h> 9 + #include <asm/alternative-macros.h> 10 + #include <linux/crc64.h> 11 + #include <linux/module.h> 12 + 13 + #include "crc-clmul.h" 14 + 15 + u64 crc64_be_arch(u64 crc, const u8 *p, size_t len) 16 + { 17 + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) 18 + return crc64_msb_clmul(crc, p, len, 19 + &crc64_msb_0x42f0e1eba9ea3693_consts); 20 + return crc64_be_generic(crc, p, len); 21 + } 22 + EXPORT_SYMBOL(crc64_be_arch); 23 + 24 + u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len) 25 + { 26 + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) 27 + return crc64_lsb_clmul(crc, p, len, 28 + &crc64_lsb_0x9a6c9329ac4bc9b5_consts); 29 + return crc64_nvme_generic(crc, p, len); 30 + } 31 + EXPORT_SYMBOL(crc64_nvme_arch); 32 + 33 + MODULE_DESCRIPTION("RISC-V optimized CRC64 functions"); 34 + MODULE_LICENSE("GPL");

+18

arch/riscv/lib/crc64_lsb.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * RISC-V optimized least-significant-bit-first CRC64 4 + * 5 + * Copyright 2025 Google LLC 6 + */ 7 + 8 + #include "crc-clmul.h" 9 + 10 + typedef u64 crc_t; 11 + #define LSB_CRC 1 12 + #include "crc-clmul-template.h" 13 + 14 + u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len, 15 + const struct crc_clmul_consts *consts) 16 + { 17 + return crc_clmul(crc, p, len, consts); 18 + }

+18

arch/riscv/lib/crc64_msb.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * RISC-V optimized most-significant-bit-first CRC64 4 + * 5 + * Copyright 2025 Google LLC 6 + */ 7 + 8 + #include "crc-clmul.h" 9 + 10 + typedef u64 crc_t; 11 + #define LSB_CRC 0 12 + #include "crc-clmul-template.h" 13 + 14 + u64 crc64_msb_clmul(u64 crc, const void *p, size_t len, 15 + const struct crc_clmul_consts *consts) 16 + { 17 + return crc_clmul(crc, p, len, consts); 18 + }

-3

arch/s390/configs/debug_defconfig

··· 815 815 CONFIG_CORDIC=m 816 816 CONFIG_CRYPTO_LIB_CURVE25519=m 817 817 CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m 818 - CONFIG_CRC4=m 819 - CONFIG_CRC7=m 820 - CONFIG_CRC8=m 821 818 CONFIG_RANDOM32_SELFTEST=y 822 819 CONFIG_XZ_DEC_MICROLZMA=y 823 820 CONFIG_DMA_CMA=y

-3

arch/s390/configs/defconfig

··· 803 803 CONFIG_PRIME_NUMBERS=m 804 804 CONFIG_CRYPTO_LIB_CURVE25519=m 805 805 CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m 806 - CONFIG_CRC4=m 807 - CONFIG_CRC7=m 808 - CONFIG_CRC8=m 809 806 CONFIG_XZ_DEC_MICROLZMA=y 810 807 CONFIG_DMA_CMA=y 811 808 CONFIG_CMA_SIZE_MBYTES=0

+1 -1

arch/s390/lib/crc32-glue.c

··· 62 62 63 63 DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base) 64 64 DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base) 65 - DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base) 65 + DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base) 66 66 67 67 static int __init crc32_s390_init(void) 68 68 {

-2

arch/sh/configs/se7206_defconfig

··· 104 104 CONFIG_CRC_CCITT=y 105 105 CONFIG_CRC16=y 106 106 CONFIG_CRC_ITU_T=y 107 - CONFIG_CRC7=y 108 - CONFIG_LIBCRC32C=y

-1

arch/sh/configs/sh2007_defconfig

··· 195 195 # CONFIG_CRYPTO_HW is not set 196 196 CONFIG_CRC_CCITT=y 197 197 CONFIG_CRC16=y 198 - CONFIG_LIBCRC32C=y

-1

arch/sh/configs/titan_defconfig

··· 266 266 CONFIG_CRYPTO_TWOFISH=m 267 267 # CONFIG_CRYPTO_ANSI_CPRNG is not set 268 268 CONFIG_CRC16=m 269 - CONFIG_LIBCRC32C=m

-1

arch/sparc/configs/sparc32_defconfig

··· 94 94 CONFIG_CRYPTO_TWOFISH=m 95 95 # CONFIG_CRYPTO_ANSI_CPRNG is not set 96 96 # CONFIG_CRYPTO_HW is not set 97 - CONFIG_LIBCRC32C=m

-1

arch/sparc/configs/sparc64_defconfig

··· 230 230 CONFIG_CRYPTO_TWOFISH=m 231 231 # CONFIG_CRYPTO_ANSI_CPRNG is not set 232 232 CONFIG_CRC16=m 233 - CONFIG_LIBCRC32C=m 234 233 CONFIG_VCC=m 235 234 CONFIG_PATA_CMD64X=y 236 235 CONFIG_IP_PNP=y

+5 -5

arch/sparc/lib/crc32_glue.c

··· 27 27 28 28 void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len); 29 29 30 - u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len) 30 + u32 crc32c_arch(u32 crc, const u8 *data, size_t len) 31 31 { 32 32 size_t n = -(uintptr_t)data & 7; 33 33 34 34 if (!static_branch_likely(&have_crc32c_opcode)) 35 - return crc32c_le_base(crc, data, len); 35 + return crc32c_base(crc, data, len); 36 36 37 37 if (n) { 38 38 /* Data isn't 8-byte aligned. Align it. */ 39 39 n = min(n, len); 40 - crc = crc32c_le_base(crc, data, n); 40 + crc = crc32c_base(crc, data, n); 41 41 data += n; 42 42 len -= n; 43 43 } ··· 48 48 len -= n; 49 49 } 50 50 if (len) 51 - crc = crc32c_le_base(crc, data, len); 51 + crc = crc32c_base(crc, data, len); 52 52 return crc; 53 53 } 54 - EXPORT_SYMBOL(crc32c_le_arch); 54 + EXPORT_SYMBOL(crc32c_arch); 55 55 56 56 u32 crc32_be_arch(u32 crc, const u8 *data, size_t len) 57 57 {

+2 -1

arch/x86/Kconfig

··· 77 77 select ARCH_HAS_CPU_FINALIZE_INIT 78 78 select ARCH_HAS_CPU_PASID if IOMMU_SVA 79 79 select ARCH_HAS_CRC32 80 - select ARCH_HAS_CRC_T10DIF if X86_64 80 + select ARCH_HAS_CRC64 if X86_64 81 + select ARCH_HAS_CRC_T10DIF 81 82 select ARCH_HAS_CURRENT_STACK_POINTER 82 83 select ARCH_HAS_DEBUG_VIRTUAL 83 84 select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE

+1 -21

arch/x86/crypto/aesni-intel_glue.c

··· 1536 1536 AES_GCM_KEY_AVX10_SIZE, 800); 1537 1537 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ 1538 1538 1539 - /* 1540 - * This is a list of CPU models that are known to suffer from downclocking when 1541 - * zmm registers (512-bit vectors) are used. On these CPUs, the AES mode 1542 - * implementations with zmm registers won't be used by default. Implementations 1543 - * with ymm registers (256-bit vectors) will be used by default instead. 1544 - */ 1545 - static const struct x86_cpu_id zmm_exclusion_list[] = { 1546 - X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), 1547 - X86_MATCH_VFM(INTEL_ICELAKE_X, 0), 1548 - X86_MATCH_VFM(INTEL_ICELAKE_D, 0), 1549 - X86_MATCH_VFM(INTEL_ICELAKE, 0), 1550 - X86_MATCH_VFM(INTEL_ICELAKE_L, 0), 1551 - X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), 1552 - X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), 1553 - X86_MATCH_VFM(INTEL_TIGERLAKE, 0), 1554 - /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ 1555 - /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */ 1556 - {}, 1557 - }; 1558 - 1559 1539 static int __init register_avx_algs(void) 1560 1540 { 1561 1541 int err; ··· 1580 1600 if (err) 1581 1601 return err; 1582 1602 1583 - if (x86_match_cpu(zmm_exclusion_list)) { 1603 + if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) { 1584 1604 int i; 1585 1605 1586 1606 aes_xts_alg_vaes_avx10_512.base.cra_priority = 1;

+1

arch/x86/include/asm/cpufeatures.h

··· 480 480 #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ 481 481 #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ 482 482 #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ 483 + #define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ 483 484 484 485 /* 485 486 * BUG word(s)

+22

arch/x86/kernel/cpu/intel.c

··· 512 512 wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); 513 513 } 514 514 515 + /* 516 + * This is a list of Intel CPUs that are known to suffer from downclocking when 517 + * ZMM registers (512-bit vectors) are used. On these CPUs, when the kernel 518 + * executes SIMD-optimized code such as cryptography functions or CRCs, it 519 + * should prefer 256-bit (YMM) code to 512-bit (ZMM) code. 520 + */ 521 + static const struct x86_cpu_id zmm_exclusion_list[] = { 522 + X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), 523 + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), 524 + X86_MATCH_VFM(INTEL_ICELAKE_D, 0), 525 + X86_MATCH_VFM(INTEL_ICELAKE, 0), 526 + X86_MATCH_VFM(INTEL_ICELAKE_L, 0), 527 + X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), 528 + X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), 529 + X86_MATCH_VFM(INTEL_TIGERLAKE, 0), 530 + /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ 531 + {}, 532 + }; 533 + 515 534 static void init_intel(struct cpuinfo_x86 *c) 516 535 { 517 536 early_init_intel(c); ··· 608 589 strcpy(c->x86_model_id, p); 609 590 } 610 591 #endif 592 + 593 + if (x86_match_cpu(zmm_exclusion_list)) 594 + set_cpu_cap(c, X86_FEATURE_PREFER_YMM); 611 595 612 596 /* Work around errata */ 613 597 srat_detect_node(c);

+4 -1

arch/x86/lib/Makefile

··· 42 42 crc32-x86-y := crc32-glue.o crc32-pclmul.o 43 43 crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o 44 44 45 + obj-$(CONFIG_CRC64_ARCH) += crc64-x86.o 46 + crc64-x86-y := crc64-glue.o crc64-pclmul.o 47 + 45 48 obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-x86.o 46 - crc-t10dif-x86-y := crc-t10dif-glue.o crct10dif-pcl-asm_64.o 49 + crc-t10dif-x86-y := crc-t10dif-glue.o crc16-msb-pclmul.o 47 50 48 51 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o 49 52 obj-y += iomem.o

+195

arch/x86/lib/crc-pclmul-consts.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * CRC constants generated by: 4 + * 5 + * ./scripts/gen-crc-consts.py x86_pclmul crc16_msb_0x8bb7,crc32_lsb_0xedb88320,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5 6 + * 7 + * Do not edit manually. 8 + */ 9 + 10 + /* 11 + * CRC folding constants generated for most-significant-bit-first CRC-16 using 12 + * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 13 + */ 14 + static const struct { 15 + u8 bswap_mask[16]; 16 + u64 fold_across_2048_bits_consts[2]; 17 + u64 fold_across_1024_bits_consts[2]; 18 + u64 fold_across_512_bits_consts[2]; 19 + u64 fold_across_256_bits_consts[2]; 20 + u64 fold_across_128_bits_consts[2]; 21 + u8 shuf_table[48]; 22 + u64 barrett_reduction_consts[2]; 23 + } crc16_msb_0x8bb7_consts ____cacheline_aligned __maybe_unused = { 24 + .bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, 25 + .fold_across_2048_bits_consts = { 26 + 0xdccf000000000000, /* LO64_TERMS: (x^2000 mod G) * x^48 */ 27 + 0x4b0b000000000000, /* HI64_TERMS: (x^2064 mod G) * x^48 */ 28 + }, 29 + .fold_across_1024_bits_consts = { 30 + 0x9d9d000000000000, /* LO64_TERMS: (x^976 mod G) * x^48 */ 31 + 0x7cf5000000000000, /* HI64_TERMS: (x^1040 mod G) * x^48 */ 32 + }, 33 + .fold_across_512_bits_consts = { 34 + 0x044c000000000000, /* LO64_TERMS: (x^464 mod G) * x^48 */ 35 + 0xe658000000000000, /* HI64_TERMS: (x^528 mod G) * x^48 */ 36 + }, 37 + .fold_across_256_bits_consts = { 38 + 0x6ee3000000000000, /* LO64_TERMS: (x^208 mod G) * x^48 */ 39 + 0xe7b5000000000000, /* HI64_TERMS: (x^272 mod G) * x^48 */ 40 + }, 41 + .fold_across_128_bits_consts = { 42 + 0x2d56000000000000, /* LO64_TERMS: (x^80 mod G) * x^48 */ 43 + 0x06df000000000000, /* HI64_TERMS: (x^144 mod G) * x^48 */ 44 + }, 45 + .shuf_table = { 46 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 47 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 48 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 49 + }, 50 + .barrett_reduction_consts = { 51 + 0x8bb7000000000000, /* LO64_TERMS: (G - x^16) * x^48 */ 52 + 0xf65a57f81d33a48a, /* HI64_TERMS: (floor(x^79 / G) * x) - x^64 */ 53 + }, 54 + }; 55 + 56 + /* 57 + * CRC folding constants generated for least-significant-bit-first CRC-32 using 58 + * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + 59 + * x^5 + x^4 + x^2 + x^1 + x^0 60 + */ 61 + static const struct { 62 + u64 fold_across_2048_bits_consts[2]; 63 + u64 fold_across_1024_bits_consts[2]; 64 + u64 fold_across_512_bits_consts[2]; 65 + u64 fold_across_256_bits_consts[2]; 66 + u64 fold_across_128_bits_consts[2]; 67 + u8 shuf_table[48]; 68 + u64 barrett_reduction_consts[2]; 69 + } crc32_lsb_0xedb88320_consts ____cacheline_aligned __maybe_unused = { 70 + .fold_across_2048_bits_consts = { 71 + 0x00000000ce3371cb, /* HI64_TERMS: (x^2079 mod G) * x^32 */ 72 + 0x00000000e95c1271, /* LO64_TERMS: (x^2015 mod G) * x^32 */ 73 + }, 74 + .fold_across_1024_bits_consts = { 75 + 0x0000000033fff533, /* HI64_TERMS: (x^1055 mod G) * x^32 */ 76 + 0x00000000910eeec1, /* LO64_TERMS: (x^991 mod G) * x^32 */ 77 + }, 78 + .fold_across_512_bits_consts = { 79 + 0x000000008f352d95, /* HI64_TERMS: (x^543 mod G) * x^32 */ 80 + 0x000000001d9513d7, /* LO64_TERMS: (x^479 mod G) * x^32 */ 81 + }, 82 + .fold_across_256_bits_consts = { 83 + 0x00000000f1da05aa, /* HI64_TERMS: (x^287 mod G) * x^32 */ 84 + 0x0000000081256527, /* LO64_TERMS: (x^223 mod G) * x^32 */ 85 + }, 86 + .fold_across_128_bits_consts = { 87 + 0x00000000ae689191, /* HI64_TERMS: (x^159 mod G) * x^32 */ 88 + 0x00000000ccaa009e, /* LO64_TERMS: (x^95 mod G) * x^32 */ 89 + }, 90 + .shuf_table = { 91 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 92 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 93 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 94 + }, 95 + .barrett_reduction_consts = { 96 + 0xb4e5b025f7011641, /* HI64_TERMS: floor(x^95 / G) */ 97 + 0x00000001db710640, /* LO64_TERMS: (G - x^32) * x^31 */ 98 + }, 99 + }; 100 + 101 + /* 102 + * CRC folding constants generated for most-significant-bit-first CRC-64 using 103 + * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + 104 + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + 105 + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + 106 + * x^7 + x^4 + x^1 + x^0 107 + */ 108 + static const struct { 109 + u8 bswap_mask[16]; 110 + u64 fold_across_2048_bits_consts[2]; 111 + u64 fold_across_1024_bits_consts[2]; 112 + u64 fold_across_512_bits_consts[2]; 113 + u64 fold_across_256_bits_consts[2]; 114 + u64 fold_across_128_bits_consts[2]; 115 + u8 shuf_table[48]; 116 + u64 barrett_reduction_consts[2]; 117 + } crc64_msb_0x42f0e1eba9ea3693_consts ____cacheline_aligned __maybe_unused = { 118 + .bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, 119 + .fold_across_2048_bits_consts = { 120 + 0x7f52691a60ddc70d, /* LO64_TERMS: (x^2048 mod G) * x^0 */ 121 + 0x7036b0389f6a0c82, /* HI64_TERMS: (x^2112 mod G) * x^0 */ 122 + }, 123 + .fold_across_1024_bits_consts = { 124 + 0x05cf79dea9ac37d6, /* LO64_TERMS: (x^1024 mod G) * x^0 */ 125 + 0x001067e571d7d5c2, /* HI64_TERMS: (x^1088 mod G) * x^0 */ 126 + }, 127 + .fold_across_512_bits_consts = { 128 + 0x5f6843ca540df020, /* LO64_TERMS: (x^512 mod G) * x^0 */ 129 + 0xddf4b6981205b83f, /* HI64_TERMS: (x^576 mod G) * x^0 */ 130 + }, 131 + .fold_across_256_bits_consts = { 132 + 0x571bee0a227ef92b, /* LO64_TERMS: (x^256 mod G) * x^0 */ 133 + 0x44bef2a201b5200c, /* HI64_TERMS: (x^320 mod G) * x^0 */ 134 + }, 135 + .fold_across_128_bits_consts = { 136 + 0x05f5c3c7eb52fab6, /* LO64_TERMS: (x^128 mod G) * x^0 */ 137 + 0x4eb938a7d257740e, /* HI64_TERMS: (x^192 mod G) * x^0 */ 138 + }, 139 + .shuf_table = { 140 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 141 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 142 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 143 + }, 144 + .barrett_reduction_consts = { 145 + 0x42f0e1eba9ea3693, /* LO64_TERMS: (G - x^64) * x^0 */ 146 + 0x578d29d06cc4f872, /* HI64_TERMS: (floor(x^127 / G) * x) - x^64 */ 147 + }, 148 + }; 149 + 150 + /* 151 + * CRC folding constants generated for least-significant-bit-first CRC-64 using 152 + * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 + 153 + * x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 + 154 + * x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 + 155 + * x^4 + x^3 + x^0 156 + */ 157 + static const struct { 158 + u64 fold_across_2048_bits_consts[2]; 159 + u64 fold_across_1024_bits_consts[2]; 160 + u64 fold_across_512_bits_consts[2]; 161 + u64 fold_across_256_bits_consts[2]; 162 + u64 fold_across_128_bits_consts[2]; 163 + u8 shuf_table[48]; 164 + u64 barrett_reduction_consts[2]; 165 + } crc64_lsb_0x9a6c9329ac4bc9b5_consts ____cacheline_aligned __maybe_unused = { 166 + .fold_across_2048_bits_consts = { 167 + 0x37ccd3e14069cabc, /* HI64_TERMS: (x^2111 mod G) * x^0 */ 168 + 0xa043808c0f782663, /* LO64_TERMS: (x^2047 mod G) * x^0 */ 169 + }, 170 + .fold_across_1024_bits_consts = { 171 + 0xa1ca681e733f9c40, /* HI64_TERMS: (x^1087 mod G) * x^0 */ 172 + 0x5f852fb61e8d92dc, /* LO64_TERMS: (x^1023 mod G) * x^0 */ 173 + }, 174 + .fold_across_512_bits_consts = { 175 + 0x0c32cdb31e18a84a, /* HI64_TERMS: (x^575 mod G) * x^0 */ 176 + 0x62242240ace5045a, /* LO64_TERMS: (x^511 mod G) * x^0 */ 177 + }, 178 + .fold_across_256_bits_consts = { 179 + 0xb0bc2e589204f500, /* HI64_TERMS: (x^319 mod G) * x^0 */ 180 + 0xe1e0bb9d45d7a44c, /* LO64_TERMS: (x^255 mod G) * x^0 */ 181 + }, 182 + .fold_across_128_bits_consts = { 183 + 0xeadc41fd2ba3d420, /* HI64_TERMS: (x^191 mod G) * x^0 */ 184 + 0x21e9761e252621ac, /* LO64_TERMS: (x^127 mod G) * x^0 */ 185 + }, 186 + .shuf_table = { 187 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 188 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 189 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 190 + }, 191 + .barrett_reduction_consts = { 192 + 0x27ecfa329aef9f77, /* HI64_TERMS: floor(x^127 / G) */ 193 + 0x34d926535897936a, /* LO64_TERMS: (G - x^64 - x^0) / x */ 194 + }, 195 + };

+582

arch/x86/lib/crc-pclmul-template.S

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + // 3 + // Template to generate [V]PCLMULQDQ-based CRC functions for x86 4 + // 5 + // Copyright 2025 Google LLC 6 + // 7 + // Author: Eric Biggers <ebiggers@google.com> 8 + 9 + #include <linux/linkage.h> 10 + #include <linux/objtool.h> 11 + 12 + // Offsets within the generated constants table 13 + .set OFFSETOF_BSWAP_MASK, -5*16 // msb-first CRCs only 14 + .set OFFSETOF_FOLD_ACROSS_2048_BITS_CONSTS, -4*16 // must precede next 15 + .set OFFSETOF_FOLD_ACROSS_1024_BITS_CONSTS, -3*16 // must precede next 16 + .set OFFSETOF_FOLD_ACROSS_512_BITS_CONSTS, -2*16 // must precede next 17 + .set OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS, -1*16 // must precede next 18 + .set OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS, 0*16 // must be 0 19 + .set OFFSETOF_SHUF_TABLE, 1*16 20 + .set OFFSETOF_BARRETT_REDUCTION_CONSTS, 4*16 21 + 22 + // Emit a VEX (or EVEX) coded instruction if allowed, or emulate it using the 23 + // corresponding non-VEX instruction plus any needed moves. The supported 24 + // instruction formats are: 25 + // 26 + // - Two-arg [src, dst], where the non-VEX format is the same. 27 + // - Three-arg [src1, src2, dst] where the non-VEX format is 28 + // [src1, src2_and_dst]. If src2 != dst, then src1 must != dst too. 29 + // 30 + // \insn gives the instruction without a "v" prefix and including any immediate 31 + // argument if needed to make the instruction follow one of the above formats. 32 + // If \unaligned_mem_tmp is given, then the emitted non-VEX code moves \arg1 to 33 + // it first; this is needed when \arg1 is an unaligned mem operand. 34 + .macro _cond_vex insn:req, arg1:req, arg2:req, arg3, unaligned_mem_tmp 35 + .if AVX_LEVEL == 0 36 + // VEX not allowed. Emulate it. 37 + .ifnb \arg3 // Three-arg [src1, src2, dst] 38 + .ifc "\arg2", "\arg3" // src2 == dst? 39 + .ifnb \unaligned_mem_tmp 40 + movdqu \arg1, \unaligned_mem_tmp 41 + \insn \unaligned_mem_tmp, \arg3 42 + .else 43 + \insn \arg1, \arg3 44 + .endif 45 + .else // src2 != dst 46 + .ifc "\arg1", "\arg3" 47 + .error "Can't have src1 == dst when src2 != dst" 48 + .endif 49 + .ifnb \unaligned_mem_tmp 50 + movdqu \arg1, \unaligned_mem_tmp 51 + movdqa \arg2, \arg3 52 + \insn \unaligned_mem_tmp, \arg3 53 + .else 54 + movdqa \arg2, \arg3 55 + \insn \arg1, \arg3 56 + .endif 57 + .endif 58 + .else // Two-arg [src, dst] 59 + .ifnb \unaligned_mem_tmp 60 + movdqu \arg1, \unaligned_mem_tmp 61 + \insn \unaligned_mem_tmp, \arg2 62 + .else 63 + \insn \arg1, \arg2 64 + .endif 65 + .endif 66 + .else 67 + // VEX is allowed. Emit the desired instruction directly. 68 + .ifnb \arg3 69 + v\insn \arg1, \arg2, \arg3 70 + .else 71 + v\insn \arg1, \arg2 72 + .endif 73 + .endif 74 + .endm 75 + 76 + // Broadcast an aligned 128-bit mem operand to all 128-bit lanes of a vector 77 + // register of length VL. 78 + .macro _vbroadcast src, dst 79 + .if VL == 16 80 + _cond_vex movdqa, \src, \dst 81 + .elseif VL == 32 82 + vbroadcasti128 \src, \dst 83 + .else 84 + vbroadcasti32x4 \src, \dst 85 + .endif 86 + .endm 87 + 88 + // Load \vl bytes from the unaligned mem operand \src into \dst, and if the CRC 89 + // is msb-first use \bswap_mask to reflect the bytes within each 128-bit lane. 90 + .macro _load_data vl, src, bswap_mask, dst 91 + .if \vl < 64 92 + _cond_vex movdqu, "\src", \dst 93 + .else 94 + vmovdqu8 \src, \dst 95 + .endif 96 + .if !LSB_CRC 97 + _cond_vex pshufb, \bswap_mask, \dst, \dst 98 + .endif 99 + .endm 100 + 101 + .macro _prepare_v0 vl, v0, v1, bswap_mask 102 + .if LSB_CRC 103 + .if \vl < 64 104 + _cond_vex pxor, (BUF), \v0, \v0, unaligned_mem_tmp=\v1 105 + .else 106 + vpxorq (BUF), \v0, \v0 107 + .endif 108 + .else 109 + _load_data \vl, (BUF), \bswap_mask, \v1 110 + .if \vl < 64 111 + _cond_vex pxor, \v1, \v0, \v0 112 + .else 113 + vpxorq \v1, \v0, \v0 114 + .endif 115 + .endif 116 + .endm 117 + 118 + // The x^0..x^63 terms, i.e. poly128 mod x^64, i.e. the physically low qword for 119 + // msb-first order or the physically high qword for lsb-first order 120 + #define LO64_TERMS 0 121 + 122 + // The x^64..x^127 terms, i.e. floor(poly128 / x^64), i.e. the physically high 123 + // qword for msb-first order or the physically low qword for lsb-first order 124 + #define HI64_TERMS 1 125 + 126 + // Multiply the given \src1_terms of each 128-bit lane of \src1 by the given 127 + // \src2_terms of each 128-bit lane of \src2, and write the result(s) to \dst. 128 + .macro _pclmulqdq src1, src1_terms, src2, src2_terms, dst 129 + _cond_vex "pclmulqdq $((\src1_terms ^ LSB_CRC) << 4) ^ (\src2_terms ^ LSB_CRC),", \ 130 + \src1, \src2, \dst 131 + .endm 132 + 133 + // Fold \acc into \data and store the result back into \acc. \data can be an 134 + // unaligned mem operand if using VEX is allowed and the CRC is lsb-first so no 135 + // byte-reflection is needed; otherwise it must be a vector register. \consts 136 + // is a vector register containing the needed fold constants, and \tmp is a 137 + // temporary vector register. All arguments must be the same length. 138 + .macro _fold_vec acc, data, consts, tmp 139 + _pclmulqdq \consts, HI64_TERMS, \acc, HI64_TERMS, \tmp 140 + _pclmulqdq \consts, LO64_TERMS, \acc, LO64_TERMS, \acc 141 + .if AVX_LEVEL <= 2 142 + _cond_vex pxor, \data, \tmp, \tmp 143 + _cond_vex pxor, \tmp, \acc, \acc 144 + .else 145 + vpternlogq $0x96, \data, \tmp, \acc 146 + .endif 147 + .endm 148 + 149 + // Fold \acc into \data and store the result back into \acc. \data is an 150 + // unaligned mem operand, \consts is a vector register containing the needed 151 + // fold constants, \bswap_mask is a vector register containing the 152 + // byte-reflection table if the CRC is msb-first, and \tmp1 and \tmp2 are 153 + // temporary vector registers. All arguments must have length \vl. 154 + .macro _fold_vec_mem vl, acc, data, consts, bswap_mask, tmp1, tmp2 155 + .if AVX_LEVEL == 0 || !LSB_CRC 156 + _load_data \vl, \data, \bswap_mask, \tmp1 157 + _fold_vec \acc, \tmp1, \consts, \tmp2 158 + .else 159 + _fold_vec \acc, \data, \consts, \tmp1 160 + .endif 161 + .endm 162 + 163 + // Load the constants for folding across 2**i vectors of length VL at a time 164 + // into all 128-bit lanes of the vector register CONSTS. 165 + .macro _load_vec_folding_consts i 166 + _vbroadcast OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS+(4-LOG2_VL-\i)*16(CONSTS_PTR), \ 167 + CONSTS 168 + .endm 169 + 170 + // Given vector registers \v0 and \v1 of length \vl, fold \v0 into \v1 and store 171 + // the result back into \v0. If the remaining length mod \vl is nonzero, also 172 + // fold \vl data bytes from BUF. For both operations the fold distance is \vl. 173 + // \consts must be a register of length \vl containing the fold constants. 174 + .macro _fold_vec_final vl, v0, v1, consts, bswap_mask, tmp1, tmp2 175 + _fold_vec \v0, \v1, \consts, \tmp1 176 + test $\vl, LEN8 177 + jz .Lfold_vec_final_done\@ 178 + _fold_vec_mem \vl, \v0, (BUF), \consts, \bswap_mask, \tmp1, \tmp2 179 + add $\vl, BUF 180 + .Lfold_vec_final_done\@: 181 + .endm 182 + 183 + // This macro generates the body of a CRC function with the following prototype: 184 + // 185 + // crc_t crc_func(crc_t crc, const u8 *buf, size_t len, const void *consts); 186 + // 187 + // |crc| is the initial CRC, and crc_t is a data type wide enough to hold it. 188 + // |buf| is the data to checksum. |len| is the data length in bytes, which must 189 + // be at least 16. |consts| is a pointer to the fold_across_128_bits_consts 190 + // field of the constants struct that was generated for the chosen CRC variant. 191 + // 192 + // Moving onto the macro parameters, \n is the number of bits in the CRC, e.g. 193 + // 32 for a CRC-32. Currently the supported values are 8, 16, 32, and 64. If 194 + // the file is compiled in i386 mode, then the maximum supported value is 32. 195 + // 196 + // \lsb_crc is 1 if the CRC processes the least significant bit of each byte 197 + // first, i.e. maps bit0 to x^7, bit1 to x^6, ..., bit7 to x^0. \lsb_crc is 0 198 + // if the CRC processes the most significant bit of each byte first, i.e. maps 199 + // bit0 to x^0, bit1 to x^1, bit7 to x^7. 200 + // 201 + // \vl is the maximum length of vector register to use in bytes: 16, 32, or 64. 202 + // 203 + // \avx_level is the level of AVX support to use: 0 for SSE only, 2 for AVX2, or 204 + // 512 for AVX512. 205 + // 206 + // If \vl == 16 && \avx_level == 0, the generated code requires: 207 + // PCLMULQDQ && SSE4.1. (Note: all known CPUs with PCLMULQDQ also have SSE4.1.) 208 + // 209 + // If \vl == 32 && \avx_level == 2, the generated code requires: 210 + // VPCLMULQDQ && AVX2. 211 + // 212 + // If \vl == 64 && \avx_level == 512, the generated code requires: 213 + // VPCLMULQDQ && AVX512BW && AVX512VL. 214 + // 215 + // Other \vl and \avx_level combinations are either not supported or not useful. 216 + .macro _crc_pclmul n, lsb_crc, vl, avx_level 217 + .set LSB_CRC, \lsb_crc 218 + .set VL, \vl 219 + .set AVX_LEVEL, \avx_level 220 + 221 + // Define aliases for the xmm, ymm, or zmm registers according to VL. 222 + .irp i, 0,1,2,3,4,5,6,7 223 + .if VL == 16 224 + .set V\i, %xmm\i 225 + .set LOG2_VL, 4 226 + .elseif VL == 32 227 + .set V\i, %ymm\i 228 + .set LOG2_VL, 5 229 + .elseif VL == 64 230 + .set V\i, %zmm\i 231 + .set LOG2_VL, 6 232 + .else 233 + .error "Unsupported vector length" 234 + .endif 235 + .endr 236 + // Define aliases for the function parameters. 237 + // Note: when crc_t is shorter than u32, zero-extension to 32 bits is 238 + // guaranteed by the ABI. Zero-extension to 64 bits is *not* guaranteed 239 + // when crc_t is shorter than u64. 240 + #ifdef __x86_64__ 241 + .if \n <= 32 242 + .set CRC, %edi 243 + .else 244 + .set CRC, %rdi 245 + .endif 246 + .set BUF, %rsi 247 + .set LEN, %rdx 248 + .set LEN32, %edx 249 + .set LEN8, %dl 250 + .set CONSTS_PTR, %rcx 251 + #else 252 + // 32-bit support, assuming -mregparm=3 and not including support for 253 + // CRC-64 (which would use both eax and edx to pass the crc parameter). 254 + .set CRC, %eax 255 + .set BUF, %edx 256 + .set LEN, %ecx 257 + .set LEN32, %ecx 258 + .set LEN8, %cl 259 + .set CONSTS_PTR, %ebx // Passed on stack 260 + #endif 261 + 262 + // Define aliases for some local variables. V0-V5 are used without 263 + // aliases (for accumulators, data, temporary values, etc). Staying 264 + // within the first 8 vector registers keeps the code 32-bit SSE 265 + // compatible and reduces the size of 64-bit SSE code slightly. 266 + .set BSWAP_MASK, V6 267 + .set BSWAP_MASK_YMM, %ymm6 268 + .set BSWAP_MASK_XMM, %xmm6 269 + .set CONSTS, V7 270 + .set CONSTS_YMM, %ymm7 271 + .set CONSTS_XMM, %xmm7 272 + 273 + // Use ANNOTATE_NOENDBR to suppress an objtool warning, since the 274 + // functions generated by this macro are called only by static_call. 275 + ANNOTATE_NOENDBR 276 + 277 + #ifdef __i386__ 278 + push CONSTS_PTR 279 + mov 8(%esp), CONSTS_PTR 280 + #endif 281 + 282 + // Create a 128-bit vector that contains the initial CRC in the end 283 + // representing the high-order polynomial coefficients, and the rest 0. 284 + // If the CRC is msb-first, also load the byte-reflection table. 285 + .if \n <= 32 286 + _cond_vex movd, CRC, %xmm0 287 + .else 288 + _cond_vex movq, CRC, %xmm0 289 + .endif 290 + .if !LSB_CRC 291 + _cond_vex pslldq, $(128-\n)/8, %xmm0, %xmm0 292 + _vbroadcast OFFSETOF_BSWAP_MASK(CONSTS_PTR), BSWAP_MASK 293 + .endif 294 + 295 + // Load the first vector of data and XOR the initial CRC into the 296 + // appropriate end of the first 128-bit lane of data. If LEN < VL, then 297 + // use a short vector and jump ahead to the final reduction. (LEN >= 16 298 + // is guaranteed here but not necessarily LEN >= VL.) 299 + .if VL >= 32 300 + cmp $VL, LEN 301 + jae .Lat_least_1vec\@ 302 + .if VL == 64 303 + cmp $32, LEN32 304 + jb .Lless_than_32bytes\@ 305 + _prepare_v0 32, %ymm0, %ymm1, BSWAP_MASK_YMM 306 + add $32, BUF 307 + jmp .Lreduce_256bits_to_128bits\@ 308 + .Lless_than_32bytes\@: 309 + .endif 310 + _prepare_v0 16, %xmm0, %xmm1, BSWAP_MASK_XMM 311 + add $16, BUF 312 + vmovdqa OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM 313 + jmp .Lcheck_for_partial_block\@ 314 + .Lat_least_1vec\@: 315 + .endif 316 + _prepare_v0 VL, V0, V1, BSWAP_MASK 317 + 318 + // Handle VL <= LEN < 4*VL. 319 + cmp $4*VL-1, LEN 320 + ja .Lat_least_4vecs\@ 321 + add $VL, BUF 322 + // If VL <= LEN < 2*VL, then jump ahead to the reduction from 1 vector. 323 + // If VL==16 then load fold_across_128_bits_consts first, as the final 324 + // reduction depends on it and it won't be loaded anywhere else. 325 + cmp $2*VL-1, LEN32 326 + .if VL == 16 327 + _cond_vex movdqa, OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM 328 + .endif 329 + jbe .Lreduce_1vec_to_128bits\@ 330 + // Otherwise 2*VL <= LEN < 4*VL. Load one more vector and jump ahead to 331 + // the reduction from 2 vectors. 332 + _load_data VL, (BUF), BSWAP_MASK, V1 333 + add $VL, BUF 334 + jmp .Lreduce_2vecs_to_1\@ 335 + 336 + .Lat_least_4vecs\@: 337 + // Load 3 more vectors of data. 338 + _load_data VL, 1*VL(BUF), BSWAP_MASK, V1 339 + _load_data VL, 2*VL(BUF), BSWAP_MASK, V2 340 + _load_data VL, 3*VL(BUF), BSWAP_MASK, V3 341 + sub $-4*VL, BUF // Shorter than 'add 4*VL' when VL=32 342 + add $-4*VL, LEN // Shorter than 'sub 4*VL' when VL=32 343 + 344 + // Main loop: while LEN >= 4*VL, fold the 4 vectors V0-V3 into the next 345 + // 4 vectors of data and write the result back to V0-V3. 346 + cmp $4*VL-1, LEN // Shorter than 'cmp 4*VL' when VL=32 347 + jbe .Lreduce_4vecs_to_2\@ 348 + _load_vec_folding_consts 2 349 + .Lfold_4vecs_loop\@: 350 + _fold_vec_mem VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 351 + _fold_vec_mem VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 352 + _fold_vec_mem VL, V2, 2*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 353 + _fold_vec_mem VL, V3, 3*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 354 + sub $-4*VL, BUF 355 + add $-4*VL, LEN 356 + cmp $4*VL-1, LEN 357 + ja .Lfold_4vecs_loop\@ 358 + 359 + // Fold V0,V1 into V2,V3 and write the result back to V0,V1. Then fold 360 + // two more vectors of data from BUF, if at least that much remains. 361 + .Lreduce_4vecs_to_2\@: 362 + _load_vec_folding_consts 1 363 + _fold_vec V0, V2, CONSTS, V4 364 + _fold_vec V1, V3, CONSTS, V4 365 + test $2*VL, LEN8 366 + jz .Lreduce_2vecs_to_1\@ 367 + _fold_vec_mem VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 368 + _fold_vec_mem VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5 369 + sub $-2*VL, BUF 370 + 371 + // Fold V0 into V1 and write the result back to V0. Then fold one more 372 + // vector of data from BUF, if at least that much remains. 373 + .Lreduce_2vecs_to_1\@: 374 + _load_vec_folding_consts 0 375 + _fold_vec_final VL, V0, V1, CONSTS, BSWAP_MASK, V4, V5 376 + 377 + .Lreduce_1vec_to_128bits\@: 378 + .if VL == 64 379 + // Reduce 512-bit %zmm0 to 256-bit %ymm0. Then fold 256 more bits of 380 + // data from BUF, if at least that much remains. 381 + vbroadcasti128 OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS(CONSTS_PTR), CONSTS_YMM 382 + vextracti64x4 $1, %zmm0, %ymm1 383 + _fold_vec_final 32, %ymm0, %ymm1, CONSTS_YMM, BSWAP_MASK_YMM, %ymm4, %ymm5 384 + .Lreduce_256bits_to_128bits\@: 385 + .endif 386 + .if VL >= 32 387 + // Reduce 256-bit %ymm0 to 128-bit %xmm0. Then fold 128 more bits of 388 + // data from BUF, if at least that much remains. 389 + vmovdqa OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM 390 + vextracti128 $1, %ymm0, %xmm1 391 + _fold_vec_final 16, %xmm0, %xmm1, CONSTS_XMM, BSWAP_MASK_XMM, %xmm4, %xmm5 392 + .Lcheck_for_partial_block\@: 393 + .endif 394 + and $15, LEN32 395 + jz .Lreduce_128bits_to_crc\@ 396 + 397 + // 1 <= LEN <= 15 data bytes remain in BUF. The polynomial is now 398 + // A*(x^(8*LEN)) + B, where A is the 128-bit polynomial stored in %xmm0 399 + // and B is the polynomial of the remaining LEN data bytes. To reduce 400 + // this to 128 bits without needing fold constants for each possible 401 + // LEN, rearrange this expression into C1*(x^128) + C2, where 402 + // C1 = floor(A / x^(128 - 8*LEN)) and C2 = A*x^(8*LEN) + B mod x^128. 403 + // Then fold C1 into C2, which is just another fold across 128 bits. 404 + 405 + .if !LSB_CRC || AVX_LEVEL == 0 406 + // Load the last 16 data bytes. Note that originally LEN was >= 16. 407 + _load_data 16, "-16(BUF,LEN)", BSWAP_MASK_XMM, %xmm2 408 + .endif // Else will use vpblendvb mem operand later. 409 + .if !LSB_CRC 410 + neg LEN // Needed for indexing shuf_table 411 + .endif 412 + 413 + // tmp = A*x^(8*LEN) mod x^128 414 + // lsb: pshufb by [LEN, LEN+1, ..., 15, -1, -1, ..., -1] 415 + // i.e. right-shift by LEN bytes. 416 + // msb: pshufb by [-1, -1, ..., -1, 0, 1, ..., 15-LEN] 417 + // i.e. left-shift by LEN bytes. 418 + _cond_vex movdqu, "OFFSETOF_SHUF_TABLE+16(CONSTS_PTR,LEN)", %xmm3 419 + _cond_vex pshufb, %xmm3, %xmm0, %xmm1 420 + 421 + // C1 = floor(A / x^(128 - 8*LEN)) 422 + // lsb: pshufb by [-1, -1, ..., -1, 0, 1, ..., LEN-1] 423 + // i.e. left-shift by 16-LEN bytes. 424 + // msb: pshufb by [16-LEN, 16-LEN+1, ..., 15, -1, -1, ..., -1] 425 + // i.e. right-shift by 16-LEN bytes. 426 + _cond_vex pshufb, "OFFSETOF_SHUF_TABLE+32*!LSB_CRC(CONSTS_PTR,LEN)", \ 427 + %xmm0, %xmm0, unaligned_mem_tmp=%xmm4 428 + 429 + // C2 = tmp + B. This is just a blend of tmp with the last 16 data 430 + // bytes (reflected if msb-first). The blend mask is the shuffle table 431 + // that was used to create tmp. 0 selects tmp, and 1 last16databytes. 432 + .if AVX_LEVEL == 0 433 + movdqa %xmm0, %xmm4 434 + movdqa %xmm3, %xmm0 435 + pblendvb %xmm2, %xmm1 // uses %xmm0 as implicit operand 436 + movdqa %xmm4, %xmm0 437 + .elseif LSB_CRC 438 + vpblendvb %xmm3, -16(BUF,LEN), %xmm1, %xmm1 439 + .else 440 + vpblendvb %xmm3, %xmm2, %xmm1, %xmm1 441 + .endif 442 + 443 + // Fold C1 into C2 and store the 128-bit result in %xmm0. 444 + _fold_vec %xmm0, %xmm1, CONSTS_XMM, %xmm4 445 + 446 + .Lreduce_128bits_to_crc\@: 447 + // Compute the CRC as %xmm0 * x^n mod G. Here %xmm0 means the 128-bit 448 + // polynomial stored in %xmm0 (using either lsb-first or msb-first bit 449 + // order according to LSB_CRC), and G is the CRC's generator polynomial. 450 + 451 + // First, multiply %xmm0 by x^n and reduce the result to 64+n bits: 452 + // 453 + // t0 := (x^(64+n) mod G) * floor(%xmm0 / x^64) + 454 + // x^n * (%xmm0 mod x^64) 455 + // 456 + // Store t0 * x^(64-n) in %xmm0. I.e., actually do: 457 + // 458 + // %xmm0 := ((x^(64+n) mod G) * x^(64-n)) * floor(%xmm0 / x^64) + 459 + // x^64 * (%xmm0 mod x^64) 460 + // 461 + // The extra unreduced factor of x^(64-n) makes floor(t0 / x^n) aligned 462 + // to the HI64_TERMS of %xmm0 so that the next pclmulqdq can easily 463 + // select it. The 64-bit constant (x^(64+n) mod G) * x^(64-n) in the 464 + // msb-first case, or (x^(63+n) mod G) * x^(64-n) in the lsb-first case 465 + // (considering the extra factor of x that gets implicitly introduced by 466 + // each pclmulqdq when using lsb-first order), is identical to the 467 + // constant that was used earlier for folding the LO64_TERMS across 128 468 + // bits. Thus it's already available in LO64_TERMS of CONSTS_XMM. 469 + _pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm0, HI64_TERMS, %xmm1 470 + .if LSB_CRC 471 + _cond_vex psrldq, $8, %xmm0, %xmm0 // x^64 * (%xmm0 mod x^64) 472 + .else 473 + _cond_vex pslldq, $8, %xmm0, %xmm0 // x^64 * (%xmm0 mod x^64) 474 + .endif 475 + _cond_vex pxor, %xmm1, %xmm0, %xmm0 476 + // The HI64_TERMS of %xmm0 now contain floor(t0 / x^n). 477 + // The LO64_TERMS of %xmm0 now contain (t0 mod x^n) * x^(64-n). 478 + 479 + // First step of Barrett reduction: Compute floor(t0 / G). This is the 480 + // polynomial by which G needs to be multiplied to cancel out the x^n 481 + // and higher terms of t0, i.e. to reduce t0 mod G. First do: 482 + // 483 + // t1 := floor(x^(63+n) / G) * x * floor(t0 / x^n) 484 + // 485 + // Then the desired value floor(t0 / G) is floor(t1 / x^64). The 63 in 486 + // x^(63+n) is the maximum degree of floor(t0 / x^n) and thus the lowest 487 + // value that makes enough precision be carried through the calculation. 488 + // 489 + // The '* x' makes it so the result is floor(t1 / x^64) rather than 490 + // floor(t1 / x^63), making it qword-aligned in HI64_TERMS so that it 491 + // can be extracted much more easily in the next step. In the lsb-first 492 + // case the '* x' happens implicitly. In the msb-first case it must be 493 + // done explicitly; floor(x^(63+n) / G) * x is a 65-bit constant, so the 494 + // constant passed to pclmulqdq is (floor(x^(63+n) / G) * x) - x^64, and 495 + // the multiplication by the x^64 term is handled using a pxor. The 496 + // pxor causes the low 64 terms of t1 to be wrong, but they are unused. 497 + _cond_vex movdqa, OFFSETOF_BARRETT_REDUCTION_CONSTS(CONSTS_PTR), CONSTS_XMM 498 + _pclmulqdq CONSTS_XMM, HI64_TERMS, %xmm0, HI64_TERMS, %xmm1 499 + .if !LSB_CRC 500 + _cond_vex pxor, %xmm0, %xmm1, %xmm1 // += x^64 * floor(t0 / x^n) 501 + .endif 502 + // The HI64_TERMS of %xmm1 now contain floor(t1 / x^64) = floor(t0 / G). 503 + 504 + // Second step of Barrett reduction: Cancel out the x^n and higher terms 505 + // of t0 by subtracting the needed multiple of G. This gives the CRC: 506 + // 507 + // crc := t0 - (G * floor(t0 / G)) 508 + // 509 + // But %xmm0 contains t0 * x^(64-n), so it's more convenient to do: 510 + // 511 + // crc := ((t0 * x^(64-n)) - ((G * x^(64-n)) * floor(t0 / G))) / x^(64-n) 512 + // 513 + // Furthermore, since the resulting CRC is n-bit, if mod x^n is 514 + // explicitly applied to it then the x^n term of G makes no difference 515 + // in the result and can be omitted. This helps keep the constant 516 + // multiplier in 64 bits in most cases. This gives the following: 517 + // 518 + // %xmm0 := %xmm0 - (((G - x^n) * x^(64-n)) * floor(t0 / G)) 519 + // crc := (%xmm0 / x^(64-n)) mod x^n 520 + // 521 + // In the lsb-first case, each pclmulqdq implicitly introduces 522 + // an extra factor of x, so in that case the constant that needs to be 523 + // passed to pclmulqdq is actually '(G - x^n) * x^(63-n)' when n <= 63. 524 + // For lsb-first CRCs where n=64, the extra factor of x cannot be as 525 + // easily avoided. In that case, instead pass '(G - x^n - x^0) / x' to 526 + // pclmulqdq and handle the x^0 term (i.e. 1) separately. (All CRC 527 + // polynomials have nonzero x^n and x^0 terms.) It works out as: the 528 + // CRC has be XORed with the physically low qword of %xmm1, representing 529 + // floor(t0 / G). The most efficient way to do that is to move it to 530 + // the physically high qword and use a ternlog to combine the two XORs. 531 + .if LSB_CRC && \n == 64 532 + _cond_vex punpcklqdq, %xmm1, %xmm2, %xmm2 533 + _pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1 534 + .if AVX_LEVEL <= 2 535 + _cond_vex pxor, %xmm2, %xmm0, %xmm0 536 + _cond_vex pxor, %xmm1, %xmm0, %xmm0 537 + .else 538 + vpternlogq $0x96, %xmm2, %xmm1, %xmm0 539 + .endif 540 + _cond_vex "pextrq $1,", %xmm0, %rax // (%xmm0 / x^0) mod x^64 541 + .else 542 + _pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1 543 + _cond_vex pxor, %xmm1, %xmm0, %xmm0 544 + .if \n == 8 545 + _cond_vex "pextrb $7 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^56) mod x^8 546 + .elseif \n == 16 547 + _cond_vex "pextrw $3 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^48) mod x^16 548 + .elseif \n == 32 549 + _cond_vex "pextrd $1 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^32) mod x^32 550 + .else // \n == 64 && !LSB_CRC 551 + _cond_vex movq, %xmm0, %rax // (%xmm0 / x^0) mod x^64 552 + .endif 553 + .endif 554 + 555 + .if VL > 16 556 + vzeroupper // Needed when ymm or zmm registers may have been used. 557 + .endif 558 + #ifdef __i386__ 559 + pop CONSTS_PTR 560 + #endif 561 + RET 562 + .endm 563 + 564 + #ifdef CONFIG_AS_VPCLMULQDQ 565 + #define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb) \ 566 + SYM_FUNC_START(prefix##_pclmul_sse); \ 567 + _crc_pclmul n=bits, lsb_crc=lsb, vl=16, avx_level=0; \ 568 + SYM_FUNC_END(prefix##_pclmul_sse); \ 569 + \ 570 + SYM_FUNC_START(prefix##_vpclmul_avx2); \ 571 + _crc_pclmul n=bits, lsb_crc=lsb, vl=32, avx_level=2; \ 572 + SYM_FUNC_END(prefix##_vpclmul_avx2); \ 573 + \ 574 + SYM_FUNC_START(prefix##_vpclmul_avx512); \ 575 + _crc_pclmul n=bits, lsb_crc=lsb, vl=64, avx_level=512; \ 576 + SYM_FUNC_END(prefix##_vpclmul_avx512); 577 + #else 578 + #define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb) \ 579 + SYM_FUNC_START(prefix##_pclmul_sse); \ 580 + _crc_pclmul n=bits, lsb_crc=lsb, vl=16, avx_level=0; \ 581 + SYM_FUNC_END(prefix##_pclmul_sse); 582 + #endif // !CONFIG_AS_VPCLMULQDQ

+76

arch/x86/lib/crc-pclmul-template.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Macros for accessing the [V]PCLMULQDQ-based CRC functions that are 4 + * instantiated by crc-pclmul-template.S 5 + * 6 + * Copyright 2025 Google LLC 7 + * 8 + * Author: Eric Biggers <ebiggers@google.com> 9 + */ 10 + #ifndef _CRC_PCLMUL_TEMPLATE_H 11 + #define _CRC_PCLMUL_TEMPLATE_H 12 + 13 + #include <asm/cpufeatures.h> 14 + #include <asm/simd.h> 15 + #include <crypto/internal/simd.h> 16 + #include <linux/static_call.h> 17 + #include "crc-pclmul-consts.h" 18 + 19 + #define DECLARE_CRC_PCLMUL_FUNCS(prefix, crc_t) \ 20 + crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len, \ 21 + const void *consts_ptr); \ 22 + crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \ 23 + const void *consts_ptr); \ 24 + crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \ 25 + const void *consts_ptr); \ 26 + DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse) 27 + 28 + #define INIT_CRC_PCLMUL(prefix) \ 29 + do { \ 30 + if (IS_ENABLED(CONFIG_AS_VPCLMULQDQ) && \ 31 + boot_cpu_has(X86_FEATURE_VPCLMULQDQ) && \ 32 + boot_cpu_has(X86_FEATURE_AVX2) && \ 33 + cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL)) { \ 34 + if (boot_cpu_has(X86_FEATURE_AVX512BW) && \ 35 + boot_cpu_has(X86_FEATURE_AVX512VL) && \ 36 + !boot_cpu_has(X86_FEATURE_PREFER_YMM) && \ 37 + cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL)) { \ 38 + static_call_update(prefix##_pclmul, \ 39 + prefix##_vpclmul_avx512); \ 40 + } else { \ 41 + static_call_update(prefix##_pclmul, \ 42 + prefix##_vpclmul_avx2); \ 43 + } \ 44 + } \ 45 + } while (0) 46 + 47 + /* 48 + * Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 16 49 + * bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD. 50 + * 51 + * 16 bytes is the minimum length supported by the [V]PCLMULQDQ functions. 52 + * There is overhead associated with kernel_fpu_begin() and kernel_fpu_end(), 53 + * varying by CPU and factors such as which parts of the "FPU" state userspace 54 + * has touched, which could result in a larger cutoff being better. Indeed, a 55 + * larger cutoff is usually better for a *single* message. However, the 56 + * overhead of the FPU section gets amortized if multiple FPU sections get 57 + * executed before returning to userspace, since the XSAVE and XRSTOR occur only 58 + * once. Considering that and the fact that the [V]PCLMULQDQ code is lighter on 59 + * the dcache than the table-based code is, a 16-byte cutoff seems to work well. 60 + */ 61 + #define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq) \ 62 + do { \ 63 + if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) && \ 64 + crypto_simd_usable()) { \ 65 + const void *consts_ptr; \ 66 + \ 67 + consts_ptr = (consts).fold_across_128_bits_consts; \ 68 + kernel_fpu_begin(); \ 69 + crc = static_call(prefix##_pclmul)((crc), (p), (len), \ 70 + consts_ptr); \ 71 + kernel_fpu_end(); \ 72 + return crc; \ 73 + } \ 74 + } while (0) 75 + 76 + #endif /* _CRC_PCLMUL_TEMPLATE_H */

+9 -20

arch/x86/lib/crc-t10dif-glue.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-or-later 2 2 /* 3 - * CRC-T10DIF using PCLMULQDQ instructions 3 + * CRC-T10DIF using [V]PCLMULQDQ instructions 4 4 * 5 5 * Copyright 2024 Google LLC 6 6 */ 7 7 8 - #include <asm/cpufeatures.h> 9 - #include <asm/simd.h> 10 - #include <crypto/internal/simd.h> 11 8 #include <linux/crc-t10dif.h> 12 9 #include <linux/module.h> 10 + #include "crc-pclmul-template.h" 13 11 14 12 static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); 15 13 16 - asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len); 14 + DECLARE_CRC_PCLMUL_FUNCS(crc16_msb, u16); 17 15 18 16 u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len) 19 17 { 20 - if (len >= 16 && 21 - static_key_enabled(&have_pclmulqdq) && crypto_simd_usable()) { 22 - kernel_fpu_begin(); 23 - crc = crc_t10dif_pcl(crc, p, len); 24 - kernel_fpu_end(); 25 - return crc; 26 - } 18 + CRC_PCLMUL(crc, p, len, crc16_msb, crc16_msb_0x8bb7_consts, 19 + have_pclmulqdq); 27 20 return crc_t10dif_generic(crc, p, len); 28 21 } 29 22 EXPORT_SYMBOL(crc_t10dif_arch); 30 23 31 24 static int __init crc_t10dif_x86_init(void) 32 25 { 33 - if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) 26 + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { 34 27 static_branch_enable(&have_pclmulqdq); 28 + INIT_CRC_PCLMUL(crc16_msb); 29 + } 35 30 return 0; 36 31 } 37 32 arch_initcall(crc_t10dif_x86_init); ··· 36 41 } 37 42 module_exit(crc_t10dif_x86_exit); 38 43 39 - bool crc_t10dif_is_optimized(void) 40 - { 41 - return static_key_enabled(&have_pclmulqdq); 42 - } 43 - EXPORT_SYMBOL(crc_t10dif_is_optimized); 44 - 45 - MODULE_DESCRIPTION("CRC-T10DIF using PCLMULQDQ instructions"); 44 + MODULE_DESCRIPTION("CRC-T10DIF using [V]PCLMULQDQ instructions"); 46 45 MODULE_LICENSE("GPL");

+6

arch/x86/lib/crc16-msb-pclmul.S

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + // Copyright 2025 Google LLC 3 + 4 + #include "crc-pclmul-template.S" 5 + 6 + DEFINE_CRC_PCLMUL_FUNCS(crc16_msb, /* bits= */ 16, /* lsb= */ 0)

+22 -35

arch/x86/lib/crc32-glue.c

··· 7 7 * Copyright 2024 Google LLC 8 8 */ 9 9 10 - #include <asm/cpufeatures.h> 11 - #include <asm/simd.h> 12 - #include <crypto/internal/simd.h> 13 10 #include <linux/crc32.h> 14 - #include <linux/linkage.h> 15 11 #include <linux/module.h> 16 - 17 - /* minimum size of buffer for crc32_pclmul_le_16 */ 18 - #define CRC32_PCLMUL_MIN_LEN 64 12 + #include "crc-pclmul-template.h" 19 13 20 14 static DEFINE_STATIC_KEY_FALSE(have_crc32); 21 15 static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); 22 16 23 - u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); 17 + DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32); 24 18 25 19 u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) 26 20 { 27 - if (len >= CRC32_PCLMUL_MIN_LEN + 15 && 28 - static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { 29 - size_t n = -(uintptr_t)p & 15; 30 - 31 - /* align p to 16-byte boundary */ 32 - if (n) { 33 - crc = crc32_le_base(crc, p, n); 34 - p += n; 35 - len -= n; 36 - } 37 - n = round_down(len, 16); 38 - kernel_fpu_begin(); 39 - crc = crc32_pclmul_le_16(crc, p, n); 40 - kernel_fpu_end(); 41 - p += n; 42 - len -= n; 43 - } 44 - if (len) 45 - crc = crc32_le_base(crc, p, len); 46 - return crc; 21 + CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts, 22 + have_pclmulqdq); 23 + return crc32_le_base(crc, p, len); 47 24 } 48 25 EXPORT_SYMBOL(crc32_le_arch); 49 26 ··· 38 61 39 62 asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); 40 63 41 - u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) 64 + u32 crc32c_arch(u32 crc, const u8 *p, size_t len) 42 65 { 43 66 size_t num_longs; 44 67 45 68 if (!static_branch_likely(&have_crc32)) 46 - return crc32c_le_base(crc, p, len); 69 + return crc32c_base(crc, p, len); 47 70 48 71 if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN && 49 72 static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { ··· 55 78 56 79 for (num_longs = len / sizeof(unsigned long); 57 80 num_longs != 0; num_longs--, p += sizeof(unsigned long)) 58 - asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p)); 81 + asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p)); 59 82 60 - for (len %= sizeof(unsigned long); len; len--, p++) 61 - asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p)); 83 + if (sizeof(unsigned long) > 4 && (len & 4)) { 84 + asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p)); 85 + p += 4; 86 + } 87 + if (len & 2) { 88 + asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p)); 89 + p += 2; 90 + } 91 + if (len & 1) 92 + asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p)); 62 93 63 94 return crc; 64 95 } 65 - EXPORT_SYMBOL(crc32c_le_arch); 96 + EXPORT_SYMBOL(crc32c_arch); 66 97 67 98 u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) 68 99 { ··· 82 97 { 83 98 if (boot_cpu_has(X86_FEATURE_XMM4_2)) 84 99 static_branch_enable(&have_crc32); 85 - if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) 100 + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { 86 101 static_branch_enable(&have_pclmulqdq); 102 + INIT_CRC_PCLMUL(crc32_lsb); 103 + } 87 104 return 0; 88 105 } 89 106 arch_initcall(crc32_x86_init);

+4 -215

arch/x86/lib/crc32-pclmul.S

··· 1 - /* SPDX-License-Identifier: GPL-2.0-only */ 2 - /* 3 - * Copyright 2012 Xyratex Technology Limited 4 - * 5 - * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 6 - * calculation. 7 - * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) 8 - * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found 9 - * at: 10 - * http://www.intel.com/products/processor/manuals/ 11 - * Intel(R) 64 and IA-32 Architectures Software Developer's Manual 12 - * Volume 2B: Instruction Set Reference, N-Z 13 - * 14 - * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com> 15 - * Alexander Boyko <Alexander_Boyko@xyratex.com> 16 - */ 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + // Copyright 2025 Google LLC 17 3 18 - #include <linux/linkage.h> 4 + #include "crc-pclmul-template.S" 19 5 20 - 21 - .section .rodata 22 - .align 16 23 - /* 24 - * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 25 - * #define CONSTANT_R1 0x154442bd4LL 26 - * 27 - * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 28 - * #define CONSTANT_R2 0x1c6e41596LL 29 - */ 30 - .Lconstant_R2R1: 31 - .octa 0x00000001c6e415960000000154442bd4 32 - /* 33 - * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 34 - * #define CONSTANT_R3 0x1751997d0LL 35 - * 36 - * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e 37 - * #define CONSTANT_R4 0x0ccaa009eLL 38 - */ 39 - .Lconstant_R4R3: 40 - .octa 0x00000000ccaa009e00000001751997d0 41 - /* 42 - * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 43 - * #define CONSTANT_R5 0x163cd6124LL 44 - */ 45 - .Lconstant_R5: 46 - .octa 0x00000000000000000000000163cd6124 47 - .Lconstant_mask32: 48 - .octa 0x000000000000000000000000FFFFFFFF 49 - /* 50 - * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL 51 - * 52 - * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL 53 - * #define CONSTANT_RU 0x1F7011641LL 54 - */ 55 - .Lconstant_RUpoly: 56 - .octa 0x00000001F701164100000001DB710641 57 - 58 - #define CONSTANT %xmm0 59 - 60 - #ifdef __x86_64__ 61 - #define CRC %edi 62 - #define BUF %rsi 63 - #define LEN %rdx 64 - #else 65 - #define CRC %eax 66 - #define BUF %edx 67 - #define LEN %ecx 68 - #endif 69 - 70 - 71 - 72 - .text 73 - /** 74 - * Calculate crc32 75 - * CRC - initial crc32 76 - * BUF - buffer (16 bytes aligned) 77 - * LEN - sizeof buffer (16 bytes aligned), LEN should be greater than 63 78 - * return %eax crc32 79 - * u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); 80 - */ 81 - 82 - SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */ 83 - movdqa (BUF), %xmm1 84 - movdqa 0x10(BUF), %xmm2 85 - movdqa 0x20(BUF), %xmm3 86 - movdqa 0x30(BUF), %xmm4 87 - movd CRC, CONSTANT 88 - pxor CONSTANT, %xmm1 89 - sub $0x40, LEN 90 - add $0x40, BUF 91 - cmp $0x40, LEN 92 - jb .Lless_64 93 - 94 - #ifdef __x86_64__ 95 - movdqa .Lconstant_R2R1(%rip), CONSTANT 96 - #else 97 - movdqa .Lconstant_R2R1, CONSTANT 98 - #endif 99 - 100 - .Lloop_64:/* 64 bytes Full cache line folding */ 101 - prefetchnta 0x40(BUF) 102 - movdqa %xmm1, %xmm5 103 - movdqa %xmm2, %xmm6 104 - movdqa %xmm3, %xmm7 105 - #ifdef __x86_64__ 106 - movdqa %xmm4, %xmm8 107 - #endif 108 - pclmulqdq $0x00, CONSTANT, %xmm1 109 - pclmulqdq $0x00, CONSTANT, %xmm2 110 - pclmulqdq $0x00, CONSTANT, %xmm3 111 - #ifdef __x86_64__ 112 - pclmulqdq $0x00, CONSTANT, %xmm4 113 - #endif 114 - pclmulqdq $0x11, CONSTANT, %xmm5 115 - pclmulqdq $0x11, CONSTANT, %xmm6 116 - pclmulqdq $0x11, CONSTANT, %xmm7 117 - #ifdef __x86_64__ 118 - pclmulqdq $0x11, CONSTANT, %xmm8 119 - #endif 120 - pxor %xmm5, %xmm1 121 - pxor %xmm6, %xmm2 122 - pxor %xmm7, %xmm3 123 - #ifdef __x86_64__ 124 - pxor %xmm8, %xmm4 125 - #else 126 - /* xmm8 unsupported for x32 */ 127 - movdqa %xmm4, %xmm5 128 - pclmulqdq $0x00, CONSTANT, %xmm4 129 - pclmulqdq $0x11, CONSTANT, %xmm5 130 - pxor %xmm5, %xmm4 131 - #endif 132 - 133 - pxor (BUF), %xmm1 134 - pxor 0x10(BUF), %xmm2 135 - pxor 0x20(BUF), %xmm3 136 - pxor 0x30(BUF), %xmm4 137 - 138 - sub $0x40, LEN 139 - add $0x40, BUF 140 - cmp $0x40, LEN 141 - jge .Lloop_64 142 - .Lless_64:/* Folding cache line into 128bit */ 143 - #ifdef __x86_64__ 144 - movdqa .Lconstant_R4R3(%rip), CONSTANT 145 - #else 146 - movdqa .Lconstant_R4R3, CONSTANT 147 - #endif 148 - prefetchnta (BUF) 149 - 150 - movdqa %xmm1, %xmm5 151 - pclmulqdq $0x00, CONSTANT, %xmm1 152 - pclmulqdq $0x11, CONSTANT, %xmm5 153 - pxor %xmm5, %xmm1 154 - pxor %xmm2, %xmm1 155 - 156 - movdqa %xmm1, %xmm5 157 - pclmulqdq $0x00, CONSTANT, %xmm1 158 - pclmulqdq $0x11, CONSTANT, %xmm5 159 - pxor %xmm5, %xmm1 160 - pxor %xmm3, %xmm1 161 - 162 - movdqa %xmm1, %xmm5 163 - pclmulqdq $0x00, CONSTANT, %xmm1 164 - pclmulqdq $0x11, CONSTANT, %xmm5 165 - pxor %xmm5, %xmm1 166 - pxor %xmm4, %xmm1 167 - 168 - cmp $0x10, LEN 169 - jb .Lfold_64 170 - .Lloop_16:/* Folding rest buffer into 128bit */ 171 - movdqa %xmm1, %xmm5 172 - pclmulqdq $0x00, CONSTANT, %xmm1 173 - pclmulqdq $0x11, CONSTANT, %xmm5 174 - pxor %xmm5, %xmm1 175 - pxor (BUF), %xmm1 176 - sub $0x10, LEN 177 - add $0x10, BUF 178 - cmp $0x10, LEN 179 - jge .Lloop_16 180 - 181 - .Lfold_64: 182 - /* perform the last 64 bit fold, also adds 32 zeroes 183 - * to the input stream */ 184 - pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ 185 - psrldq $0x08, %xmm1 186 - pxor CONSTANT, %xmm1 187 - 188 - /* final 32-bit fold */ 189 - movdqa %xmm1, %xmm2 190 - #ifdef __x86_64__ 191 - movdqa .Lconstant_R5(%rip), CONSTANT 192 - movdqa .Lconstant_mask32(%rip), %xmm3 193 - #else 194 - movdqa .Lconstant_R5, CONSTANT 195 - movdqa .Lconstant_mask32, %xmm3 196 - #endif 197 - psrldq $0x04, %xmm2 198 - pand %xmm3, %xmm1 199 - pclmulqdq $0x00, CONSTANT, %xmm1 200 - pxor %xmm2, %xmm1 201 - 202 - /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ 203 - #ifdef __x86_64__ 204 - movdqa .Lconstant_RUpoly(%rip), CONSTANT 205 - #else 206 - movdqa .Lconstant_RUpoly, CONSTANT 207 - #endif 208 - movdqa %xmm1, %xmm2 209 - pand %xmm3, %xmm1 210 - pclmulqdq $0x10, CONSTANT, %xmm1 211 - pand %xmm3, %xmm1 212 - pclmulqdq $0x00, CONSTANT, %xmm1 213 - pxor %xmm2, %xmm1 214 - pextrd $0x01, %xmm1, %eax 215 - 216 - RET 217 - SYM_FUNC_END(crc32_pclmul_le_16) 6 + DEFINE_CRC_PCLMUL_FUNCS(crc32_lsb, /* bits= */ 32, /* lsb= */ 1)

+50

arch/x86/lib/crc64-glue.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * CRC64 using [V]PCLMULQDQ instructions 4 + * 5 + * Copyright 2025 Google LLC 6 + */ 7 + 8 + #include <linux/crc64.h> 9 + #include <linux/module.h> 10 + #include "crc-pclmul-template.h" 11 + 12 + static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); 13 + 14 + DECLARE_CRC_PCLMUL_FUNCS(crc64_msb, u64); 15 + DECLARE_CRC_PCLMUL_FUNCS(crc64_lsb, u64); 16 + 17 + u64 crc64_be_arch(u64 crc, const u8 *p, size_t len) 18 + { 19 + CRC_PCLMUL(crc, p, len, crc64_msb, crc64_msb_0x42f0e1eba9ea3693_consts, 20 + have_pclmulqdq); 21 + return crc64_be_generic(crc, p, len); 22 + } 23 + EXPORT_SYMBOL_GPL(crc64_be_arch); 24 + 25 + u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len) 26 + { 27 + CRC_PCLMUL(crc, p, len, crc64_lsb, crc64_lsb_0x9a6c9329ac4bc9b5_consts, 28 + have_pclmulqdq); 29 + return crc64_nvme_generic(crc, p, len); 30 + } 31 + EXPORT_SYMBOL_GPL(crc64_nvme_arch); 32 + 33 + static int __init crc64_x86_init(void) 34 + { 35 + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { 36 + static_branch_enable(&have_pclmulqdq); 37 + INIT_CRC_PCLMUL(crc64_msb); 38 + INIT_CRC_PCLMUL(crc64_lsb); 39 + } 40 + return 0; 41 + } 42 + arch_initcall(crc64_x86_init); 43 + 44 + static void __exit crc64_x86_exit(void) 45 + { 46 + } 47 + module_exit(crc64_x86_exit); 48 + 49 + MODULE_DESCRIPTION("CRC64 using [V]PCLMULQDQ instructions"); 50 + MODULE_LICENSE("GPL");

+7

arch/x86/lib/crc64-pclmul.S

··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + // Copyright 2025 Google LLC 3 + 4 + #include "crc-pclmul-template.S" 5 + 6 + DEFINE_CRC_PCLMUL_FUNCS(crc64_msb, /* bits= */ 64, /* lsb= */ 0) 7 + DEFINE_CRC_PCLMUL_FUNCS(crc64_lsb, /* bits= */ 64, /* lsb= */ 1)

-332

arch/x86/lib/crct10dif-pcl-asm_64.S

··· 1 - ######################################################################## 2 - # Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions 3 - # 4 - # Copyright (c) 2013, Intel Corporation 5 - # 6 - # Authors: 7 - # Erdinc Ozturk <erdinc.ozturk@intel.com> 8 - # Vinodh Gopal <vinodh.gopal@intel.com> 9 - # James Guilford <james.guilford@intel.com> 10 - # Tim Chen <tim.c.chen@linux.intel.com> 11 - # 12 - # This software is available to you under a choice of one of two 13 - # licenses. You may choose to be licensed under the terms of the GNU 14 - # General Public License (GPL) Version 2, available from the file 15 - # COPYING in the main directory of this source tree, or the 16 - # OpenIB.org BSD license below: 17 - # 18 - # Redistribution and use in source and binary forms, with or without 19 - # modification, are permitted provided that the following conditions are 20 - # met: 21 - # 22 - # * Redistributions of source code must retain the above copyright 23 - # notice, this list of conditions and the following disclaimer. 24 - # 25 - # * Redistributions in binary form must reproduce the above copyright 26 - # notice, this list of conditions and the following disclaimer in the 27 - # documentation and/or other materials provided with the 28 - # distribution. 29 - # 30 - # * Neither the name of the Intel Corporation nor the names of its 31 - # contributors may be used to endorse or promote products derived from 32 - # this software without specific prior written permission. 33 - # 34 - # 35 - # THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY 36 - # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 37 - # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 38 - # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR 39 - # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 40 - # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 41 - # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 42 - # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 43 - # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 44 - # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 45 - # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 46 - # 47 - # Reference paper titled "Fast CRC Computation for Generic 48 - # Polynomials Using PCLMULQDQ Instruction" 49 - # URL: http://www.intel.com/content/dam/www/public/us/en/documents 50 - # /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf 51 - # 52 - 53 - #include <linux/linkage.h> 54 - 55 - .text 56 - 57 - #define init_crc %edi 58 - #define buf %rsi 59 - #define len %rdx 60 - 61 - #define FOLD_CONSTS %xmm10 62 - #define BSWAP_MASK %xmm11 63 - 64 - # Fold reg1, reg2 into the next 32 data bytes, storing the result back into 65 - # reg1, reg2. 66 - .macro fold_32_bytes offset, reg1, reg2 67 - movdqu \offset(buf), %xmm9 68 - movdqu \offset+16(buf), %xmm12 69 - pshufb BSWAP_MASK, %xmm9 70 - pshufb BSWAP_MASK, %xmm12 71 - movdqa \reg1, %xmm8 72 - movdqa \reg2, %xmm13 73 - pclmulqdq $0x00, FOLD_CONSTS, \reg1 74 - pclmulqdq $0x11, FOLD_CONSTS, %xmm8 75 - pclmulqdq $0x00, FOLD_CONSTS, \reg2 76 - pclmulqdq $0x11, FOLD_CONSTS, %xmm13 77 - pxor %xmm9 , \reg1 78 - xorps %xmm8 , \reg1 79 - pxor %xmm12, \reg2 80 - xorps %xmm13, \reg2 81 - .endm 82 - 83 - # Fold src_reg into dst_reg. 84 - .macro fold_16_bytes src_reg, dst_reg 85 - movdqa \src_reg, %xmm8 86 - pclmulqdq $0x11, FOLD_CONSTS, \src_reg 87 - pclmulqdq $0x00, FOLD_CONSTS, %xmm8 88 - pxor %xmm8, \dst_reg 89 - xorps \src_reg, \dst_reg 90 - .endm 91 - 92 - # 93 - # u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len); 94 - # 95 - # Assumes len >= 16. 96 - # 97 - SYM_FUNC_START(crc_t10dif_pcl) 98 - 99 - movdqa .Lbswap_mask(%rip), BSWAP_MASK 100 - 101 - # For sizes less than 256 bytes, we can't fold 128 bytes at a time. 102 - cmp $256, len 103 - jl .Lless_than_256_bytes 104 - 105 - # Load the first 128 data bytes. Byte swapping is necessary to make the 106 - # bit order match the polynomial coefficient order. 107 - movdqu 16*0(buf), %xmm0 108 - movdqu 16*1(buf), %xmm1 109 - movdqu 16*2(buf), %xmm2 110 - movdqu 16*3(buf), %xmm3 111 - movdqu 16*4(buf), %xmm4 112 - movdqu 16*5(buf), %xmm5 113 - movdqu 16*6(buf), %xmm6 114 - movdqu 16*7(buf), %xmm7 115 - add $128, buf 116 - pshufb BSWAP_MASK, %xmm0 117 - pshufb BSWAP_MASK, %xmm1 118 - pshufb BSWAP_MASK, %xmm2 119 - pshufb BSWAP_MASK, %xmm3 120 - pshufb BSWAP_MASK, %xmm4 121 - pshufb BSWAP_MASK, %xmm5 122 - pshufb BSWAP_MASK, %xmm6 123 - pshufb BSWAP_MASK, %xmm7 124 - 125 - # XOR the first 16 data *bits* with the initial CRC value. 126 - pxor %xmm8, %xmm8 127 - pinsrw $7, init_crc, %xmm8 128 - pxor %xmm8, %xmm0 129 - 130 - movdqa .Lfold_across_128_bytes_consts(%rip), FOLD_CONSTS 131 - 132 - # Subtract 128 for the 128 data bytes just consumed. Subtract another 133 - # 128 to simplify the termination condition of the following loop. 134 - sub $256, len 135 - 136 - # While >= 128 data bytes remain (not counting xmm0-7), fold the 128 137 - # bytes xmm0-7 into them, storing the result back into xmm0-7. 138 - .Lfold_128_bytes_loop: 139 - fold_32_bytes 0, %xmm0, %xmm1 140 - fold_32_bytes 32, %xmm2, %xmm3 141 - fold_32_bytes 64, %xmm4, %xmm5 142 - fold_32_bytes 96, %xmm6, %xmm7 143 - add $128, buf 144 - sub $128, len 145 - jge .Lfold_128_bytes_loop 146 - 147 - # Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7. 148 - 149 - # Fold across 64 bytes. 150 - movdqa .Lfold_across_64_bytes_consts(%rip), FOLD_CONSTS 151 - fold_16_bytes %xmm0, %xmm4 152 - fold_16_bytes %xmm1, %xmm5 153 - fold_16_bytes %xmm2, %xmm6 154 - fold_16_bytes %xmm3, %xmm7 155 - # Fold across 32 bytes. 156 - movdqa .Lfold_across_32_bytes_consts(%rip), FOLD_CONSTS 157 - fold_16_bytes %xmm4, %xmm6 158 - fold_16_bytes %xmm5, %xmm7 159 - # Fold across 16 bytes. 160 - movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS 161 - fold_16_bytes %xmm6, %xmm7 162 - 163 - # Add 128 to get the correct number of data bytes remaining in 0...127 164 - # (not counting xmm7), following the previous extra subtraction by 128. 165 - # Then subtract 16 to simplify the termination condition of the 166 - # following loop. 167 - add $128-16, len 168 - 169 - # While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes 170 - # xmm7 into them, storing the result back into xmm7. 171 - jl .Lfold_16_bytes_loop_done 172 - .Lfold_16_bytes_loop: 173 - movdqa %xmm7, %xmm8 174 - pclmulqdq $0x11, FOLD_CONSTS, %xmm7 175 - pclmulqdq $0x00, FOLD_CONSTS, %xmm8 176 - pxor %xmm8, %xmm7 177 - movdqu (buf), %xmm0 178 - pshufb BSWAP_MASK, %xmm0 179 - pxor %xmm0 , %xmm7 180 - add $16, buf 181 - sub $16, len 182 - jge .Lfold_16_bytes_loop 183 - 184 - .Lfold_16_bytes_loop_done: 185 - # Add 16 to get the correct number of data bytes remaining in 0...15 186 - # (not counting xmm7), following the previous extra subtraction by 16. 187 - add $16, len 188 - je .Lreduce_final_16_bytes 189 - 190 - .Lhandle_partial_segment: 191 - # Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16 192 - # bytes are in xmm7 and the rest are the remaining data in 'buf'. To do 193 - # this without needing a fold constant for each possible 'len', redivide 194 - # the bytes into a first chunk of 'len' bytes and a second chunk of 16 195 - # bytes, then fold the first chunk into the second. 196 - 197 - movdqa %xmm7, %xmm2 198 - 199 - # xmm1 = last 16 original data bytes 200 - movdqu -16(buf, len), %xmm1 201 - pshufb BSWAP_MASK, %xmm1 202 - 203 - # xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes. 204 - lea .Lbyteshift_table+16(%rip), %rax 205 - sub len, %rax 206 - movdqu (%rax), %xmm0 207 - pshufb %xmm0, %xmm2 208 - 209 - # xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes. 210 - pxor .Lmask1(%rip), %xmm0 211 - pshufb %xmm0, %xmm7 212 - 213 - # xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes), 214 - # then '16-len' bytes from xmm2 (high-order bytes). 215 - pblendvb %xmm2, %xmm1 #xmm0 is implicit 216 - 217 - # Fold the first chunk into the second chunk, storing the result in xmm7. 218 - movdqa %xmm7, %xmm8 219 - pclmulqdq $0x11, FOLD_CONSTS, %xmm7 220 - pclmulqdq $0x00, FOLD_CONSTS, %xmm8 221 - pxor %xmm8, %xmm7 222 - pxor %xmm1, %xmm7 223 - 224 - .Lreduce_final_16_bytes: 225 - # Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC 226 - 227 - # Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'. 228 - movdqa .Lfinal_fold_consts(%rip), FOLD_CONSTS 229 - 230 - # Fold the high 64 bits into the low 64 bits, while also multiplying by 231 - # x^64. This produces a 128-bit value congruent to x^64 * M(x) and 232 - # whose low 48 bits are 0. 233 - movdqa %xmm7, %xmm0 234 - pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x)) 235 - pslldq $8, %xmm0 236 - pxor %xmm0, %xmm7 # + low bits * x^64 237 - 238 - # Fold the high 32 bits into the low 96 bits. This produces a 96-bit 239 - # value congruent to x^64 * M(x) and whose low 48 bits are 0. 240 - movdqa %xmm7, %xmm0 241 - pand .Lmask2(%rip), %xmm0 # zero high 32 bits 242 - psrldq $12, %xmm7 # extract high 32 bits 243 - pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x)) 244 - pxor %xmm0, %xmm7 # + low bits 245 - 246 - # Load G(x) and floor(x^48 / G(x)). 247 - movdqa .Lbarrett_reduction_consts(%rip), FOLD_CONSTS 248 - 249 - # Use Barrett reduction to compute the final CRC value. 250 - movdqa %xmm7, %xmm0 251 - pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x)) 252 - psrlq $32, %xmm7 # /= x^32 253 - pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # *= G(x) 254 - psrlq $48, %xmm0 255 - pxor %xmm7, %xmm0 # + low 16 nonzero bits 256 - # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0. 257 - 258 - pextrw $0, %xmm0, %eax 259 - RET 260 - 261 - .align 16 262 - .Lless_than_256_bytes: 263 - # Checksumming a buffer of length 16...255 bytes 264 - 265 - # Load the first 16 data bytes. 266 - movdqu (buf), %xmm7 267 - pshufb BSWAP_MASK, %xmm7 268 - add $16, buf 269 - 270 - # XOR the first 16 data *bits* with the initial CRC value. 271 - pxor %xmm0, %xmm0 272 - pinsrw $7, init_crc, %xmm0 273 - pxor %xmm0, %xmm7 274 - 275 - movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS 276 - cmp $16, len 277 - je .Lreduce_final_16_bytes # len == 16 278 - sub $32, len 279 - jge .Lfold_16_bytes_loop # 32 <= len <= 255 280 - add $16, len 281 - jmp .Lhandle_partial_segment # 17 <= len <= 31 282 - SYM_FUNC_END(crc_t10dif_pcl) 283 - 284 - .section .rodata, "a", @progbits 285 - .align 16 286 - 287 - # Fold constants precomputed from the polynomial 0x18bb7 288 - # G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 289 - .Lfold_across_128_bytes_consts: 290 - .quad 0x0000000000006123 # x^(8*128) mod G(x) 291 - .quad 0x0000000000002295 # x^(8*128+64) mod G(x) 292 - .Lfold_across_64_bytes_consts: 293 - .quad 0x0000000000001069 # x^(4*128) mod G(x) 294 - .quad 0x000000000000dd31 # x^(4*128+64) mod G(x) 295 - .Lfold_across_32_bytes_consts: 296 - .quad 0x000000000000857d # x^(2*128) mod G(x) 297 - .quad 0x0000000000007acc # x^(2*128+64) mod G(x) 298 - .Lfold_across_16_bytes_consts: 299 - .quad 0x000000000000a010 # x^(1*128) mod G(x) 300 - .quad 0x0000000000001faa # x^(1*128+64) mod G(x) 301 - .Lfinal_fold_consts: 302 - .quad 0x1368000000000000 # x^48 * (x^48 mod G(x)) 303 - .quad 0x2d56000000000000 # x^48 * (x^80 mod G(x)) 304 - .Lbarrett_reduction_consts: 305 - .quad 0x0000000000018bb7 # G(x) 306 - .quad 0x00000001f65a57f8 # floor(x^48 / G(x)) 307 - 308 - .section .rodata.cst16.mask1, "aM", @progbits, 16 309 - .align 16 310 - .Lmask1: 311 - .octa 0x80808080808080808080808080808080 312 - 313 - .section .rodata.cst16.mask2, "aM", @progbits, 16 314 - .align 16 315 - .Lmask2: 316 - .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF 317 - 318 - .section .rodata.cst16.bswap_mask, "aM", @progbits, 16 319 - .align 16 320 - .Lbswap_mask: 321 - .octa 0x000102030405060708090A0B0C0D0E0F 322 - 323 - .section .rodata.cst32.byteshift_table, "aM", @progbits, 32 324 - .align 16 325 - # For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len] 326 - # is the index vector to shift left by 'len' bytes, and is also {0x80, ..., 327 - # 0x80} XOR the index vector to shift right by '16 - len' bytes. 328 - .Lbyteshift_table: 329 - .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 330 - .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f 331 - .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 332 - .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0

+1 -1

block/Kconfig

··· 63 63 config BLK_DEV_INTEGRITY 64 64 bool "Block layer data integrity support" 65 65 select CRC_T10DIF 66 - select CRC64_ROCKSOFT 66 + select CRC64 67 67 help 68 68 Some storage devices allow extra information to be 69 69 stored/retrieved to help protect the data. The block layer

+1 -1

block/t10-pi.c

··· 210 210 211 211 static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len) 212 212 { 213 - return cpu_to_be64(crc64_rocksoft_update(crc, data, len)); 213 + return cpu_to_be64(crc64_nvme(crc, data, len)); 214 214 } 215 215 216 216 static void ext_pi_crc64_generate(struct blk_integrity_iter *iter,

-20

crypto/Kconfig

··· 1081 1081 1082 1082 Used by RoCEv2 and f2fs. 1083 1083 1084 - config CRYPTO_CRCT10DIF 1085 - tristate "CRCT10DIF" 1086 - select CRYPTO_HASH 1087 - select CRC_T10DIF 1088 - help 1089 - CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) 1090 - 1091 - CRC algorithm used by the SCSI Block Commands standard. 1092 - 1093 - config CRYPTO_CRC64_ROCKSOFT 1094 - tristate "CRC64 based on Rocksoft Model algorithm" 1095 - depends on CRC64 1096 - select CRYPTO_HASH 1097 - help 1098 - CRC64 CRC algorithm based on the Rocksoft Model CRC Algorithm 1099 - 1100 - Used by the NVMe implementation of T10 DIF (BLK_DEV_INTEGRITY) 1101 - 1102 - See https://zlib.net/crc_v3.txt 1103 - 1104 1084 endmenu 1105 1085 1106 1086 menu "Compression"

-3

crypto/Makefile

··· 155 155 obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o 156 156 CFLAGS_crc32c_generic.o += -DARCH=$(ARCH) 157 157 CFLAGS_crc32_generic.o += -DARCH=$(ARCH) 158 - obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_generic.o 159 - CFLAGS_crct10dif_generic.o += -DARCH=$(ARCH) 160 - obj-$(CONFIG_CRYPTO_CRC64_ROCKSOFT) += crc64_rocksoft_generic.o 161 158 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o 162 159 obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o 163 160 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o

+4 -4

crypto/crc32c_generic.c

··· 85 85 { 86 86 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); 87 87 88 - ctx->crc = crc32c_le_base(ctx->crc, data, length); 88 + ctx->crc = crc32c_base(ctx->crc, data, length); 89 89 return 0; 90 90 } 91 91 ··· 94 94 { 95 95 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); 96 96 97 - ctx->crc = __crc32c_le(ctx->crc, data, length); 97 + ctx->crc = crc32c(ctx->crc, data, length); 98 98 return 0; 99 99 } 100 100 ··· 108 108 109 109 static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out) 110 110 { 111 - put_unaligned_le32(~crc32c_le_base(*crcp, data, len), out); 111 + put_unaligned_le32(~crc32c_base(*crcp, data, len), out); 112 112 return 0; 113 113 } 114 114 115 115 static int __chksum_finup_arch(u32 *crcp, const u8 *data, unsigned int len, 116 116 u8 *out) 117 117 { 118 - put_unaligned_le32(~__crc32c_le(*crcp, data, len), out); 118 + put_unaligned_le32(~crc32c(*crcp, data, len), out); 119 119 return 0; 120 120 } 121 121

-89

crypto/crc64_rocksoft_generic.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - 3 - #include <linux/crc64.h> 4 - #include <linux/module.h> 5 - #include <crypto/internal/hash.h> 6 - #include <linux/unaligned.h> 7 - 8 - static int chksum_init(struct shash_desc *desc) 9 - { 10 - u64 *crc = shash_desc_ctx(desc); 11 - 12 - *crc = 0; 13 - 14 - return 0; 15 - } 16 - 17 - static int chksum_update(struct shash_desc *desc, const u8 *data, 18 - unsigned int length) 19 - { 20 - u64 *crc = shash_desc_ctx(desc); 21 - 22 - *crc = crc64_rocksoft_generic(*crc, data, length); 23 - 24 - return 0; 25 - } 26 - 27 - static int chksum_final(struct shash_desc *desc, u8 *out) 28 - { 29 - u64 *crc = shash_desc_ctx(desc); 30 - 31 - put_unaligned_le64(*crc, out); 32 - return 0; 33 - } 34 - 35 - static int __chksum_finup(u64 crc, const u8 *data, unsigned int len, u8 *out) 36 - { 37 - crc = crc64_rocksoft_generic(crc, data, len); 38 - put_unaligned_le64(crc, out); 39 - return 0; 40 - } 41 - 42 - static int chksum_finup(struct shash_desc *desc, const u8 *data, 43 - unsigned int len, u8 *out) 44 - { 45 - u64 *crc = shash_desc_ctx(desc); 46 - 47 - return __chksum_finup(*crc, data, len, out); 48 - } 49 - 50 - static int chksum_digest(struct shash_desc *desc, const u8 *data, 51 - unsigned int length, u8 *out) 52 - { 53 - return __chksum_finup(0, data, length, out); 54 - } 55 - 56 - static struct shash_alg alg = { 57 - .digestsize = sizeof(u64), 58 - .init = chksum_init, 59 - .update = chksum_update, 60 - .final = chksum_final, 61 - .finup = chksum_finup, 62 - .digest = chksum_digest, 63 - .descsize = sizeof(u64), 64 - .base = { 65 - .cra_name = CRC64_ROCKSOFT_STRING, 66 - .cra_driver_name = "crc64-rocksoft-generic", 67 - .cra_priority = 200, 68 - .cra_blocksize = 1, 69 - .cra_module = THIS_MODULE, 70 - } 71 - }; 72 - 73 - static int __init crc64_rocksoft_init(void) 74 - { 75 - return crypto_register_shash(&alg); 76 - } 77 - 78 - static void __exit crc64_rocksoft_exit(void) 79 - { 80 - crypto_unregister_shash(&alg); 81 - } 82 - 83 - module_init(crc64_rocksoft_init); 84 - module_exit(crc64_rocksoft_exit); 85 - 86 - MODULE_LICENSE("GPL"); 87 - MODULE_DESCRIPTION("Rocksoft model CRC64 calculation."); 88 - MODULE_ALIAS_CRYPTO("crc64-rocksoft"); 89 - MODULE_ALIAS_CRYPTO("crc64-rocksoft-generic");

-168

crypto/crct10dif_generic.c

··· 1 - /* 2 - * Cryptographic API. 3 - * 4 - * T10 Data Integrity Field CRC16 Crypto Transform 5 - * 6 - * Copyright (c) 2007 Oracle Corporation. All rights reserved. 7 - * Written by Martin K. Petersen <martin.petersen@oracle.com> 8 - * Copyright (C) 2013 Intel Corporation 9 - * Author: Tim Chen <tim.c.chen@linux.intel.com> 10 - * 11 - * This program is free software; you can redistribute it and/or modify it 12 - * under the terms of the GNU General Public License as published by the Free 13 - * Software Foundation; either version 2 of the License, or (at your option) 14 - * any later version. 15 - * 16 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 - * SOFTWARE. 24 - * 25 - */ 26 - 27 - #include <linux/module.h> 28 - #include <linux/crc-t10dif.h> 29 - #include <crypto/internal/hash.h> 30 - #include <linux/init.h> 31 - #include <linux/kernel.h> 32 - 33 - struct chksum_desc_ctx { 34 - __u16 crc; 35 - }; 36 - 37 - /* 38 - * Steps through buffer one byte at a time, calculates reflected 39 - * crc using table. 40 - */ 41 - 42 - static int chksum_init(struct shash_desc *desc) 43 - { 44 - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); 45 - 46 - ctx->crc = 0; 47 - 48 - return 0; 49 - } 50 - 51 - static int chksum_update(struct shash_desc *desc, const u8 *data, 52 - unsigned int length) 53 - { 54 - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); 55 - 56 - ctx->crc = crc_t10dif_generic(ctx->crc, data, length); 57 - return 0; 58 - } 59 - 60 - static int chksum_update_arch(struct shash_desc *desc, const u8 *data, 61 - unsigned int length) 62 - { 63 - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); 64 - 65 - ctx->crc = crc_t10dif_update(ctx->crc, data, length); 66 - return 0; 67 - } 68 - 69 - static int chksum_final(struct shash_desc *desc, u8 *out) 70 - { 71 - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); 72 - 73 - *(__u16 *)out = ctx->crc; 74 - return 0; 75 - } 76 - 77 - static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) 78 - { 79 - *(__u16 *)out = crc_t10dif_generic(crc, data, len); 80 - return 0; 81 - } 82 - 83 - static int __chksum_finup_arch(__u16 crc, const u8 *data, unsigned int len, 84 - u8 *out) 85 - { 86 - *(__u16 *)out = crc_t10dif_update(crc, data, len); 87 - return 0; 88 - } 89 - 90 - static int chksum_finup(struct shash_desc *desc, const u8 *data, 91 - unsigned int len, u8 *out) 92 - { 93 - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); 94 - 95 - return __chksum_finup(ctx->crc, data, len, out); 96 - } 97 - 98 - static int chksum_finup_arch(struct shash_desc *desc, const u8 *data, 99 - unsigned int len, u8 *out) 100 - { 101 - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); 102 - 103 - return __chksum_finup_arch(ctx->crc, data, len, out); 104 - } 105 - 106 - static int chksum_digest(struct shash_desc *desc, const u8 *data, 107 - unsigned int length, u8 *out) 108 - { 109 - return __chksum_finup(0, data, length, out); 110 - } 111 - 112 - static int chksum_digest_arch(struct shash_desc *desc, const u8 *data, 113 - unsigned int length, u8 *out) 114 - { 115 - return __chksum_finup_arch(0, data, length, out); 116 - } 117 - 118 - static struct shash_alg algs[] = {{ 119 - .digestsize = CRC_T10DIF_DIGEST_SIZE, 120 - .init = chksum_init, 121 - .update = chksum_update, 122 - .final = chksum_final, 123 - .finup = chksum_finup, 124 - .digest = chksum_digest, 125 - .descsize = sizeof(struct chksum_desc_ctx), 126 - .base.cra_name = "crct10dif", 127 - .base.cra_driver_name = "crct10dif-generic", 128 - .base.cra_priority = 100, 129 - .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, 130 - .base.cra_module = THIS_MODULE, 131 - }, { 132 - .digestsize = CRC_T10DIF_DIGEST_SIZE, 133 - .init = chksum_init, 134 - .update = chksum_update_arch, 135 - .final = chksum_final, 136 - .finup = chksum_finup_arch, 137 - .digest = chksum_digest_arch, 138 - .descsize = sizeof(struct chksum_desc_ctx), 139 - .base.cra_name = "crct10dif", 140 - .base.cra_driver_name = "crct10dif-" __stringify(ARCH), 141 - .base.cra_priority = 150, 142 - .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, 143 - .base.cra_module = THIS_MODULE, 144 - }}; 145 - 146 - static int num_algs; 147 - 148 - static int __init crct10dif_mod_init(void) 149 - { 150 - /* register the arch flavor only if it differs from the generic one */ 151 - num_algs = 1 + crc_t10dif_is_optimized(); 152 - 153 - return crypto_register_shashes(algs, num_algs); 154 - } 155 - 156 - static void __exit crct10dif_mod_fini(void) 157 - { 158 - crypto_unregister_shashes(algs, num_algs); 159 - } 160 - 161 - subsys_initcall(crct10dif_mod_init); 162 - module_exit(crct10dif_mod_fini); 163 - 164 - MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); 165 - MODULE_DESCRIPTION("T10 DIF CRC calculation."); 166 - MODULE_LICENSE("GPL"); 167 - MODULE_ALIAS_CRYPTO("crct10dif"); 168 - MODULE_ALIAS_CRYPTO("crct10dif-generic");

-8

crypto/tcrypt.c

··· 1654 1654 ret = min(ret, tcrypt_test("ghash")); 1655 1655 break; 1656 1656 1657 - case 47: 1658 - ret = min(ret, tcrypt_test("crct10dif")); 1659 - break; 1660 - 1661 1657 case 48: 1662 1658 ret = min(ret, tcrypt_test("sha3-224")); 1663 1659 break; ··· 2266 2270 fallthrough; 2267 2271 case 319: 2268 2272 test_hash_speed("crc32c", sec, generic_hash_speed_template); 2269 - if (mode > 300 && mode < 400) break; 2270 - fallthrough; 2271 - case 320: 2272 - test_hash_speed("crct10dif", sec, generic_hash_speed_template); 2273 2273 if (mode > 300 && mode < 400) break; 2274 2274 fallthrough; 2275 2275 case 321:

-14

crypto/testmgr.c

··· 4760 4760 .hash = __VECS(crc32c_tv_template) 4761 4761 } 4762 4762 }, { 4763 - .alg = "crc64-rocksoft", 4764 - .test = alg_test_hash, 4765 - .fips_allowed = 1, 4766 - .suite = { 4767 - .hash = __VECS(crc64_rocksoft_tv_template) 4768 - } 4769 - }, { 4770 - .alg = "crct10dif", 4771 - .test = alg_test_hash, 4772 - .fips_allowed = 1, 4773 - .suite = { 4774 - .hash = __VECS(crct10dif_tv_template) 4775 - } 4776 - }, { 4777 4763 .alg = "ctr(aes)", 4778 4764 .test = alg_test_skcipher, 4779 4765 .fips_allowed = 1,

-303

crypto/testmgr.h

··· 6017 6017 } 6018 6018 }; 6019 6019 6020 - static const u8 zeroes[4096] = { [0 ... 4095] = 0 }; 6021 - static const u8 ones[4096] = { [0 ... 4095] = 0xff }; 6022 - 6023 - static const struct hash_testvec crc64_rocksoft_tv_template[] = { 6024 - { 6025 - .plaintext = zeroes, 6026 - .psize = 4096, 6027 - .digest = "\x4e\xb6\x22\xeb\x67\xd3\x82\x64", 6028 - }, { 6029 - .plaintext = ones, 6030 - .psize = 4096, 6031 - .digest = "\xac\xa3\xec\x02\x73\xba\xdd\xc0", 6032 - } 6033 - }; 6034 - 6035 - static const struct hash_testvec crct10dif_tv_template[] = { 6036 - { 6037 - .plaintext = "abc", 6038 - .psize = 3, 6039 - .digest = (u8 *)(u16 []){ 0x443b }, 6040 - }, { 6041 - .plaintext = "1234567890123456789012345678901234567890" 6042 - "123456789012345678901234567890123456789", 6043 - .psize = 79, 6044 - .digest = (u8 *)(u16 []){ 0x4b70 }, 6045 - }, { 6046 - .plaintext = "abcdddddddddddddddddddddddddddddddddddddddd" 6047 - "ddddddddddddd", 6048 - .psize = 56, 6049 - .digest = (u8 *)(u16 []){ 0x9ce3 }, 6050 - }, { 6051 - .plaintext = "1234567890123456789012345678901234567890" 6052 - "1234567890123456789012345678901234567890" 6053 - "1234567890123456789012345678901234567890" 6054 - "1234567890123456789012345678901234567890" 6055 - "1234567890123456789012345678901234567890" 6056 - "1234567890123456789012345678901234567890" 6057 - "1234567890123456789012345678901234567890" 6058 - "123456789012345678901234567890123456789", 6059 - .psize = 319, 6060 - .digest = (u8 *)(u16 []){ 0x44c6 }, 6061 - }, { 6062 - .plaintext = "\x6e\x05\x79\x10\xa7\x1b\xb2\x49" 6063 - "\xe0\x54\xeb\x82\x19\x8d\x24\xbb" 6064 - "\x2f\xc6\x5d\xf4\x68\xff\x96\x0a" 6065 - "\xa1\x38\xcf\x43\xda\x71\x08\x7c" 6066 - "\x13\xaa\x1e\xb5\x4c\xe3\x57\xee" 6067 - "\x85\x1c\x90\x27\xbe\x32\xc9\x60" 6068 - "\xf7\x6b\x02\x99\x0d\xa4\x3b\xd2" 6069 - "\x46\xdd\x74\x0b\x7f\x16\xad\x21" 6070 - "\xb8\x4f\xe6\x5a\xf1\x88\x1f\x93" 6071 - "\x2a\xc1\x35\xcc\x63\xfa\x6e\x05" 6072 - "\x9c\x10\xa7\x3e\xd5\x49\xe0\x77" 6073 - "\x0e\x82\x19\xb0\x24\xbb\x52\xe9" 6074 - "\x5d\xf4\x8b\x22\x96\x2d\xc4\x38" 6075 - "\xcf\x66\xfd\x71\x08\x9f\x13\xaa" 6076 - "\x41\xd8\x4c\xe3\x7a\x11\x85\x1c" 6077 - "\xb3\x27\xbe\x55\xec\x60\xf7\x8e" 6078 - "\x02\x99\x30\xc7\x3b\xd2\x69\x00" 6079 - "\x74\x0b\xa2\x16\xad\x44\xdb\x4f" 6080 - "\xe6\x7d\x14\x88\x1f\xb6\x2a\xc1" 6081 - "\x58\xef\x63\xfa\x91\x05\x9c\x33" 6082 - "\xca\x3e\xd5\x6c\x03\x77\x0e\xa5" 6083 - "\x19\xb0\x47\xde\x52\xe9\x80\x17" 6084 - "\x8b\x22\xb9\x2d\xc4\x5b\xf2\x66" 6085 - "\xfd\x94\x08\x9f\x36\xcd\x41\xd8" 6086 - "\x6f\x06\x7a\x11\xa8\x1c\xb3\x4a" 6087 - "\xe1\x55\xec\x83\x1a\x8e\x25\xbc" 6088 - "\x30\xc7\x5e\xf5\x69\x00\x97\x0b" 6089 - "\xa2\x39\xd0\x44\xdb\x72\x09\x7d" 6090 - "\x14\xab\x1f\xb6\x4d\xe4\x58\xef" 6091 - "\x86\x1d\x91\x28\xbf\x33\xca\x61" 6092 - "\xf8\x6c\x03\x9a\x0e\xa5\x3c\xd3" 6093 - "\x47\xde\x75\x0c\x80\x17\xae\x22" 6094 - "\xb9\x50\xe7\x5b\xf2\x89\x20\x94" 6095 - "\x2b\xc2\x36\xcd\x64\xfb\x6f\x06" 6096 - "\x9d\x11\xa8\x3f\xd6\x4a\xe1\x78" 6097 - "\x0f\x83\x1a\xb1\x25\xbc\x53\xea" 6098 - "\x5e\xf5\x8c\x00\x97\x2e\xc5\x39" 6099 - "\xd0\x67\xfe\x72\x09\xa0\x14\xab" 6100 - "\x42\xd9\x4d\xe4\x7b\x12\x86\x1d" 6101 - "\xb4\x28\xbf\x56\xed\x61\xf8\x8f" 6102 - "\x03\x9a\x31\xc8\x3c\xd3\x6a\x01" 6103 - "\x75\x0c\xa3\x17\xae\x45\xdc\x50" 6104 - "\xe7\x7e\x15\x89\x20\xb7\x2b\xc2" 6105 - "\x59\xf0\x64\xfb\x92\x06\x9d\x34" 6106 - "\xcb\x3f\xd6\x6d\x04\x78\x0f\xa6" 6107 - "\x1a\xb1\x48\xdf\x53\xea\x81\x18" 6108 - "\x8c\x23\xba\x2e\xc5\x5c\xf3\x67" 6109 - "\xfe\x95\x09\xa0\x37\xce\x42\xd9" 6110 - "\x70\x07\x7b\x12\xa9\x1d\xb4\x4b" 6111 - "\xe2\x56\xed\x84\x1b\x8f\x26\xbd" 6112 - "\x31\xc8\x5f\xf6\x6a\x01\x98\x0c" 6113 - "\xa3\x3a\xd1\x45\xdc\x73\x0a\x7e" 6114 - "\x15\xac\x20\xb7\x4e\xe5\x59\xf0" 6115 - "\x87\x1e\x92\x29\xc0\x34\xcb\x62" 6116 - "\xf9\x6d\x04\x9b\x0f\xa6\x3d\xd4" 6117 - "\x48\xdf\x76\x0d\x81\x18\xaf\x23" 6118 - "\xba\x51\xe8\x5c\xf3\x8a\x21\x95" 6119 - "\x2c\xc3\x37\xce\x65\xfc\x70\x07" 6120 - "\x9e\x12\xa9\x40\xd7\x4b\xe2\x79" 6121 - "\x10\x84\x1b\xb2\x26\xbd\x54\xeb" 6122 - "\x5f\xf6\x8d\x01\x98\x2f\xc6\x3a" 6123 - "\xd1\x68\xff\x73\x0a\xa1\x15\xac" 6124 - "\x43\xda\x4e\xe5\x7c\x13\x87\x1e" 6125 - "\xb5\x29\xc0\x57\xee\x62\xf9\x90" 6126 - "\x04\x9b\x32\xc9\x3d\xd4\x6b\x02" 6127 - "\x76\x0d\xa4\x18\xaf\x46\xdd\x51" 6128 - "\xe8\x7f\x16\x8a\x21\xb8\x2c\xc3" 6129 - "\x5a\xf1\x65\xfc\x93\x07\x9e\x35" 6130 - "\xcc\x40\xd7\x6e\x05\x79\x10\xa7" 6131 - "\x1b\xb2\x49\xe0\x54\xeb\x82\x19" 6132 - "\x8d\x24\xbb\x2f\xc6\x5d\xf4\x68" 6133 - "\xff\x96\x0a\xa1\x38\xcf\x43\xda" 6134 - "\x71\x08\x7c\x13\xaa\x1e\xb5\x4c" 6135 - "\xe3\x57\xee\x85\x1c\x90\x27\xbe" 6136 - "\x32\xc9\x60\xf7\x6b\x02\x99\x0d" 6137 - "\xa4\x3b\xd2\x46\xdd\x74\x0b\x7f" 6138 - "\x16\xad\x21\xb8\x4f\xe6\x5a\xf1" 6139 - "\x88\x1f\x93\x2a\xc1\x35\xcc\x63" 6140 - "\xfa\x6e\x05\x9c\x10\xa7\x3e\xd5" 6141 - "\x49\xe0\x77\x0e\x82\x19\xb0\x24" 6142 - "\xbb\x52\xe9\x5d\xf4\x8b\x22\x96" 6143 - "\x2d\xc4\x38\xcf\x66\xfd\x71\x08" 6144 - "\x9f\x13\xaa\x41\xd8\x4c\xe3\x7a" 6145 - "\x11\x85\x1c\xb3\x27\xbe\x55\xec" 6146 - "\x60\xf7\x8e\x02\x99\x30\xc7\x3b" 6147 - "\xd2\x69\x00\x74\x0b\xa2\x16\xad" 6148 - "\x44\xdb\x4f\xe6\x7d\x14\x88\x1f" 6149 - "\xb6\x2a\xc1\x58\xef\x63\xfa\x91" 6150 - "\x05\x9c\x33\xca\x3e\xd5\x6c\x03" 6151 - "\x77\x0e\xa5\x19\xb0\x47\xde\x52" 6152 - "\xe9\x80\x17\x8b\x22\xb9\x2d\xc4" 6153 - "\x5b\xf2\x66\xfd\x94\x08\x9f\x36" 6154 - "\xcd\x41\xd8\x6f\x06\x7a\x11\xa8" 6155 - "\x1c\xb3\x4a\xe1\x55\xec\x83\x1a" 6156 - "\x8e\x25\xbc\x30\xc7\x5e\xf5\x69" 6157 - "\x00\x97\x0b\xa2\x39\xd0\x44\xdb" 6158 - "\x72\x09\x7d\x14\xab\x1f\xb6\x4d" 6159 - "\xe4\x58\xef\x86\x1d\x91\x28\xbf" 6160 - "\x33\xca\x61\xf8\x6c\x03\x9a\x0e" 6161 - "\xa5\x3c\xd3\x47\xde\x75\x0c\x80" 6162 - "\x17\xae\x22\xb9\x50\xe7\x5b\xf2" 6163 - "\x89\x20\x94\x2b\xc2\x36\xcd\x64" 6164 - "\xfb\x6f\x06\x9d\x11\xa8\x3f\xd6" 6165 - "\x4a\xe1\x78\x0f\x83\x1a\xb1\x25" 6166 - "\xbc\x53\xea\x5e\xf5\x8c\x00\x97" 6167 - "\x2e\xc5\x39\xd0\x67\xfe\x72\x09" 6168 - "\xa0\x14\xab\x42\xd9\x4d\xe4\x7b" 6169 - "\x12\x86\x1d\xb4\x28\xbf\x56\xed" 6170 - "\x61\xf8\x8f\x03\x9a\x31\xc8\x3c" 6171 - "\xd3\x6a\x01\x75\x0c\xa3\x17\xae" 6172 - "\x45\xdc\x50\xe7\x7e\x15\x89\x20" 6173 - "\xb7\x2b\xc2\x59\xf0\x64\xfb\x92" 6174 - "\x06\x9d\x34\xcb\x3f\xd6\x6d\x04" 6175 - "\x78\x0f\xa6\x1a\xb1\x48\xdf\x53" 6176 - "\xea\x81\x18\x8c\x23\xba\x2e\xc5" 6177 - "\x5c\xf3\x67\xfe\x95\x09\xa0\x37" 6178 - "\xce\x42\xd9\x70\x07\x7b\x12\xa9" 6179 - "\x1d\xb4\x4b\xe2\x56\xed\x84\x1b" 6180 - "\x8f\x26\xbd\x31\xc8\x5f\xf6\x6a" 6181 - "\x01\x98\x0c\xa3\x3a\xd1\x45\xdc" 6182 - "\x73\x0a\x7e\x15\xac\x20\xb7\x4e" 6183 - "\xe5\x59\xf0\x87\x1e\x92\x29\xc0" 6184 - "\x34\xcb\x62\xf9\x6d\x04\x9b\x0f" 6185 - "\xa6\x3d\xd4\x48\xdf\x76\x0d\x81" 6186 - "\x18\xaf\x23\xba\x51\xe8\x5c\xf3" 6187 - "\x8a\x21\x95\x2c\xc3\x37\xce\x65" 6188 - "\xfc\x70\x07\x9e\x12\xa9\x40\xd7" 6189 - "\x4b\xe2\x79\x10\x84\x1b\xb2\x26" 6190 - "\xbd\x54\xeb\x5f\xf6\x8d\x01\x98" 6191 - "\x2f\xc6\x3a\xd1\x68\xff\x73\x0a" 6192 - "\xa1\x15\xac\x43\xda\x4e\xe5\x7c" 6193 - "\x13\x87\x1e\xb5\x29\xc0\x57\xee" 6194 - "\x62\xf9\x90\x04\x9b\x32\xc9\x3d" 6195 - "\xd4\x6b\x02\x76\x0d\xa4\x18\xaf" 6196 - "\x46\xdd\x51\xe8\x7f\x16\x8a\x21" 6197 - "\xb8\x2c\xc3\x5a\xf1\x65\xfc\x93" 6198 - "\x07\x9e\x35\xcc\x40\xd7\x6e\x05" 6199 - "\x79\x10\xa7\x1b\xb2\x49\xe0\x54" 6200 - "\xeb\x82\x19\x8d\x24\xbb\x2f\xc6" 6201 - "\x5d\xf4\x68\xff\x96\x0a\xa1\x38" 6202 - "\xcf\x43\xda\x71\x08\x7c\x13\xaa" 6203 - "\x1e\xb5\x4c\xe3\x57\xee\x85\x1c" 6204 - "\x90\x27\xbe\x32\xc9\x60\xf7\x6b" 6205 - "\x02\x99\x0d\xa4\x3b\xd2\x46\xdd" 6206 - "\x74\x0b\x7f\x16\xad\x21\xb8\x4f" 6207 - "\xe6\x5a\xf1\x88\x1f\x93\x2a\xc1" 6208 - "\x35\xcc\x63\xfa\x6e\x05\x9c\x10" 6209 - "\xa7\x3e\xd5\x49\xe0\x77\x0e\x82" 6210 - "\x19\xb0\x24\xbb\x52\xe9\x5d\xf4" 6211 - "\x8b\x22\x96\x2d\xc4\x38\xcf\x66" 6212 - "\xfd\x71\x08\x9f\x13\xaa\x41\xd8" 6213 - "\x4c\xe3\x7a\x11\x85\x1c\xb3\x27" 6214 - "\xbe\x55\xec\x60\xf7\x8e\x02\x99" 6215 - "\x30\xc7\x3b\xd2\x69\x00\x74\x0b" 6216 - "\xa2\x16\xad\x44\xdb\x4f\xe6\x7d" 6217 - "\x14\x88\x1f\xb6\x2a\xc1\x58\xef" 6218 - "\x63\xfa\x91\x05\x9c\x33\xca\x3e" 6219 - "\xd5\x6c\x03\x77\x0e\xa5\x19\xb0" 6220 - "\x47\xde\x52\xe9\x80\x17\x8b\x22" 6221 - "\xb9\x2d\xc4\x5b\xf2\x66\xfd\x94" 6222 - "\x08\x9f\x36\xcd\x41\xd8\x6f\x06" 6223 - "\x7a\x11\xa8\x1c\xb3\x4a\xe1\x55" 6224 - "\xec\x83\x1a\x8e\x25\xbc\x30\xc7" 6225 - "\x5e\xf5\x69\x00\x97\x0b\xa2\x39" 6226 - "\xd0\x44\xdb\x72\x09\x7d\x14\xab" 6227 - "\x1f\xb6\x4d\xe4\x58\xef\x86\x1d" 6228 - "\x91\x28\xbf\x33\xca\x61\xf8\x6c" 6229 - "\x03\x9a\x0e\xa5\x3c\xd3\x47\xde" 6230 - "\x75\x0c\x80\x17\xae\x22\xb9\x50" 6231 - "\xe7\x5b\xf2\x89\x20\x94\x2b\xc2" 6232 - "\x36\xcd\x64\xfb\x6f\x06\x9d\x11" 6233 - "\xa8\x3f\xd6\x4a\xe1\x78\x0f\x83" 6234 - "\x1a\xb1\x25\xbc\x53\xea\x5e\xf5" 6235 - "\x8c\x00\x97\x2e\xc5\x39\xd0\x67" 6236 - "\xfe\x72\x09\xa0\x14\xab\x42\xd9" 6237 - "\x4d\xe4\x7b\x12\x86\x1d\xb4\x28" 6238 - "\xbf\x56\xed\x61\xf8\x8f\x03\x9a" 6239 - "\x31\xc8\x3c\xd3\x6a\x01\x75\x0c" 6240 - "\xa3\x17\xae\x45\xdc\x50\xe7\x7e" 6241 - "\x15\x89\x20\xb7\x2b\xc2\x59\xf0" 6242 - "\x64\xfb\x92\x06\x9d\x34\xcb\x3f" 6243 - "\xd6\x6d\x04\x78\x0f\xa6\x1a\xb1" 6244 - "\x48\xdf\x53\xea\x81\x18\x8c\x23" 6245 - "\xba\x2e\xc5\x5c\xf3\x67\xfe\x95" 6246 - "\x09\xa0\x37\xce\x42\xd9\x70\x07" 6247 - "\x7b\x12\xa9\x1d\xb4\x4b\xe2\x56" 6248 - "\xed\x84\x1b\x8f\x26\xbd\x31\xc8" 6249 - "\x5f\xf6\x6a\x01\x98\x0c\xa3\x3a" 6250 - "\xd1\x45\xdc\x73\x0a\x7e\x15\xac" 6251 - "\x20\xb7\x4e\xe5\x59\xf0\x87\x1e" 6252 - "\x92\x29\xc0\x34\xcb\x62\xf9\x6d" 6253 - "\x04\x9b\x0f\xa6\x3d\xd4\x48\xdf" 6254 - "\x76\x0d\x81\x18\xaf\x23\xba\x51" 6255 - "\xe8\x5c\xf3\x8a\x21\x95\x2c\xc3" 6256 - "\x37\xce\x65\xfc\x70\x07\x9e\x12" 6257 - "\xa9\x40\xd7\x4b\xe2\x79\x10\x84" 6258 - "\x1b\xb2\x26\xbd\x54\xeb\x5f\xf6" 6259 - "\x8d\x01\x98\x2f\xc6\x3a\xd1\x68" 6260 - "\xff\x73\x0a\xa1\x15\xac\x43\xda" 6261 - "\x4e\xe5\x7c\x13\x87\x1e\xb5\x29" 6262 - "\xc0\x57\xee\x62\xf9\x90\x04\x9b" 6263 - "\x32\xc9\x3d\xd4\x6b\x02\x76\x0d" 6264 - "\xa4\x18\xaf\x46\xdd\x51\xe8\x7f" 6265 - "\x16\x8a\x21\xb8\x2c\xc3\x5a\xf1" 6266 - "\x65\xfc\x93\x07\x9e\x35\xcc\x40" 6267 - "\xd7\x6e\x05\x79\x10\xa7\x1b\xb2" 6268 - "\x49\xe0\x54\xeb\x82\x19\x8d\x24" 6269 - "\xbb\x2f\xc6\x5d\xf4\x68\xff\x96" 6270 - "\x0a\xa1\x38\xcf\x43\xda\x71\x08" 6271 - "\x7c\x13\xaa\x1e\xb5\x4c\xe3\x57" 6272 - "\xee\x85\x1c\x90\x27\xbe\x32\xc9" 6273 - "\x60\xf7\x6b\x02\x99\x0d\xa4\x3b" 6274 - "\xd2\x46\xdd\x74\x0b\x7f\x16\xad" 6275 - "\x21\xb8\x4f\xe6\x5a\xf1\x88\x1f" 6276 - "\x93\x2a\xc1\x35\xcc\x63\xfa\x6e" 6277 - "\x05\x9c\x10\xa7\x3e\xd5\x49\xe0" 6278 - "\x77\x0e\x82\x19\xb0\x24\xbb\x52" 6279 - "\xe9\x5d\xf4\x8b\x22\x96\x2d\xc4" 6280 - "\x38\xcf\x66\xfd\x71\x08\x9f\x13" 6281 - "\xaa\x41\xd8\x4c\xe3\x7a\x11\x85" 6282 - "\x1c\xb3\x27\xbe\x55\xec\x60\xf7" 6283 - "\x8e\x02\x99\x30\xc7\x3b\xd2\x69" 6284 - "\x00\x74\x0b\xa2\x16\xad\x44\xdb" 6285 - "\x4f\xe6\x7d\x14\x88\x1f\xb6\x2a" 6286 - "\xc1\x58\xef\x63\xfa\x91\x05\x9c" 6287 - "\x33\xca\x3e\xd5\x6c\x03\x77\x0e" 6288 - "\xa5\x19\xb0\x47\xde\x52\xe9\x80" 6289 - "\x17\x8b\x22\xb9\x2d\xc4\x5b\xf2" 6290 - "\x66\xfd\x94\x08\x9f\x36\xcd\x41" 6291 - "\xd8\x6f\x06\x7a\x11\xa8\x1c\xb3" 6292 - "\x4a\xe1\x55\xec\x83\x1a\x8e\x25" 6293 - "\xbc\x30\xc7\x5e\xf5\x69\x00\x97" 6294 - "\x0b\xa2\x39\xd0\x44\xdb\x72\x09" 6295 - "\x7d\x14\xab\x1f\xb6\x4d\xe4\x58" 6296 - "\xef\x86\x1d\x91\x28\xbf\x33\xca" 6297 - "\x61\xf8\x6c\x03\x9a\x0e\xa5\x3c" 6298 - "\xd3\x47\xde\x75\x0c\x80\x17\xae" 6299 - "\x22\xb9\x50\xe7\x5b\xf2\x89\x20" 6300 - "\x94\x2b\xc2\x36\xcd\x64\xfb\x6f" 6301 - "\x06\x9d\x11\xa8\x3f\xd6\x4a\xe1" 6302 - "\x78\x0f\x83\x1a\xb1\x25\xbc\x53" 6303 - "\xea\x5e\xf5\x8c\x00\x97\x2e\xc5" 6304 - "\x39\xd0\x67\xfe\x72\x09\xa0\x14" 6305 - "\xab\x42\xd9\x4d\xe4\x7b\x12\x86" 6306 - "\x1d\xb4\x28\xbf\x56\xed\x61\xf8" 6307 - "\x8f\x03\x9a\x31\xc8\x3c\xd3\x6a" 6308 - "\x01\x75\x0c\xa3\x17\xae\x45\xdc" 6309 - "\x50\xe7\x7e\x15\x89\x20\xb7\x2b" 6310 - "\xc2\x59\xf0\x64\xfb\x92\x06\x9d" 6311 - "\x34\xcb\x3f\xd6\x6d\x04\x78\x0f" 6312 - "\xa6\x1a\xb1\x48\xdf\x53\xea\x81" 6313 - "\x18\x8c\x23\xba\x2e\xc5\x5c\xf3" 6314 - "\x67\xfe\x95\x09\xa0\x37\xce\x42" 6315 - "\xd9\x70\x07\x7b\x12\xa9\x1d\xb4" 6316 - "\x4b\xe2\x56\xed\x84\x1b\x8f\x26" 6317 - "\xbd\x31\xc8\x5f\xf6\x6a\x01\x98", 6318 - .psize = 2048, 6319 - .digest = (u8 *)(u16 []){ 0x23ca }, 6320 - } 6321 - }; 6322 - 6323 6020 /* 6324 6021 * Streebog test vectors from RFC 6986 and GOST R 34.11-2012 6325 6022 */

+1 -1

drivers/crypto/stm32/stm32-crc32.c

··· 162 162 if (mctx->poly == CRC32_POLY_LE) 163 163 ctx->partial = crc32_le(ctx->partial, d8, length); 164 164 else 165 - ctx->partial = __crc32c_le(ctx->partial, d8, length); 165 + ctx->partial = crc32c(ctx->partial, d8, length); 166 166 167 167 goto pm_out; 168 168 }

+2 -2

drivers/infiniband/sw/siw/siw.h

··· 676 676 static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset, 677 677 int len) 678 678 { 679 - return (__force __wsum)__crc32c_le_combine((__force __u32)csum, 680 - (__force __u32)csum2, len); 679 + return (__force __wsum)crc32c_combine((__force __u32)csum, 680 + (__force __u32)csum2, len); 681 681 } 682 682 683 683 static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)

+15 -16

drivers/md/raid5-cache.c

··· 714 714 715 715 block = page_address(io->meta_page); 716 716 block->meta_size = cpu_to_le32(io->meta_offset); 717 - crc = crc32c_le(log->uuid_checksum, block, PAGE_SIZE); 717 + crc = crc32c(log->uuid_checksum, block, PAGE_SIZE); 718 718 block->checksum = cpu_to_le32(crc); 719 719 720 720 log->current_io = NULL; ··· 1020 1020 if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) 1021 1021 continue; 1022 1022 addr = kmap_local_page(sh->dev[i].page); 1023 - sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum, 1024 - addr, PAGE_SIZE); 1023 + sh->dev[i].log_checksum = crc32c(log->uuid_checksum, 1024 + addr, PAGE_SIZE); 1025 1025 kunmap_local(addr); 1026 1026 } 1027 1027 parity_pages = 1 + !!(sh->qd_idx >= 0); ··· 1741 1741 le64_to_cpu(mb->position) != ctx->pos) 1742 1742 return -EINVAL; 1743 1743 1744 - crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE); 1744 + crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE); 1745 1745 if (stored_crc != crc) 1746 1746 return -EINVAL; 1747 1747 ··· 1780 1780 return -ENOMEM; 1781 1781 r5l_recovery_create_empty_meta_block(log, page, pos, seq); 1782 1782 mb = page_address(page); 1783 - mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, 1784 - mb, PAGE_SIZE)); 1783 + mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum, mb, PAGE_SIZE)); 1785 1784 if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE | 1786 1785 REQ_SYNC | REQ_FUA, false)) { 1787 1786 __free_page(page); ··· 1975 1976 1976 1977 r5l_recovery_read_page(log, ctx, page, log_offset); 1977 1978 addr = kmap_local_page(page); 1978 - checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE); 1979 + checksum = crc32c(log->uuid_checksum, addr, PAGE_SIZE); 1979 1980 kunmap_local(addr); 1980 1981 return (le32_to_cpu(log_checksum) == checksum) ? 0 : -EINVAL; 1981 1982 } ··· 2378 2379 raid5_compute_blocknr(sh, i, 0)); 2379 2380 addr = kmap_local_page(dev->page); 2380 2381 payload->checksum[0] = cpu_to_le32( 2381 - crc32c_le(log->uuid_checksum, addr, 2382 - PAGE_SIZE)); 2382 + crc32c(log->uuid_checksum, addr, 2383 + PAGE_SIZE)); 2383 2384 kunmap_local(addr); 2384 2385 sync_page_io(log->rdev, write_pos, PAGE_SIZE, 2385 2386 dev->page, REQ_OP_WRITE, false); ··· 2391 2392 } 2392 2393 } 2393 2394 mb->meta_size = cpu_to_le32(offset); 2394 - mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, 2395 - mb, PAGE_SIZE)); 2395 + mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum, 2396 + mb, PAGE_SIZE)); 2396 2397 sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, 2397 2398 REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false); 2398 2399 sh->log_start = ctx->pos; ··· 2884 2885 if (!test_bit(R5_Wantwrite, &sh->dev[i].flags)) 2885 2886 continue; 2886 2887 addr = kmap_local_page(sh->dev[i].page); 2887 - sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum, 2888 - addr, PAGE_SIZE); 2888 + sh->dev[i].log_checksum = crc32c(log->uuid_checksum, 2889 + addr, PAGE_SIZE); 2889 2890 kunmap_local(addr); 2890 2891 pages++; 2891 2892 } ··· 2968 2969 } 2969 2970 stored_crc = le32_to_cpu(mb->checksum); 2970 2971 mb->checksum = 0; 2971 - expected_crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE); 2972 + expected_crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE); 2972 2973 if (stored_crc != expected_crc) { 2973 2974 create_super = true; 2974 2975 goto create; ··· 3076 3077 return -ENOMEM; 3077 3078 log->rdev = rdev; 3078 3079 log->need_cache_flush = bdev_write_cache(rdev->bdev); 3079 - log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid, 3080 - sizeof(rdev->mddev->uuid)); 3080 + log->uuid_checksum = crc32c(~0, rdev->mddev->uuid, 3081 + sizeof(rdev->mddev->uuid)); 3081 3082 3082 3083 mutex_init(&log->io_mutex); 3083 3084

+8 -8

drivers/md/raid5-ppl.c

··· 346 346 if (!test_bit(STRIPE_FULL_WRITE, &sh->state)) { 347 347 le32_add_cpu(&e->pp_size, PAGE_SIZE); 348 348 io->pp_size += PAGE_SIZE; 349 - e->checksum = cpu_to_le32(crc32c_le(le32_to_cpu(e->checksum), 350 - page_address(sh->ppl_page), 351 - PAGE_SIZE)); 349 + e->checksum = cpu_to_le32(crc32c(le32_to_cpu(e->checksum), 350 + page_address(sh->ppl_page), 351 + PAGE_SIZE)); 352 352 } 353 353 354 354 list_add_tail(&sh->log_list, &io->stripe_list); ··· 454 454 } 455 455 456 456 pplhdr->entries_count = cpu_to_le32(io->entries_count); 457 - pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE)); 457 + pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PPL_HEADER_SIZE)); 458 458 459 459 /* Rewind the buffer if current PPL is larger then remaining space */ 460 460 if (log->use_multippl && ··· 998 998 goto out; 999 999 } 1000 1000 1001 - crc = crc32c_le(crc, page_address(page), s); 1001 + crc = crc32c(crc, page_address(page), s); 1002 1002 1003 1003 pp_size -= s; 1004 1004 sector += s >> 9; ··· 1052 1052 log->rdev->ppl.size, GFP_NOIO, 0); 1053 1053 memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); 1054 1054 pplhdr->signature = cpu_to_le32(log->ppl_conf->signature); 1055 - pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE)); 1055 + pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PAGE_SIZE)); 1056 1056 1057 1057 if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset, 1058 1058 PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC | ··· 1106 1106 /* check header validity */ 1107 1107 crc_stored = le32_to_cpu(pplhdr->checksum); 1108 1108 pplhdr->checksum = 0; 1109 - crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE); 1109 + crc = ~crc32c(~0, pplhdr, PAGE_SIZE); 1110 1110 1111 1111 if (crc_stored != crc) { 1112 1112 pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n", ··· 1390 1390 spin_lock_init(&ppl_conf->no_mem_stripes_lock); 1391 1391 1392 1392 if (!mddev->external) { 1393 - ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid)); 1393 + ppl_conf->signature = ~crc32c(~0, mddev->uuid, sizeof(mddev->uuid)); 1394 1394 ppl_conf->block_size = 512; 1395 1395 } else { 1396 1396 ppl_conf->block_size =

+1 -1

drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c

··· 2593 2593 /********************* Multicast verbs: SET, CLEAR ****************************/ 2594 2594 static inline u8 bnx2x_mcast_bin_from_mac(u8 *mac) 2595 2595 { 2596 - return (crc32c_le(0, mac, ETH_ALEN) >> 24) & 0xff; 2596 + return (crc32c(0, mac, ETH_ALEN) >> 24) & 0xff; 2597 2597 } 2598 2598 2599 2599 struct bnx2x_mcast_mac_elem {

+1 -1

drivers/thunderbolt/ctl.c

··· 312 312 313 313 static __be32 tb_crc(const void *data, size_t len) 314 314 { 315 - return cpu_to_be32(~__crc32c_le(~0, data, len)); 315 + return cpu_to_be32(~crc32c(~0, data, len)); 316 316 } 317 317 318 318 static void tb_ctl_pkg_free(struct ctl_pkg *pkg)

+1 -1

drivers/thunderbolt/eeprom.c

··· 211 211 212 212 static u32 tb_crc32(void *data, size_t len) 213 213 { 214 - return ~__crc32c_le(~0, data, len); 214 + return ~crc32c(~0, data, len); 215 215 } 216 216 217 217 #define TB_DROM_DATA_START 13

-12

include/linux/crc-t10dif.h

··· 4 4 5 5 #include <linux/types.h> 6 6 7 - #define CRC_T10DIF_DIGEST_SIZE 2 8 - #define CRC_T10DIF_BLOCK_SIZE 1 9 - 10 7 u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len); 11 8 u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len); 12 9 ··· 18 21 { 19 22 return crc_t10dif_update(0, p, len); 20 23 } 21 - 22 - #if IS_ENABLED(CONFIG_CRC_T10DIF_ARCH) 23 - bool crc_t10dif_is_optimized(void); 24 - #else 25 - static inline bool crc_t10dif_is_optimized(void) 26 - { 27 - return false; 28 - } 29 - #endif 30 24 31 25 #endif

+26 -29

include/linux/crc32.h

··· 8 8 #include <linux/types.h> 9 9 #include <linux/bitrev.h> 10 10 11 - u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len); 12 - u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len); 13 - u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len); 14 - u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len); 15 - u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len); 16 - u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len); 11 + u32 crc32_le_arch(u32 crc, const u8 *p, size_t len); 12 + u32 crc32_le_base(u32 crc, const u8 *p, size_t len); 13 + u32 crc32_be_arch(u32 crc, const u8 *p, size_t len); 14 + u32 crc32_be_base(u32 crc, const u8 *p, size_t len); 15 + u32 crc32c_arch(u32 crc, const u8 *p, size_t len); 16 + u32 crc32c_base(u32 crc, const u8 *p, size_t len); 17 17 18 - static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len) 18 + static inline u32 crc32_le(u32 crc, const void *p, size_t len) 19 19 { 20 20 if (IS_ENABLED(CONFIG_CRC32_ARCH)) 21 21 return crc32_le_arch(crc, p, len); 22 22 return crc32_le_base(crc, p, len); 23 23 } 24 24 25 - static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len) 25 + static inline u32 crc32_be(u32 crc, const void *p, size_t len) 26 26 { 27 27 if (IS_ENABLED(CONFIG_CRC32_ARCH)) 28 28 return crc32_be_arch(crc, p, len); 29 29 return crc32_be_base(crc, p, len); 30 30 } 31 31 32 - /* TODO: leading underscores should be dropped once callers have been updated */ 33 - static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) 32 + static inline u32 crc32c(u32 crc, const void *p, size_t len) 34 33 { 35 34 if (IS_ENABLED(CONFIG_CRC32_ARCH)) 36 - return crc32c_le_arch(crc, p, len); 37 - return crc32c_le_base(crc, p, len); 35 + return crc32c_arch(crc, p, len); 36 + return crc32c_base(crc, p, len); 38 37 } 39 38 40 39 /* ··· 44 45 */ 45 46 #define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ 46 47 #define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ 47 - #define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */ 48 + #define CRC32C_OPTIMIZATION BIT(2) /* crc32c() is optimized */ 48 49 #if IS_ENABLED(CONFIG_CRC32_ARCH) 49 50 u32 crc32_optimizations(void); 50 51 #else ··· 69 70 * with the same initializer as crc1, and crc2 seed was 0. See 70 71 * also crc32_combine_test(). 71 72 */ 72 - u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len); 73 + u32 crc32_le_shift(u32 crc, size_t len); 73 74 74 75 static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) 75 76 { 76 77 return crc32_le_shift(crc1, len2) ^ crc2; 77 78 } 78 79 80 + u32 crc32c_shift(u32 crc, size_t len); 81 + 79 82 /** 80 - * __crc32c_le_combine - Combine two crc32c check values into one. For two 81 - * sequences of bytes, seq1 and seq2 with lengths len1 82 - * and len2, __crc32c_le() check values were calculated 83 - * for each, crc1 and crc2. 83 + * crc32c_combine - Combine two crc32c check values into one. For two sequences 84 + * of bytes, seq1 and seq2 with lengths len1 and len2, crc32c() 85 + * check values were calculated for each, crc1 and crc2. 84 86 * 85 87 * @crc1: crc32c of the first block 86 88 * @crc2: crc32c of the second block 87 89 * @len2: length of the second block 88 90 * 89 - * Return: The __crc32c_le() check value of seq1 and seq2 concatenated, 90 - * requiring only crc1, crc2, and len2. Note: If seq_full denotes 91 - * the concatenated memory area of seq1 with seq2, and crc_full 92 - * the __crc32c_le() value of seq_full, then crc_full == 93 - * __crc32c_le_combine(crc1, crc2, len2) when crc_full was 94 - * seeded with the same initializer as crc1, and crc2 seed 95 - * was 0. See also crc32c_combine_test(). 91 + * Return: The crc32c() check value of seq1 and seq2 concatenated, requiring 92 + * only crc1, crc2, and len2. Note: If seq_full denotes the concatenated 93 + * memory area of seq1 with seq2, and crc_full the crc32c() value of 94 + * seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when 95 + * crc_full was seeded with the same initializer as crc1, and crc2 seed 96 + * was 0. See also crc_combine_test(). 96 97 */ 97 - u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len); 98 - 99 - static inline u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2) 98 + static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2) 100 99 { 101 - return __crc32c_le_shift(crc1, len2) ^ crc2; 100 + return crc32c_shift(crc1, len2) ^ crc2; 102 101 } 103 102 104 103 #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length)

-8

include/linux/crc32c.h

··· 4 4 5 5 #include <linux/crc32.h> 6 6 7 - static inline u32 crc32c(u32 crc, const void *address, unsigned int length) 8 - { 9 - return __crc32c_le(crc, address, length); 10 - } 11 - 12 - /* This macro exists for backwards-compatibility. */ 13 - #define crc32c_le crc32c 14 - 15 7 #endif /* _LINUX_CRC32C_H */

+33 -5

include/linux/crc64.h

··· 7 7 8 8 #include <linux/types.h> 9 9 10 - #define CRC64_ROCKSOFT_STRING "crc64-rocksoft" 10 + u64 crc64_be_arch(u64 crc, const u8 *p, size_t len); 11 + u64 crc64_be_generic(u64 crc, const u8 *p, size_t len); 12 + u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len); 13 + u64 crc64_nvme_generic(u64 crc, const u8 *p, size_t len); 11 14 12 - u64 __pure crc64_be(u64 crc, const void *p, size_t len); 13 - u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len); 15 + /** 16 + * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64 17 + * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation, 18 + * or the previous crc64 value if computing incrementally. 19 + * @p: pointer to buffer over which CRC64 is run 20 + * @len: length of buffer @p 21 + */ 22 + static inline u64 crc64_be(u64 crc, const void *p, size_t len) 23 + { 24 + if (IS_ENABLED(CONFIG_CRC64_ARCH)) 25 + return crc64_be_arch(crc, p, len); 26 + return crc64_be_generic(crc, p, len); 27 + } 14 28 15 - u64 crc64_rocksoft(const unsigned char *buffer, size_t len); 16 - u64 crc64_rocksoft_update(u64 crc, const unsigned char *buffer, size_t len); 29 + /** 30 + * crc64_nvme - Calculate CRC64-NVME 31 + * @crc: seed value for computation. 0 for a new CRC calculation, or the 32 + * previous crc64 value if computing incrementally. 33 + * @p: pointer to buffer over which CRC64 is run 34 + * @len: length of buffer @p 35 + * 36 + * This computes the CRC64 defined in the NVME NVM Command Set Specification, 37 + * *including the bitwise inversion at the beginning and end*. 38 + */ 39 + static inline u64 crc64_nvme(u64 crc, const void *p, size_t len) 40 + { 41 + if (IS_ENABLED(CONFIG_CRC64_ARCH)) 42 + return ~crc64_nvme_arch(~crc, p, len); 43 + return ~crc64_nvme_generic(~crc, p, len); 44 + } 17 45 18 46 #endif /* _LINUX_CRC64_H */

-7

include/linux/crc7.h

··· 3 3 #define _LINUX_CRC7_H 4 4 #include <linux/types.h> 5 5 6 - extern const u8 crc7_be_syndrome_table[256]; 7 - 8 - static inline u8 crc7_be_byte(u8 crc, u8 data) 9 - { 10 - return crc7_be_syndrome_table[crc ^ data]; 11 - } 12 - 13 6 extern u8 crc7_be(u8 crc, const u8 *buffer, size_t len); 14 7 15 8 #endif

+2 -5

include/net/sctp/checksum.h

··· 30 30 31 31 static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum) 32 32 { 33 - /* This uses the crypto implementation of crc32c, which is either 34 - * implemented w/ hardware support or resolves to __crc32c_le(). 35 - */ 36 33 return (__force __wsum)crc32c((__force __u32)sum, buff, len); 37 34 } 38 35 39 36 static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, 40 37 int offset, int len) 41 38 { 42 - return (__force __wsum)__crc32c_le_combine((__force __u32)csum, 43 - (__force __u32)csum2, len); 39 + return (__force __wsum)crc32c_combine((__force __u32)csum, 40 + (__force __u32)csum2, len); 44 41 } 45 42 46 43 static const struct skb_checksum_ops sctp_csum_ops = {

+12 -33

lib/Kconfig

··· 168 168 tristate 169 169 default CRC_T10DIF if ARCH_HAS_CRC_T10DIF && CRC_OPTIMIZATIONS 170 170 171 - config CRC64_ROCKSOFT 172 - tristate "CRC calculation for the Rocksoft model CRC64" 173 - select CRC64 174 - select CRYPTO 175 - select CRYPTO_CRC64_ROCKSOFT 176 - help 177 - This option provides a CRC64 API to a registered crypto driver. 178 - This is used with the block layer's data integrity subsystem. 179 - 180 171 config CRC_ITU_T 181 172 tristate "CRC ITU-T V.41 functions" 182 173 help ··· 194 203 default CRC32 if ARCH_HAS_CRC32 && CRC_OPTIMIZATIONS 195 204 196 205 config CRC64 197 - tristate "CRC64 functions" 198 - help 199 - This option is provided for the case where no in-kernel-tree 200 - modules require CRC64 functions, but a module built outside 201 - the kernel tree does. Such modules that use library CRC64 202 - functions require M here. 206 + tristate 207 + 208 + config ARCH_HAS_CRC64 209 + bool 210 + 211 + config CRC64_ARCH 212 + tristate 213 + default CRC64 if ARCH_HAS_CRC64 && CRC_OPTIMIZATIONS 203 214 204 215 config CRC4 205 - tristate "CRC4 functions" 206 - help 207 - This option is provided for the case where no in-kernel-tree 208 - modules require CRC4 functions, but a module built outside 209 - the kernel tree does. Such modules that use library CRC4 210 - functions require M here. 216 + tristate 211 217 212 218 config CRC7 213 - tristate "CRC7 functions" 214 - help 215 - This option is provided for the case where no in-kernel-tree 216 - modules require CRC7 functions, but a module built outside 217 - the kernel tree does. Such modules that use library CRC7 218 - functions require M here. 219 + tristate 219 220 220 221 config LIBCRC32C 221 - tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check" 222 + tristate 222 223 select CRC32 223 224 help 224 225 This option just selects CRC32 and is provided for compatibility 225 226 purposes until the users are updated to select CRC32 directly. 226 227 227 228 config CRC8 228 - tristate "CRC8 function" 229 - help 230 - This option provides CRC8 function. Drivers may select this 231 - when they need to do cyclic redundancy check according CRC8 232 - algorithm. Module will be called crc8. 229 + tristate 233 230 234 231 config CRC_OPTIMIZATIONS 235 232 bool "Enable optimized CRC implementations" if EXPERT

+1

lib/Kconfig.debug

··· 2889 2889 tristate "KUnit tests for CRC functions" if !KUNIT_ALL_TESTS 2890 2890 depends on KUNIT 2891 2891 default KUNIT_ALL_TESTS 2892 + select CRC7 2892 2893 select CRC16 2893 2894 select CRC_T10DIF 2894 2895 select CRC32

-1

lib/Makefile

··· 159 159 obj-$(CONFIG_CRC4) += crc4.o 160 160 obj-$(CONFIG_CRC7) += crc7.o 161 161 obj-$(CONFIG_CRC8) += crc8.o 162 - obj-$(CONFIG_CRC64_ROCKSOFT) += crc64-rocksoft.o 163 162 obj-$(CONFIG_XXHASH) += xxhash.o 164 163 obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o 165 164

+10 -11

lib/crc32.c

··· 37 37 MODULE_DESCRIPTION("Various CRC32 calculations"); 38 38 MODULE_LICENSE("GPL"); 39 39 40 - u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) 40 + u32 crc32_le_base(u32 crc, const u8 *p, size_t len) 41 41 { 42 42 while (len--) 43 43 crc = (crc >> 8) ^ crc32table_le[(crc & 255) ^ *p++]; ··· 45 45 } 46 46 EXPORT_SYMBOL(crc32_le_base); 47 47 48 - u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) 48 + u32 crc32c_base(u32 crc, const u8 *p, size_t len) 49 49 { 50 50 while (len--) 51 51 crc = (crc >> 8) ^ crc32ctable_le[(crc & 255) ^ *p++]; 52 52 return crc; 53 53 } 54 - EXPORT_SYMBOL(crc32c_le_base); 54 + EXPORT_SYMBOL(crc32c_base); 55 55 56 56 /* 57 57 * This multiplies the polynomials x and y modulo the given modulus. 58 58 * This follows the "little-endian" CRC convention that the lsbit 59 59 * represents the highest power of x, and the msbit represents x^0. 60 60 */ 61 - static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus) 61 + static u32 gf2_multiply(u32 x, u32 y, u32 modulus) 62 62 { 63 63 u32 product = x & 1 ? y : 0; 64 64 int i; ··· 84 84 * as appending len bytes of zero to the data), in time proportional 85 85 * to log(len). 86 86 */ 87 - static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len, 88 - u32 polynomial) 87 + static u32 crc32_generic_shift(u32 crc, size_t len, u32 polynomial) 89 88 { 90 89 u32 power = polynomial; /* CRC of x^32 */ 91 90 int i; ··· 113 114 return crc; 114 115 } 115 116 116 - u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len) 117 + u32 crc32_le_shift(u32 crc, size_t len) 117 118 { 118 119 return crc32_generic_shift(crc, len, CRC32_POLY_LE); 119 120 } 121 + EXPORT_SYMBOL(crc32_le_shift); 120 122 121 - u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len) 123 + u32 crc32c_shift(u32 crc, size_t len) 122 124 { 123 125 return crc32_generic_shift(crc, len, CRC32C_POLY_LE); 124 126 } 125 - EXPORT_SYMBOL(crc32_le_shift); 126 - EXPORT_SYMBOL(__crc32c_le_shift); 127 + EXPORT_SYMBOL(crc32c_shift); 127 128 128 - u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) 129 + u32 crc32_be_base(u32 crc, const u8 *p, size_t len) 129 130 { 130 131 while (len--) 131 132 crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++];

-126

lib/crc64-rocksoft.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - 3 - #include <linux/types.h> 4 - #include <linux/module.h> 5 - #include <linux/crc64.h> 6 - #include <linux/err.h> 7 - #include <linux/init.h> 8 - #include <crypto/hash.h> 9 - #include <crypto/algapi.h> 10 - #include <linux/static_key.h> 11 - #include <linux/notifier.h> 12 - 13 - static struct crypto_shash __rcu *crc64_rocksoft_tfm; 14 - static DEFINE_STATIC_KEY_TRUE(crc64_rocksoft_fallback); 15 - static DEFINE_MUTEX(crc64_rocksoft_mutex); 16 - static struct work_struct crc64_rocksoft_rehash_work; 17 - 18 - static int crc64_rocksoft_notify(struct notifier_block *self, unsigned long val, void *data) 19 - { 20 - struct crypto_alg *alg = data; 21 - 22 - if (val != CRYPTO_MSG_ALG_LOADED || 23 - strcmp(alg->cra_name, CRC64_ROCKSOFT_STRING)) 24 - return NOTIFY_DONE; 25 - 26 - schedule_work(&crc64_rocksoft_rehash_work); 27 - return NOTIFY_OK; 28 - } 29 - 30 - static void crc64_rocksoft_rehash(struct work_struct *work) 31 - { 32 - struct crypto_shash *new, *old; 33 - 34 - mutex_lock(&crc64_rocksoft_mutex); 35 - old = rcu_dereference_protected(crc64_rocksoft_tfm, 36 - lockdep_is_held(&crc64_rocksoft_mutex)); 37 - new = crypto_alloc_shash(CRC64_ROCKSOFT_STRING, 0, 0); 38 - if (IS_ERR(new)) { 39 - mutex_unlock(&crc64_rocksoft_mutex); 40 - return; 41 - } 42 - rcu_assign_pointer(crc64_rocksoft_tfm, new); 43 - mutex_unlock(&crc64_rocksoft_mutex); 44 - 45 - if (old) { 46 - synchronize_rcu(); 47 - crypto_free_shash(old); 48 - } else { 49 - static_branch_disable(&crc64_rocksoft_fallback); 50 - } 51 - } 52 - 53 - static struct notifier_block crc64_rocksoft_nb = { 54 - .notifier_call = crc64_rocksoft_notify, 55 - }; 56 - 57 - u64 crc64_rocksoft_update(u64 crc, const unsigned char *buffer, size_t len) 58 - { 59 - struct { 60 - struct shash_desc shash; 61 - u64 crc; 62 - } desc; 63 - int err; 64 - 65 - if (static_branch_unlikely(&crc64_rocksoft_fallback)) 66 - return crc64_rocksoft_generic(crc, buffer, len); 67 - 68 - rcu_read_lock(); 69 - desc.shash.tfm = rcu_dereference(crc64_rocksoft_tfm); 70 - desc.crc = crc; 71 - err = crypto_shash_update(&desc.shash, buffer, len); 72 - rcu_read_unlock(); 73 - 74 - BUG_ON(err); 75 - 76 - return desc.crc; 77 - } 78 - EXPORT_SYMBOL_GPL(crc64_rocksoft_update); 79 - 80 - u64 crc64_rocksoft(const unsigned char *buffer, size_t len) 81 - { 82 - return crc64_rocksoft_update(0, buffer, len); 83 - } 84 - EXPORT_SYMBOL_GPL(crc64_rocksoft); 85 - 86 - static int __init crc64_rocksoft_mod_init(void) 87 - { 88 - INIT_WORK(&crc64_rocksoft_rehash_work, crc64_rocksoft_rehash); 89 - crypto_register_notifier(&crc64_rocksoft_nb); 90 - crc64_rocksoft_rehash(&crc64_rocksoft_rehash_work); 91 - return 0; 92 - } 93 - 94 - static void __exit crc64_rocksoft_mod_fini(void) 95 - { 96 - crypto_unregister_notifier(&crc64_rocksoft_nb); 97 - cancel_work_sync(&crc64_rocksoft_rehash_work); 98 - crypto_free_shash(rcu_dereference_protected(crc64_rocksoft_tfm, 1)); 99 - } 100 - 101 - module_init(crc64_rocksoft_mod_init); 102 - module_exit(crc64_rocksoft_mod_fini); 103 - 104 - static int crc64_rocksoft_transform_show(char *buffer, const struct kernel_param *kp) 105 - { 106 - struct crypto_shash *tfm; 107 - int len; 108 - 109 - if (static_branch_unlikely(&crc64_rocksoft_fallback)) 110 - return sprintf(buffer, "fallback\n"); 111 - 112 - rcu_read_lock(); 113 - tfm = rcu_dereference(crc64_rocksoft_tfm); 114 - len = snprintf(buffer, PAGE_SIZE, "%s\n", 115 - crypto_shash_driver_name(tfm)); 116 - rcu_read_unlock(); 117 - 118 - return len; 119 - } 120 - 121 - module_param_call(transform, NULL, crc64_rocksoft_transform_show, NULL, 0444); 122 - 123 - MODULE_AUTHOR("Keith Busch <kbusch@kernel.org>"); 124 - MODULE_DESCRIPTION("Rocksoft model CRC64 calculation (library API)"); 125 - MODULE_LICENSE("GPL"); 126 - MODULE_SOFTDEP("pre: crc64");

+11 -38

lib/crc64.c

··· 22 22 * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + 23 23 * x^7 + x^4 + x + 1 24 24 * 25 - * crc64rocksoft[256] table is from the Rocksoft specification polynomial 26 - * defined as, 25 + * crc64nvmetable[256] uses the CRC64 polynomial from the NVME NVM Command Set 26 + * Specification and uses least-significant-bit first bit order: 27 27 * 28 28 * x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 + x^47 + 29 29 * x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 + x^26 + x^23 + ··· 41 41 MODULE_DESCRIPTION("CRC64 calculations"); 42 42 MODULE_LICENSE("GPL v2"); 43 43 44 - /** 45 - * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64 46 - * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation, 47 - * or the previous crc64 value if computing incrementally. 48 - * @p: pointer to buffer over which CRC64 is run 49 - * @len: length of buffer @p 50 - */ 51 - u64 __pure crc64_be(u64 crc, const void *p, size_t len) 44 + u64 crc64_be_generic(u64 crc, const u8 *p, size_t len) 52 45 { 53 - size_t i, t; 54 - 55 - const unsigned char *_p = p; 56 - 57 - for (i = 0; i < len; i++) { 58 - t = ((crc >> 56) ^ (*_p++)) & 0xFF; 59 - crc = crc64table[t] ^ (crc << 8); 60 - } 61 - 46 + while (len--) 47 + crc = (crc << 8) ^ crc64table[(crc >> 56) ^ *p++]; 62 48 return crc; 63 49 } 64 - EXPORT_SYMBOL_GPL(crc64_be); 50 + EXPORT_SYMBOL_GPL(crc64_be_generic); 65 51 66 - /** 67 - * crc64_rocksoft_generic - Calculate bitwise Rocksoft CRC64 68 - * @crc: seed value for computation. 0 for a new CRC calculation, or the 69 - * previous crc64 value if computing incrementally. 70 - * @p: pointer to buffer over which CRC64 is run 71 - * @len: length of buffer @p 72 - */ 73 - u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len) 52 + u64 crc64_nvme_generic(u64 crc, const u8 *p, size_t len) 74 53 { 75 - const unsigned char *_p = p; 76 - size_t i; 77 - 78 - crc = ~crc; 79 - 80 - for (i = 0; i < len; i++) 81 - crc = (crc >> 8) ^ crc64rocksofttable[(crc & 0xff) ^ *_p++]; 82 - 83 - return ~crc; 54 + while (len--) 55 + crc = (crc >> 8) ^ crc64nvmetable[(crc & 0xff) ^ *p++]; 56 + return crc; 84 57 } 85 - EXPORT_SYMBOL_GPL(crc64_rocksoft_generic); 58 + EXPORT_SYMBOL_GPL(crc64_nvme_generic);

+2 -4

lib/crc7.c

··· 7 7 #include <linux/module.h> 8 8 #include <linux/crc7.h> 9 9 10 - 11 10 /* 12 11 * Table for CRC-7 (polynomial x^7 + x^3 + 1). 13 12 * This is a big-endian CRC (msbit is highest power of x), 14 13 * aligned so the msbit of the byte is the x^6 coefficient 15 14 * and the lsbit is not used. 16 15 */ 17 - const u8 crc7_be_syndrome_table[256] = { 16 + static const u8 crc7_be_syndrome_table[256] = { 18 17 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 19 18 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee, 20 19 0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c, ··· 47 48 0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62, 48 49 0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2 49 50 }; 50 - EXPORT_SYMBOL(crc7_be_syndrome_table); 51 51 52 52 /** 53 53 * crc7_be - update the CRC7 for the data buffer ··· 63 65 u8 crc7_be(u8 crc, const u8 *buffer, size_t len) 64 66 { 65 67 while (len--) 66 - crc = crc7_be_byte(crc, *buffer++); 68 + crc = crc7_be_syndrome_table[crc ^ *buffer++]; 67 69 return crc; 68 70 } 69 71 EXPORT_SYMBOL(crc7_be);

+5 -5

lib/gen_crc64table.c

··· 17 17 #include <stdio.h> 18 18 19 19 #define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL 20 - #define CRC64_ROCKSOFT_POLY 0x9A6C9329AC4BC9B5ULL 20 + #define CRC64_NVME_POLY 0x9A6C9329AC4BC9B5ULL 21 21 22 22 static uint64_t crc64_table[256] = {0}; 23 - static uint64_t crc64_rocksoft_table[256] = {0}; 23 + static uint64_t crc64_nvme_table[256] = {0}; 24 24 25 25 static void generate_reflected_crc64_table(uint64_t table[256], uint64_t poly) 26 26 { ··· 82 82 printf("static const u64 ____cacheline_aligned crc64table[256] = {\n"); 83 83 output_table(crc64_table); 84 84 85 - printf("\nstatic const u64 ____cacheline_aligned crc64rocksofttable[256] = {\n"); 86 - output_table(crc64_rocksoft_table); 85 + printf("\nstatic const u64 ____cacheline_aligned crc64nvmetable[256] = {\n"); 86 + output_table(crc64_nvme_table); 87 87 } 88 88 89 89 int main(int argc, char *argv[]) 90 90 { 91 91 generate_crc64_table(crc64_table, CRC64_ECMA182_POLY); 92 - generate_reflected_crc64_table(crc64_rocksoft_table, CRC64_ROCKSOFT_POLY); 92 + generate_reflected_crc64_table(crc64_nvme_table, CRC64_NVME_POLY); 93 93 print_crc64_tables(); 94 94 return 0; 95 95 }

+64 -4

lib/tests/crc_kunit.c

··· 7 7 * Author: Eric Biggers <ebiggers@google.com> 8 8 */ 9 9 #include <kunit/test.h> 10 + #include <linux/crc7.h> 10 11 #include <linux/crc16.h> 11 12 #include <linux/crc-t10dif.h> 12 13 #include <linux/crc32.h> ··· 33 32 * @poly: The generator polynomial with the highest-order term omitted. 34 33 * Bit-reversed if @le is true. 35 34 * @func: The function to compute a CRC. The type signature uses u64 so that it 36 - * can fit any CRC up to CRC-64. 35 + * can fit any CRC up to CRC-64. The CRC is passed in, and is expected 36 + * to be returned in, the least significant bits of the u64. The 37 + * function is expected to *not* invert the CRC at the beginning and end. 37 38 * @combine_func: Optional function to combine two CRCs. 38 39 */ 39 40 struct crc_variant { ··· 226 223 }; 227 224 size_t len, i, j, num_iters; 228 225 /* 229 - * Some of the CRC library functions are marked as __pure, so use 230 - * volatile to ensure that all calls are really made as intended. 226 + * The CRC value that this function computes in a series of calls to 227 + * crc_func is never actually used, so use volatile to ensure that the 228 + * computations are done as intended and don't all get optimized out. 231 229 */ 232 230 volatile u64 crc = 0; 233 231 u64 t; ··· 253 249 kunit_info(test, "len=%zu: %llu MB/s\n", 254 250 len, div64_u64((u64)len * num_iters * 1000, t)); 255 251 } 252 + } 253 + 254 + /* crc7_be */ 255 + 256 + static u64 crc7_be_wrapper(u64 crc, const u8 *p, size_t len) 257 + { 258 + /* 259 + * crc7_be() left-aligns the 7-bit CRC in a u8, whereas the test wants a 260 + * right-aligned CRC (in a u64). Convert between the conventions. 261 + */ 262 + return crc7_be(crc << 1, p, len) >> 1; 263 + } 264 + 265 + static const struct crc_variant crc_variant_crc7_be = { 266 + .bits = 7, 267 + .poly = 0x9, 268 + .func = crc7_be_wrapper, 269 + }; 270 + 271 + static void crc7_be_test(struct kunit *test) 272 + { 273 + crc_test(test, &crc_variant_crc7_be); 274 + } 275 + 276 + static void crc7_be_benchmark(struct kunit *test) 277 + { 278 + crc_benchmark(test, crc7_be_wrapper); 256 279 } 257 280 258 281 /* crc16 */ ··· 393 362 394 363 static u64 crc32c_combine_wrapper(u64 crc1, u64 crc2, size_t len2) 395 364 { 396 - return __crc32c_le_combine(crc1, crc2, len2); 365 + return crc32c_combine(crc1, crc2, len2); 397 366 } 398 367 399 368 static const struct crc_variant crc_variant_crc32c = { ··· 438 407 crc_benchmark(test, crc64_be_wrapper); 439 408 } 440 409 410 + /* crc64_nvme */ 411 + 412 + static u64 crc64_nvme_wrapper(u64 crc, const u8 *p, size_t len) 413 + { 414 + /* The inversions that crc64_nvme() does have to be undone here. */ 415 + return ~crc64_nvme(~crc, p, len); 416 + } 417 + 418 + static const struct crc_variant crc_variant_crc64_nvme = { 419 + .bits = 64, 420 + .le = true, 421 + .poly = 0x9a6c9329ac4bc9b5, 422 + .func = crc64_nvme_wrapper, 423 + }; 424 + 425 + static void crc64_nvme_test(struct kunit *test) 426 + { 427 + crc_test(test, &crc_variant_crc64_nvme); 428 + } 429 + 430 + static void crc64_nvme_benchmark(struct kunit *test) 431 + { 432 + crc_benchmark(test, crc64_nvme_wrapper); 433 + } 434 + 441 435 static struct kunit_case crc_test_cases[] = { 436 + KUNIT_CASE(crc7_be_test), 437 + KUNIT_CASE(crc7_be_benchmark), 442 438 KUNIT_CASE(crc16_test), 443 439 KUNIT_CASE(crc16_benchmark), 444 440 KUNIT_CASE(crc_t10dif_test), ··· 478 420 KUNIT_CASE(crc32c_benchmark), 479 421 KUNIT_CASE(crc64_be_test), 480 422 KUNIT_CASE(crc64_be_benchmark), 423 + KUNIT_CASE(crc64_nvme_test), 424 + KUNIT_CASE(crc64_nvme_benchmark), 481 425 {}, 482 426 }; 483 427

+291

scripts/gen-crc-consts.py

··· 1 + #!/usr/bin/env python3 2 + # SPDX-License-Identifier: GPL-2.0-or-later 3 + # 4 + # Script that generates constants for computing the given CRC variant(s). 5 + # 6 + # Copyright 2025 Google LLC 7 + # 8 + # Author: Eric Biggers <ebiggers@google.com> 9 + 10 + import sys 11 + 12 + # XOR (add) an iterable of polynomials. 13 + def xor(iterable): 14 + res = 0 15 + for val in iterable: 16 + res ^= val 17 + return res 18 + 19 + # Multiply two polynomials. 20 + def clmul(a, b): 21 + return xor(a << i for i in range(b.bit_length()) if (b & (1 << i)) != 0) 22 + 23 + # Polynomial division floor(a / b). 24 + def div(a, b): 25 + q = 0 26 + while a.bit_length() >= b.bit_length(): 27 + q ^= 1 << (a.bit_length() - b.bit_length()) 28 + a ^= b << (a.bit_length() - b.bit_length()) 29 + return q 30 + 31 + # Reduce the polynomial 'a' modulo the polynomial 'b'. 32 + def reduce(a, b): 33 + return a ^ clmul(div(a, b), b) 34 + 35 + # Reflect the bits of a polynomial. 36 + def bitreflect(poly, num_bits): 37 + assert poly.bit_length() <= num_bits 38 + return xor(((poly >> i) & 1) << (num_bits - 1 - i) for i in range(num_bits)) 39 + 40 + # Format a polynomial as hex. Bit-reflect it if the CRC is lsb-first. 41 + def fmt_poly(variant, poly, num_bits): 42 + if variant.lsb: 43 + poly = bitreflect(poly, num_bits) 44 + return f'0x{poly:0{2*num_bits//8}x}' 45 + 46 + # Print a pair of 64-bit polynomial multipliers. They are always passed in the 47 + # order [HI64_TERMS, LO64_TERMS] but will be printed in the appropriate order. 48 + def print_mult_pair(variant, mults): 49 + mults = list(mults if variant.lsb else reversed(mults)) 50 + terms = ['HI64_TERMS', 'LO64_TERMS'] if variant.lsb else ['LO64_TERMS', 'HI64_TERMS'] 51 + for i in range(2): 52 + print(f'\t\t{fmt_poly(variant, mults[i]["val"], 64)},\t/* {terms[i]}: {mults[i]["desc"]} */') 53 + 54 + # Pretty-print a polynomial. 55 + def pprint_poly(prefix, poly): 56 + terms = [f'x^{i}' for i in reversed(range(poly.bit_length())) 57 + if (poly & (1 << i)) != 0] 58 + j = 0 59 + while j < len(terms): 60 + s = prefix + terms[j] + (' +' if j < len(terms) - 1 else '') 61 + j += 1 62 + while j < len(terms) and len(s) < 73: 63 + s += ' ' + terms[j] + (' +' if j < len(terms) - 1 else '') 64 + j += 1 65 + print(s) 66 + prefix = ' * ' + (' ' * (len(prefix) - 3)) 67 + 68 + # Print a comment describing constants generated for the given CRC variant. 69 + def print_header(variant, what): 70 + print('/*') 71 + s = f'{"least" if variant.lsb else "most"}-significant-bit-first CRC-{variant.bits}' 72 + print(f' * {what} generated for {s} using') 73 + pprint_poly(' * G(x) = ', variant.G) 74 + print(' */') 75 + 76 + class CrcVariant: 77 + def __init__(self, bits, generator_poly, bit_order): 78 + self.bits = bits 79 + if bit_order not in ['lsb', 'msb']: 80 + raise ValueError('Invalid value for bit_order') 81 + self.lsb = bit_order == 'lsb' 82 + self.name = f'crc{bits}_{bit_order}_0x{generator_poly:0{(2*bits+7)//8}x}' 83 + if self.lsb: 84 + generator_poly = bitreflect(generator_poly, bits) 85 + self.G = generator_poly ^ (1 << bits) 86 + 87 + # Generate tables for CRC computation using the "slice-by-N" method. 88 + # N=1 corresponds to the traditional byte-at-a-time table. 89 + def gen_slicebyN_tables(variants, n): 90 + for v in variants: 91 + print('') 92 + print_header(v, f'Slice-by-{n} CRC table') 93 + print(f'static const u{v.bits} __maybe_unused {v.name}_table[{256*n}] = {{') 94 + s = '' 95 + for i in range(256 * n): 96 + # The i'th table entry is the CRC of the message consisting of byte 97 + # i % 256 followed by i // 256 zero bytes. 98 + poly = (bitreflect(i % 256, 8) if v.lsb else i % 256) << (v.bits + 8*(i//256)) 99 + next_entry = fmt_poly(v, reduce(poly, v.G), v.bits) + ',' 100 + if len(s + next_entry) > 71: 101 + print(f'\t{s}') 102 + s = '' 103 + s += (' ' if s else '') + next_entry 104 + if s: 105 + print(f'\t{s}') 106 + print('};') 107 + 108 + def print_riscv_const(v, bits_per_long, name, val, desc): 109 + print(f'\t.{name} = {fmt_poly(v, val, bits_per_long)}, /* {desc} */') 110 + 111 + def do_gen_riscv_clmul_consts(v, bits_per_long): 112 + (G, n, lsb) = (v.G, v.bits, v.lsb) 113 + 114 + pow_of_x = 3 * bits_per_long - (1 if lsb else 0) 115 + print_riscv_const(v, bits_per_long, 'fold_across_2_longs_const_hi', 116 + reduce(1 << pow_of_x, G), f'x^{pow_of_x} mod G') 117 + pow_of_x = 2 * bits_per_long - (1 if lsb else 0) 118 + print_riscv_const(v, bits_per_long, 'fold_across_2_longs_const_lo', 119 + reduce(1 << pow_of_x, G), f'x^{pow_of_x} mod G') 120 + 121 + pow_of_x = bits_per_long - 1 + n 122 + print_riscv_const(v, bits_per_long, 'barrett_reduction_const_1', 123 + div(1 << pow_of_x, G), f'floor(x^{pow_of_x} / G)') 124 + 125 + val = G - (1 << n) 126 + desc = f'G - x^{n}' 127 + if lsb: 128 + val <<= bits_per_long - n 129 + desc = f'({desc}) * x^{bits_per_long - n}' 130 + print_riscv_const(v, bits_per_long, 'barrett_reduction_const_2', val, desc) 131 + 132 + def gen_riscv_clmul_consts(variants): 133 + print('') 134 + print('struct crc_clmul_consts {'); 135 + print('\tunsigned long fold_across_2_longs_const_hi;'); 136 + print('\tunsigned long fold_across_2_longs_const_lo;'); 137 + print('\tunsigned long barrett_reduction_const_1;'); 138 + print('\tunsigned long barrett_reduction_const_2;'); 139 + print('};'); 140 + for v in variants: 141 + print(''); 142 + if v.bits > 32: 143 + print_header(v, 'Constants') 144 + print('#ifdef CONFIG_64BIT') 145 + print(f'static const struct crc_clmul_consts {v.name}_consts __maybe_unused = {{') 146 + do_gen_riscv_clmul_consts(v, 64) 147 + print('};') 148 + print('#endif') 149 + else: 150 + print_header(v, 'Constants') 151 + print(f'static const struct crc_clmul_consts {v.name}_consts __maybe_unused = {{') 152 + print('#ifdef CONFIG_64BIT') 153 + do_gen_riscv_clmul_consts(v, 64) 154 + print('#else') 155 + do_gen_riscv_clmul_consts(v, 32) 156 + print('#endif') 157 + print('};') 158 + 159 + # Generate constants for carryless multiplication based CRC computation. 160 + def gen_x86_pclmul_consts(variants): 161 + # These are the distances, in bits, to generate folding constants for. 162 + FOLD_DISTANCES = [2048, 1024, 512, 256, 128] 163 + 164 + for v in variants: 165 + (G, n, lsb) = (v.G, v.bits, v.lsb) 166 + print('') 167 + print_header(v, 'CRC folding constants') 168 + print('static const struct {') 169 + if not lsb: 170 + print('\tu8 bswap_mask[16];') 171 + for i in FOLD_DISTANCES: 172 + print(f'\tu64 fold_across_{i}_bits_consts[2];') 173 + print('\tu8 shuf_table[48];') 174 + print('\tu64 barrett_reduction_consts[2];') 175 + print(f'}} {v.name}_consts ____cacheline_aligned __maybe_unused = {{') 176 + 177 + # Byte-reflection mask, needed for msb-first CRCs 178 + if not lsb: 179 + print('\t.bswap_mask = {' + ', '.join(str(i) for i in reversed(range(16))) + '},') 180 + 181 + # Fold constants for all distances down to 128 bits 182 + for i in FOLD_DISTANCES: 183 + print(f'\t.fold_across_{i}_bits_consts = {{') 184 + # Given 64x64 => 128 bit carryless multiplication instructions, two 185 + # 64-bit fold constants are needed per "fold distance" i: one for 186 + # HI64_TERMS that is basically x^(i+64) mod G and one for LO64_TERMS 187 + # that is basically x^i mod G. The exact values however undergo a 188 + # couple adjustments, described below. 189 + mults = [] 190 + for j in [64, 0]: 191 + pow_of_x = i + j 192 + if lsb: 193 + # Each 64x64 => 128 bit carryless multiplication instruction 194 + # actually generates a 127-bit product in physical bits 0 195 + # through 126, which in the lsb-first case represent the 196 + # coefficients of x^1 through x^127, not x^0 through x^126. 197 + # Thus in the lsb-first case, each such instruction 198 + # implicitly adds an extra factor of x. The below removes a 199 + # factor of x from each constant to compensate for this. 200 + # For n < 64 the x could be removed from either the reduced 201 + # part or unreduced part, but for n == 64 the reduced part 202 + # is the only option. Just always use the reduced part. 203 + pow_of_x -= 1 204 + # Make a factor of x^(64-n) be applied unreduced rather than 205 + # reduced, to cause the product to use only the x^(64-n) and 206 + # higher terms and always be zero in the lower terms. Usually 207 + # this makes no difference as it does not affect the product's 208 + # congruence class mod G and the constant remains 64-bit, but 209 + # part of the final reduction from 128 bits does rely on this 210 + # property when it reuses one of the constants. 211 + pow_of_x -= 64 - n 212 + mults.append({ 'val': reduce(1 << pow_of_x, G) << (64 - n), 213 + 'desc': f'(x^{pow_of_x} mod G) * x^{64-n}' }) 214 + print_mult_pair(v, mults) 215 + print('\t},') 216 + 217 + # Shuffle table for handling 1..15 bytes at end 218 + print('\t.shuf_table = {') 219 + print('\t\t' + (16*'-1, ').rstrip()) 220 + print('\t\t' + ''.join(f'{i:2}, ' for i in range(16)).rstrip()) 221 + print('\t\t' + (16*'-1, ').rstrip()) 222 + print('\t},') 223 + 224 + # Barrett reduction constants for reducing 128 bits to the final CRC 225 + print('\t.barrett_reduction_consts = {') 226 + mults = [] 227 + 228 + val = div(1 << (63+n), G) 229 + desc = f'floor(x^{63+n} / G)' 230 + if not lsb: 231 + val = (val << 1) - (1 << 64) 232 + desc = f'({desc} * x) - x^64' 233 + mults.append({ 'val': val, 'desc': desc }) 234 + 235 + val = G - (1 << n) 236 + desc = f'G - x^{n}' 237 + if lsb and n == 64: 238 + assert (val & 1) != 0 # The x^0 term should always be nonzero. 239 + val >>= 1 240 + desc = f'({desc} - x^0) / x' 241 + else: 242 + pow_of_x = 64 - n - (1 if lsb else 0) 243 + val <<= pow_of_x 244 + desc = f'({desc}) * x^{pow_of_x}' 245 + mults.append({ 'val': val, 'desc': desc }) 246 + 247 + print_mult_pair(v, mults) 248 + print('\t},') 249 + 250 + print('};') 251 + 252 + def parse_crc_variants(vars_string): 253 + variants = [] 254 + for var_string in vars_string.split(','): 255 + bits, bit_order, generator_poly = var_string.split('_') 256 + assert bits.startswith('crc') 257 + bits = int(bits.removeprefix('crc')) 258 + assert generator_poly.startswith('0x') 259 + generator_poly = generator_poly.removeprefix('0x') 260 + assert len(generator_poly) % 2 == 0 261 + generator_poly = int(generator_poly, 16) 262 + variants.append(CrcVariant(bits, generator_poly, bit_order)) 263 + return variants 264 + 265 + if len(sys.argv) != 3: 266 + sys.stderr.write(f'Usage: {sys.argv[0]} CONSTS_TYPE[,CONSTS_TYPE]... CRC_VARIANT[,CRC_VARIANT]...\n') 267 + sys.stderr.write(' CONSTS_TYPE can be sliceby[1-8], riscv_clmul, or x86_pclmul\n') 268 + sys.stderr.write(' CRC_VARIANT is crc${num_bits}_${bit_order}_${generator_poly_as_hex}\n') 269 + sys.stderr.write(' E.g. crc16_msb_0x8bb7 or crc32_lsb_0xedb88320\n') 270 + sys.stderr.write(' Polynomial must use the given bit_order and exclude x^{num_bits}\n') 271 + sys.exit(1) 272 + 273 + print('/* SPDX-License-Identifier: GPL-2.0-or-later */') 274 + print('/*') 275 + print(' * CRC constants generated by:') 276 + print(' *') 277 + print(f' *\t{sys.argv[0]} {" ".join(sys.argv[1:])}') 278 + print(' *') 279 + print(' * Do not edit manually.') 280 + print(' */') 281 + consts_types = sys.argv[1].split(',') 282 + variants = parse_crc_variants(sys.argv[2]) 283 + for consts_type in consts_types: 284 + if consts_type.startswith('sliceby'): 285 + gen_slicebyN_tables(variants, int(consts_type.removeprefix('sliceby'))) 286 + elif consts_type == 'riscv_clmul': 287 + gen_riscv_clmul_consts(variants) 288 + elif consts_type == 'x86_pclmul': 289 + gen_x86_pclmul_consts(variants) 290 + else: 291 + raise ValueError(f'Unknown consts_type: {consts_type}')

+1 -1

sound/soc/codecs/aw88395/aw88395_device.c

··· 424 424 return -EINVAL; 425 425 } 426 426 427 - crc_value = __crc32c_le(0xFFFFFFFF, crc_dsp_cfg->data, crc_data_len) ^ 0xFFFFFFFF; 427 + crc_value = crc32c(0xFFFFFFFF, crc_dsp_cfg->data, crc_data_len) ^ 0xFFFFFFFF; 428 428 429 429 return aw_dev_dsp_write(aw_dev, AW88395_DSP_REG_CRC_ADDR, crc_value, 430 430 AW88395_DSP_32_DATA);

-1

tools/testing/selftests/arm64/fp/kernel-test.c

··· 46 46 } 47 47 48 48 static char *drivers[] = { 49 - "crct10dif-arm64", 50 49 "sha1-ce", 51 50 "sha224-arm64", 52 51 "sha224-arm64-neon",