Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.6 180 lines 4.4 kB view raw
1/* SPDX-License-Identifier: GPL-2.0-only */ 2#ifndef _ASM_X86_XOR_AVX_H 3#define _ASM_X86_XOR_AVX_H 4 5/* 6 * Optimized RAID-5 checksumming functions for AVX 7 * 8 * Copyright (C) 2012 Intel Corporation 9 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com> 10 * 11 * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines 12 */ 13 14#ifdef CONFIG_AS_AVX 15 16#include <linux/compiler.h> 17#include <asm/fpu/api.h> 18 19#define BLOCK4(i) \ 20 BLOCK(32 * i, 0) \ 21 BLOCK(32 * (i + 1), 1) \ 22 BLOCK(32 * (i + 2), 2) \ 23 BLOCK(32 * (i + 3), 3) 24 25#define BLOCK16() \ 26 BLOCK4(0) \ 27 BLOCK4(4) \ 28 BLOCK4(8) \ 29 BLOCK4(12) 30 31static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) 32{ 33 unsigned long lines = bytes >> 9; 34 35 kernel_fpu_begin(); 36 37 while (lines--) { 38#undef BLOCK 39#define BLOCK(i, reg) \ 40do { \ 41 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \ 42 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ 43 "m" (p0[i / sizeof(*p0)])); \ 44 asm volatile("vmovdqa %%ymm" #reg ", %0" : \ 45 "=m" (p0[i / sizeof(*p0)])); \ 46} while (0); 47 48 BLOCK16() 49 50 p0 = (unsigned long *)((uintptr_t)p0 + 512); 51 p1 = (unsigned long *)((uintptr_t)p1 + 512); 52 } 53 54 kernel_fpu_end(); 55} 56 57static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, 58 unsigned long *p2) 59{ 60 unsigned long lines = bytes >> 9; 61 62 kernel_fpu_begin(); 63 64 while (lines--) { 65#undef BLOCK 66#define BLOCK(i, reg) \ 67do { \ 68 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \ 69 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ 70 "m" (p1[i / sizeof(*p1)])); \ 71 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ 72 "m" (p0[i / sizeof(*p0)])); \ 73 asm volatile("vmovdqa %%ymm" #reg ", %0" : \ 74 "=m" (p0[i / sizeof(*p0)])); \ 75} while (0); 76 77 BLOCK16() 78 79 p0 = (unsigned long *)((uintptr_t)p0 + 512); 80 p1 = (unsigned long *)((uintptr_t)p1 + 512); 81 p2 = (unsigned long *)((uintptr_t)p2 + 512); 82 } 83 84 kernel_fpu_end(); 85} 86 87static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, 88 unsigned long *p2, unsigned long *p3) 89{ 90 unsigned long lines = bytes >> 9; 91 92 kernel_fpu_begin(); 93 94 while (lines--) { 95#undef BLOCK 96#define BLOCK(i, reg) \ 97do { \ 98 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \ 99 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ 100 "m" (p2[i / sizeof(*p2)])); \ 101 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ 102 "m" (p1[i / sizeof(*p1)])); \ 103 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ 104 "m" (p0[i / sizeof(*p0)])); \ 105 asm volatile("vmovdqa %%ymm" #reg ", %0" : \ 106 "=m" (p0[i / sizeof(*p0)])); \ 107} while (0); 108 109 BLOCK16(); 110 111 p0 = (unsigned long *)((uintptr_t)p0 + 512); 112 p1 = (unsigned long *)((uintptr_t)p1 + 512); 113 p2 = (unsigned long *)((uintptr_t)p2 + 512); 114 p3 = (unsigned long *)((uintptr_t)p3 + 512); 115 } 116 117 kernel_fpu_end(); 118} 119 120static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, 121 unsigned long *p2, unsigned long *p3, unsigned long *p4) 122{ 123 unsigned long lines = bytes >> 9; 124 125 kernel_fpu_begin(); 126 127 while (lines--) { 128#undef BLOCK 129#define BLOCK(i, reg) \ 130do { \ 131 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \ 132 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ 133 "m" (p3[i / sizeof(*p3)])); \ 134 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ 135 "m" (p2[i / sizeof(*p2)])); \ 136 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ 137 "m" (p1[i / sizeof(*p1)])); \ 138 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \ 139 "m" (p0[i / sizeof(*p0)])); \ 140 asm volatile("vmovdqa %%ymm" #reg ", %0" : \ 141 "=m" (p0[i / sizeof(*p0)])); \ 142} while (0); 143 144 BLOCK16() 145 146 p0 = (unsigned long *)((uintptr_t)p0 + 512); 147 p1 = (unsigned long *)((uintptr_t)p1 + 512); 148 p2 = (unsigned long *)((uintptr_t)p2 + 512); 149 p3 = (unsigned long *)((uintptr_t)p3 + 512); 150 p4 = (unsigned long *)((uintptr_t)p4 + 512); 151 } 152 153 kernel_fpu_end(); 154} 155 156static struct xor_block_template xor_block_avx = { 157 .name = "avx", 158 .do_2 = xor_avx_2, 159 .do_3 = xor_avx_3, 160 .do_4 = xor_avx_4, 161 .do_5 = xor_avx_5, 162}; 163 164#define AVX_XOR_SPEED \ 165do { \ 166 if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \ 167 xor_speed(&xor_block_avx); \ 168} while (0) 169 170#define AVX_SELECT(FASTEST) \ 171 (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST) 172 173#else 174 175#define AVX_XOR_SPEED {} 176 177#define AVX_SELECT(FASTEST) (FASTEST) 178 179#endif 180#endif