Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v2.6.17-rc2 171 lines 5.2 kB view raw
1/* -*- linux-c -*- ------------------------------------------------------- * 2 * 3 * Copyright 2002 H. Peter Anvin - All Rights Reserved 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 8 * Bostom MA 02111-1307, USA; either version 2 of the License, or 9 * (at your option) any later version; incorporated herein by reference. 10 * 11 * ----------------------------------------------------------------------- */ 12 13/* 14 * raid6sse1.c 15 * 16 * SSE-1/MMXEXT implementation of RAID-6 syndrome functions 17 * 18 * This is really an MMX implementation, but it requires SSE-1 or 19 * AMD MMXEXT for prefetch support and a few other features. The 20 * support for nontemporal memory accesses is enough to make this 21 * worthwhile as a separate implementation. 22 */ 23 24#if defined(__i386__) 25 26#include "raid6.h" 27#include "raid6x86.h" 28 29/* Defined in raid6mmx.c */ 30extern const struct raid6_mmx_constants { 31 u64 x1d; 32} raid6_mmx_constants; 33 34static int raid6_have_sse1_or_mmxext(void) 35{ 36#ifdef __KERNEL__ 37 /* Not really boot_cpu but "all_cpus" */ 38 return boot_cpu_has(X86_FEATURE_MMX) && 39 (boot_cpu_has(X86_FEATURE_XMM) || 40 boot_cpu_has(X86_FEATURE_MMXEXT)); 41#else 42 /* User space test code - this incorrectly breaks on some Athlons */ 43 u32 features = cpuid_features(); 44 return ( (features & (5<<23)) == (5<<23) ); 45#endif 46} 47 48/* 49 * Plain SSE1 implementation 50 */ 51static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) 52{ 53 u8 **dptr = (u8 **)ptrs; 54 u8 *p, *q; 55 int d, z, z0; 56 raid6_mmx_save_t sa; 57 58 z0 = disks - 3; /* Highest data disk */ 59 p = dptr[z0+1]; /* XOR parity */ 60 q = dptr[z0+2]; /* RS syndrome */ 61 62 /* This is really MMX code, not SSE */ 63 raid6_before_mmx(&sa); 64 65 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); 66 asm volatile("pxor %mm5,%mm5"); /* Zero temp */ 67 68 for ( d = 0 ; d < bytes ; d += 8 ) { 69 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 70 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ 71 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); 72 asm volatile("movq %mm2,%mm4"); /* Q[0] */ 73 asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); 74 for ( z = z0-2 ; z >= 0 ; z-- ) { 75 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 76 asm volatile("pcmpgtb %mm4,%mm5"); 77 asm volatile("paddb %mm4,%mm4"); 78 asm volatile("pand %mm0,%mm5"); 79 asm volatile("pxor %mm5,%mm4"); 80 asm volatile("pxor %mm5,%mm5"); 81 asm volatile("pxor %mm6,%mm2"); 82 asm volatile("pxor %mm6,%mm4"); 83 asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); 84 } 85 asm volatile("pcmpgtb %mm4,%mm5"); 86 asm volatile("paddb %mm4,%mm4"); 87 asm volatile("pand %mm0,%mm5"); 88 asm volatile("pxor %mm5,%mm4"); 89 asm volatile("pxor %mm5,%mm5"); 90 asm volatile("pxor %mm6,%mm2"); 91 asm volatile("pxor %mm6,%mm4"); 92 93 asm volatile("movntq %%mm2,%0" : "=m" (p[d])); 94 asm volatile("movntq %%mm4,%0" : "=m" (q[d])); 95 } 96 97 raid6_after_mmx(&sa); 98 asm volatile("sfence" : : : "memory"); 99} 100 101const struct raid6_calls raid6_sse1x1 = { 102 raid6_sse11_gen_syndrome, 103 raid6_have_sse1_or_mmxext, 104 "sse1x1", 105 1 /* Has cache hints */ 106}; 107 108/* 109 * Unrolled-by-2 SSE1 implementation 110 */ 111static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) 112{ 113 u8 **dptr = (u8 **)ptrs; 114 u8 *p, *q; 115 int d, z, z0; 116 raid6_mmx_save_t sa; 117 118 z0 = disks - 3; /* Highest data disk */ 119 p = dptr[z0+1]; /* XOR parity */ 120 q = dptr[z0+2]; /* RS syndrome */ 121 122 raid6_before_mmx(&sa); 123 124 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); 125 asm volatile("pxor %mm5,%mm5"); /* Zero temp */ 126 asm volatile("pxor %mm7,%mm7"); /* Zero temp */ 127 128 /* We uniformly assume a single prefetch covers at least 16 bytes */ 129 for ( d = 0 ; d < bytes ; d += 16 ) { 130 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 131 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ 132 asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ 133 asm volatile("movq %mm2,%mm4"); /* Q[0] */ 134 asm volatile("movq %mm3,%mm6"); /* Q[1] */ 135 for ( z = z0-1 ; z >= 0 ; z-- ) { 136 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 137 asm volatile("pcmpgtb %mm4,%mm5"); 138 asm volatile("pcmpgtb %mm6,%mm7"); 139 asm volatile("paddb %mm4,%mm4"); 140 asm volatile("paddb %mm6,%mm6"); 141 asm volatile("pand %mm0,%mm5"); 142 asm volatile("pand %mm0,%mm7"); 143 asm volatile("pxor %mm5,%mm4"); 144 asm volatile("pxor %mm7,%mm6"); 145 asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); 146 asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); 147 asm volatile("pxor %mm5,%mm2"); 148 asm volatile("pxor %mm7,%mm3"); 149 asm volatile("pxor %mm5,%mm4"); 150 asm volatile("pxor %mm7,%mm6"); 151 asm volatile("pxor %mm5,%mm5"); 152 asm volatile("pxor %mm7,%mm7"); 153 } 154 asm volatile("movntq %%mm2,%0" : "=m" (p[d])); 155 asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); 156 asm volatile("movntq %%mm4,%0" : "=m" (q[d])); 157 asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); 158 } 159 160 raid6_after_mmx(&sa); 161 asm volatile("sfence" : :: "memory"); 162} 163 164const struct raid6_calls raid6_sse1x2 = { 165 raid6_sse12_gen_syndrome, 166 raid6_have_sse1_or_mmxext, 167 "sse1x2", 168 1 /* Has cache hints */ 169}; 170 171#endif