Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RAID: add tilegx SIMD implementation of raid6

This change adds TILE-Gx SIMD instructions to the software raid
(md), modeling the Altivec implementation. This is only for Syndrome
generation; there is more that could be done to improve recovery,
as in the recent Intel SSE3 recovery implementation.

The code unrolls 8 times; this turns out to be the best on tilegx
hardware among the set 1, 2, 4, 8 or 16. The code reads one
cache-line of data from each disk, stores P and Q then goes to the
next cache-line.

The test code in sys/linux/lib/raid6/test reports 2008 MB/s data
read rate for syndrome generation using 18 disks (16 data and 2
parity). It was 1512 MB/s before this SIMD optimizations. This is
running on 1 core with all the data in cache.

This is based on the paper The Mathematics of RAID-6.
(http://kernel.org/pub/linux/kernel/people/hpa/raid6.pdf).

Signed-off-by: Ken Steele <ken@tilera.com>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: NeilBrown <neilb@suse.de>

authored by

Ken Steele and committed by
NeilBrown
ae77cbc1 275c51c4

+103
+1
include/linux/raid/pq.h
··· 101 101 extern const struct raid6_calls raid6_avx2x1; 102 102 extern const struct raid6_calls raid6_avx2x2; 103 103 extern const struct raid6_calls raid6_avx2x4; 104 + extern const struct raid6_calls raid6_tilegx8; 104 105 105 106 struct raid6_recov_calls { 106 107 void (*data2)(int, size_t, int, int, void **);
+6
lib/raid6/Makefile
··· 5 5 6 6 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o 7 7 raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o 8 + raid6_pq-$(CONFIG_TILEGX) += tilegx8.o 8 9 9 10 hostprogs-y += mktables 10 11 ··· 69 68 targets += altivec8.c 70 69 $(obj)/altivec8.c: UNROLL := 8 71 70 $(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE 71 + $(call if_changed,unroll) 72 + 73 + targets += tilegx8.c 74 + $(obj)/tilegx8.c: UNROLL := 8 75 + $(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE 72 76 $(call if_changed,unroll) 73 77 74 78 quiet_cmd_mktable = TABLE $@
+3
lib/raid6/algos.c
··· 66 66 &raid6_altivec4, 67 67 &raid6_altivec8, 68 68 #endif 69 + #if defined(CONFIG_TILEGX) 70 + &raid6_tilegx8, 71 + #endif 69 72 &raid6_intx1, 70 73 &raid6_intx2, 71 74 &raid6_intx4,
+7
lib/raid6/test/Makefile
··· 35 35 OBJS += altivec1.o altivec2.o altivec4.o altivec8.o 36 36 endif 37 37 endif 38 + ifeq ($(ARCH),tilegx) 39 + OBJS += tilegx8.o 40 + endif 38 41 39 42 .c.o: 40 43 $(CC) $(CFLAGS) -c -o $@ $< ··· 88 85 int32.c: int.uc ../unroll.awk 89 86 $(AWK) ../unroll.awk -vN=32 < int.uc > $@ 90 87 88 + tilegx8.c: tilegx.uc ../unroll.awk 89 + $(AWK) ../unroll.awk -vN=8 < tilegx.uc > $@ 90 + 91 91 tables.c: mktables 92 92 ./mktables > tables.c 93 93 94 94 clean: 95 95 rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test 96 + rm -f tilegx*.c 96 97 97 98 spotless: clean 98 99 rm -f *~
+86
lib/raid6/tilegx.uc
··· 1 + /* -*- linux-c -*- ------------------------------------------------------- * 2 + * 3 + * Copyright 2002 H. Peter Anvin - All Rights Reserved 4 + * Copyright 2012 Tilera Corporation - All Rights Reserved 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License as published by 8 + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 9 + * Boston MA 02111-1307, USA; either version 2 of the License, or 10 + * (at your option) any later version; incorporated herein by reference. 11 + * 12 + * ----------------------------------------------------------------------- */ 13 + 14 + /* 15 + * tilegx$#.c 16 + * 17 + * $#-way unrolled TILE-Gx SIMD for RAID-6 math. 18 + * 19 + * This file is postprocessed using unroll.awk. 20 + * 21 + */ 22 + 23 + #include <linux/raid/pq.h> 24 + 25 + /* Create 8 byte copies of constant byte */ 26 + # define NBYTES(x) (__insn_v1addi(0, x)) 27 + # define NSIZE 8 28 + 29 + /* 30 + * The SHLBYTE() operation shifts each byte left by 1, *not* 31 + * rolling over into the next byte 32 + */ 33 + static inline __attribute_const__ u64 SHLBYTE(u64 v) 34 + { 35 + /* Vector One Byte Shift Left Immediate. */ 36 + return __insn_v1shli(v, 1); 37 + } 38 + 39 + /* 40 + * The MASK() operation returns 0xFF in any byte for which the high 41 + * bit is 1, 0x00 for any byte for which the high bit is 0. 42 + */ 43 + static inline __attribute_const__ u64 MASK(u64 v) 44 + { 45 + /* Vector One Byte Shift Right Signed Immediate. */ 46 + return __insn_v1shrsi(v, 7); 47 + } 48 + 49 + 50 + void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) 51 + { 52 + u8 **dptr = (u8 **)ptrs; 53 + u64 *p, *q; 54 + int d, z, z0; 55 + 56 + u64 wd$$, wq$$, wp$$, w1$$, w2$$; 57 + u64 x1d = NBYTES(0x1d); 58 + u64 * z0ptr; 59 + 60 + z0 = disks - 3; /* Highest data disk */ 61 + p = (u64 *)dptr[z0+1]; /* XOR parity */ 62 + q = (u64 *)dptr[z0+2]; /* RS syndrome */ 63 + 64 + z0ptr = (u64 *)&dptr[z0][0]; 65 + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { 66 + wq$$ = wp$$ = *z0ptr++; 67 + for ( z = z0-1 ; z >= 0 ; z-- ) { 68 + wd$$ = *(u64 *)&dptr[z][d+$$*NSIZE]; 69 + wp$$ = wp$$ ^ wd$$; 70 + w2$$ = MASK(wq$$); 71 + w1$$ = SHLBYTE(wq$$); 72 + w2$$ = w2$$ & x1d; 73 + w1$$ = w1$$ ^ w2$$; 74 + wq$$ = w1$$ ^ wd$$; 75 + } 76 + *p++ = wp$$; 77 + *q++ = wq$$; 78 + } 79 + } 80 + 81 + const struct raid6_calls raid6_tilegx$# = { 82 + raid6_tilegx$#_gen_syndrome, 83 + NULL, 84 + "tilegx$#", 85 + 0 86 + };