Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Pull branch 'for-rmk' of git://git.linaro.org/people/ardbiesheuvel/linux-arm into devel-stable

Comments from Ard Biesheuvel:

I have included two use cases that I have been using, XOR and RAID-6
checksumming. The former gets a 60% performance boost on the NEON, the
latter over 400%.

ARM: add support for kernel mode NEON

Adds kernel_neon_begin/end (renamed from kernel_vfp_begin/end in the
previous version to de-emphasize the VFP part as VFP code that needs
software assistance is not supported currently.)

Introduces <asm/neon.h> and the Kconfig symbol KERNEL_MODE_NEON. This
has been aligned with Catalin for arm64, so any NEON code that does
not use assembly but intrinsics or the GCC vectorizer (such as my
examples) can potentially be shared between arm and arm64 archs.

ARM: move VFP init to an earlier boot stage

This is needed so the NEON is enabled when the XOR and RAID-6 algo
boot time benchmarks are run.

ARM: be strict about FP exceptions in kernel mode

This adds a check to vfp_support_entry() to flag unsupported uses of
the NEON/VFP in kernel mode. FP exceptions (bounces) are flagged as
a bug, this is because of their potentially intermittent nature.
Exceptions caused by the fact that kernel_neon_begin has not been
called are just routed through the undef handler.

ARM: crypto: add NEON accelerated XOR implementation

This is the xor_blocks() implementation built with -ftree-vectorize,
60% faster than optimized ARM code. It calls in_interrupt() to check
whether the NEON flavor can be used: this should really not be
necessary, but due to xor_blocks'squite generic nature, there is no
telling how exactly people may be using it in the real world.

lib/raid6: add ARM-NEON accelerated syndrome calculation

This is a port of the RAID-6 checksumming code in altivec.uc ported
to use NEON intrinsics. It is about 4x faster than the sequential
code.

+452 -2
+7
arch/arm/Kconfig
··· 2176 2176 Say Y to include support code for NEON, the ARMv7 Advanced SIMD 2177 2177 Extension. 2178 2178 2179 + config KERNEL_MODE_NEON 2180 + bool "Support for NEON in kernel mode" 2181 + default n 2182 + depends on NEON 2183 + help 2184 + Say Y to include support for NEON in kernel mode. 2185 + 2179 2186 endmenu 2180 2187 2181 2188 menu "Userspace binary formats"
+36
arch/arm/include/asm/neon.h
··· 1 + /* 2 + * linux/arch/arm/include/asm/neon.h 3 + * 4 + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + */ 10 + 11 + #include <asm/hwcap.h> 12 + 13 + #define cpu_has_neon() (!!(elf_hwcap & HWCAP_NEON)) 14 + 15 + #ifdef __ARM_NEON__ 16 + 17 + /* 18 + * If you are affected by the BUILD_BUG below, it probably means that you are 19 + * using NEON code /and/ calling the kernel_neon_begin() function from the same 20 + * compilation unit. To prevent issues that may arise from GCC reordering or 21 + * generating(1) NEON instructions outside of these begin/end functions, the 22 + * only supported way of using NEON code in the kernel is by isolating it in a 23 + * separate compilation unit, and calling it from another unit from inside a 24 + * kernel_neon_begin/kernel_neon_end pair. 25 + * 26 + * (1) Current GCC (4.7) might generate NEON instructions at O3 level if 27 + * -mpfu=neon is set. 28 + */ 29 + 30 + #define kernel_neon_begin() \ 31 + BUILD_BUG_ON_MSG(1, "kernel_neon_begin() called from NEON code") 32 + 33 + #else 34 + void kernel_neon_begin(void); 35 + #endif 36 + void kernel_neon_end(void);
+73
arch/arm/include/asm/xor.h
··· 7 7 * it under the terms of the GNU General Public License version 2 as 8 8 * published by the Free Software Foundation. 9 9 */ 10 + #include <linux/hardirq.h> 10 11 #include <asm-generic/xor.h> 12 + #include <asm/hwcap.h> 13 + #include <asm/neon.h> 11 14 12 15 #define __XOR(a1, a2) a1 ^= a2 13 16 ··· 141 138 xor_speed(&xor_block_arm4regs); \ 142 139 xor_speed(&xor_block_8regs); \ 143 140 xor_speed(&xor_block_32regs); \ 141 + NEON_TEMPLATES; \ 144 142 } while (0) 143 + 144 + #ifdef CONFIG_KERNEL_MODE_NEON 145 + 146 + extern struct xor_block_template const xor_block_neon_inner; 147 + 148 + static void 149 + xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 150 + { 151 + if (in_interrupt()) { 152 + xor_arm4regs_2(bytes, p1, p2); 153 + } else { 154 + kernel_neon_begin(); 155 + xor_block_neon_inner.do_2(bytes, p1, p2); 156 + kernel_neon_end(); 157 + } 158 + } 159 + 160 + static void 161 + xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 162 + unsigned long *p3) 163 + { 164 + if (in_interrupt()) { 165 + xor_arm4regs_3(bytes, p1, p2, p3); 166 + } else { 167 + kernel_neon_begin(); 168 + xor_block_neon_inner.do_3(bytes, p1, p2, p3); 169 + kernel_neon_end(); 170 + } 171 + } 172 + 173 + static void 174 + xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 175 + unsigned long *p3, unsigned long *p4) 176 + { 177 + if (in_interrupt()) { 178 + xor_arm4regs_4(bytes, p1, p2, p3, p4); 179 + } else { 180 + kernel_neon_begin(); 181 + xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); 182 + kernel_neon_end(); 183 + } 184 + } 185 + 186 + static void 187 + xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 188 + unsigned long *p3, unsigned long *p4, unsigned long *p5) 189 + { 190 + if (in_interrupt()) { 191 + xor_arm4regs_5(bytes, p1, p2, p3, p4, p5); 192 + } else { 193 + kernel_neon_begin(); 194 + xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); 195 + kernel_neon_end(); 196 + } 197 + } 198 + 199 + static struct xor_block_template xor_block_neon = { 200 + .name = "neon", 201 + .do_2 = xor_neon_2, 202 + .do_3 = xor_neon_3, 203 + .do_4 = xor_neon_4, 204 + .do_5 = xor_neon_5 205 + }; 206 + 207 + #define NEON_TEMPLATES \ 208 + do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0) 209 + #else 210 + #define NEON_TEMPLATES 211 + #endif
+6
arch/arm/lib/Makefile
··· 45 45 46 46 $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S 47 47 $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S 48 + 49 + ifeq ($(CONFIG_KERNEL_MODE_NEON),y) 50 + NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon 51 + CFLAGS_xor-neon.o += $(NEON_FLAGS) 52 + lib-$(CONFIG_XOR_BLOCKS) += xor-neon.o 53 + endif
+42
arch/arm/lib/xor-neon.c
··· 1 + /* 2 + * linux/arch/arm/lib/xor-neon.c 3 + * 4 + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + */ 10 + 11 + #include <linux/raid/xor.h> 12 + 13 + #ifndef __ARM_NEON__ 14 + #error You should compile this file with '-mfloat-abi=softfp -mfpu=neon' 15 + #endif 16 + 17 + /* 18 + * Pull in the reference implementations while instructing GCC (through 19 + * -ftree-vectorize) to attempt to exploit implicit parallelism and emit 20 + * NEON instructions. 21 + */ 22 + #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) 23 + #pragma GCC optimize "tree-vectorize" 24 + #else 25 + /* 26 + * While older versions of GCC do not generate incorrect code, they fail to 27 + * recognize the parallel nature of these functions, and emit plain ARM code, 28 + * which is known to be slower than the optimized ARM code in asm-arm/xor.h. 29 + */ 30 + #warning This code requires at least version 4.6 of GCC 31 + #endif 32 + 33 + #pragma GCC diagnostic ignored "-Wunused-variable" 34 + #include <asm-generic/xor.h> 35 + 36 + struct xor_block_template const xor_block_neon_inner = { 37 + .name = "__inner_neon__", 38 + .do_2 = xor_8regs_2, 39 + .do_3 = xor_8regs_3, 40 + .do_4 = xor_8regs_4, 41 + .do_5 = xor_8regs_5, 42 + };
+5
arch/arm/vfp/vfphw.S
··· 78 78 ENTRY(vfp_support_entry) 79 79 DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 80 80 81 + ldr r3, [sp, #S_PSR] @ Neither lazy restore nor FP exceptions 82 + and r3, r3, #MODE_MASK @ are supported in kernel mode 83 + teq r3, #USR_MODE 84 + bne vfp_kmode_exception @ Returns through lr 85 + 81 86 VFPFMRX r1, FPEXC @ Is the VFP enabled? 82 87 DBGSTR1 "fpexc %08x", r1 83 88 tst r1, #FPEXC_EN
+68 -1
arch/arm/vfp/vfpmodule.c
··· 20 20 #include <linux/init.h> 21 21 #include <linux/uaccess.h> 22 22 #include <linux/user.h> 23 + #include <linux/export.h> 23 24 24 25 #include <asm/cp15.h> 25 26 #include <asm/cputype.h> ··· 649 648 return NOTIFY_OK; 650 649 } 651 650 651 + void vfp_kmode_exception(void) 652 + { 653 + /* 654 + * If we reach this point, a floating point exception has been raised 655 + * while running in kernel mode. If the NEON/VFP unit was enabled at the 656 + * time, it means a VFP instruction has been issued that requires 657 + * software assistance to complete, something which is not currently 658 + * supported in kernel mode. 659 + * If the NEON/VFP unit was disabled, and the location pointed to below 660 + * is properly preceded by a call to kernel_neon_begin(), something has 661 + * caused the task to be scheduled out and back in again. In this case, 662 + * rebuilding and running with CONFIG_DEBUG_ATOMIC_SLEEP enabled should 663 + * be helpful in localizing the problem. 664 + */ 665 + if (fmrx(FPEXC) & FPEXC_EN) 666 + pr_crit("BUG: unsupported FP instruction in kernel mode\n"); 667 + else 668 + pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n"); 669 + } 670 + 671 + #ifdef CONFIG_KERNEL_MODE_NEON 672 + 673 + /* 674 + * Kernel-side NEON support functions 675 + */ 676 + void kernel_neon_begin(void) 677 + { 678 + struct thread_info *thread = current_thread_info(); 679 + unsigned int cpu; 680 + u32 fpexc; 681 + 682 + /* 683 + * Kernel mode NEON is only allowed outside of interrupt context 684 + * with preemption disabled. This will make sure that the kernel 685 + * mode NEON register contents never need to be preserved. 686 + */ 687 + BUG_ON(in_interrupt()); 688 + cpu = get_cpu(); 689 + 690 + fpexc = fmrx(FPEXC) | FPEXC_EN; 691 + fmxr(FPEXC, fpexc); 692 + 693 + /* 694 + * Save the userland NEON/VFP state. Under UP, 695 + * the owner could be a task other than 'current' 696 + */ 697 + if (vfp_state_in_hw(cpu, thread)) 698 + vfp_save_state(&thread->vfpstate, fpexc); 699 + #ifndef CONFIG_SMP 700 + else if (vfp_current_hw_state[cpu] != NULL) 701 + vfp_save_state(vfp_current_hw_state[cpu], fpexc); 702 + #endif 703 + vfp_current_hw_state[cpu] = NULL; 704 + } 705 + EXPORT_SYMBOL(kernel_neon_begin); 706 + 707 + void kernel_neon_end(void) 708 + { 709 + /* Disable the NEON/VFP unit. */ 710 + fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); 711 + put_cpu(); 712 + } 713 + EXPORT_SYMBOL(kernel_neon_end); 714 + 715 + #endif /* CONFIG_KERNEL_MODE_NEON */ 716 + 652 717 /* 653 718 * VFP support code initialisation. 654 719 */ ··· 798 731 return 0; 799 732 } 800 733 801 - late_initcall(vfp_init); 734 + core_initcall(vfp_init);
+5
include/linux/raid/pq.h
··· 114 114 extern const struct raid6_recov_calls raid6_recov_ssse3; 115 115 extern const struct raid6_recov_calls raid6_recov_avx2; 116 116 117 + extern const struct raid6_calls raid6_neonx1; 118 + extern const struct raid6_calls raid6_neonx2; 119 + extern const struct raid6_calls raid6_neonx4; 120 + extern const struct raid6_calls raid6_neonx8; 121 + 117 122 /* Algorithm list */ 118 123 extern const struct raid6_calls * const raid6_algos[]; 119 124 extern const struct raid6_recov_calls *const raid6_recov_algos[];
+1
lib/raid6/.gitignore
··· 2 2 altivec*.c 3 3 int*.c 4 4 tables.c 5 + neon?.c
+40
lib/raid6/Makefile
··· 5 5 6 6 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o 7 7 raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o 8 + raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o 8 9 9 10 hostprogs-y += mktables 10 11 ··· 15 14 16 15 ifeq ($(CONFIG_ALTIVEC),y) 17 16 altivec_flags := -maltivec -mabi=altivec 17 + endif 18 + 19 + # The GCC option -ffreestanding is required in order to compile code containing 20 + # ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel) 21 + ifeq ($(CONFIG_KERNEL_MODE_NEON),y) 22 + NEON_FLAGS := -ffreestanding 23 + ifeq ($(ARCH),arm) 24 + NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon 25 + endif 26 + ifeq ($(ARCH),arm64) 27 + CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only 28 + CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only 29 + CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only 30 + CFLAGS_REMOVE_neon8.o += -mgeneral-regs-only 31 + endif 18 32 endif 19 33 20 34 targets += int1.c ··· 84 68 targets += altivec8.c 85 69 $(obj)/altivec8.c: UNROLL := 8 86 70 $(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE 71 + $(call if_changed,unroll) 72 + 73 + CFLAGS_neon1.o += $(NEON_FLAGS) 74 + targets += neon1.c 75 + $(obj)/neon1.c: UNROLL := 1 76 + $(obj)/neon1.c: $(src)/neon.uc $(src)/unroll.awk FORCE 77 + $(call if_changed,unroll) 78 + 79 + CFLAGS_neon2.o += $(NEON_FLAGS) 80 + targets += neon2.c 81 + $(obj)/neon2.c: UNROLL := 2 82 + $(obj)/neon2.c: $(src)/neon.uc $(src)/unroll.awk FORCE 83 + $(call if_changed,unroll) 84 + 85 + CFLAGS_neon4.o += $(NEON_FLAGS) 86 + targets += neon4.c 87 + $(obj)/neon4.c: UNROLL := 4 88 + $(obj)/neon4.c: $(src)/neon.uc $(src)/unroll.awk FORCE 89 + $(call if_changed,unroll) 90 + 91 + CFLAGS_neon8.o += $(NEON_FLAGS) 92 + targets += neon8.c 93 + $(obj)/neon8.c: UNROLL := 8 94 + $(obj)/neon8.c: $(src)/neon.uc $(src)/unroll.awk FORCE 87 95 $(call if_changed,unroll) 88 96 89 97 quiet_cmd_mktable = TABLE $@
+6
lib/raid6/algos.c
··· 70 70 &raid6_intx2, 71 71 &raid6_intx4, 72 72 &raid6_intx8, 73 + #ifdef CONFIG_KERNEL_MODE_NEON 74 + &raid6_neonx1, 75 + &raid6_neonx2, 76 + &raid6_neonx4, 77 + &raid6_neonx8, 78 + #endif 73 79 NULL 74 80 }; 75 81
+58
lib/raid6/neon.c
··· 1 + /* 2 + * linux/lib/raid6/neon.c - RAID6 syndrome calculation using ARM NEON intrinsics 3 + * 4 + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + */ 10 + 11 + #include <linux/raid/pq.h> 12 + 13 + #ifdef __KERNEL__ 14 + #include <asm/neon.h> 15 + #else 16 + #define kernel_neon_begin() 17 + #define kernel_neon_end() 18 + #define cpu_has_neon() (1) 19 + #endif 20 + 21 + /* 22 + * There are 2 reasons these wrappers are kept in a separate compilation unit 23 + * from the actual implementations in neonN.c (generated from neon.uc by 24 + * unroll.awk): 25 + * - the actual implementations use NEON intrinsics, and the GCC support header 26 + * (arm_neon.h) is not fully compatible (type wise) with the kernel; 27 + * - the neonN.c files are compiled with -mfpu=neon and optimization enabled, 28 + * and we have to make sure that we never use *any* NEON/VFP instructions 29 + * outside a kernel_neon_begin()/kernel_neon_end() pair. 30 + */ 31 + 32 + #define RAID6_NEON_WRAPPER(_n) \ 33 + static void raid6_neon ## _n ## _gen_syndrome(int disks, \ 34 + size_t bytes, void **ptrs) \ 35 + { \ 36 + void raid6_neon ## _n ## _gen_syndrome_real(int, \ 37 + unsigned long, void**); \ 38 + kernel_neon_begin(); \ 39 + raid6_neon ## _n ## _gen_syndrome_real(disks, \ 40 + (unsigned long)bytes, ptrs); \ 41 + kernel_neon_end(); \ 42 + } \ 43 + struct raid6_calls const raid6_neonx ## _n = { \ 44 + raid6_neon ## _n ## _gen_syndrome, \ 45 + raid6_have_neon, \ 46 + "neonx" #_n, \ 47 + 0 \ 48 + } 49 + 50 + static int raid6_have_neon(void) 51 + { 52 + return cpu_has_neon(); 53 + } 54 + 55 + RAID6_NEON_WRAPPER(1); 56 + RAID6_NEON_WRAPPER(2); 57 + RAID6_NEON_WRAPPER(4); 58 + RAID6_NEON_WRAPPER(8);
+80
lib/raid6/neon.uc
··· 1 + /* ----------------------------------------------------------------------- 2 + * 3 + * neon.uc - RAID-6 syndrome calculation using ARM NEON instructions 4 + * 5 + * Copyright (C) 2012 Rob Herring 6 + * 7 + * Based on altivec.uc: 8 + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved 9 + * 10 + * This program is free software; you can redistribute it and/or modify 11 + * it under the terms of the GNU General Public License as published by 12 + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 13 + * Boston MA 02111-1307, USA; either version 2 of the License, or 14 + * (at your option) any later version; incorporated herein by reference. 15 + * 16 + * ----------------------------------------------------------------------- */ 17 + 18 + /* 19 + * neon$#.c 20 + * 21 + * $#-way unrolled NEON intrinsics math RAID-6 instruction set 22 + * 23 + * This file is postprocessed using unroll.awk 24 + */ 25 + 26 + #include <arm_neon.h> 27 + 28 + typedef uint8x16_t unative_t; 29 + 30 + #define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) 31 + #define NSIZE sizeof(unative_t) 32 + 33 + /* 34 + * The SHLBYTE() operation shifts each byte left by 1, *not* 35 + * rolling over into the next byte 36 + */ 37 + static inline unative_t SHLBYTE(unative_t v) 38 + { 39 + return vshlq_n_u8(v, 1); 40 + } 41 + 42 + /* 43 + * The MASK() operation returns 0xFF in any byte for which the high 44 + * bit is 1, 0x00 for any byte for which the high bit is 0. 45 + */ 46 + static inline unative_t MASK(unative_t v) 47 + { 48 + const uint8x16_t temp = NBYTES(0); 49 + return (unative_t)vcltq_s8((int8x16_t)v, (int8x16_t)temp); 50 + } 51 + 52 + void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 53 + { 54 + uint8_t **dptr = (uint8_t **)ptrs; 55 + uint8_t *p, *q; 56 + int d, z, z0; 57 + 58 + register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; 59 + const unative_t x1d = NBYTES(0x1d); 60 + 61 + z0 = disks - 3; /* Highest data disk */ 62 + p = dptr[z0+1]; /* XOR parity */ 63 + q = dptr[z0+2]; /* RS syndrome */ 64 + 65 + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { 66 + wq$$ = wp$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]); 67 + for ( z = z0-1 ; z >= 0 ; z-- ) { 68 + wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]); 69 + wp$$ = veorq_u8(wp$$, wd$$); 70 + w2$$ = MASK(wq$$); 71 + w1$$ = SHLBYTE(wq$$); 72 + 73 + w2$$ = vandq_u8(w2$$, x1d); 74 + w1$$ = veorq_u8(w1$$, w2$$); 75 + wq$$ = veorq_u8(w1$$, wd$$); 76 + } 77 + vst1q_u8(&p[d+NSIZE*$$], wp$$); 78 + vst1q_u8(&q[d+NSIZE*$$], wq$$); 79 + } 80 + }
+25 -1
lib/raid6/test/Makefile
··· 22 22 IS_X86 = yes 23 23 endif 24 24 25 + ifeq ($(ARCH),arm) 26 + CFLAGS += -I../../../arch/arm/include -mfpu=neon 27 + HAS_NEON = yes 28 + endif 29 + ifeq ($(ARCH),arm64) 30 + CFLAGS += -I../../../arch/arm64/include 31 + HAS_NEON = yes 32 + endif 33 + 25 34 ifeq ($(IS_X86),yes) 26 35 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o 27 36 CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ 28 37 gcc -c -x assembler - >&/dev/null && \ 29 38 rm ./-.o && echo -DCONFIG_AS_AVX2=1) 39 + else ifeq ($(HAS_NEON),yes) 40 + OBJS += neon.o neon1.o neon2.o neon4.o neon8.o 41 + CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 30 42 else 31 43 HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\ 32 44 gcc -c -x c - >&/dev/null && \ ··· 66 54 67 55 raid6test: test.c raid6.a 68 56 $(CC) $(CFLAGS) -o raid6test $^ 57 + 58 + neon1.c: neon.uc ../unroll.awk 59 + $(AWK) ../unroll.awk -vN=1 < neon.uc > $@ 60 + 61 + neon2.c: neon.uc ../unroll.awk 62 + $(AWK) ../unroll.awk -vN=2 < neon.uc > $@ 63 + 64 + neon4.c: neon.uc ../unroll.awk 65 + $(AWK) ../unroll.awk -vN=4 < neon.uc > $@ 66 + 67 + neon8.c: neon.uc ../unroll.awk 68 + $(AWK) ../unroll.awk -vN=8 < neon.uc > $@ 69 69 70 70 altivec1.c: altivec.uc ../unroll.awk 71 71 $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@ ··· 113 89 ./mktables > tables.c 114 90 115 91 clean: 116 - rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test 92 + rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test 117 93 118 94 spotless: clean 119 95 rm -f *~