Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.3 220 lines 5.4 kB view raw
1/* 2 * Copyright 2011 Tilera Corporation. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation, version 2. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 * NON INFRINGEMENT. See the GNU General Public License for 12 * more details. 13 */ 14 15#include <linux/types.h> 16#include <linux/string.h> 17#include <linux/module.h> 18#define __memcpy memcpy 19/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */ 20 21/* Must be 8 bytes in size. */ 22#define word_t uint64_t 23 24#if CHIP_L2_LINE_SIZE() != 64 && CHIP_L2_LINE_SIZE() != 128 25#error "Assumes 64 or 128 byte line size" 26#endif 27 28/* How many cache lines ahead should we prefetch? */ 29#define PREFETCH_LINES_AHEAD 3 30 31/* 32 * Provide "base versions" of load and store for the normal code path. 33 * The kernel provides other versions for userspace copies. 34 */ 35#define ST(p, v) (*(p) = (v)) 36#define LD(p) (*(p)) 37 38#ifndef USERCOPY_FUNC 39#define ST1 ST 40#define ST2 ST 41#define ST4 ST 42#define ST8 ST 43#define LD1 LD 44#define LD2 LD 45#define LD4 LD 46#define LD8 LD 47#define RETVAL dstv 48void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n) 49#else 50/* 51 * Special kernel version will provide implementation of the LDn/STn 52 * macros to return a count of uncopied bytes due to mm fault. 53 */ 54#define RETVAL 0 55int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n) 56#endif 57{ 58 char *__restrict dst1 = (char *)dstv; 59 const char *__restrict src1 = (const char *)srcv; 60 const char *__restrict src1_end; 61 const char *__restrict prefetch; 62 word_t *__restrict dst8; /* 8-byte pointer to destination memory. */ 63 word_t final; /* Final bytes to write to trailing word, if any */ 64 long i; 65 66 if (n < 16) { 67 for (; n; n--) 68 ST1(dst1++, LD1(src1++)); 69 return RETVAL; 70 } 71 72 /* 73 * Locate the end of source memory we will copy. Don't 74 * prefetch past this. 75 */ 76 src1_end = src1 + n - 1; 77 78 /* Prefetch ahead a few cache lines, but not past the end. */ 79 prefetch = src1; 80 for (i = 0; i < PREFETCH_LINES_AHEAD; i++) { 81 __insn_prefetch(prefetch); 82 prefetch += CHIP_L2_LINE_SIZE(); 83 prefetch = (prefetch > src1_end) ? prefetch : src1; 84 } 85 86 /* Copy bytes until dst is word-aligned. */ 87 for (; (uintptr_t)dst1 & (sizeof(word_t) - 1); n--) 88 ST1(dst1++, LD1(src1++)); 89 90 /* 8-byte pointer to destination memory. */ 91 dst8 = (word_t *)dst1; 92 93 if (__builtin_expect((uintptr_t)src1 & (sizeof(word_t) - 1), 0)) { 94 /* 95 * Misaligned copy. Copy 8 bytes at a time, but don't 96 * bother with other fanciness. 97 * 98 * TODO: Consider prefetching and using wh64 as well. 99 */ 100 101 /* Create an aligned src8. */ 102 const word_t *__restrict src8 = 103 (const word_t *)((uintptr_t)src1 & -sizeof(word_t)); 104 word_t b; 105 106 word_t a = LD8(src8++); 107 for (; n >= sizeof(word_t); n -= sizeof(word_t)) { 108 b = LD8(src8++); 109 a = __insn_dblalign(a, b, src1); 110 ST8(dst8++, a); 111 a = b; 112 } 113 114 if (n == 0) 115 return RETVAL; 116 117 b = ((const char *)src8 <= src1_end) ? *src8 : 0; 118 119 /* 120 * Final source bytes to write to trailing partial 121 * word, if any. 122 */ 123 final = __insn_dblalign(a, b, src1); 124 } else { 125 /* Aligned copy. */ 126 127 const word_t* __restrict src8 = (const word_t *)src1; 128 129 /* src8 and dst8 are both word-aligned. */ 130 if (n >= CHIP_L2_LINE_SIZE()) { 131 /* Copy until 'dst' is cache-line-aligned. */ 132 for (; (uintptr_t)dst8 & (CHIP_L2_LINE_SIZE() - 1); 133 n -= sizeof(word_t)) 134 ST8(dst8++, LD8(src8++)); 135 136 for (; n >= CHIP_L2_LINE_SIZE(); ) { 137 __insn_wh64(dst8); 138 139 /* 140 * Prefetch and advance to next line 141 * to prefetch, but don't go past the end 142 */ 143 __insn_prefetch(prefetch); 144 prefetch += CHIP_L2_LINE_SIZE(); 145 prefetch = (prefetch > src1_end) ? prefetch : 146 (const char *)src8; 147 148 /* 149 * Copy an entire cache line. Manually 150 * unrolled to avoid idiosyncracies of 151 * compiler unrolling. 152 */ 153#define COPY_WORD(offset) ({ ST8(dst8+offset, LD8(src8+offset)); n -= 8; }) 154 COPY_WORD(0); 155 COPY_WORD(1); 156 COPY_WORD(2); 157 COPY_WORD(3); 158 COPY_WORD(4); 159 COPY_WORD(5); 160 COPY_WORD(6); 161 COPY_WORD(7); 162#if CHIP_L2_LINE_SIZE() == 128 163 COPY_WORD(8); 164 COPY_WORD(9); 165 COPY_WORD(10); 166 COPY_WORD(11); 167 COPY_WORD(12); 168 COPY_WORD(13); 169 COPY_WORD(14); 170 COPY_WORD(15); 171#elif CHIP_L2_LINE_SIZE() != 64 172# error Fix code that assumes particular L2 cache line sizes 173#endif 174 175 dst8 += CHIP_L2_LINE_SIZE() / sizeof(word_t); 176 src8 += CHIP_L2_LINE_SIZE() / sizeof(word_t); 177 } 178 } 179 180 for (; n >= sizeof(word_t); n -= sizeof(word_t)) 181 ST8(dst8++, LD8(src8++)); 182 183 if (__builtin_expect(n == 0, 1)) 184 return RETVAL; 185 186 final = LD8(src8); 187 } 188 189 /* n != 0 if we get here. Write out any trailing bytes. */ 190 dst1 = (char *)dst8; 191 if (n & 4) { 192 ST4((uint32_t *)dst1, final); 193 dst1 += 4; 194 final >>= 32; 195 n &= 3; 196 } 197 if (n & 2) { 198 ST2((uint16_t *)dst1, final); 199 dst1 += 2; 200 final >>= 16; 201 n &= 1; 202 } 203 if (n) 204 ST1((uint8_t *)dst1, final); 205 206 return RETVAL; 207} 208 209 210#ifdef USERCOPY_FUNC 211#undef ST1 212#undef ST2 213#undef ST4 214#undef ST8 215#undef LD1 216#undef LD2 217#undef LD4 218#undef LD8 219#undef USERCOPY_FUNC 220#endif