Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v2.6.31-rc4 293 lines 7.5 kB view raw
1/* 2 * SpanDSP - a series of DSP components for telephony 3 * 4 * fir.h - General telephony FIR routines 5 * 6 * Written by Steve Underwood <steveu@coppice.org> 7 * 8 * Copyright (C) 2002 Steve Underwood 9 * 10 * All rights reserved. 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2, as 14 * published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 24 */ 25 26/*! \page fir_page FIR filtering 27\section fir_page_sec_1 What does it do? 28???. 29 30\section fir_page_sec_2 How does it work? 31???. 32*/ 33 34#if !defined(_FIR_H_) 35#define _FIR_H_ 36 37/* 38 Blackfin NOTES & IDEAS: 39 40 A simple dot product function is used to implement the filter. This performs 41 just one MAC/cycle which is inefficient but was easy to implement as a first 42 pass. The current Blackfin code also uses an unrolled form of the filter 43 history to avoid 0 length hardware loop issues. This is wasteful of 44 memory. 45 46 Ideas for improvement: 47 48 1/ Rewrite filter for dual MAC inner loop. The issue here is handling 49 history sample offsets that are 16 bit aligned - the dual MAC needs 50 32 bit aligmnent. There are some good examples in libbfdsp. 51 52 2/ Use the hardware circular buffer facility tohalve memory usage. 53 54 3/ Consider using internal memory. 55 56 Using less memory might also improve speed as cache misses will be 57 reduced. A drop in MIPs and memory approaching 50% should be 58 possible. 59 60 The foreground and background filters currenlty use a total of 61 about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo 62 can. 63*/ 64 65#if defined(USE_MMX) || defined(USE_SSE2) 66#include "mmx.h" 67#endif 68 69/*! 70 16 bit integer FIR descriptor. This defines the working state for a single 71 instance of an FIR filter using 16 bit integer coefficients. 72*/ 73struct fir16_state_t { 74 int taps; 75 int curr_pos; 76 const int16_t *coeffs; 77 int16_t *history; 78}; 79 80/*! 81 32 bit integer FIR descriptor. This defines the working state for a single 82 instance of an FIR filter using 32 bit integer coefficients, and filtering 83 16 bit integer data. 84*/ 85struct fir32_state_t { 86 int taps; 87 int curr_pos; 88 const int32_t *coeffs; 89 int16_t *history; 90}; 91 92/*! 93 Floating point FIR descriptor. This defines the working state for a single 94 instance of an FIR filter using floating point coefficients and data. 95*/ 96struct fir_float_state_t { 97 int taps; 98 int curr_pos; 99 const float *coeffs; 100 float *history; 101}; 102 103static inline const int16_t *fir16_create(struct fir16_state_t *fir, 104 const int16_t *coeffs, int taps) 105{ 106 fir->taps = taps; 107 fir->curr_pos = taps - 1; 108 fir->coeffs = coeffs; 109#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__) 110 fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL); 111#else 112 fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); 113#endif 114 return fir->history; 115} 116 117static inline void fir16_flush(struct fir16_state_t *fir) 118{ 119#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__) 120 memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t)); 121#else 122 memset(fir->history, 0, fir->taps * sizeof(int16_t)); 123#endif 124} 125 126static inline void fir16_free(struct fir16_state_t *fir) 127{ 128 kfree(fir->history); 129} 130 131#ifdef __bfin__ 132static inline int32_t dot_asm(short *x, short *y, int len) 133{ 134 int dot; 135 136 len--; 137 138 __asm__("I0 = %1;\n\t" 139 "I1 = %2;\n\t" 140 "A0 = 0;\n\t" 141 "R0.L = W[I0++] || R1.L = W[I1++];\n\t" 142 "LOOP dot%= LC0 = %3;\n\t" 143 "LOOP_BEGIN dot%=;\n\t" 144 "A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t" 145 "LOOP_END dot%=;\n\t" 146 "A0 += R0.L*R1.L (IS);\n\t" 147 "R0 = A0;\n\t" 148 "%0 = R0;\n\t" 149 : "=&d"(dot) 150 : "a"(x), "a"(y), "a"(len) 151 : "I0", "I1", "A1", "A0", "R0", "R1" 152 ); 153 154 return dot; 155} 156#endif 157 158static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) 159{ 160 int32_t y; 161#if defined(USE_MMX) 162 int i; 163 union mmx_t *mmx_coeffs; 164 union mmx_t *mmx_hist; 165 166 fir->history[fir->curr_pos] = sample; 167 fir->history[fir->curr_pos + fir->taps] = sample; 168 169 mmx_coeffs = (union mmx_t *)fir->coeffs; 170 mmx_hist = (union mmx_t *)&fir->history[fir->curr_pos]; 171 i = fir->taps; 172 pxor_r2r(mm4, mm4); 173 /* 8 samples per iteration, so the filter must be a multiple of 8 long. */ 174 while (i > 0) { 175 movq_m2r(mmx_coeffs[0], mm0); 176 movq_m2r(mmx_coeffs[1], mm2); 177 movq_m2r(mmx_hist[0], mm1); 178 movq_m2r(mmx_hist[1], mm3); 179 mmx_coeffs += 2; 180 mmx_hist += 2; 181 pmaddwd_r2r(mm1, mm0); 182 pmaddwd_r2r(mm3, mm2); 183 paddd_r2r(mm0, mm4); 184 paddd_r2r(mm2, mm4); 185 i -= 8; 186 } 187 movq_r2r(mm4, mm0); 188 psrlq_i2r(32, mm0); 189 paddd_r2r(mm0, mm4); 190 movd_r2m(mm4, y); 191 emms(); 192#elif defined(USE_SSE2) 193 int i; 194 union xmm_t *xmm_coeffs; 195 union xmm_t *xmm_hist; 196 197 fir->history[fir->curr_pos] = sample; 198 fir->history[fir->curr_pos + fir->taps] = sample; 199 200 xmm_coeffs = (union xmm_t *)fir->coeffs; 201 xmm_hist = (union xmm_t *)&fir->history[fir->curr_pos]; 202 i = fir->taps; 203 pxor_r2r(xmm4, xmm4); 204 /* 16 samples per iteration, so the filter must be a multiple of 16 long. */ 205 while (i > 0) { 206 movdqu_m2r(xmm_coeffs[0], xmm0); 207 movdqu_m2r(xmm_coeffs[1], xmm2); 208 movdqu_m2r(xmm_hist[0], xmm1); 209 movdqu_m2r(xmm_hist[1], xmm3); 210 xmm_coeffs += 2; 211 xmm_hist += 2; 212 pmaddwd_r2r(xmm1, xmm0); 213 pmaddwd_r2r(xmm3, xmm2); 214 paddd_r2r(xmm0, xmm4); 215 paddd_r2r(xmm2, xmm4); 216 i -= 16; 217 } 218 movdqa_r2r(xmm4, xmm0); 219 psrldq_i2r(8, xmm0); 220 paddd_r2r(xmm0, xmm4); 221 movdqa_r2r(xmm4, xmm0); 222 psrldq_i2r(4, xmm0); 223 paddd_r2r(xmm0, xmm4); 224 movd_r2m(xmm4, y); 225#elif defined(__bfin__) 226 fir->history[fir->curr_pos] = sample; 227 fir->history[fir->curr_pos + fir->taps] = sample; 228 y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos], 229 fir->taps); 230#else 231 int i; 232 int offset1; 233 int offset2; 234 235 fir->history[fir->curr_pos] = sample; 236 237 offset2 = fir->curr_pos; 238 offset1 = fir->taps - offset2; 239 y = 0; 240 for (i = fir->taps - 1; i >= offset1; i--) 241 y += fir->coeffs[i] * fir->history[i - offset1]; 242 for (; i >= 0; i--) 243 y += fir->coeffs[i] * fir->history[i + offset2]; 244#endif 245 if (fir->curr_pos <= 0) 246 fir->curr_pos = fir->taps; 247 fir->curr_pos--; 248 return (int16_t) (y >> 15); 249} 250 251static inline const int16_t *fir32_create(struct fir32_state_t *fir, 252 const int32_t *coeffs, int taps) 253{ 254 fir->taps = taps; 255 fir->curr_pos = taps - 1; 256 fir->coeffs = coeffs; 257 fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); 258 return fir->history; 259} 260 261static inline void fir32_flush(struct fir32_state_t *fir) 262{ 263 memset(fir->history, 0, fir->taps * sizeof(int16_t)); 264} 265 266static inline void fir32_free(struct fir32_state_t *fir) 267{ 268 kfree(fir->history); 269} 270 271static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample) 272{ 273 int i; 274 int32_t y; 275 int offset1; 276 int offset2; 277 278 fir->history[fir->curr_pos] = sample; 279 offset2 = fir->curr_pos; 280 offset1 = fir->taps - offset2; 281 y = 0; 282 for (i = fir->taps - 1; i >= offset1; i--) 283 y += fir->coeffs[i] * fir->history[i - offset1]; 284 for (; i >= 0; i--) 285 y += fir->coeffs[i] * fir->history[i + offset2]; 286 if (fir->curr_pos <= 0) 287 fir->curr_pos = fir->taps; 288 fir->curr_pos--; 289 return (int16_t) (y >> 15); 290} 291 292#endif 293/*- End of file ------------------------------------------------------------*/