A modern Music Player Daemon based on Rockbox open source high quality audio player
libadwaita audio rust zig deno mpris rockbox mpd
2
fork

Configure Feed

Select the types of activity you want to include in your feed.

Sync to upstream libopus

Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c

This brings in a bunch of optimizations to decode speed
and memory usage. Allocations are switched from using
the pseudostack to using the real stack. Enabled hacks
to reduce stack usage.

This should fix crashes on sansa clip, although some
files will not play due to failing allocations in the
codec buffer.

Speeds up decoding of the following test files:

H300 (cf) C200 (arm7tdmi) ipod classic (arm9e)
16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz
64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz
128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz

Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58

+1604 -1047
-4
lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
··· 65 65 do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ 66 66 (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) 67 67 68 - # define C_MUL4(m,a,b) \ 69 - do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \ 70 - (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0) 71 - 72 68 # define C_MULBYSCALAR( c, s ) \ 73 69 do{ (c).r = S_MUL( (c).r , s ) ;\ 74 70 (c).i = S_MUL( (c).i , s ) ; }while(0)
+31 -3
lib/rbcodec/codecs/libopus/celt/arch.h
··· 69 69 70 70 #define IMUL32(a,b) ((a)*(b)) 71 71 72 - #define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ 73 - #define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ 72 + #define ABS(x) ((x) < 0 ? (-(x)) : (x)) 74 73 #define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ 75 74 #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ 76 - #define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ 77 75 #define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ 78 76 #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ 79 77 #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ ··· 108 106 #define SCALEIN(a) (a) 109 107 #define SCALEOUT(a) (a) 110 108 109 + #define ABS16(x) ((x) < 0 ? (-(x)) : (x)) 110 + #define ABS32(x) ((x) < 0 ? (-(x)) : (x)) 111 + 112 + static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { 113 + return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; 114 + } 115 + 111 116 #ifdef FIXED_DEBUG 112 117 #include "fixed_debug.h" 113 118 #else ··· 139 144 typedef float celt_norm; 140 145 typedef float celt_ener; 141 146 147 + #ifdef FLOAT_APPROX 148 + /* This code should reliably detect NaN/inf even when -ffast-math is used. 149 + Assumes IEEE 754 format. */ 150 + static OPUS_INLINE int celt_isnan(float x) 151 + { 152 + union {float f; opus_uint32 i;} in; 153 + in.f = x; 154 + return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0; 155 + } 156 + #else 157 + #ifdef __FAST_MATH__ 158 + #error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input 159 + #endif 160 + #define celt_isnan(x) ((x)!=(x)) 161 + #endif 162 + 142 163 #define Q15ONE 1.0f 143 164 144 165 #define NORM_SCALING 1.f ··· 147 168 #define VERY_SMALL 1e-30f 148 169 #define VERY_LARGE16 1e15f 149 170 #define Q15_ONE ((opus_val16)1.f) 171 + 172 + /* This appears to be the same speed as C99's fabsf() but it's more portable. */ 173 + #define ABS16(x) ((float)fabs(x)) 174 + #define ABS32(x) ((float)fabs(x)) 150 175 151 176 #define QCONST16(x,bits) (x) 152 177 #define QCONST32(x,bits) (x) ··· 186 211 #define MULT32_32_Q31(a,b) ((a)*(b)) 187 212 188 213 #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) 214 + #define MAC16_32_Q16(c,a,b) ((c)+(a)*(b)) 189 215 190 216 #define MULT16_16_Q11_32(a,b) ((a)*(b)) 191 217 #define MULT16_16_Q11(a,b) ((a)*(b)) ··· 202 228 203 229 #define SCALEIN(a) ((a)*CELT_SIG_SCALE) 204 230 #define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) 231 + 232 + #define SIG2WORD16(x) (x) 205 233 206 234 #endif /* !FIXED_POINT */ 207 235
+4
lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
··· 68 68 #undef MAC16_32_Q15 69 69 #define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) 70 70 71 + /** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. 72 + Result fits in 32 bits. */ 73 + #undef MAC16_32_Q16 74 + #define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b)) 71 75 72 76 /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ 73 77 #undef MULT32_32_Q31
+35
lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
··· 82 82 } 83 83 #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) 84 84 85 + /** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. 86 + Result fits in 32 bits. */ 87 + #undef MAC16_32_Q16 88 + static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a, 89 + opus_val32 b) 90 + { 91 + int res; 92 + __asm__( 93 + "#MAC16_32_Q16\n\t" 94 + "smlawb %0, %1, %2, %3;\n" 95 + : "=r"(res) 96 + : "r"(b), "r"(a), "r"(c) 97 + ); 98 + return res; 99 + } 100 + #define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b)) 101 + 85 102 /** 16x16 multiply-add where the result fits in 32 bits */ 86 103 #undef MAC16_16 87 104 static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, ··· 112 129 return res; 113 130 } 114 131 #define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) 132 + 133 + #ifdef OPUS_ARM_INLINE_MEDIA 134 + 135 + #undef SIG2WORD16 136 + static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x) 137 + { 138 + celt_sig res; 139 + __asm__( 140 + "#SIG2WORD16\n\t" 141 + "ssat %0, #16, %1, ASR #12\n\t" 142 + : "=r"(res) 143 + : "r"(x+2048) 144 + ); 145 + return EXTRACT16(res); 146 + } 147 + #define SIG2WORD16(x) (SIG2WORD16_armv6(x)) 148 + 149 + #endif /* OPUS_ARM_INLINE_MEDIA */ 115 150 116 151 #endif
+111 -104
lib/rbcodec/codecs/libopus/celt/bands.c
··· 93 93 #if 0 94 94 #ifdef FIXED_POINT 95 95 /* Compute the amplitude (sqrt energy) in each of the bands */ 96 - void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) 96 + void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) 97 97 { 98 98 int i, c, N; 99 99 const opus_int16 *eBands = m->eBands; 100 - N = M*m->shortMdctSize; 100 + N = m->shortMdctSize<<LM; 101 101 c=0; do { 102 102 for (i=0;i<end;i++) 103 103 { ··· 105 105 opus_val32 maxval=0; 106 106 opus_val32 sum = 0; 107 107 108 - j=M*eBands[i]; do { 109 - maxval = MAX32(maxval, X[j+c*N]); 110 - maxval = MAX32(maxval, -X[j+c*N]); 111 - } while (++j<M*eBands[i+1]); 112 - 108 + maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); 113 109 if (maxval > 0) 114 110 { 115 - int shift = celt_ilog2(maxval)-10; 116 - j=M*eBands[i]; do { 117 - sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), 118 - EXTRACT16(VSHR32(X[j+c*N],shift))); 119 - } while (++j<M*eBands[i+1]); 111 + int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1); 112 + j=eBands[i]<<LM; 113 + if (shift>0) 114 + { 115 + do { 116 + sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)), 117 + EXTRACT16(SHR32(X[j+c*N],shift))); 118 + } while (++j<eBands[i+1]<<LM); 119 + } else { 120 + do { 121 + sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)), 122 + EXTRACT16(SHL32(X[j+c*N],-shift))); 123 + } while (++j<eBands[i+1]<<LM); 124 + } 120 125 /* We're adding one here to ensure the normalized band isn't larger than unity norm */ 121 126 bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); 122 127 } else { ··· 151 156 152 157 #else /* FIXED_POINT */ 153 158 /* Compute the amplitude (sqrt energy) in each of the bands */ 154 - void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) 159 + void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) 155 160 { 156 161 int i, c, N; 157 162 const opus_int16 *eBands = m->eBands; 158 - N = M*m->shortMdctSize; 163 + N = m->shortMdctSize<<LM; 159 164 c=0; do { 160 165 for (i=0;i<end;i++) 161 166 { 162 - int j; 163 - opus_val32 sum = 1e-27f; 164 - for (j=M*eBands[i];j<M*eBands[i+1];j++) 165 - sum += X[j+c*N]*X[j+c*N]; 167 + opus_val32 sum; 168 + sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); 166 169 bandE[i+c*m->nbEBands] = celt_sqrt(sum); 167 170 /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ 168 171 } ··· 192 195 193 196 /* De-normalise the energy to produce the synthesis from the unit-energy bands */ 194 197 void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, 195 - celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M) 198 + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, 199 + int end, int M, int downsample, int silence) 196 200 { 197 - int i, c, N; 201 + int i, N; 202 + int bound; 203 + celt_sig * OPUS_RESTRICT f; 204 + const celt_norm * OPUS_RESTRICT x; 198 205 const opus_int16 *eBands = m->eBands; 199 206 N = M*m->shortMdctSize; 200 - celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels"); 201 - c=0; do { 202 - celt_sig * OPUS_RESTRICT f; 203 - const celt_norm * OPUS_RESTRICT x; 204 - f = freq+c*N; 205 - x = X+c*N+M*eBands[start]; 206 - for (i=0;i<M*eBands[start];i++) 207 - *f++ = 0; 208 - for (i=start;i<end;i++) 209 - { 210 - int j, band_end; 211 - opus_val16 g; 212 - opus_val16 lg; 207 + bound = M*eBands[end]; 208 + if (downsample!=1) 209 + bound = IMIN(bound, N/downsample); 210 + if (silence) 211 + { 212 + bound = 0; 213 + start = end = 0; 214 + } 215 + f = freq; 216 + x = X+M*eBands[start]; 217 + for (i=0;i<M*eBands[start];i++) 218 + *f++ = 0; 219 + for (i=start;i<end;i++) 220 + { 221 + int j, band_end; 222 + opus_val16 g; 223 + opus_val16 lg; 213 224 #ifdef FIXED_POINT 214 - int shift; 225 + int shift; 215 226 #endif 216 - j=M*eBands[i]; 217 - band_end = M*eBands[i+1]; 218 - lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6)); 227 + j=M*eBands[i]; 228 + band_end = M*eBands[i+1]; 229 + lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); 219 230 #ifndef FIXED_POINT 220 - g = celt_exp2(lg); 231 + g = celt_exp2(lg); 221 232 #else 222 - /* Handle the integer part of the log energy */ 223 - shift = 16-(lg>>DB_SHIFT); 224 - if (shift>31) 233 + /* Handle the integer part of the log energy */ 234 + shift = 16-(lg>>DB_SHIFT); 235 + if (shift>31) 236 + { 237 + shift=0; 238 + g=0; 239 + } else { 240 + /* Handle the fractional part. */ 241 + g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); 242 + } 243 + /* Handle extreme gains with negative shift. */ 244 + if (shift<0) 245 + { 246 + /* For shift < -2 we'd be likely to overflow, so we're capping 247 + the gain here. This shouldn't happen unless the bitstream is 248 + already corrupted. */ 249 + if (shift < -2) 225 250 { 226 - shift=0; 227 - g=0; 228 - } else { 229 - /* Handle the fractional part. */ 230 - g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); 251 + g = 32767; 252 + shift = -2; 231 253 } 232 - /* Handle extreme gains with negative shift. */ 233 - if (shift<0) 234 - { 235 - /* For shift < -2 we'd be likely to overflow, so we're capping 236 - the gain here. This shouldn't happen unless the bitstream is 237 - already corrupted. */ 238 - if (shift < -2) 239 - { 240 - g = 32767; 241 - shift = -2; 242 - } 243 - do { 244 - *f++ = SHL32(MULT16_16(*x++, g), -shift); 245 - } while (++j<band_end); 246 - } else 254 + do { 255 + *f++ = SHL32(MULT16_16(*x++, g), -shift); 256 + } while (++j<band_end); 257 + } else 247 258 #endif 248 259 /* Be careful of the fixed-point "else" just above when changing this code */ 249 260 do { 250 261 *f++ = SHR32(MULT16_16(*x++, g), shift); 251 262 } while (++j<band_end); 252 - } 253 - celt_assert(start <= end); 254 - for (i=M*eBands[end];i<N;i++) 255 - *f++ = 0; 256 - } while (++c<C); 263 + } 264 + celt_assert(start <= end); 265 + OPUS_CLEAR(&freq[bound], N-bound); 257 266 } 258 267 259 268 /* This prevents energy collapse for transients with multiple short MDCTs */ 260 269 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, 261 - int start, int end, opus_val16 *logE, opus_val16 *prev1logE, 262 - opus_val16 *prev2logE, int *pulses, opus_uint32 seed) 270 + int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE, 271 + const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed) 263 272 { 264 273 int c, i, j, k; 265 274 for (i=start;i<end;i++) ··· 274 283 275 284 N0 = m->eBands[i+1]-m->eBands[i]; 276 285 /* depth in 1/8 bits */ 277 - depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM); 286 + celt_assert(pulses[i]>=0); 287 + depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; 278 288 279 289 #ifdef FIXED_POINT 280 290 thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); ··· 352 362 } 353 363 } 354 364 355 - static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N) 365 + static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N) 356 366 { 357 367 int i = bandID; 358 368 int j; ··· 372 382 celt_norm r, l; 373 383 l = X[j]; 374 384 r = Y[j]; 375 - X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r); 385 + X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14)); 376 386 /* Side is not encoded, no need to calculate */ 377 387 } 378 388 } 379 389 380 - static void stereo_split(celt_norm *X, celt_norm *Y, int N) 390 + static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N) 381 391 { 382 392 int j; 383 393 for (j=0;j<N;j++) 384 394 { 385 - celt_norm r, l; 386 - l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]); 387 - r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]); 388 - X[j] = l+r; 389 - Y[j] = r-l; 395 + opus_val32 r, l; 396 + l = MULT16_16(QCONST16(.70710678f, 15), X[j]); 397 + r = MULT16_16(QCONST16(.70710678f, 15), Y[j]); 398 + X[j] = EXTRACT16(SHR32(ADD32(l, r), 15)); 399 + Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15)); 390 400 } 391 401 } 392 402 393 - static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) 403 + static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N) 394 404 { 395 405 int j; 396 406 opus_val32 xp=0, side=0; ··· 411 421 Er = MULT16_16(mid2, mid2) + side + 2*xp; 412 422 if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) 413 423 { 414 - for (j=0;j<N;j++) 415 - Y[j] = X[j]; 424 + OPUS_COPY(Y, X, N); 416 425 return; 417 426 } 418 427 ··· 436 445 { 437 446 celt_norm r, l; 438 447 /* Apply mid scaling (side is already scaled) */ 439 - l = MULT16_16_Q15(mid, X[j]); 448 + l = MULT16_16_P15(mid, X[j]); 440 449 r = Y[j]; 441 450 X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1)); 442 451 Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1)); ··· 445 454 446 455 #if 0 447 456 /* Decide whether we should spread the pulses in the current frame */ 448 - int spreading_decision(const CELTMode *m, celt_norm *X, int *average, 457 + int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, 449 458 int last_decision, int *hf_average, int *tapset_decision, int update_hf, 450 459 int end, int C, int M) 451 460 { ··· 466 475 { 467 476 int j, N, tmp=0; 468 477 int tcount[3] = {0,0,0}; 469 - celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; 478 + const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; 470 479 N = M*(eBands[i+1]-eBands[i]); 471 480 if (N<=8) 472 481 continue; ··· 486 495 487 496 /* Only include four last bands (8 kHz and up) */ 488 497 if (i>m->nbEBands-4) 489 - hf_sum += 32*(tcount[1]+tcount[0])/N; 498 + hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); 490 499 tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); 491 500 sum += tmp*256; 492 501 nbBands++; ··· 496 505 if (update_hf) 497 506 { 498 507 if (hf_sum) 499 - hf_sum /= C*(4-m->nbEBands+end); 508 + hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end)); 500 509 *hf_average = (*hf_average+hf_sum)>>1; 501 510 hf_sum = *hf_average; 502 511 if (*tapset_decision==2) ··· 512 521 } 513 522 /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ 514 523 celt_assert(nbBands>0); /* end has to be non-zero */ 515 - sum /= nbBands; 524 + celt_assert(sum>=0); 525 + sum = celt_udiv(sum, nbBands); 516 526 /* Recursive averaging */ 517 527 sum = (sum+*average)>>1; 518 528 *average = sum; ··· 571 581 for (j=0;j<N0;j++) 572 582 tmp[i*N0+j] = X[j*stride+i]; 573 583 } 574 - for (j=0;j<N;j++) 575 - X[j] = tmp[j]; 584 + OPUS_COPY(X, tmp, N); 576 585 RESTORE_STACK; 577 586 } 578 587 ··· 595 604 for (j=0;j<N0;j++) 596 605 tmp[j*stride+i] = X[i*N0+j]; 597 606 } 598 - for (j=0;j<N;j++) 599 - X[j] = tmp[j]; 607 + OPUS_COPY(X, tmp, N); 600 608 RESTORE_STACK; 601 609 } 602 610 ··· 607 615 for (i=0;i<stride;i++) 608 616 for (j=0;j<N0;j++) 609 617 { 610 - celt_norm tmp1, tmp2; 611 - tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]); 612 - tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); 613 - X[stride*2*j+i] = tmp1 + tmp2; 614 - X[stride*(2*j+1)+i] = tmp1 - tmp2; 618 + opus_val32 tmp1, tmp2; 619 + tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]); 620 + tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); 621 + X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15)); 622 + X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15)); 615 623 } 616 624 } 617 625 ··· 626 634 /* The upper limit ensures that in a stereo split with itheta==16384, we'll 627 635 always have enough bits left over to code at least one pulse in the 628 636 side; otherwise it would collapse, since it doesn't get folded. */ 629 - qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2); 637 + qb = celt_sudiv(b+N2*offset, N2); 638 + qb = IMIN(b-pulse_cap-(4<<BITRES), qb); 630 639 631 640 qb = IMIN(8<<BITRES, qb); 632 641 ··· 773 782 ec_dec_update(ec, fl, fl+fs, ft); 774 783 } 775 784 } 776 - itheta = (opus_int32)itheta*16384/qn; 785 + celt_assert(itheta>=0); 786 + itheta = celt_udiv((opus_int32)itheta*16384, qn); 777 787 if (encode && stereo) 778 788 { 779 789 if (itheta==0) ··· 1025 1035 fill &= cm_mask; 1026 1036 if (!fill) 1027 1037 { 1028 - for (j=0;j<N;j++) 1029 - X[j] = 0; 1038 + OPUS_CLEAR(X, N); 1030 1039 } else { 1031 1040 if (lowband == NULL) 1032 1041 { ··· 1088 1097 1089 1098 longBlocks = B0==1; 1090 1099 1091 - N_B /= B; 1100 + N_B = celt_udiv(N_B, B); 1092 1101 1093 1102 /* Special case for one sample */ 1094 1103 if (N==1) ··· 1102 1111 1103 1112 if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) 1104 1113 { 1105 - int j; 1106 - for (j=0;j<N;j++) 1107 - lowband_scratch[j] = lowband[j]; 1114 + OPUS_COPY(lowband_scratch, lowband, N); 1108 1115 lowband = lowband_scratch; 1109 1116 } 1110 1117 ··· 1432 1439 ctx.remaining_bits = remaining_bits; 1433 1440 if (i <= codedBands-1) 1434 1441 { 1435 - curr_balance = balance / IMIN(3, codedBands-i); 1442 + curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i)); 1436 1443 b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance))); 1437 1444 } else { 1438 1445 b = 0;
+6 -5
lib/rbcodec/codecs/libopus/celt/bands.h
··· 41 41 * @param X Spectrum 42 42 * @param bandE Square root of the energy for each band (returned) 43 43 */ 44 - void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M); 44 + void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM); 45 45 46 46 /*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/ 47 47 ··· 59 59 * @param bandE Square root of the energy for each band 60 60 */ 61 61 void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, 62 - celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M); 62 + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, 63 + int end, int M, int downsample, int silence); 63 64 64 65 #define SPREAD_NONE (0) 65 66 #define SPREAD_LIGHT (1) 66 67 #define SPREAD_NORMAL (2) 67 68 #define SPREAD_AGGRESSIVE (3) 68 69 69 - int spreading_decision(const CELTMode *m, celt_norm *X, int *average, 70 + int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, 70 71 int last_decision, int *hf_average, int *tapset_decision, int update_hf, 71 72 int end, int C, int M); 72 73 ··· 104 105 opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed); 105 106 106 107 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, 107 - int start, int end, opus_val16 *logE, opus_val16 *prev1logE, 108 - opus_val16 *prev2logE, int *pulses, opus_uint32 seed); 108 + int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE, 109 + const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed); 109 110 110 111 opus_uint32 celt_lcg_rand(opus_uint32 seed); 111 112
+73 -6
lib/rbcodec/codecs/libopus/celt/celt.c
··· 54 54 #define PACKAGE_VERSION "unknown" 55 55 #endif 56 56 57 + #if defined(MIPSr1_ASM) 58 + #include "mips/celt_mipsr1.h" 59 + #endif 60 + 57 61 58 62 int resampling_factor(opus_int32 rate) 59 63 { ··· 86 90 } 87 91 88 92 #ifndef OVERRIDE_COMB_FILTER_CONST 93 + /* This version should be faster on ARM */ 94 + #ifdef OPUS_ARM_ASM 95 + static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, 96 + opus_val16 g10, opus_val16 g11, opus_val16 g12) 97 + { 98 + opus_val32 x0, x1, x2, x3, x4; 99 + int i; 100 + x4 = SHL32(x[-T-2], 1); 101 + x3 = SHL32(x[-T-1], 1); 102 + x2 = SHL32(x[-T], 1); 103 + x1 = SHL32(x[-T+1], 1); 104 + for (i=0;i<N-4;i+=5) 105 + { 106 + opus_val32 t; 107 + x0=SHL32(x[i-T+2],1); 108 + t = MAC16_32_Q16(x[i], g10, x2); 109 + t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); 110 + t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); 111 + y[i] = t; 112 + x4=SHL32(x[i-T+3],1); 113 + t = MAC16_32_Q16(x[i+1], g10, x1); 114 + t = MAC16_32_Q16(t, g11, ADD32(x0,x2)); 115 + t = MAC16_32_Q16(t, g12, ADD32(x4,x3)); 116 + y[i+1] = t; 117 + x3=SHL32(x[i-T+4],1); 118 + t = MAC16_32_Q16(x[i+2], g10, x0); 119 + t = MAC16_32_Q16(t, g11, ADD32(x4,x1)); 120 + t = MAC16_32_Q16(t, g12, ADD32(x3,x2)); 121 + y[i+2] = t; 122 + x2=SHL32(x[i-T+5],1); 123 + t = MAC16_32_Q16(x[i+3], g10, x4); 124 + t = MAC16_32_Q16(t, g11, ADD32(x3,x0)); 125 + t = MAC16_32_Q16(t, g12, ADD32(x2,x1)); 126 + y[i+3] = t; 127 + x1=SHL32(x[i-T+6],1); 128 + t = MAC16_32_Q16(x[i+4], g10, x3); 129 + t = MAC16_32_Q16(t, g11, ADD32(x2,x4)); 130 + t = MAC16_32_Q16(t, g12, ADD32(x1,x0)); 131 + y[i+4] = t; 132 + } 133 + #ifdef CUSTOM_MODES 134 + for (;i<N;i++) 135 + { 136 + opus_val32 t; 137 + x0=SHL32(x[i-T+2],1); 138 + t = MAC16_32_Q16(x[i], g10, x2); 139 + t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); 140 + t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); 141 + y[i] = t; 142 + x4=x3; 143 + x3=x2; 144 + x2=x1; 145 + x1=x0; 146 + } 147 + #endif 148 + } 149 + #else 89 150 static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, 90 151 opus_val16 g10, opus_val16 g11, opus_val16 g12) 91 152 { ··· 110 171 111 172 } 112 173 #endif 174 + #endif 113 175 176 + #ifndef OVERRIDE_comb_filter 114 177 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, 115 178 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, 116 179 const opus_val16 *window, int overlap) ··· 131 194 OPUS_MOVE(y, x, N); 132 195 return; 133 196 } 134 - g00 = MULT16_16_Q15(g0, gains[tapset0][0]); 135 - g01 = MULT16_16_Q15(g0, gains[tapset0][1]); 136 - g02 = MULT16_16_Q15(g0, gains[tapset0][2]); 137 - g10 = MULT16_16_Q15(g1, gains[tapset1][0]); 138 - g11 = MULT16_16_Q15(g1, gains[tapset1][1]); 139 - g12 = MULT16_16_Q15(g1, gains[tapset1][2]); 197 + g00 = MULT16_16_P15(g0, gains[tapset0][0]); 198 + g01 = MULT16_16_P15(g0, gains[tapset0][1]); 199 + g02 = MULT16_16_P15(g0, gains[tapset0][2]); 200 + g10 = MULT16_16_P15(g1, gains[tapset1][0]); 201 + g11 = MULT16_16_P15(g1, gains[tapset1][1]); 202 + g12 = MULT16_16_P15(g1, gains[tapset1][2]); 140 203 x1 = x[-T1+1]; 141 204 x2 = x[-T1 ]; 142 205 x3 = x[-T1-1]; 143 206 x4 = x[-T1-2]; 207 + /* If the filter didn't change, we don't need the overlap */ 208 + if (g0==g1 && T0==T1 && tapset0==tapset1) 209 + overlap=0; 144 210 for (i=0;i<overlap;i++) 145 211 { 146 212 opus_val16 f; ··· 170 236 /* Compute the part with the constant filter. */ 171 237 comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12); 172 238 } 239 + #endif /* OVERRIDE_comb_filter */ 173 240 174 241 const signed char tf_select_table[4][8] = { 175 242 {0, -1, 0, -1, 0,-1, 0,-1},
+6 -5
lib/rbcodec/codecs/libopus/celt/celt.h
··· 134 134 135 135 int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels); 136 136 137 - int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec); 137 + int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, 138 + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum); 138 139 139 140 #define celt_encoder_ctl opus_custom_encoder_ctl 140 141 #define celt_decoder_ctl opus_custom_decoder_ctl ··· 205 206 void init_caps(const CELTMode *m,int *cap,int LM,int C); 206 207 207 208 #ifdef RESYNTH 208 - void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch); 209 - 210 - void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, 211 - celt_sig * OPUS_RESTRICT out_mem[], int C, int LM); 209 + void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem); 210 + void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], 211 + opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient, 212 + int LM, int downsample, int silence); 212 213 #endif 213 214 214 215 #ifdef __cplusplus
+180 -145
lib/rbcodec/codecs/libopus/celt/celt_decoder.c
··· 51 51 #include "celt_lpc.h" 52 52 #include "vq.h" 53 53 54 + #if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT) 55 + #define NORM_ALIASING_HACK 56 + #endif 54 57 /**********************************************************************/ 55 58 /* */ 56 59 /* DECODER */ ··· 175 178 } 176 179 #endif /* CUSTOM_MODES */ 177 180 178 - static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x) 179 - { 180 - #ifdef FIXED_POINT 181 - x = PSHR32(x, SIG_SHIFT); 182 - x = MAX32(x, -32768); 183 - x = MIN32(x, 32767); 184 - return EXTRACT16(x); 185 - #else 186 - return (opus_val16)x; 187 - #endif 188 - } 189 181 190 182 #ifndef RESYNTH 191 183 static 192 184 #endif 193 - void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch) 185 + void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, 186 + celt_sig *mem, int accum) 194 187 { 195 188 int c; 196 189 int Nd; 197 190 int apply_downsampling=0; 198 191 opus_val16 coef0; 199 - 192 + VARDECL(celt_sig, scratch); 193 + SAVE_STACK; 194 + #ifndef FIXED_POINT 195 + (void)accum; 196 + celt_assert(accum==0); 197 + #endif 198 + ALLOC(scratch, N, celt_sig); 200 199 coef0 = coef[0]; 201 200 Nd = N/downsample; 202 201 c=0; do { ··· 234 233 apply_downsampling=1; 235 234 } else { 236 235 /* Shortcut for the standard (non-custom modes) case */ 237 - for (j=0;j<N;j++) 236 + #ifdef FIXED_POINT 237 + if (accum) 238 238 { 239 - celt_sig tmp = x[j] + m + VERY_SMALL; 240 - m = MULT16_32_Q15(coef0, tmp); 241 - y[j*C] = SCALEOUT(SIG2WORD16(tmp)); 239 + for (j=0;j<N;j++) 240 + { 241 + celt_sig tmp = x[j] + m + VERY_SMALL; 242 + m = MULT16_32_Q15(coef0, tmp); 243 + y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp)))); 244 + } 245 + } else 246 + #endif 247 + { 248 + for (j=0;j<N;j++) 249 + { 250 + celt_sig tmp = x[j] + m + VERY_SMALL; 251 + m = MULT16_32_Q15(coef0, tmp); 252 + y[j*C] = SCALEOUT(SIG2WORD16(tmp)); 253 + } 242 254 } 243 255 } 244 256 mem[c] = m; ··· 246 258 if (apply_downsampling) 247 259 { 248 260 /* Perform down-sampling */ 249 - for (j=0;j<Nd;j++) 250 - y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); 261 + #ifdef FIXED_POINT 262 + if (accum) 263 + { 264 + for (j=0;j<Nd;j++) 265 + y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample])))); 266 + } else 267 + #endif 268 + { 269 + for (j=0;j<Nd;j++) 270 + y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); 271 + } 251 272 } 252 273 } while (++c<C); 274 + RESTORE_STACK; 253 275 } 254 276 255 - /** Compute the IMDCT and apply window for all sub-frames and 256 - all channels in a frame */ 257 277 #ifndef RESYNTH 258 278 static 259 279 #endif 260 - void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, 261 - celt_sig * OPUS_RESTRICT out_mem[], int C, int LM) 280 + void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], 281 + opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient, 282 + int LM, int downsample, int silence) 262 283 { 263 - int b, c; 284 + int c, i; 285 + int M; 286 + int b; 264 287 int B; 265 - int N; 288 + int N, NB; 266 289 int shift; 267 - const int overlap = OVERLAP(mode); 290 + int nbEBands; 291 + int overlap; 292 + VARDECL(celt_sig, freq); 293 + SAVE_STACK; 294 + 295 + overlap = mode->overlap; 296 + nbEBands = mode->nbEBands; 297 + N = mode->shortMdctSize<<LM; 298 + ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */ 299 + M = 1<<LM; 268 300 269 - if (shortBlocks) 301 + if (isTransient) 270 302 { 271 - B = shortBlocks; 272 - N = mode->shortMdctSize; 303 + B = M; 304 + NB = mode->shortMdctSize; 273 305 shift = mode->maxLM; 274 306 } else { 275 307 B = 1; 276 - N = mode->shortMdctSize<<LM; 308 + NB = mode->shortMdctSize<<LM; 277 309 shift = mode->maxLM-LM; 278 310 } 279 - c=0; do { 280 - /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */ 311 + 312 + if (CC==2&&C==1) 313 + { 314 + /* Copying a mono streams to two channels */ 315 + celt_sig *freq2; 316 + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, 317 + downsample, silence); 318 + /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */ 319 + freq2 = out_syn[1]+overlap/2; 320 + OPUS_COPY(freq2, freq, N); 321 + for (b=0;b<B;b++) 322 + clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); 281 323 for (b=0;b<B;b++) 282 - clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B); 283 - } while (++c<C); 324 + clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B); 325 + } else if (CC==1&&C==2) 326 + { 327 + /* Downmixing a stereo stream to mono */ 328 + celt_sig *freq2; 329 + freq2 = out_syn[0]+overlap/2; 330 + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, 331 + downsample, silence); 332 + /* Use the output buffer as temp array before downmixing. */ 333 + denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, 334 + downsample, silence); 335 + for (i=0;i<N;i++) 336 + freq[i] = HALF32(ADD32(freq[i],freq2[i])); 337 + for (b=0;b<B;b++) 338 + clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); 339 + } else { 340 + /* Normal case (mono or stereo) */ 341 + c=0; do { 342 + denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M, 343 + downsample, silence); 344 + for (b=0;b<B;b++) 345 + clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B); 346 + } while (++c<CC); 347 + } 348 + RESTORE_STACK; 284 349 } 285 350 286 351 static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec) ··· 330 395 pitch of 480 Hz. */ 331 396 #define PLC_PITCH_LAG_MIN (100) 332 397 333 - static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM) 398 + static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch) 399 + { 400 + int pitch_index; 401 + VARDECL( opus_val16, lp_pitch_buf ); 402 + SAVE_STACK; 403 + ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); 404 + pitch_downsample(decode_mem, lp_pitch_buf, 405 + DECODE_BUFFER_SIZE, C, arch); 406 + pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, 407 + DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, 408 + PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch); 409 + pitch_index = PLC_PITCH_LAG_MAX-pitch_index; 410 + RESTORE_STACK; 411 + return pitch_index; 412 + } 413 + 414 + static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) 334 415 { 335 416 int c; 336 417 int i; ··· 343 424 int nbEBands; 344 425 int overlap; 345 426 int start; 346 - int downsample; 347 427 int loss_count; 348 428 int noise_based; 349 429 const opus_int16 *eBands; 350 - VARDECL(celt_sig, scratch); 351 430 SAVE_STACK; 352 431 353 432 mode = st->mode; ··· 367 446 368 447 loss_count = st->loss_count; 369 448 start = st->start; 370 - downsample = st->downsample; 371 449 noise_based = loss_count >= 5 || start != 0; 372 - ALLOC(scratch, noise_based?N*C:N, celt_sig); 373 450 if (noise_based) 374 451 { 375 452 /* Noise-based PLC/CNG */ 376 - celt_sig *freq; 453 + #ifdef NORM_ALIASING_HACK 454 + celt_norm *X; 455 + #else 377 456 VARDECL(celt_norm, X); 457 + #endif 378 458 opus_uint32 seed; 379 459 opus_val16 *plcLogE; 380 460 int end; ··· 383 463 end = st->end; 384 464 effEnd = IMAX(start, IMIN(end, mode->effEBands)); 385 465 386 - /* Share the interleaved signal MDCT coefficient buffer with the 387 - deemphasis scratch buffer. */ 388 - freq = scratch; 466 + #ifdef NORM_ALIASING_HACK 467 + /* This is an ugly hack that breaks aliasing rules and would be easily broken, 468 + but it saves almost 4kB of stack. */ 469 + X = (celt_norm*)(out_syn[C-1]+overlap/2); 470 + #else 389 471 ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ 472 + #endif 390 473 391 474 if (loss_count >= 5) 392 475 plcLogE = backgroundLogE; ··· 421 504 } 422 505 st->rng = seed; 423 506 424 - denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM); 425 - 426 - c=0; do { 427 - int bound = eBands[effEnd]<<LM; 428 - if (downsample!=1) 429 - bound = IMIN(bound, N/downsample); 430 - for (i=bound;i<N;i++) 431 - freq[c*N+i] = 0; 432 - } while (++c<C); 433 507 c=0; do { 434 508 OPUS_MOVE(decode_mem[c], decode_mem[c]+N, 435 509 DECODE_BUFFER_SIZE-N+(overlap>>1)); 436 510 } while (++c<C); 437 - compute_inv_mdcts(mode, 0, freq, out_syn, C, LM); 511 + 512 + celt_synthesis(mode, X, out_syn, plcLogE, start, effEnd, C, C, 0, LM, st->downsample, 0); 438 513 } else { 439 514 /* Pitch-based PLC */ 440 515 const opus_val16 *window; ··· 445 520 446 521 if (loss_count == 0) 447 522 { 448 - VARDECL( opus_val16, lp_pitch_buf ); 449 - ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); 450 - pitch_downsample(decode_mem, lp_pitch_buf, 451 - DECODE_BUFFER_SIZE, C, st->arch); 452 - pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, 453 - DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, 454 - PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch); 455 - pitch_index = PLC_PITCH_LAG_MAX-pitch_index; 456 - st->last_pitch_index = pitch_index; 523 + st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch); 457 524 } else { 458 525 pitch_index = st->last_pitch_index; 459 526 fade = QCONST16(.8f,15); ··· 644 711 } while (++c<C); 645 712 } 646 713 647 - deemphasis(out_syn, pcm, N, C, downsample, 648 - mode->preemph, st->preemph_memD, scratch); 649 - 650 714 st->loss_count = loss_count+1; 651 715 652 716 RESTORE_STACK; 653 717 } 654 718 655 - #define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */ 656 - static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */ 657 - static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */ 658 - int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec) 719 + int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, 720 + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum) 659 721 { 660 722 int c, i, N; 661 723 int spread_decision; 662 724 opus_int32 bits; 663 725 ec_dec _dec; 664 - VARDECL(celt_sig, freq); 726 + #ifdef NORM_ALIASING_HACK 727 + celt_norm *X; 728 + #else 665 729 VARDECL(celt_norm, X); 730 + #endif 666 731 VARDECL(int, fine_quant); 667 732 VARDECL(int, pulses); 668 733 VARDECL(int, cap); ··· 680 745 int intra_ener; 681 746 const int CC = st->channels; 682 747 int LM, M; 748 + int start; 749 + int end; 683 750 int effEnd; 684 751 int codedBands; 685 752 int alloc_trim; ··· 706 773 nbEBands = mode->nbEBands; 707 774 overlap = mode->overlap; 708 775 eBands = mode->eBands; 776 + start = st->start; 777 + end = st->end; 709 778 frame_size *= st->downsample; 710 779 711 - c=0; do { 712 - decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); 713 - } while (++c<CC); 714 780 lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); 715 781 oldBandE = lpc+CC*LPC_ORDER; 716 782 oldLogE = oldBandE + 2*nbEBands; ··· 728 794 if (data0<0) 729 795 return OPUS_INVALID_PACKET; 730 796 } 731 - st->end = IMAX(1, mode->effEBands-2*(data0>>5)); 797 + st->end = end = IMAX(1, mode->effEBands-2*(data0>>5)); 732 798 LM = (data0>>3)&0x3; 733 799 C = 1 + ((data0>>2)&0x1); 734 800 data++; ··· 755 821 return OPUS_BAD_ARG; 756 822 757 823 N = M*mode->shortMdctSize; 824 + c=0; do { 825 + decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); 826 + out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; 827 + } while (++c<CC); 758 828 759 - effEnd = st->end; 829 + effEnd = end; 760 830 if (effEnd > mode->effEBands) 761 831 effEnd = mode->effEBands; 762 832 763 833 if (data == NULL || len<=1) 764 834 { 765 - celt_decode_lost(st, pcm, N, LM); 835 + celt_decode_lost(st, N, LM); 836 + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); 766 837 RESTORE_STACK; 767 838 return frame_size/st->downsample; 768 839 } ··· 798 869 postfilter_gain = 0; 799 870 postfilter_pitch = 0; 800 871 postfilter_tapset = 0; 801 - if (st->start==0 && tell+16 <= total_bits) 872 + if (start==0 && tell+16 <= total_bits) 802 873 { 803 874 if(ec_dec_bit_logp(dec, 1)) 804 875 { ··· 829 900 /* Decode the global flags (first symbols in the stream) */ 830 901 intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; 831 902 /* Get band energies */ 832 - unquant_coarse_energy(mode, st->start, st->end, oldBandE, 903 + unquant_coarse_energy(mode, start, end, oldBandE, 833 904 intra_ener, dec, C, LM); 834 905 835 906 ALLOC(tf_res, nbEBands, int); 836 - tf_decode(st->start, st->end, isTransient, tf_res, LM, dec); 907 + tf_decode(start, end, isTransient, tf_res, LM, dec); 837 908 838 909 tell = ec_tell(dec); 839 910 spread_decision = SPREAD_NORMAL; ··· 849 920 dynalloc_logp = 6; 850 921 total_bits<<=BITRES; 851 922 tell = ec_tell_frac(dec); 852 - for (i=st->start;i<st->end;i++) 923 + for (i=start;i<end;i++) 853 924 { 854 925 int width, quanta; 855 926 int dynalloc_loop_logp; ··· 888 959 ALLOC(pulses, nbEBands, int); 889 960 ALLOC(fine_priority, nbEBands, int); 890 961 891 - codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, 962 + codedBands = compute_allocation(mode, start, end, offsets, cap, 892 963 alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, 893 964 fine_quant, fine_priority, C, LM, dec, 0, 0, 0); 894 965 895 - unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C); 966 + unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C); 967 + 968 + c=0; do { 969 + OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); 970 + } while (++c<CC); 896 971 897 972 /* Decode fixed codebook */ 898 973 ALLOC(collapse_masks, C*nbEBands, unsigned char); 899 - /**< Interleaved normalised MDCTs */ 900 - if (FREQ_X_BUF_SIZE >= C*N) 901 - X = s_X; 902 - else 903 - ALLOC(X, C*N, celt_norm); 904 974 905 - quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, 975 + #ifdef NORM_ALIASING_HACK 976 + /* This is an ugly hack that breaks aliasing rules and would be easily broken, 977 + but it saves almost 4kB of stack. */ 978 + X = (celt_norm*)(out_syn[CC-1]+overlap/2); 979 + #else 980 + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ 981 + #endif 982 + 983 + quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, 906 984 NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, 907 985 len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng); 908 986 ··· 911 989 anti_collapse_on = ec_dec_bits(dec, 1); 912 990 } 913 991 914 - unquant_energy_finalise(mode, st->start, st->end, oldBandE, 992 + unquant_energy_finalise(mode, start, end, oldBandE, 915 993 fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); 916 994 917 995 if (anti_collapse_on) 918 996 anti_collapse(mode, X, collapse_masks, LM, C, N, 919 - st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); 920 - 921 - /**< Interleaved signal MDCTs */ 922 - if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N) 923 - freq = s_freq; 924 - else 925 - ALLOC(freq, IMAX(CC,C)*N, celt_sig); 997 + start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); 926 998 927 999 if (silence) 928 1000 { 929 1001 for (i=0;i<C*nbEBands;i++) 930 1002 oldBandE[i] = -QCONST16(28.f,DB_SHIFT); 931 - for (i=0;i<C*N;i++) 932 - freq[i] = 0; 933 - } else { 934 - /* Synthesis */ 935 - denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M); 936 1003 } 937 - c=0; do { 938 - OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); 939 - } while (++c<CC); 940 1004 941 - c=0; do { 942 - int bound = M*eBands[effEnd]; 943 - if (st->downsample!=1) 944 - bound = IMIN(bound, N/st->downsample); 945 - for (i=bound;i<N;i++) 946 - freq[c*N+i] = 0; 947 - } while (++c<C); 948 - 949 - c=0; do { 950 - out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; 951 - } while (++c<CC); 952 - 953 - if (CC==2&&C==1) 954 - { 955 - for (i=0;i<N;i++) 956 - freq[N+i] = freq[i]; 957 - } 958 - if (CC==1&&C==2) 959 - { 960 - for (i=0;i<N;i++) 961 - freq[i] = HALF32(ADD32(freq[i],freq[N+i])); 962 - } 963 - 964 - /* Compute inverse MDCTs */ 965 - compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM); 1005 + celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, CC, isTransient, LM, st->downsample, silence); 966 1006 967 1007 c=0; do { 968 1008 st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); ··· 989 1029 st->postfilter_tapset_old = st->postfilter_tapset; 990 1030 } 991 1031 992 - if (C==1) { 993 - for (i=0;i<nbEBands;i++) 994 - oldBandE[nbEBands+i]=oldBandE[i]; 995 - } 1032 + if (C==1) 1033 + OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); 996 1034 997 1035 /* In case start or end were to change */ 998 1036 if (!isTransient) 999 1037 { 1000 - for (i=0;i<2*nbEBands;i++) 1001 - oldLogE2[i] = oldLogE[i]; 1002 - for (i=0;i<2*nbEBands;i++) 1003 - oldLogE[i] = oldBandE[i]; 1038 + OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands); 1039 + OPUS_COPY(oldLogE, oldBandE, 2*nbEBands); 1004 1040 for (i=0;i<2*nbEBands;i++) 1005 1041 backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); 1006 1042 } else { ··· 1009 1045 } 1010 1046 c=0; do 1011 1047 { 1012 - for (i=0;i<st->start;i++) 1048 + for (i=0;i<start;i++) 1013 1049 { 1014 1050 oldBandE[c*nbEBands+i]=0; 1015 1051 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); 1016 1052 } 1017 - for (i=st->end;i<nbEBands;i++) 1053 + for (i=end;i<nbEBands;i++) 1018 1054 { 1019 1055 oldBandE[c*nbEBands+i]=0; 1020 1056 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); ··· 1022 1058 } while (++c<2); 1023 1059 st->rng = dec->rng; 1024 1060 1025 - /* We reuse freq[] as scratch space for the de-emphasis */ 1026 - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq); 1061 + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); 1027 1062 st->loss_count = 0; 1028 1063 RESTORE_STACK; 1029 1064 if (ec_tell(dec) > 8*len) ··· 1039 1074 #ifdef FIXED_POINT 1040 1075 int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) 1041 1076 { 1042 - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); 1077 + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); 1043 1078 } 1044 1079 1045 1080 #ifndef DISABLE_FLOAT_API ··· 1056 1091 N = frame_size; 1057 1092 1058 1093 ALLOC(out, C*N, opus_int16); 1059 - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); 1094 + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); 1060 1095 if (ret>0) 1061 1096 for (j=0;j<C*ret;j++) 1062 1097 pcm[j]=out[j]*(1.f/32768.f); ··· 1070 1105 1071 1106 int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) 1072 1107 { 1073 - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); 1108 + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); 1074 1109 } 1075 1110 1076 1111 int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) ··· 1086 1121 N = frame_size; 1087 1122 ALLOC(out, C*N, celt_sig); 1088 1123 1089 - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); 1124 + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); 1090 1125 1091 1126 if (ret>0) 1092 1127 for (j=0;j<C*ret;j++)
+29 -11
lib/rbcodec/codecs/libopus/celt/cwrs.c
··· 460 460 ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k)); 461 461 } 462 462 463 - static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ 463 + static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ 464 464 opus_uint32 p; 465 465 int s; 466 466 int k0; 467 + opus_int16 val; 468 + opus_val32 yy=0; 467 469 celt_assert(_k>0); 468 470 celt_assert(_n>1); 469 471 while(_n>2){ ··· 487 489 } 488 490 else for(p=row[_k];p>_i;p=row[_k])_k--; 489 491 _i-=p; 490 - *_y++=(k0-_k+s)^s; 492 + val=(k0-_k+s)^s; 493 + *_y++=val; 494 + yy=MAC16_16(yy,val,val); 491 495 } 492 496 /*Lots of dimensions case:*/ 493 497 else{ ··· 507 511 do p=CELT_PVQ_U_ROW[--_k][_n]; 508 512 while(p>_i); 509 513 _i-=p; 510 - *_y++=(k0-_k+s)^s; 514 + val=(k0-_k+s)^s; 515 + *_y++=val; 516 + yy=MAC16_16(yy,val,val); 511 517 } 512 518 } 513 519 _n--; ··· 519 525 k0=_k; 520 526 _k=(_i+1)>>1; 521 527 if(_k)_i-=2*_k-1; 522 - *_y++=(k0-_k+s)^s; 528 + val=(k0-_k+s)^s; 529 + *_y++=val; 530 + yy=MAC16_16(yy,val,val); 523 531 /*_n==1*/ 524 532 s=-(int)_i; 525 - *_y=(_k+s)^s; 533 + val=(_k+s)^s; 534 + *_y=val; 535 + yy=MAC16_16(yy,val,val); 536 + return yy; 526 537 } 527 538 528 - void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ 529 - cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); 539 + opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ 540 + return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); 530 541 } 531 542 532 543 #else /* SMALL_FOOTPRINT */ ··· 591 602 _y: Returns the vector of pulses. 592 603 _u: Must contain entries [0..._k+1] of row _n of U() on input. 593 604 Its contents will be destructively modified.*/ 594 - static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ 605 + static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ 595 606 int j; 607 + opus_int16 val; 608 + opus_val32 yy=0; 596 609 celt_assert(_n>0); 597 610 j=0; 598 611 do{ ··· 607 620 while(p>_i)p=_u[--_k]; 608 621 _i-=p; 609 622 yj-=_k; 610 - _y[j]=(yj+s)^s; 623 + val=(yj+s)^s; 624 + _y[j]=val; 625 + yy=MAC16_16(yy,val,val); 611 626 uprev(_u,_k+2,0); 612 627 } 613 628 while(++j<_n); 629 + return yy; 614 630 } 615 631 616 632 /*Returns the index of the given combination of K elements chosen from a set ··· 685 701 RESTORE_STACK; 686 702 } 687 703 688 - void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ 704 + opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ 689 705 VARDECL(opus_uint32,u); 706 + int ret; 690 707 SAVE_STACK; 691 708 celt_assert(_k>0); 692 709 ALLOC(u,_k+2U,opus_uint32); 693 - cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); 710 + ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); 694 711 RESTORE_STACK; 712 + return ret; 695 713 } 696 714 697 715 #endif /* SMALL_FOOTPRINT */
+1 -1
lib/rbcodec/codecs/libopus/celt/cwrs.h
··· 43 43 44 44 void encode_pulses(const int *_y, int N, int K, ec_enc *enc); 45 45 46 - void decode_pulses(int *_y, int N, int K, ec_dec *dec); 46 + opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec); 47 47 48 48 #endif /* CWRS_H */
+60
lib/rbcodec/codecs/libopus/celt/entcode.c
··· 62 62 } 63 63 #endif 64 64 65 + #if 1 66 + /* This is a faster version of ec_tell_frac() that takes advantage 67 + of the low (1/8 bit) resolution to use just a linear function 68 + followed by a lookup to determine the exact transition thresholds. */ 69 + opus_uint32 ec_tell_frac(ec_ctx *_this){ 70 + static const unsigned correction[8] = 71 + {35733, 38967, 42495, 46340, 72 + 50535, 55109, 60097, 65535}; 73 + opus_uint32 nbits; 74 + opus_uint32 r; 75 + int l; 76 + unsigned b; 77 + nbits=_this->nbits_total<<BITRES; 78 + l=EC_ILOG(_this->rng); 79 + r=_this->rng>>(l-16); 80 + b = (r>>12)-8; 81 + b += r>correction[b]; 82 + l = (l<<3)+b; 83 + return nbits-l; 84 + } 85 + #else 65 86 opus_uint32 ec_tell_frac(ec_ctx *_this){ 66 87 opus_uint32 nbits; 67 88 opus_uint32 r; ··· 91 112 } 92 113 return nbits-l; 93 114 } 115 + #endif 116 + 117 + #ifdef USE_SMALL_DIV_TABLE 118 + /* Result of 2^32/(2*i+1), except for i=0. */ 119 + const opus_uint32 SMALL_DIV_TABLE[129] ICONST_ATTR = { 120 + 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924, 121 + 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111, 122 + 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C, 123 + 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084, 124 + 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906, 125 + 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A, 126 + 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A, 127 + 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104, 128 + 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1, 129 + 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2, 130 + 0x0329161F, 0x03159721, 0x03030303, 0x02F14990, 131 + 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46, 132 + 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597, 133 + 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17, 134 + 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902, 135 + 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810, 136 + 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC, 137 + 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30, 138 + 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364, 139 + 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14, 140 + 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F, 141 + 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE, 142 + 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6, 143 + 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3, 144 + 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E, 145 + 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A, 146 + 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190, 147 + 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227, 148 + 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4, 149 + 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E, 150 + 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3, 151 + 0x01073260, 0x0105197F, 0x0103091B, 0x01010101 152 + }; 153 + #endif
+35
lib/rbcodec/codecs/libopus/celt/entcode.h
··· 34 34 # include <stddef.h> 35 35 # include "ecintrin.h" 36 36 37 + extern const opus_uint32 SMALL_DIV_TABLE[129]; 38 + 39 + #ifdef OPUS_ARM_ASM 40 + #define USE_SMALL_DIV_TABLE 41 + #endif 42 + 37 43 /*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a 38 44 larger type, you can speed up the decoder by using it here.*/ 39 45 typedef opus_uint32 ec_window; ··· 113 119 This will always be slightly larger than the exact value (e.g., all 114 120 rounding error is in the positive direction).*/ 115 121 opus_uint32 ec_tell_frac(ec_ctx *_this); 122 + 123 + /* Tested exhaustively for all n and for 1<=d<=256 */ 124 + static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { 125 + celt_assert(d>0); 126 + #ifdef USE_SMALL_DIV_TABLE 127 + if (d>256) 128 + return n/d; 129 + else { 130 + opus_uint32 t, q; 131 + t = EC_ILOG(d&-d); 132 + q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32; 133 + return q+(n-q*d >= d); 134 + } 135 + #else 136 + return n/d; 137 + #endif 138 + } 139 + 140 + static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) { 141 + celt_assert(d>0); 142 + #ifdef USE_SMALL_DIV_TABLE 143 + if (n<0) 144 + return -(opus_int32)celt_udiv(-n, d); 145 + else 146 + return celt_udiv(n, d); 147 + #else 148 + return n/d; 149 + #endif 150 + } 116 151 117 152 #endif
+1 -1
lib/rbcodec/codecs/libopus/celt/entdec.c
··· 138 138 139 139 unsigned ec_decode(ec_dec *_this,unsigned _ft){ 140 140 unsigned s; 141 - _this->ext=_this->rng/_ft; 141 + _this->ext=celt_udiv(_this->rng,_ft); 142 142 s=(unsigned)(_this->val/_this->ext); 143 143 return _ft-EC_MINI(s+1,_ft); 144 144 }
+1 -1
lib/rbcodec/codecs/libopus/celt/entenc.c
··· 127 127 128 128 void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){ 129 129 opus_uint32 r; 130 - r=_this->rng/_ft; 130 + r=celt_udiv(_this->rng,_ft); 131 131 if(_fl>0){ 132 132 _this->val+=_this->rng-IMUL32(r,(_ft-_fl)); 133 133 _this->rng=IMUL32(r,(_fh-_fl));
+18 -1
lib/rbcodec/codecs/libopus/celt/fixed_generic.h
··· 113 113 /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. 114 114 b must fit in 31 bits. 115 115 Result fits in 32 bits. */ 116 - #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) 116 + #define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) 117 + 118 + /** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add. 119 + Results fits in 32 bits */ 120 + #define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))) 117 121 118 122 #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) 119 123 #define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) ··· 130 134 131 135 /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ 132 136 #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) 137 + 138 + #if defined(MIPSr1_ASM) 139 + #include "mips/fixed_generic_mipsr1.h" 140 + #endif 141 + 142 + static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) 143 + { 144 + x = PSHR32(x, SIG_SHIFT); 145 + x = MAX32(x, -32768); 146 + x = MIN32(x, 32767); 147 + return EXTRACT16(x); 148 + } 149 + #define SIG2WORD16(x) (SIG2WORD16_generic(x)) 133 150 134 151 #endif
+169 -305
lib/rbcodec/codecs/libopus/celt/kiss_fft.c
··· 45 45 complex numbers. It also delares the kf_ internal functions. 46 46 */ 47 47 48 - #if 0 49 48 static void kf_bfly2( 50 49 kiss_fft_cpx * Fout, 51 - const size_t fstride, 52 - const kiss_fft_state *st, 53 50 int m, 54 - int N, 55 - int mm 51 + int N 56 52 ) 57 53 { 58 54 kiss_fft_cpx * Fout2; 59 - const kiss_twiddle_cpx * tw1; 60 - int i,j; 61 - kiss_fft_cpx * Fout_beg = Fout; 62 - for (i=0;i<N;i++) 55 + int i; 56 + (void)m; 57 + #ifdef CUSTOM_MODES 58 + if (m==1) 63 59 { 64 - Fout = Fout_beg + i*mm; 65 - Fout2 = Fout + m; 66 - tw1 = st->twiddles; 67 - for(j=0;j<m;j++) 60 + celt_assert(m==1); 61 + for (i=0;i<N;i++) 68 62 { 69 63 kiss_fft_cpx t; 70 - Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); 71 - Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); 72 - C_MUL (t, *Fout2 , *tw1); 73 - tw1 += fstride; 64 + Fout2 = Fout + 1; 65 + t = *Fout2; 74 66 C_SUB( *Fout2 , *Fout , t ); 75 67 C_ADDTO( *Fout , t ); 76 - ++Fout2; 77 - ++Fout; 68 + Fout += 2; 78 69 } 79 - } 80 - } 70 + } else 81 71 #endif 82 - 83 - static void ki_bfly2( 84 - kiss_fft_cpx * Fout, 85 - const size_t fstride, 86 - const kiss_fft_state *st, 87 - int m, 88 - int N, 89 - int mm 90 - ) 91 - { 92 - kiss_fft_cpx * Fout2; 93 - const kiss_twiddle_cpx * tw1; 94 - kiss_fft_cpx t; 95 - int i,j; 96 - kiss_fft_cpx * Fout_beg = Fout; 97 - for (i=0;i<N;i++) 98 72 { 99 - Fout = Fout_beg + i*mm; 100 - Fout2 = Fout + m; 101 - tw1 = st->twiddles; 102 - for(j=0;j<m;j++) 73 + opus_val16 tw; 74 + tw = QCONST16(0.7071067812f, 15); 75 + /* We know that m==4 here because the radix-2 is just after a radix-4 */ 76 + celt_assert(m==4); 77 + for (i=0;i<N;i++) 103 78 { 104 - C_MULC (t, *Fout2 , *tw1); 105 - tw1 += fstride; 106 - C_SUB( *Fout2 , *Fout , t ); 107 - C_ADDTO( *Fout , t ); 108 - ++Fout2; 109 - ++Fout; 79 + kiss_fft_cpx t; 80 + Fout2 = Fout + 4; 81 + t = Fout2[0]; 82 + C_SUB( Fout2[0] , Fout[0] , t ); 83 + C_ADDTO( Fout[0] , t ); 84 + 85 + t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); 86 + t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); 87 + C_SUB( Fout2[1] , Fout[1] , t ); 88 + C_ADDTO( Fout[1] , t ); 89 + 90 + t.r = Fout2[2].i; 91 + t.i = -Fout2[2].r; 92 + C_SUB( Fout2[2] , Fout[2] , t ); 93 + C_ADDTO( Fout[2] , t ); 94 + 95 + t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); 96 + t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); 97 + C_SUB( Fout2[3] , Fout[3] , t ); 98 + C_ADDTO( Fout[3] , t ); 99 + Fout += 8; 110 100 } 111 101 } 112 102 } 113 103 114 - #if 0 115 104 static void kf_bfly4( 116 105 kiss_fft_cpx * Fout, 117 106 const size_t fstride, ··· 121 110 int mm 122 111 ) 123 112 { 124 - const kiss_twiddle_cpx *tw1,*tw2,*tw3; 125 - kiss_fft_cpx scratch[6]; 126 - const size_t m2=2*m; 127 - const size_t m3=3*m; 128 - int i, j; 113 + int i; 129 114 130 - kiss_fft_cpx * Fout_beg = Fout; 131 - for (i=0;i<N;i++) 115 + if (m==1) 132 116 { 133 - Fout = Fout_beg + i*mm; 134 - tw3 = tw2 = tw1 = st->twiddles; 135 - for (j=0;j<m;j++) 117 + /* Degenerate case where all the twiddles are 1. */ 118 + for (i=0;i<N;i++) 136 119 { 137 - C_MUL4(scratch[0],Fout[m] , *tw1 ); 138 - C_MUL4(scratch[1],Fout[m2] , *tw2 ); 139 - C_MUL4(scratch[2],Fout[m3] , *tw3 ); 120 + kiss_fft_cpx scratch0, scratch1; 140 121 141 - Fout->r = PSHR32(Fout->r, 2); 142 - Fout->i = PSHR32(Fout->i, 2); 143 - C_SUB( scratch[5] , *Fout, scratch[1] ); 144 - C_ADDTO(*Fout, scratch[1]); 145 - C_ADD( scratch[3] , scratch[0] , scratch[2] ); 146 - C_SUB( scratch[4] , scratch[0] , scratch[2] ); 147 - C_SUB( Fout[m2], *Fout, scratch[3] ); 148 - tw1 += fstride; 149 - tw2 += fstride*2; 150 - tw3 += fstride*3; 151 - C_ADDTO( *Fout , scratch[3] ); 122 + C_SUB( scratch0 , *Fout, Fout[2] ); 123 + C_ADDTO(*Fout, Fout[2]); 124 + C_ADD( scratch1 , Fout[1] , Fout[3] ); 125 + C_SUB( Fout[2], *Fout, scratch1 ); 126 + C_ADDTO( *Fout , scratch1 ); 127 + C_SUB( scratch1 , Fout[1] , Fout[3] ); 152 128 153 - Fout[m].r = scratch[5].r + scratch[4].i; 154 - Fout[m].i = scratch[5].i - scratch[4].r; 155 - Fout[m3].r = scratch[5].r - scratch[4].i; 156 - Fout[m3].i = scratch[5].i + scratch[4].r; 157 - ++Fout; 129 + Fout[1].r = scratch0.r + scratch1.i; 130 + Fout[1].i = scratch0.i - scratch1.r; 131 + Fout[3].r = scratch0.r - scratch1.i; 132 + Fout[3].i = scratch0.i + scratch1.r; 133 + Fout+=4; 158 134 } 159 - } 160 - } 161 - #endif 162 - 163 - static void ki_bfly4( 164 - kiss_fft_cpx * Fout, 165 - const size_t fstride, 166 - const kiss_fft_state *st, 167 - int m, 168 - int N, 169 - int mm 170 - ) 171 - { 172 - const kiss_twiddle_cpx *tw1,*tw2,*tw3; 173 - kiss_fft_cpx scratch[6]; 174 - const size_t m2=2*m; 175 - const size_t m3=3*m; 176 - int i, j; 177 - 178 - kiss_fft_cpx * Fout_beg = Fout; 179 - for (i=0;i<N;i++) 180 - { 181 - Fout = Fout_beg + i*mm; 182 - tw3 = tw2 = tw1 = st->twiddles; 183 - for (j=0;j<m;j++) 135 + } else { 136 + int j; 137 + kiss_fft_cpx scratch[6]; 138 + const kiss_twiddle_cpx *tw1,*tw2,*tw3; 139 + const int m2=2*m; 140 + const int m3=3*m; 141 + kiss_fft_cpx * Fout_beg = Fout; 142 + for (i=0;i<N;i++) 184 143 { 185 - C_MULC(scratch[0],Fout[m] , *tw1 ); 186 - C_MULC(scratch[1],Fout[m2] , *tw2 ); 187 - C_MULC(scratch[2],Fout[m3] , *tw3 ); 144 + Fout = Fout_beg + i*mm; 145 + tw3 = tw2 = tw1 = st->twiddles; 146 + /* m is guaranteed to be a multiple of 4. */ 147 + for (j=0;j<m;j++) 148 + { 149 + C_MUL(scratch[0],Fout[m] , *tw1 ); 150 + C_MUL(scratch[1],Fout[m2] , *tw2 ); 151 + C_MUL(scratch[2],Fout[m3] , *tw3 ); 188 152 189 - C_SUB( scratch[5] , *Fout, scratch[1] ); 190 - C_ADDTO(*Fout, scratch[1]); 191 - C_ADD( scratch[3] , scratch[0] , scratch[2] ); 192 - C_SUB( scratch[4] , scratch[0] , scratch[2] ); 193 - C_SUB( Fout[m2], *Fout, scratch[3] ); 194 - tw1 += fstride; 195 - tw2 += fstride*2; 196 - tw3 += fstride*3; 197 - C_ADDTO( *Fout , scratch[3] ); 153 + C_SUB( scratch[5] , *Fout, scratch[1] ); 154 + C_ADDTO(*Fout, scratch[1]); 155 + C_ADD( scratch[3] , scratch[0] , scratch[2] ); 156 + C_SUB( scratch[4] , scratch[0] , scratch[2] ); 157 + C_SUB( Fout[m2], *Fout, scratch[3] ); 158 + tw1 += fstride; 159 + tw2 += fstride*2; 160 + tw3 += fstride*3; 161 + C_ADDTO( *Fout , scratch[3] ); 198 162 199 - Fout[m].r = scratch[5].r - scratch[4].i; 200 - Fout[m].i = scratch[5].i + scratch[4].r; 201 - Fout[m3].r = scratch[5].r + scratch[4].i; 202 - Fout[m3].i = scratch[5].i - scratch[4].r; 203 - ++Fout; 163 + Fout[m].r = scratch[5].r + scratch[4].i; 164 + Fout[m].i = scratch[5].i - scratch[4].r; 165 + Fout[m3].r = scratch[5].r - scratch[4].i; 166 + Fout[m3].i = scratch[5].i + scratch[4].r; 167 + ++Fout; 168 + } 204 169 } 205 170 } 206 171 } 207 172 173 + 208 174 #ifndef RADIX_TWO_ONLY 209 175 210 - #if 0 211 176 static void kf_bfly3( 212 177 kiss_fft_cpx * Fout, 213 178 const size_t fstride, ··· 225 190 kiss_twiddle_cpx epi3; 226 191 227 192 kiss_fft_cpx * Fout_beg = Fout; 193 + #ifdef FIXED_POINT 194 + epi3.r = -16384; 195 + epi3.i = -28378; 196 + #else 228 197 epi3 = st->twiddles[fstride*m]; 198 + #endif 229 199 for (i=0;i<N;i++) 230 200 { 231 201 Fout = Fout_beg + i*mm; 232 202 tw1=tw2=st->twiddles; 203 + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ 233 204 k=m; 234 205 do { 235 - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); 236 206 237 207 C_MUL(scratch[1],Fout[m] , *tw1); 238 208 C_MUL(scratch[2],Fout[m2] , *tw2); ··· 259 229 } while(--k); 260 230 } 261 231 } 262 - #endif 263 232 264 - static void ki_bfly3( 265 - kiss_fft_cpx * Fout, 266 - const size_t fstride, 267 - const kiss_fft_state *st, 268 - int m, 269 - int N, 270 - int mm 271 - ) 272 - { 273 - int i, k; 274 - const size_t m2 = 2*m; 275 - const kiss_twiddle_cpx *tw1,*tw2; 276 - kiss_fft_cpx scratch[5]; 277 - kiss_twiddle_cpx epi3; 278 233 279 - kiss_fft_cpx * Fout_beg = Fout; 280 - epi3 = st->twiddles[fstride*m]; 281 - for (i=0;i<N;i++) 282 - { 283 - Fout = Fout_beg + i*mm; 284 - tw1=tw2=st->twiddles; 285 - k=m; 286 - do{ 287 - 288 - C_MULC(scratch[1],Fout[m] , *tw1); 289 - C_MULC(scratch[2],Fout[m2] , *tw2); 290 - 291 - C_ADD(scratch[3],scratch[1],scratch[2]); 292 - C_SUB(scratch[0],scratch[1],scratch[2]); 293 - tw1 += fstride; 294 - tw2 += fstride*2; 295 - 296 - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); 297 - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); 298 - 299 - C_MULBYSCALAR( scratch[0] , -epi3.i ); 300 - 301 - C_ADDTO(*Fout,scratch[3]); 302 - 303 - Fout[m2].r = Fout[m].r + scratch[0].i; 304 - Fout[m2].i = Fout[m].i - scratch[0].r; 305 - 306 - Fout[m].r -= scratch[0].i; 307 - Fout[m].i += scratch[0].r; 308 - 309 - ++Fout; 310 - }while(--k); 311 - } 312 - } 313 - 314 - #if 0 234 + #ifndef OVERRIDE_kf_bfly5 315 235 static void kf_bfly5( 316 236 kiss_fft_cpx * Fout, 317 237 const size_t fstride, ··· 324 244 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 325 245 int i, u; 326 246 kiss_fft_cpx scratch[13]; 327 - const kiss_twiddle_cpx * twiddles = st->twiddles; 328 247 const kiss_twiddle_cpx *tw; 329 248 kiss_twiddle_cpx ya,yb; 330 249 kiss_fft_cpx * Fout_beg = Fout; 331 250 332 - ya = twiddles[fstride*m]; 333 - yb = twiddles[fstride*2*m]; 251 + #ifdef FIXED_POINT 252 + ya.r = 10126; 253 + ya.i = -31164; 254 + yb.r = -26510; 255 + yb.i = -19261; 256 + #else 257 + ya = st->twiddles[fstride*m]; 258 + yb = st->twiddles[fstride*2*m]; 259 + #endif 334 260 tw=st->twiddles; 335 261 336 262 for (i=0;i<N;i++) ··· 342 268 Fout3=Fout0+3*m; 343 269 Fout4=Fout0+4*m; 344 270 271 + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ 345 272 for ( u=0; u<m; ++u ) { 346 - C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); 347 273 scratch[0] = *Fout0; 348 274 349 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); ··· 380 306 } 381 307 } 382 308 } 383 - #endif 309 + #endif /* OVERRIDE_kf_bfly5 */ 384 310 385 - static void ki_bfly5( 386 - kiss_fft_cpx * Fout, 387 - const size_t fstride, 388 - const kiss_fft_state *st, 389 - int m, 390 - int N, 391 - int mm 392 - ) 393 - { 394 - kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 395 - int i, u; 396 - kiss_fft_cpx scratch[13]; 397 - const kiss_twiddle_cpx * twiddles = st->twiddles; 398 - const kiss_twiddle_cpx *tw; 399 - kiss_twiddle_cpx ya,yb; 400 - kiss_fft_cpx * Fout_beg = Fout; 401 - 402 - ya = twiddles[fstride*m]; 403 - yb = twiddles[fstride*2*m]; 404 - tw=st->twiddles; 405 - 406 - for (i=0;i<N;i++) 407 - { 408 - Fout = Fout_beg + i*mm; 409 - Fout0=Fout; 410 - Fout1=Fout0+m; 411 - Fout2=Fout0+2*m; 412 - Fout3=Fout0+3*m; 413 - Fout4=Fout0+4*m; 414 - 415 - for ( u=0; u<m; ++u ) { 416 - scratch[0] = *Fout0; 417 - 418 - C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); 419 - C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); 420 - C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); 421 - C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); 422 - 423 - C_ADD( scratch[7],scratch[1],scratch[4]); 424 - C_SUB( scratch[10],scratch[1],scratch[4]); 425 - C_ADD( scratch[8],scratch[2],scratch[3]); 426 - C_SUB( scratch[9],scratch[2],scratch[3]); 427 - 428 - Fout0->r += scratch[7].r + scratch[8].r; 429 - Fout0->i += scratch[7].i + scratch[8].i; 430 - 431 - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); 432 - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); 433 - 434 - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); 435 - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); 436 - 437 - C_SUB(*Fout1,scratch[5],scratch[6]); 438 - C_ADD(*Fout4,scratch[5],scratch[6]); 439 - 440 - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); 441 - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); 442 - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); 443 - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); 444 - 445 - C_ADD(*Fout2,scratch[11],scratch[12]); 446 - C_SUB(*Fout3,scratch[11],scratch[12]); 447 - 448 - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; 449 - } 450 - } 451 - } 452 311 453 312 #endif 454 313 ··· 496 355 int kf_factor(int n,opus_int16 * facbuf) 497 356 { 498 357 int p=4; 358 + int i; 359 + int stages=0; 360 + int nbak = n; 499 361 500 362 /*factor out powers of 4, powers of 2, then any remaining primes */ 501 363 do { ··· 517 379 { 518 380 return 0; 519 381 } 520 - *facbuf++ = p; 521 - *facbuf++ = n; 382 + facbuf[2*stages] = p; 383 + if (p==2 && stages > 1) 384 + { 385 + facbuf[2*stages] = 4; 386 + facbuf[2] = 2; 387 + } 388 + stages++; 522 389 } while (n > 1); 390 + n = nbak; 391 + /* Reverse the order to get the radix 4 at the end, so we can use the 392 + fast degenerate case. It turns out that reversing the order also 393 + improves the noise behaviour. */ 394 + for (i=0;i<stages/2;i++) 395 + { 396 + int tmp; 397 + tmp = facbuf[2*i]; 398 + facbuf[2*i] = facbuf[2*(stages-i-1)]; 399 + facbuf[2*(stages-i-1)] = tmp; 400 + } 401 + for (i=0;i<stages;i++) 402 + { 403 + n /= facbuf[2*i]; 404 + facbuf[2*i+1] = n; 405 + } 523 406 return 1; 524 407 } 525 408 ··· 563 446 kiss_twiddle_cpx *twiddles; 564 447 565 448 st->nfft=nfft; 566 - #ifndef FIXED_POINT 449 + #ifdef FIXED_POINT 450 + st->scale_shift = celt_ilog2(st->nfft); 451 + if (st->nfft == 1<<st->scale_shift) 452 + st->scale = Q15ONE; 453 + else 454 + st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); 455 + #else 567 456 st->scale = 1.f/nfft; 568 457 #endif 569 458 if (base != NULL) 570 459 { 571 460 st->twiddles = base->twiddles; 572 461 st->shift = 0; 573 - while (nfft<<st->shift != base->nfft && st->shift < 32) 462 + while (st->shift < 32 && nfft<<st->shift != base->nfft) 574 463 st->shift++; 575 464 if (st->shift>=32) 576 465 goto fail; ··· 614 503 615 504 #endif /* CUSTOM_MODES */ 616 505 617 - #if 0 618 - void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 506 + void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) 619 507 { 620 508 int m2, m; 621 509 int p; ··· 627 515 /* st->shift can be -1 */ 628 516 shift = st->shift>0 ? st->shift : 0; 629 517 630 - celt_assert2 (fin != fout, "In-place FFT not supported"); 631 - /* Bit-reverse the input */ 632 - for (i=0;i<st->nfft;i++) 633 - { 634 - fout[st->bitrev[i]] = fin[i]; 635 - #ifndef FIXED_POINT 636 - fout[st->bitrev[i]].r *= st->scale; 637 - fout[st->bitrev[i]].i *= st->scale; 638 - #endif 639 - } 640 - 641 518 fstride[0] = 1; 642 519 L=0; 643 520 do { ··· 656 533 switch (st->factors[2*i]) 657 534 { 658 535 case 2: 659 - kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); 536 + kf_bfly2(fout, m, fstride[i]); 660 537 break; 661 538 case 4: 662 539 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); ··· 673 550 m = m2; 674 551 } 675 552 } 676 - #endif 677 553 678 - void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 554 + #if 0 555 + void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 679 556 { 680 - int m2, m; 681 - int p; 682 - int L; 683 - int fstride[MAXFACTORS]; 684 557 int i; 685 - int shift; 558 + opus_val16 scale; 559 + #ifdef FIXED_POINT 560 + /* Allows us to scale with MULT16_32_Q16(), which is faster than 561 + MULT16_32_Q15() on ARM. */ 562 + int scale_shift = st->scale_shift-1; 563 + #endif 564 + scale = st->scale; 686 565 687 - /* st->shift can be -1 */ 688 - shift = st->shift>0 ? st->shift : 0; 689 566 celt_assert2 (fin != fout, "In-place FFT not supported"); 690 567 /* Bit-reverse the input */ 691 568 for (i=0;i<st->nfft;i++) 692 - fout[st->bitrev[i]] = fin[i]; 693 - 694 - fstride[0] = 1; 695 - L=0; 696 - do { 697 - p = st->factors[2*L]; 698 - m = st->factors[2*L+1]; 699 - fstride[L+1] = fstride[L]*p; 700 - L++; 701 - } while(m!=1); 702 - m = st->factors[2*L-1]; 703 - for (i=L-1;i>=0;i--) 704 569 { 705 - if (i!=0) 706 - m2 = st->factors[2*i-1]; 707 - else 708 - m2 = 1; 709 - switch (st->factors[2*i]) 710 - { 711 - case 2: 712 - ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); 713 - break; 714 - case 4: 715 - ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); 716 - break; 717 - #ifndef RADIX_TWO_ONLY 718 - case 3: 719 - ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); 720 - break; 721 - case 5: 722 - ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); 723 - break; 724 - #endif 725 - } 726 - m = m2; 570 + kiss_fft_cpx x = fin[i]; 571 + fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); 572 + fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); 727 573 } 574 + opus_fft_impl(st, fout); 728 575 } 576 + #endif 729 577 578 + 579 + #ifdef TEST_UNIT_DFT_C 580 + void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 581 + { 582 + int i; 583 + celt_assert2 (fin != fout, "In-place FFT not supported"); 584 + /* Bit-reverse the input */ 585 + for (i=0;i<st->nfft;i++) 586 + fout[st->bitrev[i]] = fin[i]; 587 + for (i=0;i<st->nfft;i++) 588 + fout[i].i = -fout[i].i; 589 + opus_fft_impl(st, fout); 590 + for (i=0;i<st->nfft;i++) 591 + fout[i].i = -fout[i].i; 592 + } 593 + #endif
+6 -9
lib/rbcodec/codecs/libopus/celt/kiss_fft.h
··· 79 79 80 80 typedef struct kiss_fft_state{ 81 81 int nfft; 82 - #ifndef FIXED_POINT 83 - kiss_fft_scalar scale; 82 + opus_val16 scale; 83 + #ifdef FIXED_POINT 84 + int scale_shift; 84 85 #endif 85 86 int shift; 86 87 opus_int16 factors[2*MAXFACTORS]; ··· 128 129 f[k].r and f[k].i 129 130 * */ 130 131 void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); 131 - 132 - #if defined(CPU_COLDFIRE) 133 - #define IFFT_ICODE ICODE_ATTR 134 - #else 135 - #define IFFT_ICODE 136 - #endif 132 + void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); 137 133 138 - void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) IFFT_ICODE; 134 + void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); 135 + void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); 139 136 140 137 void opus_fft_free(const kiss_fft_state *cfg); 141 138
+110 -81
lib/rbcodec/codecs/libopus/celt/mdct.c
··· 53 53 #include "mathops.h" 54 54 #include "stack_alloc.h" 55 55 56 + #if defined(MIPSr1_ASM) 57 + #include "mips/mdct_mipsr1.h" 58 + #endif 59 + 60 + 56 61 #ifdef CUSTOM_MODES 57 62 58 63 int clt_mdct_init(mdct_lookup *l,int N, int maxshift) 59 64 { 60 65 int i; 61 - int N4; 62 66 kiss_twiddle_scalar *trig; 63 - #if defined(FIXED_POINT) 67 + int shift; 64 68 int N2=N>>1; 65 - #endif 66 69 l->n = N; 67 - N4 = N>>2; 68 70 l->maxshift = maxshift; 69 71 for (i=0;i<=maxshift;i++) 70 72 { ··· 77 79 return 0; 78 80 #endif 79 81 } 80 - l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); 82 + l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar)); 81 83 if (l->trig==NULL) 82 84 return 0; 83 - /* We have enough points that sine isn't necessary */ 85 + for (shift=0;shift<=maxshift;shift++) 86 + { 87 + /* We have enough points that sine isn't necessary */ 84 88 #if defined(FIXED_POINT) 85 - for (i=0;i<=N4;i++) 86 - trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); 89 + #if 1 90 + for (i=0;i<N2;i++) 91 + trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N)); 87 92 #else 88 - for (i=0;i<=N4;i++) 89 - trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N); 93 + for (i=0;i<N2;i++) 94 + trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N)))); 90 95 #endif 96 + #else 97 + for (i=0;i<N2;i++) 98 + trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N); 99 + #endif 100 + trig += N2; 101 + N2 >>= 1; 102 + N >>= 1; 103 + } 91 104 return 1; 92 105 } 93 106 ··· 103 116 104 117 #if 0 105 118 /* Forward MDCT trashes the input array */ 119 + #ifndef OVERRIDE_clt_mdct_forward 106 120 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 107 121 const opus_val16 *window, int overlap, int shift, int stride) 108 122 { 109 123 int i; 110 124 int N, N2, N4; 111 - kiss_twiddle_scalar sine; 112 125 VARDECL(kiss_fft_scalar, f); 113 - VARDECL(kiss_fft_scalar, f2); 126 + VARDECL(kiss_fft_cpx, f2); 127 + const kiss_fft_state *st = l->kfft[shift]; 128 + const kiss_twiddle_scalar *trig; 129 + opus_val16 scale; 130 + #ifdef FIXED_POINT 131 + /* Allows us to scale with MULT16_32_Q16(), which is faster than 132 + MULT16_32_Q15() on ARM. */ 133 + int scale_shift = st->scale_shift-1; 134 + #endif 114 135 SAVE_STACK; 136 + scale = st->scale; 137 + 115 138 N = l->n; 116 - N >>= shift; 139 + trig = l->trig; 140 + for (i=0;i<shift;i++) 141 + { 142 + N >>= 1; 143 + trig += N; 144 + } 117 145 N2 = N>>1; 118 146 N4 = N>>2; 147 + 119 148 ALLOC(f, N2, kiss_fft_scalar); 120 - ALLOC(f2, N2, kiss_fft_scalar); 121 - /* sin(x) ~= x here */ 122 - #ifdef FIXED_POINT 123 - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; 124 - #else 125 - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; 126 - #endif 149 + ALLOC(f2, N4, kiss_fft_cpx); 127 150 128 151 /* Consider the input to be composed of four blocks: [a, b, c, d] */ 129 152 /* Window, shuffle, fold */ ··· 168 191 /* Pre-rotation */ 169 192 { 170 193 kiss_fft_scalar * OPUS_RESTRICT yp = f; 171 - const kiss_twiddle_scalar *t = &l->trig[0]; 194 + const kiss_twiddle_scalar *t = &trig[0]; 172 195 for(i=0;i<N4;i++) 173 196 { 197 + kiss_fft_cpx yc; 198 + kiss_twiddle_scalar t0, t1; 174 199 kiss_fft_scalar re, im, yr, yi; 175 - re = yp[0]; 176 - im = yp[1]; 177 - yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); 178 - yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); 179 - /* works because the cos is nearly one */ 180 - *yp++ = yr + S_MUL(yi,sine); 181 - *yp++ = yi - S_MUL(yr,sine); 200 + t0 = t[i]; 201 + t1 = t[N4+i]; 202 + re = *yp++; 203 + im = *yp++; 204 + yr = S_MUL(re,t0) - S_MUL(im,t1); 205 + yi = S_MUL(im,t0) + S_MUL(re,t1); 206 + yc.r = yr; 207 + yc.i = yi; 208 + yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift); 209 + yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift); 210 + f2[st->bitrev[i]] = yc; 182 211 } 183 212 } 184 213 185 - /* N/4 complex FFT, down-scales by 4/N */ 186 - opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); 214 + /* N/4 complex FFT, does not downscale anymore */ 215 + opus_fft_impl(st, f2); 187 216 188 217 /* Post-rotate */ 189 218 { 190 219 /* Temp pointers to make it really clear to the compiler what we're doing */ 191 - const kiss_fft_scalar * OPUS_RESTRICT fp = f2; 220 + const kiss_fft_cpx * OPUS_RESTRICT fp = f2; 192 221 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 193 222 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); 194 - const kiss_twiddle_scalar *t = &l->trig[0]; 223 + const kiss_twiddle_scalar *t = &trig[0]; 195 224 /* Temp pointers to make it really clear to the compiler what we're doing */ 196 225 for(i=0;i<N4;i++) 197 226 { 198 227 kiss_fft_scalar yr, yi; 199 - yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); 200 - yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); 201 - /* works because the cos is nearly one */ 202 - *yp1 = yr - S_MUL(yi,sine); 203 - *yp2 = yi + S_MUL(yr,sine);; 204 - fp += 2; 228 + yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]); 229 + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); 230 + *yp1 = yr; 231 + *yp2 = yi; 232 + fp++; 205 233 yp1 += 2*stride; 206 234 yp2 -= 2*stride; 207 235 } 208 236 } 209 237 RESTORE_STACK; 210 238 } 239 + #endif /* OVERRIDE_clt_mdct_forward */ 211 240 #endif 212 241 242 + #ifndef OVERRIDE_clt_mdct_backward 213 243 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 214 244 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) 215 245 { 216 246 int i; 217 247 int N, N2, N4; 218 - kiss_twiddle_scalar sine; 219 - /* VARDECL(kiss_fft_scalar, f2); 220 - SAVE_STACK; */ 248 + const kiss_twiddle_scalar *trig; 249 + 221 250 N = l->n; 222 - N >>= shift; 251 + trig = l->trig; 252 + for (i=0;i<shift;i++) 253 + { 254 + N >>= 1; 255 + trig += N; 256 + } 223 257 N2 = N>>1; 224 258 N4 = N>>2; 225 - /* ALLOC(f2, N2, kiss_fft_scalar); */ 226 - kiss_fft_scalar f2[N2]; /* worst case 3840b */ 227 - /* sin(x) ~= x here */ 228 - #ifdef FIXED_POINT 229 - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; 230 - #else 231 - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; 232 - #endif 233 259 234 260 /* Pre-rotate */ 235 261 { 236 262 /* Temp pointers to make it really clear to the compiler what we're doing */ 237 263 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; 238 264 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); 239 - kiss_fft_scalar * OPUS_RESTRICT yp = f2; 240 - const kiss_twiddle_scalar *t = &l->trig[0]; 265 + kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); 266 + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; 267 + const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; 241 268 for(i=0;i<N4;i++) 242 269 { 270 + int rev; 243 271 kiss_fft_scalar yr, yi; 244 - yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); 245 - yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); 246 - /* works because the cos is nearly one */ 247 - *yp++ = yr - S_MUL(yi,sine); 248 - *yp++ = yi + S_MUL(yr,sine); 272 + rev = *bitrev++; 273 + yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]); 274 + yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]); 275 + /* We swap real and imag because we use an FFT instead of an IFFT. */ 276 + yp[2*rev+1] = yr; 277 + yp[2*rev] = yi; 278 + /* Storing the pre-rotation directly in the bitrev order. */ 249 279 xp1+=2*stride; 250 280 xp2-=2*stride; 251 281 } 252 282 } 253 283 254 - /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ 255 - opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); 284 + opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); 256 285 257 286 /* Post-rotate and de-shuffle from both ends of the buffer at once to make 258 287 it in-place. */ 259 288 { 260 - kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); 261 - kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; 262 - const kiss_twiddle_scalar *t = &l->trig[0]; 289 + kiss_fft_scalar * yp0 = out+(overlap>>1); 290 + kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; 291 + const kiss_twiddle_scalar *t = &trig[0]; 263 292 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the 264 293 middle pair will be computed twice. */ 265 294 for(i=0;i<(N4+1)>>1;i++) 266 295 { 267 296 kiss_fft_scalar re, im, yr, yi; 268 297 kiss_twiddle_scalar t0, t1; 269 - re = yp0[0]; 270 - im = yp0[1]; 271 - t0 = t[i<<shift]; 272 - t1 = t[(N4-i)<<shift]; 298 + /* We swap real and imag because we're using an FFT instead of an IFFT. */ 299 + re = yp0[1]; 300 + im = yp0[0]; 301 + t0 = t[i]; 302 + t1 = t[N4+i]; 273 303 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 274 - yr = S_MUL(re,t0) - S_MUL(im,t1); 275 - yi = S_MUL(im,t0) + S_MUL(re,t1); 276 - re = yp1[0]; 277 - im = yp1[1]; 278 - /* works because the cos is nearly one */ 279 - yp0[0] = -(yr - S_MUL(yi,sine)); 280 - yp1[1] = yi + S_MUL(yr,sine); 304 + yr = S_MUL(re,t0) + S_MUL(im,t1); 305 + yi = S_MUL(re,t1) - S_MUL(im,t0); 306 + /* We swap real and imag because we're using an FFT instead of an IFFT. */ 307 + re = yp1[1]; 308 + im = yp1[0]; 309 + yp0[0] = yr; 310 + yp1[1] = yi; 281 311 282 - t0 = t[(N4-i-1)<<shift]; 283 - t1 = t[(i+1)<<shift]; 312 + t0 = t[(N4-i-1)]; 313 + t1 = t[(N2-i-1)]; 284 314 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 285 - yr = S_MUL(re,t0) - S_MUL(im,t1); 286 - yi = S_MUL(im,t0) + S_MUL(re,t1); 287 - /* works because the cos is nearly one */ 288 - yp1[0] = -(yr - S_MUL(yi,sine)); 289 - yp0[1] = yi + S_MUL(yr,sine); 315 + yr = S_MUL(re,t0) + S_MUL(im,t1); 316 + yi = S_MUL(re,t1) - S_MUL(im,t0); 317 + yp1[0] = yr; 318 + yp0[1] = yi; 290 319 yp0 += 2; 291 320 yp1 -= 2; 292 321 } ··· 310 339 wp2--; 311 340 } 312 341 } 313 - /* RESTORE_STACK; */ 314 342 } 343 + #endif /* OVERRIDE_clt_mdct_backward */
-8
lib/rbcodec/codecs/libopus/celt/modes.h
··· 39 39 40 40 #define MAX_PERIOD 1024 41 41 42 - #ifndef OVERLAP 43 - #define OVERLAP(mode) ((mode)->overlap) 44 - #endif 45 - 46 - #ifndef FRAMESIZE 47 - #define FRAMESIZE(mode) ((mode)->mdctSize) 48 - #endif 49 - 50 42 typedef struct { 51 43 int size; 52 44 const opus_int16 *index;
+14 -16
lib/rbcodec/codecs/libopus/celt/pitch.c
··· 252 252 #endif 253 253 celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) 254 254 { 255 - int i,j; 255 + int i; 256 256 /*The EDSP version requires that max_pitch is at least 1, and that _x is 257 257 32-bit aligned. 258 258 Since it's hard to put asserts in assembly, put them here.*/ 259 - celt_assert(max_pitch>0); 260 - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); 261 259 #ifdef FIXED_POINT 262 260 opus_val32 maxcorr=1; 263 261 #endif 262 + celt_assert(max_pitch>0); 263 + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); 264 264 for (i=0;i<max_pitch-3;i+=4) 265 265 { 266 266 opus_val32 sum[4]={0,0,0,0}; ··· 279 279 /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */ 280 280 for (;i<max_pitch;i++) 281 281 { 282 - opus_val32 sum = 0; 283 - for (j=0;j<len;j++) 284 - sum = MAC16_16(sum, _x[j],_y[i+j]); 282 + opus_val32 sum; 283 + sum = celt_inner_prod(_x, _y+i, len); 285 284 xcorr[i] = sum; 286 285 #ifdef FIXED_POINT 287 286 maxcorr = MAX32(maxcorr, sum); ··· 361 360 #endif 362 361 for (i=0;i<max_pitch>>1;i++) 363 362 { 364 - opus_val32 sum=0; 363 + opus_val32 sum; 365 364 xcorr[i] = 0; 366 365 if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) 367 366 continue; 367 + #ifdef FIXED_POINT 368 + sum = 0; 368 369 for (j=0;j<len>>1;j++) 369 370 sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); 371 + #else 372 + sum = celt_inner_prod(x_lp, y+i, len>>1); 373 + #endif 370 374 xcorr[i] = MAX32(-1, sum); 371 375 #ifdef FIXED_POINT 372 376 maxcorr = MAX32(maxcorr, sum); ··· 457 461 opus_val16 g1; 458 462 opus_val16 cont=0; 459 463 opus_val16 thresh; 460 - T1 = (2*T0+k)/(2*k); 464 + T1 = celt_udiv(2*T0+k, 2*k); 461 465 if (T1 < minperiod) 462 466 break; 463 467 /* Look for another strong correlation at T1b */ ··· 469 473 T1b = T0+T1; 470 474 } else 471 475 { 472 - T1b = (2*second_check[k]*T0+k)/(2*k); 476 + T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); 473 477 } 474 478 dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); 475 479 xy += xy2; ··· 514 518 pg = SHR32(frac_div32(best_xy,best_yy+1),16); 515 519 516 520 for (k=0;k<3;k++) 517 - { 518 - int T1 = T+k-1; 519 - xy = 0; 520 - for (i=0;i<N;i++) 521 - xy = MAC16_16(xy, x[i], x[i-T1]); 522 - xcorr[k] = xy; 523 - } 521 + xcorr[k] = celt_inner_prod(x, x-(T+k-1), N); 524 522 if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) 525 523 offset = 1; 526 524 else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
+17 -1
lib/rbcodec/codecs/libopus/celt/pitch.h
··· 41 41 #include "x86/pitch_sse.h" 42 42 #endif 43 43 44 + #if defined(MIPSr1_ASM) 45 + #include "mips/pitch_mipsr1.h" 46 + #endif 47 + 44 48 #if defined(OPUS_ARM_ASM) && defined(FIXED_POINT) 45 - # include "arm/pitch_arm.h" 49 + //# include "arm/pitch_arm.h" 46 50 #endif 47 51 48 52 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, ··· 138 142 } 139 143 *xy1 = xy01; 140 144 *xy2 = xy02; 145 + } 146 + #endif 147 + 148 + #ifndef OVERRIDE_CELT_INNER_PROD 149 + static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, const opus_val16 *y, 150 + int N) 151 + { 152 + int i; 153 + opus_val32 xy=0; 154 + for (i=0;i<N;i++) 155 + xy = MAC16_16(xy, x[i], y[i]); 156 + return xy; 141 157 } 142 158 #endif 143 159
+4 -3
lib/rbcodec/codecs/libopus/celt/rate.c
··· 333 333 /*Figure out how many left-over bits we would be adding to this band. 334 334 This can include bits we've stolen back from higher, skipped bands.*/ 335 335 left = total-psum; 336 - percoeff = left/(m->eBands[codedBands]-m->eBands[start]); 336 + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); 337 337 left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; 338 338 rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0); 339 339 band_width = m->eBands[codedBands]-m->eBands[j]; ··· 414 414 415 415 /* Allocate the remaining bits */ 416 416 left = total-psum; 417 - percoeff = left/(m->eBands[codedBands]-m->eBands[start]); 417 + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); 418 418 left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; 419 419 for (j=start;j<codedBands;j++) 420 420 bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j])); ··· 465 465 offset += NClogN>>3; 466 466 467 467 /* Divide with rounding */ 468 - ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<<BITRES)); 468 + ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1)))); 469 + ebits[j] = celt_udiv(ebits[j], den)>>BITRES; 469 470 470 471 /* Make sure not to bust */ 471 472 if (C*ebits[j] > (bits[j]>>BITRES))
+7 -1
lib/rbcodec/codecs/libopus/celt/stack_alloc.h
··· 116 116 #else 117 117 118 118 #ifdef CELT_C 119 + char *scratch_ptr=0; 119 120 char *global_stack=0; 120 121 #else 121 122 extern char *global_stack; 123 + extern char *scratch_ptr; 122 124 #endif /* CELT_C */ 123 125 124 126 #ifdef ENABLE_VALGRIND ··· 140 142 141 143 #define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) 142 144 #define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char)))) 145 + #if 0 /* Set this to 1 to instrument pseudostack usage */ 146 + #define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack) 147 + #else 143 148 #define RESTORE_STACK (global_stack = _saved_stack) 144 - #define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack; 149 + #endif 150 + #define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack; 145 151 146 152 #endif /* ENABLE_VALGRIND */ 147 153
+434 -162
lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h
··· 341 341 #ifndef FFT_BITREV480 342 342 #define FFT_BITREV480 343 343 static const opus_int16 fft_bitrev480[480] = { 344 - 0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330, 345 - 450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225, 346 - 345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95, 347 - 215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440, 348 - 110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310, 349 - 430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205, 350 - 325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61, 351 - 181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406, 352 - 76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276, 353 - 396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171, 354 - 291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41, 355 - 161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386, 356 - 56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242, 357 - 362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137, 358 - 257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7, 359 - 127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457, 360 - 22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352, 361 - 472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222, 362 - 342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117, 363 - 237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423, 364 - 93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318, 365 - 438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188, 366 - 308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83, 367 - 203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403, 368 - 73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298, 369 - 418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154, 370 - 274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49, 371 - 169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369, 372 - 39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264, 373 - 384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134, 374 - 254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29, 375 - 149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479, 344 + 0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, 345 + 8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, 346 + 16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, 347 + 24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, 348 + 4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, 349 + 12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, 350 + 20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, 351 + 28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, 352 + 1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, 353 + 9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, 354 + 17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, 355 + 25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, 356 + 5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, 357 + 13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, 358 + 21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, 359 + 29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, 360 + 2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, 361 + 10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, 362 + 18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, 363 + 26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, 364 + 6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, 365 + 14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, 366 + 22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, 367 + 30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, 368 + 3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, 369 + 11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, 370 + 19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, 371 + 27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, 372 + 7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, 373 + 15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, 374 + 23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, 375 + 31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, 376 376 }; 377 377 #endif 378 378 379 379 #ifndef FFT_BITREV240 380 380 #define FFT_BITREV240 381 381 static const opus_int16 fft_bitrev240[240] = { 382 - 0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165, 383 - 225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110, 384 - 170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55, 385 - 115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211, 386 - 46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156, 387 - 216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101, 388 - 161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32, 389 - 92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202, 390 - 37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147, 391 - 207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78, 392 - 138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23, 393 - 83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193, 394 - 28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124, 395 - 184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69, 396 - 129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14, 397 - 74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239, 382 + 0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, 383 + 4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, 384 + 8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, 385 + 12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, 386 + 1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, 387 + 5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, 388 + 9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, 389 + 13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, 390 + 2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, 391 + 6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, 392 + 10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, 393 + 14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, 394 + 3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, 395 + 7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, 396 + 11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, 397 + 15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, 398 398 }; 399 399 #endif 400 400 401 401 #ifndef FFT_BITREV120 402 402 #define FFT_BITREV120 403 403 static const opus_int16 fft_bitrev120[120] = { 404 - 0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80, 405 - 110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46, 406 - 76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26, 407 - 56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97, 408 - 22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63, 409 - 93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43, 410 - 73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9, 411 - 39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119, 404 + 0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, 405 + 4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, 406 + 1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, 407 + 5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, 408 + 2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, 409 + 6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, 410 + 3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, 411 + 7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, 412 412 }; 413 413 #endif 414 414 415 415 #ifndef FFT_BITREV60 416 416 #define FFT_BITREV60 417 417 static const opus_int16 fft_bitrev60[60] = { 418 - 0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31, 419 - 46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22, 420 - 37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13, 421 - 28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59, 418 + 0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, 419 + 1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, 420 + 2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, 421 + 3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, 422 422 }; 423 423 #endif 424 424 ··· 426 426 #define FFT_STATE48000_960_0 427 427 static const kiss_fft_state fft_state48000_960_0 ICONST_ATTR = { 428 428 480, /* nfft */ 429 + 17476, /* scale */ 430 + 8, /* scale_shift */ 429 431 -1, /* shift */ 430 - {4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ 432 + {5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ 431 433 fft_bitrev480, /* bitrev */ 432 434 fft_twiddles48000_960, /* bitrev */ 433 435 }; ··· 437 439 #define FFT_STATE48000_960_1 438 440 static const kiss_fft_state fft_state48000_960_1 ICONST_ATTR = { 439 441 240, /* nfft */ 442 + 17476, /* scale */ 443 + 7, /* scale_shift */ 440 444 1, /* shift */ 441 - {4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 445 + {5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 442 446 fft_bitrev240, /* bitrev */ 443 447 fft_twiddles48000_960, /* bitrev */ 444 448 }; ··· 448 452 #define FFT_STATE48000_960_2 449 453 static const kiss_fft_state fft_state48000_960_2 ICONST_ATTR = { 450 454 120, /* nfft */ 455 + 17476, /* scale */ 456 + 6, /* scale_shift */ 451 457 2, /* shift */ 452 - {4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 458 + {5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 453 459 fft_bitrev120, /* bitrev */ 454 460 fft_twiddles48000_960, /* bitrev */ 455 461 }; ··· 459 465 #define FFT_STATE48000_960_3 460 466 static const kiss_fft_state fft_state48000_960_3 ICONST_ATTR = { 461 467 60, /* nfft */ 468 + 17476, /* scale */ 469 + 5, /* scale_shift */ 462 470 3, /* shift */ 463 - {4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 471 + {5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 464 472 fft_bitrev60, /* bitrev */ 465 473 fft_twiddles48000_960, /* bitrev */ 466 474 }; ··· 470 478 471 479 #ifndef MDCT_TWIDDLES960 472 480 #define MDCT_TWIDDLES960 473 - static const opus_val16 mdct_twiddles960[481] ICONST_ATTR = { 474 - 32767, 32767, 32767, 32767, 32766, 475 - 32763, 32762, 32759, 32757, 32753, 476 - 32751, 32747, 32743, 32738, 32733, 477 - 32729, 32724, 32717, 32711, 32705, 478 - 32698, 32690, 32683, 32676, 32667, 479 - 32658, 32650, 32640, 32631, 32620, 480 - 32610, 32599, 32588, 32577, 32566, 481 - 32554, 32541, 32528, 32515, 32502, 482 - 32487, 32474, 32459, 32444, 32429, 483 - 32413, 32397, 32381, 32364, 32348, 484 - 32331, 32313, 32294, 32277, 32257, 485 - 32239, 32219, 32200, 32180, 32159, 486 - 32138, 32118, 32096, 32074, 32051, 487 - 32029, 32006, 31984, 31960, 31936, 488 - 31912, 31888, 31863, 31837, 31812, 489 - 31786, 31760, 31734, 31707, 31679, 490 - 31652, 31624, 31596, 31567, 31539, 491 - 31508, 31479, 31450, 31419, 31388, 492 - 31357, 31326, 31294, 31262, 31230, 493 - 31198, 31164, 31131, 31097, 31063, 494 - 31030, 30994, 30959, 30924, 30889, 495 - 30853, 30816, 30779, 30743, 30705, 496 - 30668, 30629, 30592, 30553, 30515, 497 - 30475, 30435, 30396, 30356, 30315, 498 - 30274, 30233, 30191, 30149, 30107, 499 - 30065, 30022, 29979, 29936, 29891, 500 - 29847, 29803, 29758, 29713, 29668, 501 - 29622, 29577, 29529, 29483, 29436, 502 - 29390, 29341, 29293, 29246, 29197, 503 - 29148, 29098, 29050, 29000, 28949, 504 - 28899, 28848, 28797, 28746, 28694, 505 - 28642, 28590, 28537, 28485, 28432, 506 - 28378, 28324, 28271, 28217, 28162, 507 - 28106, 28051, 27995, 27940, 27884, 508 - 27827, 27770, 27713, 27657, 27598, 509 - 27540, 27481, 27423, 27365, 27305, 510 - 27246, 27187, 27126, 27066, 27006, 511 - 26945, 26883, 26822, 26760, 26698, 512 - 26636, 26574, 26510, 26448, 26383, 513 - 26320, 26257, 26191, 26127, 26062, 514 - 25997, 25931, 25866, 25800, 25734, 515 - 25667, 25601, 25533, 25466, 25398, 516 - 25330, 25262, 25194, 25125, 25056, 517 - 24987, 24917, 24848, 24778, 24707, 518 - 24636, 24566, 24495, 24424, 24352, 519 - 24280, 24208, 24135, 24063, 23990, 520 - 23917, 23842, 23769, 23695, 23622, 521 - 23546, 23472, 23398, 23322, 23246, 522 - 23171, 23095, 23018, 22942, 22866, 523 - 22788, 22711, 22634, 22557, 22478, 524 - 22400, 22322, 22244, 22165, 22085, 525 - 22006, 21927, 21846, 21766, 21687, 526 - 21606, 21524, 21443, 21363, 21282, 527 - 21199, 21118, 21035, 20954, 20870, 528 - 20788, 20705, 20621, 20538, 20455, 529 - 20371, 20286, 20202, 20118, 20034, 530 - 19947, 19863, 19777, 19692, 19606, 531 - 19520, 19434, 19347, 19260, 19174, 532 - 19088, 18999, 18911, 18825, 18737, 533 - 18648, 18560, 18472, 18384, 18294, 534 - 18205, 18116, 18025, 17936, 17846, 535 - 17757, 17666, 17576, 17485, 17395, 536 - 17303, 17212, 17122, 17030, 16937, 537 - 16846, 16755, 16662, 16569, 16477, 538 - 16385, 16291, 16198, 16105, 16012, 539 - 15917, 15824, 15730, 15636, 15541, 540 - 15447, 15352, 15257, 15162, 15067, 541 - 14973, 14875, 14781, 14685, 14589, 542 - 14493, 14396, 14300, 14204, 14107, 543 - 14010, 13914, 13815, 13718, 13621, 544 - 13524, 13425, 13328, 13230, 13133, 545 - 13033, 12935, 12836, 12738, 12638, 546 - 12540, 12441, 12341, 12241, 12142, 547 - 12044, 11943, 11843, 11744, 11643, 548 - 11542, 11442, 11342, 11241, 11139, 549 - 11039, 10939, 10836, 10736, 10635, 550 - 10534, 10431, 10330, 10228, 10127, 551 - 10024, 9921, 9820, 9718, 9614, 552 - 9512, 9410, 9306, 9204, 9101, 553 - 8998, 8895, 8791, 8689, 8585, 554 - 8481, 8377, 8274, 8171, 8067, 555 - 7962, 7858, 7753, 7650, 7545, 556 - 7441, 7336, 7231, 7129, 7023, 557 - 6917, 6813, 6709, 6604, 6498, 558 - 6393, 6288, 6182, 6077, 5973, 559 - 5867, 5760, 5656, 5549, 5445, 560 - 5339, 5232, 5127, 5022, 4914, 561 - 4809, 4703, 4596, 4490, 4384, 562 - 4278, 4171, 4065, 3958, 3852, 563 - 3745, 3640, 3532, 3426, 3318, 564 - 3212, 3106, 2998, 2891, 2786, 565 - 2679, 2570, 2465, 2358, 2251, 566 - 2143, 2037, 1929, 1823, 1715, 567 - 1609, 1501, 1393, 1287, 1180, 568 - 1073, 964, 858, 751, 644, 569 - 535, 429, 322, 214, 107, 570 - 0, }; 481 + static const opus_val16 mdct_twiddles960[1800] ICONST_ATTR = { 482 + 32767, 32767, 32767, 32766, 32765, 483 + 32763, 32761, 32759, 32756, 32753, 484 + 32750, 32746, 32742, 32738, 32733, 485 + 32728, 32722, 32717, 32710, 32704, 486 + 32697, 32690, 32682, 32674, 32666, 487 + 32657, 32648, 32639, 32629, 32619, 488 + 32609, 32598, 32587, 32576, 32564, 489 + 32552, 32539, 32526, 32513, 32500, 490 + 32486, 32472, 32457, 32442, 32427, 491 + 32411, 32395, 32379, 32362, 32345, 492 + 32328, 32310, 32292, 32274, 32255, 493 + 32236, 32217, 32197, 32177, 32157, 494 + 32136, 32115, 32093, 32071, 32049, 495 + 32027, 32004, 31981, 31957, 31933, 496 + 31909, 31884, 31859, 31834, 31809, 497 + 31783, 31756, 31730, 31703, 31676, 498 + 31648, 31620, 31592, 31563, 31534, 499 + 31505, 31475, 31445, 31415, 31384, 500 + 31353, 31322, 31290, 31258, 31226, 501 + 31193, 31160, 31127, 31093, 31059, 502 + 31025, 30990, 30955, 30920, 30884, 503 + 30848, 30812, 30775, 30738, 30701, 504 + 30663, 30625, 30587, 30548, 30509, 505 + 30470, 30430, 30390, 30350, 30309, 506 + 30269, 30227, 30186, 30144, 30102, 507 + 30059, 30016, 29973, 29930, 29886, 508 + 29842, 29797, 29752, 29707, 29662, 509 + 29616, 29570, 29524, 29477, 29430, 510 + 29383, 29335, 29287, 29239, 29190, 511 + 29142, 29092, 29043, 28993, 28943, 512 + 28892, 28842, 28791, 28739, 28688, 513 + 28636, 28583, 28531, 28478, 28425, 514 + 28371, 28317, 28263, 28209, 28154, 515 + 28099, 28044, 27988, 27932, 27876, 516 + 27820, 27763, 27706, 27648, 27591, 517 + 27533, 27474, 27416, 27357, 27298, 518 + 27238, 27178, 27118, 27058, 26997, 519 + 26936, 26875, 26814, 26752, 26690, 520 + 26628, 26565, 26502, 26439, 26375, 521 + 26312, 26247, 26183, 26119, 26054, 522 + 25988, 25923, 25857, 25791, 25725, 523 + 25658, 25592, 25524, 25457, 25389, 524 + 25322, 25253, 25185, 25116, 25047, 525 + 24978, 24908, 24838, 24768, 24698, 526 + 24627, 24557, 24485, 24414, 24342, 527 + 24270, 24198, 24126, 24053, 23980, 528 + 23907, 23834, 23760, 23686, 23612, 529 + 23537, 23462, 23387, 23312, 23237, 530 + 23161, 23085, 23009, 22932, 22856, 531 + 22779, 22701, 22624, 22546, 22468, 532 + 22390, 22312, 22233, 22154, 22075, 533 + 21996, 21916, 21836, 21756, 21676, 534 + 21595, 21515, 21434, 21352, 21271, 535 + 21189, 21107, 21025, 20943, 20860, 536 + 20777, 20694, 20611, 20528, 20444, 537 + 20360, 20276, 20192, 20107, 20022, 538 + 19937, 19852, 19767, 19681, 19595, 539 + 19509, 19423, 19336, 19250, 19163, 540 + 19076, 18988, 18901, 18813, 18725, 541 + 18637, 18549, 18460, 18372, 18283, 542 + 18194, 18104, 18015, 17925, 17835, 543 + 17745, 17655, 17565, 17474, 17383, 544 + 17292, 17201, 17110, 17018, 16927, 545 + 16835, 16743, 16650, 16558, 16465, 546 + 16372, 16279, 16186, 16093, 15999, 547 + 15906, 15812, 15718, 15624, 15529, 548 + 15435, 15340, 15245, 15150, 15055, 549 + 14960, 14864, 14769, 14673, 14577, 550 + 14481, 14385, 14288, 14192, 14095, 551 + 13998, 13901, 13804, 13706, 13609, 552 + 13511, 13414, 13316, 13218, 13119, 553 + 13021, 12923, 12824, 12725, 12626, 554 + 12527, 12428, 12329, 12230, 12130, 555 + 12030, 11930, 11831, 11730, 11630, 556 + 11530, 11430, 11329, 11228, 11128, 557 + 11027, 10926, 10824, 10723, 10622, 558 + 10520, 10419, 10317, 10215, 10113, 559 + 10011, 9909, 9807, 9704, 9602, 560 + 9499, 9397, 9294, 9191, 9088, 561 + 8985, 8882, 8778, 8675, 8572, 562 + 8468, 8364, 8261, 8157, 8053, 563 + 7949, 7845, 7741, 7637, 7532, 564 + 7428, 7323, 7219, 7114, 7009, 565 + 6905, 6800, 6695, 6590, 6485, 566 + 6380, 6274, 6169, 6064, 5958, 567 + 5853, 5747, 5642, 5536, 5430, 568 + 5325, 5219, 5113, 5007, 4901, 569 + 4795, 4689, 4583, 4476, 4370, 570 + 4264, 4157, 4051, 3945, 3838, 571 + 3732, 3625, 3518, 3412, 3305, 572 + 3198, 3092, 2985, 2878, 2771, 573 + 2664, 2558, 2451, 2344, 2237, 574 + 2130, 2023, 1916, 1809, 1702, 575 + 1594, 1487, 1380, 1273, 1166, 576 + 1059, 952, 844, 737, 630, 577 + 523, 416, 308, 201, 94, 578 + -13, -121, -228, -335, -442, 579 + -550, -657, -764, -871, -978, 580 + -1086, -1193, -1300, -1407, -1514, 581 + -1621, -1728, -1835, -1942, -2049, 582 + -2157, -2263, -2370, -2477, -2584, 583 + -2691, -2798, -2905, -3012, -3118, 584 + -3225, -3332, -3439, -3545, -3652, 585 + -3758, -3865, -3971, -4078, -4184, 586 + -4290, -4397, -4503, -4609, -4715, 587 + -4821, -4927, -5033, -5139, -5245, 588 + -5351, -5457, -5562, -5668, -5774, 589 + -5879, -5985, -6090, -6195, -6301, 590 + -6406, -6511, -6616, -6721, -6826, 591 + -6931, -7036, -7140, -7245, -7349, 592 + -7454, -7558, -7663, -7767, -7871, 593 + -7975, -8079, -8183, -8287, -8390, 594 + -8494, -8597, -8701, -8804, -8907, 595 + -9011, -9114, -9217, -9319, -9422, 596 + -9525, -9627, -9730, -9832, -9934, 597 + -10037, -10139, -10241, -10342, -10444, 598 + -10546, -10647, -10748, -10850, -10951, 599 + -11052, -11153, -11253, -11354, -11455, 600 + -11555, -11655, -11756, -11856, -11955, 601 + -12055, -12155, -12254, -12354, -12453, 602 + -12552, -12651, -12750, -12849, -12947, 603 + -13046, -13144, -13242, -13340, -13438, 604 + -13536, -13633, -13731, -13828, -13925, 605 + -14022, -14119, -14216, -14312, -14409, 606 + -14505, -14601, -14697, -14793, -14888, 607 + -14984, -15079, -15174, -15269, -15364, 608 + -15459, -15553, -15647, -15741, -15835, 609 + -15929, -16023, -16116, -16210, -16303, 610 + -16396, -16488, -16581, -16673, -16766, 611 + -16858, -16949, -17041, -17133, -17224, 612 + -17315, -17406, -17497, -17587, -17678, 613 + -17768, -17858, -17948, -18037, -18127, 614 + -18216, -18305, -18394, -18483, -18571, 615 + -18659, -18747, -18835, -18923, -19010, 616 + -19098, -19185, -19271, -19358, -19444, 617 + -19531, -19617, -19702, -19788, -19873, 618 + -19959, -20043, -20128, -20213, -20297, 619 + -20381, -20465, -20549, -20632, -20715, 620 + -20798, -20881, -20963, -21046, -21128, 621 + -21210, -21291, -21373, -21454, -21535, 622 + -21616, -21696, -21776, -21856, -21936, 623 + -22016, -22095, -22174, -22253, -22331, 624 + -22410, -22488, -22566, -22643, -22721, 625 + -22798, -22875, -22951, -23028, -23104, 626 + -23180, -23256, -23331, -23406, -23481, 627 + -23556, -23630, -23704, -23778, -23852, 628 + -23925, -23998, -24071, -24144, -24216, 629 + -24288, -24360, -24432, -24503, -24574, 630 + -24645, -24716, -24786, -24856, -24926, 631 + -24995, -25064, -25133, -25202, -25270, 632 + -25339, -25406, -25474, -25541, -25608, 633 + -25675, -25742, -25808, -25874, -25939, 634 + -26005, -26070, -26135, -26199, -26264, 635 + -26327, -26391, -26455, -26518, -26581, 636 + -26643, -26705, -26767, -26829, -26891, 637 + -26952, -27013, -27073, -27133, -27193, 638 + -27253, -27312, -27372, -27430, -27489, 639 + -27547, -27605, -27663, -27720, -27777, 640 + -27834, -27890, -27946, -28002, -28058, 641 + -28113, -28168, -28223, -28277, -28331, 642 + -28385, -28438, -28491, -28544, -28596, 643 + -28649, -28701, -28752, -28803, -28854, 644 + -28905, -28955, -29006, -29055, -29105, 645 + -29154, -29203, -29251, -29299, -29347, 646 + -29395, -29442, -29489, -29535, -29582, 647 + -29628, -29673, -29719, -29764, -29808, 648 + -29853, -29897, -29941, -29984, -30027, 649 + -30070, -30112, -30154, -30196, -30238, 650 + -30279, -30320, -30360, -30400, -30440, 651 + -30480, -30519, -30558, -30596, -30635, 652 + -30672, -30710, -30747, -30784, -30821, 653 + -30857, -30893, -30929, -30964, -30999, 654 + -31033, -31068, -31102, -31135, -31168, 655 + -31201, -31234, -31266, -31298, -31330, 656 + -31361, -31392, -31422, -31453, -31483, 657 + -31512, -31541, -31570, -31599, -31627, 658 + -31655, -31682, -31710, -31737, -31763, 659 + -31789, -31815, -31841, -31866, -31891, 660 + -31915, -31939, -31963, -31986, -32010, 661 + -32032, -32055, -32077, -32099, -32120, 662 + -32141, -32162, -32182, -32202, -32222, 663 + -32241, -32260, -32279, -32297, -32315, 664 + -32333, -32350, -32367, -32383, -32399, 665 + -32415, -32431, -32446, -32461, -32475, 666 + -32489, -32503, -32517, -32530, -32542, 667 + -32555, -32567, -32579, -32590, -32601, 668 + -32612, -32622, -32632, -32641, -32651, 669 + -32659, -32668, -32676, -32684, -32692, 670 + -32699, -32706, -32712, -32718, -32724, 671 + -32729, -32734, -32739, -32743, -32747, 672 + -32751, -32754, -32757, -32760, -32762, 673 + -32764, -32765, -32767, -32767, -32767, 674 + 32767, 32767, 32765, 32761, 32756, 675 + 32750, 32742, 32732, 32722, 32710, 676 + 32696, 32681, 32665, 32647, 32628, 677 + 32608, 32586, 32562, 32538, 32512, 678 + 32484, 32455, 32425, 32393, 32360, 679 + 32326, 32290, 32253, 32214, 32174, 680 + 32133, 32090, 32046, 32001, 31954, 681 + 31906, 31856, 31805, 31753, 31700, 682 + 31645, 31588, 31530, 31471, 31411, 683 + 31349, 31286, 31222, 31156, 31089, 684 + 31020, 30951, 30880, 30807, 30733, 685 + 30658, 30582, 30504, 30425, 30345, 686 + 30263, 30181, 30096, 30011, 29924, 687 + 29836, 29747, 29656, 29564, 29471, 688 + 29377, 29281, 29184, 29086, 28987, 689 + 28886, 28784, 28681, 28577, 28471, 690 + 28365, 28257, 28147, 28037, 27925, 691 + 27812, 27698, 27583, 27467, 27349, 692 + 27231, 27111, 26990, 26868, 26744, 693 + 26620, 26494, 26367, 26239, 26110, 694 + 25980, 25849, 25717, 25583, 25449, 695 + 25313, 25176, 25038, 24900, 24760, 696 + 24619, 24477, 24333, 24189, 24044, 697 + 23898, 23751, 23602, 23453, 23303, 698 + 23152, 22999, 22846, 22692, 22537, 699 + 22380, 22223, 22065, 21906, 21746, 700 + 21585, 21423, 21261, 21097, 20933, 701 + 20767, 20601, 20434, 20265, 20096, 702 + 19927, 19756, 19584, 19412, 19239, 703 + 19065, 18890, 18714, 18538, 18361, 704 + 18183, 18004, 17824, 17644, 17463, 705 + 17281, 17098, 16915, 16731, 16546, 706 + 16361, 16175, 15988, 15800, 15612, 707 + 15423, 15234, 15043, 14852, 14661, 708 + 14469, 14276, 14083, 13889, 13694, 709 + 13499, 13303, 13107, 12910, 12713, 710 + 12515, 12317, 12118, 11918, 11718, 711 + 11517, 11316, 11115, 10913, 10710, 712 + 10508, 10304, 10100, 9896, 9691, 713 + 9486, 9281, 9075, 8869, 8662, 714 + 8455, 8248, 8040, 7832, 7623, 715 + 7415, 7206, 6996, 6787, 6577, 716 + 6366, 6156, 5945, 5734, 5523, 717 + 5311, 5100, 4888, 4675, 4463, 718 + 4251, 4038, 3825, 3612, 3399, 719 + 3185, 2972, 2758, 2544, 2330, 720 + 2116, 1902, 1688, 1474, 1260, 721 + 1045, 831, 617, 402, 188, 722 + -27, -241, -456, -670, -885, 723 + -1099, -1313, -1528, -1742, -1956, 724 + -2170, -2384, -2598, -2811, -3025, 725 + -3239, -3452, -3665, -3878, -4091, 726 + -4304, -4516, -4728, -4941, -5153, 727 + -5364, -5576, -5787, -5998, -6209, 728 + -6419, -6629, -6839, -7049, -7258, 729 + -7467, -7676, -7884, -8092, -8300, 730 + -8507, -8714, -8920, -9127, -9332, 731 + -9538, -9743, -9947, -10151, -10355, 732 + -10558, -10761, -10963, -11165, -11367, 733 + -11568, -11768, -11968, -12167, -12366, 734 + -12565, -12762, -12960, -13156, -13352, 735 + -13548, -13743, -13937, -14131, -14324, 736 + -14517, -14709, -14900, -15091, -15281, 737 + -15470, -15659, -15847, -16035, -16221, 738 + -16407, -16593, -16777, -16961, -17144, 739 + -17326, -17508, -17689, -17869, -18049, 740 + -18227, -18405, -18582, -18758, -18934, 741 + -19108, -19282, -19455, -19627, -19799, 742 + -19969, -20139, -20308, -20475, -20642, 743 + -20809, -20974, -21138, -21301, -21464, 744 + -21626, -21786, -21946, -22105, -22263, 745 + -22420, -22575, -22730, -22884, -23037, 746 + -23189, -23340, -23490, -23640, -23788, 747 + -23935, -24080, -24225, -24369, -24512, 748 + -24654, -24795, -24934, -25073, -25211, 749 + -25347, -25482, -25617, -25750, -25882, 750 + -26013, -26143, -26272, -26399, -26526, 751 + -26651, -26775, -26898, -27020, -27141, 752 + -27260, -27379, -27496, -27612, -27727, 753 + -27841, -27953, -28065, -28175, -28284, 754 + -28391, -28498, -28603, -28707, -28810, 755 + -28911, -29012, -29111, -29209, -29305, 756 + -29401, -29495, -29587, -29679, -29769, 757 + -29858, -29946, -30032, -30118, -30201, 758 + -30284, -30365, -30445, -30524, -30601, 759 + -30677, -30752, -30825, -30897, -30968, 760 + -31038, -31106, -31172, -31238, -31302, 761 + -31365, -31426, -31486, -31545, -31602, 762 + -31658, -31713, -31766, -31818, -31869, 763 + -31918, -31966, -32012, -32058, -32101, 764 + -32144, -32185, -32224, -32262, -32299, 765 + -32335, -32369, -32401, -32433, -32463, 766 + -32491, -32518, -32544, -32568, -32591, 767 + -32613, -32633, -32652, -32669, -32685, 768 + -32700, -32713, -32724, -32735, -32744, 769 + -32751, -32757, -32762, -32766, -32767, 770 + 32767, 32764, 32755, 32741, 32720, 771 + 32694, 32663, 32626, 32583, 32535, 772 + 32481, 32421, 32356, 32286, 32209, 773 + 32128, 32041, 31948, 31850, 31747, 774 + 31638, 31523, 31403, 31278, 31148, 775 + 31012, 30871, 30724, 30572, 30415, 776 + 30253, 30086, 29913, 29736, 29553, 777 + 29365, 29172, 28974, 28771, 28564, 778 + 28351, 28134, 27911, 27684, 27452, 779 + 27216, 26975, 26729, 26478, 26223, 780 + 25964, 25700, 25432, 25159, 24882, 781 + 24601, 24315, 24026, 23732, 23434, 782 + 23133, 22827, 22517, 22204, 21886, 783 + 21565, 21240, 20912, 20580, 20244, 784 + 19905, 19563, 19217, 18868, 18516, 785 + 18160, 17802, 17440, 17075, 16708, 786 + 16338, 15964, 15588, 15210, 14829, 787 + 14445, 14059, 13670, 13279, 12886, 788 + 12490, 12093, 11693, 11291, 10888, 789 + 10482, 10075, 9666, 9255, 8843, 790 + 8429, 8014, 7597, 7180, 6760, 791 + 6340, 5919, 5496, 5073, 4649, 792 + 4224, 3798, 3372, 2945, 2517, 793 + 2090, 1661, 1233, 804, 375, 794 + -54, -483, -911, -1340, -1768, 795 + -2197, -2624, -3052, -3479, -3905, 796 + -4330, -4755, -5179, -5602, -6024, 797 + -6445, -6865, -7284, -7702, -8118, 798 + -8533, -8946, -9358, -9768, -10177, 799 + -10584, -10989, -11392, -11793, -12192, 800 + -12589, -12984, -13377, -13767, -14155, 801 + -14541, -14924, -15305, -15683, -16058, 802 + -16430, -16800, -17167, -17531, -17892, 803 + -18249, -18604, -18956, -19304, -19649, 804 + -19990, -20329, -20663, -20994, -21322, 805 + -21646, -21966, -22282, -22595, -22904, 806 + -23208, -23509, -23806, -24099, -24387, 807 + -24672, -24952, -25228, -25499, -25766, 808 + -26029, -26288, -26541, -26791, -27035, 809 + -27275, -27511, -27741, -27967, -28188, 810 + -28405, -28616, -28823, -29024, -29221, 811 + -29412, -29599, -29780, -29957, -30128, 812 + -30294, -30455, -30611, -30761, -30906, 813 + -31046, -31181, -31310, -31434, -31552, 814 + -31665, -31773, -31875, -31972, -32063, 815 + -32149, -32229, -32304, -32373, -32437, 816 + -32495, -32547, -32594, -32635, -32671, 817 + -32701, -32726, -32745, -32758, -32766, 818 + 32767, 32754, 32717, 32658, 32577, 819 + 32473, 32348, 32200, 32029, 31837, 820 + 31624, 31388, 31131, 30853, 30553, 821 + 30232, 29891, 29530, 29148, 28746, 822 + 28324, 27883, 27423, 26944, 26447, 823 + 25931, 25398, 24847, 24279, 23695, 824 + 23095, 22478, 21846, 21199, 20538, 825 + 19863, 19174, 18472, 17757, 17030, 826 + 16291, 15541, 14781, 14010, 13230, 827 + 12441, 11643, 10837, 10024, 9204, 828 + 8377, 7545, 6708, 5866, 5020, 829 + 4171, 3319, 2464, 1608, 751, 830 + -107, -965, -1822, -2678, -3532, 831 + -4383, -5232, -6077, -6918, -7754, 832 + -8585, -9409, -10228, -11039, -11843, 833 + -12639, -13426, -14204, -14972, -15730, 834 + -16477, -17213, -17937, -18648, -19347, 835 + -20033, -20705, -21363, -22006, -22634, 836 + -23246, -23843, -24423, -24986, -25533, 837 + -26062, -26573, -27066, -27540, -27995, 838 + -28431, -28848, -29245, -29622, -29979, 839 + -30315, -30630, -30924, -31197, -31449, 840 + -31679, -31887, -32074, -32239, -32381, 841 + -32501, -32600, -32675, -32729, -32759, 842 + }; 571 843 #endif 572 844 573 845 static const CELTMode mode48000_960_120 ICONST_ATTR = {
+31 -38
lib/rbcodec/codecs/libopus/celt/vq.c
··· 37 37 #include "os_support.h" 38 38 #include "bands.h" 39 39 #include "rate.h" 40 + #include "pitch.h" 40 41 42 + #if defined(MIPSr1_ASM) 43 + #include "mips/vq_mipsr1.h" 44 + #endif 45 + 46 + #ifndef OVERRIDE_vq_exp_rotation1 41 47 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) 42 48 { 43 49 int i; 50 + opus_val16 ms; 44 51 celt_norm *Xptr; 45 52 Xptr = X; 53 + ms = NEG16(s); 46 54 for (i=0;i<len-stride;i++) 47 55 { 48 56 celt_norm x1, x2; 49 57 x1 = Xptr[0]; 50 58 x2 = Xptr[stride]; 51 - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); 52 - *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); 59 + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); 60 + *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); 53 61 } 54 62 Xptr = &X[len-2*stride-1]; 55 63 for (i=len-2*stride-1;i>=0;i--) ··· 57 65 celt_norm x1, x2; 58 66 x1 = Xptr[0]; 59 67 x2 = Xptr[stride]; 60 - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); 61 - *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); 68 + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); 69 + *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); 62 70 } 63 71 } 72 + #endif /* OVERRIDE_vq_exp_rotation1 */ 64 73 65 74 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) 66 75 { ··· 91 100 } 92 101 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 93 102 extract_collapse_mask().*/ 94 - len /= stride; 103 + len = celt_udiv(len, stride); 95 104 for (i=0;i<stride;i++) 96 105 { 97 106 if (dir < 0) ··· 140 149 return 1; 141 150 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 142 151 exp_rotation().*/ 143 - N0 = N/B; 152 + N0 = celt_udiv(N, B); 144 153 collapse_mask = 0; 145 154 i=0; do { 146 155 int j; 156 + unsigned tmp=0; 147 157 j=0; do { 148 - collapse_mask |= (iy[i*N0+j]!=0)<<i; 158 + tmp |= iy[i*N0+j]; 149 159 } while (++j<N0); 160 + collapse_mask |= (tmp!=0)<<i; 150 161 } while (++i<B); 151 162 return collapse_mask; 152 163 } ··· 322 333 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, 323 334 ec_dec *dec, opus_val16 gain) 324 335 { 325 - int i; 326 336 opus_val32 Ryy; 327 337 unsigned collapse_mask; 328 - /* VARDECL(int, iy); 329 - SAVE_STACK; */ 330 - 331 - /* the difference between the last two values of eband5ms shifted by maxLM 332 - which is 22 << 3 with the static mode */ 333 - int iy[176]; 338 + VARDECL(int, iy); 339 + SAVE_STACK; 334 340 335 341 celt_assert2(K>0, "alg_unquant() needs at least one pulse"); 336 342 celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); 337 - /* ALLOC(iy, N, int); */ 338 - decode_pulses(iy, N, K, dec); 339 - Ryy = 0; 340 - i=0; 341 - do { 342 - Ryy = MAC16_16(Ryy, iy[i], iy[i]); 343 - } while (++i < N); 343 + ALLOC(iy, N, int); 344 + Ryy = decode_pulses(iy, N, K, dec); 344 345 normalise_residual(iy, X, N, Ryy, gain); 345 346 exp_rotation(X, N, -1, B, K, spread); 346 347 collapse_mask = extract_collapse_mask(iy, N, B); 347 - /* RESTORE_STACK; */ 348 + RESTORE_STACK; 348 349 return collapse_mask; 349 350 } 350 351 352 + #ifndef OVERRIDE_renormalise_vector 351 353 void renormalise_vector(celt_norm *X, int N, opus_val16 gain) 352 354 { 353 355 int i; 354 356 #ifdef FIXED_POINT 355 357 int k; 356 358 #endif 357 - opus_val32 E = EPSILON; 359 + opus_val32 E; 358 360 opus_val16 g; 359 361 opus_val32 t; 360 - celt_norm *xptr = X; 361 - for (i=0;i<N;i++) 362 - { 363 - E = MAC16_16(E, *xptr, *xptr); 364 - xptr++; 365 - } 362 + celt_norm *xptr; 363 + E = EPSILON + celt_inner_prod(X, X, N); 366 364 #ifdef FIXED_POINT 367 365 k = celt_ilog2(E)>>1; 368 366 #endif ··· 377 375 } 378 376 /*return celt_sqrt(E);*/ 379 377 } 378 + #endif /* OVERRIDE_renormalise_vector */ 380 379 381 - int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) 380 + int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N) 382 381 { 383 382 int i; 384 383 int itheta; ··· 397 396 Eside = MAC16_16(Eside, s, s); 398 397 } 399 398 } else { 400 - for (i=0;i<N;i++) 401 - { 402 - celt_norm m, s; 403 - m = X[i]; 404 - s = Y[i]; 405 - Emid = MAC16_16(Emid, m, m); 406 - Eside = MAC16_16(Eside, s, s); 407 - } 399 + Emid += celt_inner_prod(X, X, N); 400 + Eside += celt_inner_prod(Y, Y, N); 408 401 } 409 402 mid = celt_sqrt(Emid); 410 403 side = celt_sqrt(Eside);
+1 -1
lib/rbcodec/codecs/libopus/celt/vq.h
··· 65 65 66 66 void renormalise_vector(celt_norm *X, int N, opus_val16 gain); 67 67 68 - int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N); 68 + int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N); 69 69 70 70 #endif /* VQ_H */
+3 -1
lib/rbcodec/codecs/libopus/config.h
··· 9 9 #define OPUS_BUILD 10 10 11 11 /* alloc stuff */ 12 - #define NONTHREADSAFE_PSEUDOSTACK 12 + #define VAR_ARRAYS 13 + #define NORM_ALIASING_HACK 13 14 14 15 #define OVERRIDE_OPUS_ALLOC 15 16 #define OVERRIDE_OPUS_FREE ··· 40 41 #endif 41 42 42 43 #if defined(CPU_ARM) 44 + #define OPUS_ARM_ASM 43 45 #if ARM_ARCH == 4 44 46 #define OPUS_ARM_INLINE_ASM 45 47 #elif ARM_ARCH > 4
+21
lib/rbcodec/codecs/libopus/opus.c
··· 168 168 } 169 169 } 170 170 171 + int opus_packet_get_samples_per_frame(const unsigned char *data, 172 + opus_int32 Fs) 173 + { 174 + int audiosize; 175 + if (data[0]&0x80) 176 + { 177 + audiosize = ((data[0]>>3)&0x3); 178 + audiosize = (Fs<<audiosize)/400; 179 + } else if ((data[0]&0x60) == 0x60) 180 + { 181 + audiosize = (data[0]&0x08) ? Fs/50 : Fs/100; 182 + } else { 183 + audiosize = ((data[0]>>3)&0x3); 184 + if (audiosize == 3) 185 + audiosize = Fs*60/1000; 186 + else 187 + audiosize = (Fs<<audiosize)/100; 188 + } 189 + return audiosize; 190 + } 191 + 171 192 int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, 172 193 int self_delimited, unsigned char *out_toc, 173 194 const unsigned char *frames[48], opus_int16 size[48],
+47 -38
lib/rbcodec/codecs/libopus/opus_decoder.c
··· 77 77 opus_uint32 rangeFinal; 78 78 }; 79 79 80 - #ifdef FIXED_POINT 81 - static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { 82 - return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; 83 - } 84 - #endif 85 - 86 80 87 81 int opus_decoder_get_size(int channels) 88 82 { ··· 222 216 VARDECL(opus_val16, pcm_transition_silk); 223 217 int pcm_transition_celt_size; 224 218 VARDECL(opus_val16, pcm_transition_celt); 225 - opus_val16 *pcm_transition = NULL; /* Silence false positive "may be used uninitialized" warning */ 219 + opus_val16 *pcm_transition=NULL; 226 220 int redundant_audio_size; 227 221 VARDECL(opus_val16, redundant_audio); 228 222 ··· 237 231 int F2_5, F5, F10, F20; 238 232 const opus_val16 *window; 239 233 opus_uint32 redundant_rng = 0; 234 + int celt_accum; 240 235 ALLOC_STACK; 241 236 242 237 silk_dec = (char*)st+st->silk_dec_offset; ··· 302 297 } 303 298 } 304 299 300 + /* In fixed-point, we can tell CELT to do the accumulation on top of the 301 + SILK PCM buffer. This saves some stack space. */ 302 + #ifdef FIXED_POINT 303 + celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10); 304 + #else 305 + celt_accum = 0; 306 + #endif 307 + 305 308 pcm_transition_silk_size = ALLOC_NONE; 306 309 pcm_transition_celt_size = ALLOC_NONE; 307 310 if (data!=NULL && st->prev_mode > 0 && ( ··· 332 335 } 333 336 334 337 /* Don't allocate any memory when in CELT-only mode */ 335 - pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; 338 + pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; 336 339 ALLOC(pcm_silk, pcm_silk_size, opus_int16); 337 340 338 341 /* SILK processing */ 339 342 if (mode != MODE_CELT_ONLY) 340 343 { 341 344 int lost_flag, decoded_samples; 342 - opus_int16 *pcm_ptr = pcm_silk; 345 + opus_int16 *pcm_ptr; 346 + #ifdef FIXED_POINT 347 + if (celt_accum) 348 + pcm_ptr = pcm; 349 + else 350 + #endif 351 + pcm_ptr = pcm_silk; 343 352 344 353 if (st->prev_mode==MODE_CELT_ONLY) 345 354 silk_InitDecoder( silk_dec ); ··· 469 478 { 470 479 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); 471 480 celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, 472 - redundant_audio, F5, NULL); 481 + redundant_audio, F5, NULL, 0); 473 482 celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); 474 483 } 475 484 ··· 484 493 celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); 485 494 /* Decode CELT */ 486 495 celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data, 487 - len, pcm, celt_frame_size, &dec); 496 + len, pcm, celt_frame_size, &dec, celt_accum); 488 497 } else { 489 498 unsigned char silence[2] = {0xFF, 0xFF}; 490 - for (i=0;i<frame_size*st->channels;i++) 491 - pcm[i] = 0; 499 + if (!celt_accum) 500 + { 501 + for (i=0;i<frame_size*st->channels;i++) 502 + pcm[i] = 0; 503 + } 492 504 /* For hybrid -> SILK transitions, we let the CELT MDCT 493 505 do a fade-out by decoding a silence frame */ 494 506 if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) ) 495 507 { 496 508 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); 497 - celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL); 509 + celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum); 498 510 } 499 511 } 500 512 501 - if (mode != MODE_CELT_ONLY) 513 + if (mode != MODE_CELT_ONLY && !celt_accum) 502 514 { 503 515 #ifdef FIXED_POINT 504 516 for (i=0;i<frame_size*st->channels;i++) 505 - pcm[i] = SAT16(pcm[i] + pcm_silk[i]); 517 + pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i])); 506 518 #else 507 519 for (i=0;i<frame_size*st->channels;i++) 508 520 pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]); ··· 521 533 celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); 522 534 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); 523 535 524 - celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL); 536 + celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); 525 537 celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); 526 538 smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, 527 539 pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); ··· 717 729 { 718 730 VARDECL(opus_int16, out); 719 731 int ret, i; 732 + int nb_samples; 720 733 ALLOC_STACK; 721 734 722 735 if(frame_size<=0) 723 736 { 724 737 RESTORE_STACK; 725 738 return OPUS_BAD_ARG; 739 + } 740 + if (data != NULL && len > 0 && !decode_fec) 741 + { 742 + nb_samples = opus_decoder_get_nb_samples(st, data, len); 743 + if (nb_samples>0) 744 + frame_size = IMIN(frame_size, nb_samples); 745 + else 746 + return OPUS_INVALID_PACKET; 726 747 } 727 748 ALLOC(out, frame_size*st->channels, opus_int16); 728 749 ··· 744 765 { 745 766 VARDECL(float, out); 746 767 int ret, i; 768 + int nb_samples; 747 769 ALLOC_STACK; 748 770 749 771 if(frame_size<=0) ··· 752 774 return OPUS_BAD_ARG; 753 775 } 754 776 777 + if (data != NULL && len > 0 && !decode_fec) 778 + { 779 + nb_samples = opus_decoder_get_nb_samples(st, data, len); 780 + if (nb_samples>0) 781 + frame_size = IMIN(frame_size, nb_samples); 782 + else 783 + return OPUS_INVALID_PACKET; 784 + } 755 785 ALLOC(out, frame_size*st->channels, float); 756 786 757 787 ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1); ··· 909 939 bandwidth = OPUS_BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3); 910 940 } 911 941 return bandwidth; 912 - } 913 - 914 - int opus_packet_get_samples_per_frame(const unsigned char *data, 915 - opus_int32 Fs) 916 - { 917 - int audiosize; 918 - if (data[0]&0x80) 919 - { 920 - audiosize = ((data[0]>>3)&0x3); 921 - audiosize = (Fs<<audiosize)/400; 922 - } else if ((data[0]&0x60) == 0x60) 923 - { 924 - audiosize = (data[0]&0x08) ? Fs/50 : Fs/100; 925 - } else { 926 - audiosize = ((data[0]>>3)&0x3); 927 - if (audiosize == 3) 928 - audiosize = Fs*60/1000; 929 - else 930 - audiosize = (Fs<<audiosize)/100; 931 - } 932 - return audiosize; 933 942 } 934 943 935 944 int opus_packet_get_nb_channels(const unsigned char *data)
+25 -25
lib/rbcodec/codecs/libopus/opus_defines.h
··· 454 454 * @hideinitializer */ 455 455 #define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x) 456 456 457 - /** Gets the sampling rate the encoder or decoder was initialized with. 458 - * This simply returns the <code>Fs</code> value passed to opus_encoder_init() 459 - * or opus_decoder_init(). 460 - * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder. 461 - * @hideinitializer 462 - */ 463 - #define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) 464 - 465 457 /** Gets the total samples of delay added by the entire codec. 466 458 * This can be queried by the encoder and then the provided number of samples can be 467 459 * skipped on from the start of the decoder's output to provide time aligned input ··· 544 536 * 24 (default: 24). 545 537 * @hideinitializer */ 546 538 #define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x) 547 - 548 - /** Gets the duration (in samples) of the last packet successfully decoded or concealed. 549 - * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate). 550 - * @hideinitializer */ 551 - #define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x) 552 539 553 540 /** Configures the encoder's use of variable duration frames. 554 541 * When variable duration is enabled, the encoder is free to use a shorter frame ··· 648 635 * 649 636 * @hideinitializer */ 650 637 #define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x) 651 - 652 - /** Gets the pitch of the last decoded frame, if available. 653 - * This can be used for any post-processing algorithm requiring the use of pitch, 654 - * e.g. time stretching/shortening. If the last frame was not voiced, or if the 655 - * pitch was not coded in the frame, then zero is returned. 656 - * 657 - * This CTL is only implemented for decoder instances. 658 - * 659 - * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available) 660 - * 661 - * @hideinitializer */ 662 - #define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x) 663 638 664 639 /** Gets the encoder's configured bandpass or the decoder's last bandpass. 665 640 * @see OPUS_SET_BANDWIDTH ··· 675 650 * @hideinitializer */ 676 651 #define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x) 677 652 653 + /** Gets the sampling rate the encoder or decoder was initialized with. 654 + * This simply returns the <code>Fs</code> value passed to opus_encoder_init() 655 + * or opus_decoder_init(). 656 + * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder. 657 + * @hideinitializer 658 + */ 659 + #define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) 660 + 678 661 /**@}*/ 679 662 680 663 /** @defgroup opus_decoderctls Decoder related CTLs ··· 698 681 * @param[out] x <tt>opus_int32 *</tt>: Amount to scale PCM signal by in Q8 dB units. 699 682 * @hideinitializer */ 700 683 #define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x) 684 + 685 + /** Gets the duration (in samples) of the last packet successfully decoded or concealed. 686 + * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate). 687 + * @hideinitializer */ 688 + #define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x) 689 + 690 + /** Gets the pitch of the last decoded frame, if available. 691 + * This can be used for any post-processing algorithm requiring the use of pitch, 692 + * e.g. time stretching/shortening. If the last frame was not voiced, or if the 693 + * pitch was not coded in the frame, then zero is returned. 694 + * 695 + * This CTL is only implemented for decoder instances. 696 + * 697 + * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available) 698 + * 699 + * @hideinitializer */ 700 + #define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x) 701 701 702 702 /**@}*/ 703 703
+2 -5
lib/rbcodec/codecs/libopus/opus_private.h
··· 86 86 void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); 87 87 void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); 88 88 89 - int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, 90 - int bitrate, opus_val16 tonality, float *mem, int buffering, 91 - downmix_func downmix); 92 - 93 89 int encode_size(int size, unsigned char *data); 94 90 95 91 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); ··· 104 100 105 101 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, 106 102 unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, 107 - const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix); 103 + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, 104 + int analysis_channels, downmix_func downmix, int float_api); 108 105 109 106 int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, 110 107 opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
+15 -6
lib/rbcodec/codecs/libopus/silk/CNG.c
··· 34 34 35 35 /* Generates excitation for CNG LPC synthesis */ 36 36 static OPUS_INLINE void silk_CNG_exc( 37 - opus_int32 residual_Q10[], /* O CNG residual signal Q10 */ 37 + opus_int32 exc_Q10[], /* O CNG excitation signal Q10 */ 38 38 opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */ 39 39 opus_int32 Gain_Q16, /* I Gain to apply */ 40 40 opus_int length, /* I Length */ ··· 55 55 idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask ); 56 56 silk_assert( idx >= 0 ); 57 57 silk_assert( idx <= CNG_BUF_MASK_MAX ); 58 - residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) ); 58 + exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) ); 59 59 } 60 60 *rand_seed = seed; 61 61 } ··· 85 85 ) 86 86 { 87 87 opus_int i, subfr; 88 - opus_int32 sum_Q6, max_Gain_Q16; 88 + opus_int32 sum_Q6, max_Gain_Q16, gain_Q16; 89 89 opus_int16 A_Q12[ MAX_LPC_ORDER ]; 90 90 silk_CNG_struct *psCNG = &psDec->sCNG; 91 91 SAVE_STACK; ··· 125 125 /* Add CNG when packet is lost or during DTX */ 126 126 if( psDec->lossCnt ) { 127 127 VARDECL( opus_int32, CNG_sig_Q10 ); 128 - 129 128 ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 ); 130 129 131 130 /* Generate CNG excitation */ 132 - silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed ); 131 + gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] ); 132 + if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) { 133 + gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 ); 134 + gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); 135 + gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 ); 136 + } else { 137 + gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 ); 138 + gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); 139 + gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 ); 140 + } 141 + silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed ); 133 142 134 143 /* Convert CNG NLSF to filter representation */ 135 144 silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order ); ··· 162 171 /* Update states */ 163 172 CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 ); 164 173 165 - frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) ); 174 + frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) ); 166 175 } 167 176 silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); 168 177 } else {
+36 -17
lib/rbcodec/codecs/libopus/silk/PLC.c
··· 165 165 psPLC->nb_subfr = psDec->nb_subfr; 166 166 } 167 167 168 + static OPUS_INLINE void silk_PLC_energy(opus_int32 *energy1, opus_int *shift1, opus_int32 *energy2, opus_int *shift2, 169 + const opus_int32 *exc_Q14, const opus_int32 *prevGain_Q10, int subfr_length, int nb_subfr) 170 + { 171 + int i, k; 172 + VARDECL( opus_int16, exc_buf ); 173 + opus_int16 *exc_buf_ptr; 174 + SAVE_STACK; 175 + ALLOC( exc_buf, 2*subfr_length, opus_int16 ); 176 + /* Find random noise component */ 177 + /* Scale previous excitation signal */ 178 + exc_buf_ptr = exc_buf; 179 + for( k = 0; k < 2; k++ ) { 180 + for( i = 0; i < subfr_length; i++ ) { 181 + exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( 182 + silk_SMULWW( exc_Q14[ i + ( k + nb_subfr - 2 ) * subfr_length ], prevGain_Q10[ k ] ), 8 ) ); 183 + } 184 + exc_buf_ptr += subfr_length; 185 + } 186 + /* Find the subframe with lowest energy of the last two and use that as random noise generator */ 187 + silk_sum_sqr_shift( energy1, shift1, exc_buf, subfr_length ); 188 + silk_sum_sqr_shift( energy2, shift2, &exc_buf[ subfr_length ], subfr_length ); 189 + RESTORE_STACK; 190 + } 191 + 168 192 static OPUS_INLINE void silk_PLC_conceal( 169 193 silk_decoder_state *psDec, /* I/O Decoder state */ 170 194 silk_decoder_control *psDecCtrl, /* I/O Decoder control */ ··· 177 201 opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr; 178 202 opus_int32 LPC_pred_Q10, LTP_pred_Q12; 179 203 opus_int16 rand_scale_Q14; 180 - opus_int16 *B_Q14, *exc_buf_ptr; 204 + opus_int16 *B_Q14; 181 205 opus_int32 *sLPC_Q14_ptr; 182 - VARDECL( opus_int16, exc_buf ); 183 206 opus_int16 A_Q12[ MAX_LPC_ORDER ]; 207 + #ifdef SMALL_FOOTPRINT 208 + opus_int16 *sLTP; 209 + #else 184 210 VARDECL( opus_int16, sLTP ); 211 + #endif 185 212 VARDECL( opus_int32, sLTP_Q14 ); 186 213 silk_PLC_struct *psPLC = &psDec->sPLC; 187 214 opus_int32 prevGain_Q10[2]; 188 215 SAVE_STACK; 189 216 190 - ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 ); 217 + ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); 218 + #ifdef SMALL_FOOTPRINT 219 + /* Ugly hack that breaks aliasing rules to save stack: put sLTP at the very end of sLTP_Q14. */ 220 + sLTP = ((opus_int16*)&sLTP_Q14[psDec->ltp_mem_length + psDec->frame_length])-psDec->ltp_mem_length; 221 + #else 191 222 ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); 192 - ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); 223 + #endif 193 224 194 225 prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6); 195 226 prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6); ··· 198 229 silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) ); 199 230 } 200 231 201 - /* Find random noise component */ 202 - /* Scale previous excitation signal */ 203 - exc_buf_ptr = exc_buf; 204 - for( k = 0; k < 2; k++ ) { 205 - for( i = 0; i < psPLC->subfr_length; i++ ) { 206 - exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( 207 - silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) ); 208 - } 209 - exc_buf_ptr += psPLC->subfr_length; 210 - } 211 - /* Find the subframe with lowest energy of the last two and use that as random noise generator */ 212 - silk_sum_sqr_shift( &energy1, &shift1, exc_buf, psPLC->subfr_length ); 213 - silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length ); 232 + silk_PLC_energy(&energy1, &shift1, &energy2, &shift2, psDec->exc_Q14, prevGain_Q10, psDec->subfr_length, psDec->nb_subfr); 214 233 215 234 if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) { 216 235 /* First sub-frame has lowest energy */
+5
lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h
··· 587 587 #include "arm/SigProc_FIX_armv5e.h" 588 588 #endif 589 589 590 + #if defined(MIPSr1_ASM) 591 + #include "mips/sigproc_fix_mipsr1.h" 592 + #endif 593 + 594 + 590 595 #ifdef __cplusplus 591 596 } 592 597 #endif
+2 -2
lib/rbcodec/codecs/libopus/silk/code_signs.c
··· 76 76 /* Decodes signs of excitation */ 77 77 void silk_decode_signs( 78 78 ec_dec *psRangeDec, /* I/O Compressor data structure */ 79 - opus_int pulses[], /* I/O pulse signal */ 79 + opus_int16 pulses[], /* I/O pulse signal */ 80 80 opus_int length, /* I length of input */ 81 81 const opus_int signalType, /* I Signal type */ 82 82 const opus_int quantOffsetType, /* I Quantization offset type */ ··· 85 85 { 86 86 opus_int i, j, p; 87 87 opus_uint8 icdf[ 2 ]; 88 - opus_int *q_ptr; 88 + opus_int16 *q_ptr; 89 89 const opus_uint8 *icdf_ptr; 90 90 91 91 icdf[ 1 ] = 0;
+29 -8
lib/rbcodec/codecs/libopus/silk/dec_API.c
··· 31 31 #include "API.h" 32 32 #include "main.h" 33 33 #include "stack_alloc.h" 34 + #include "os_support.h" 34 35 35 36 /************************/ 36 37 /* Decoder Super Struct */ ··· 90 91 opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; 91 92 opus_int32 nSamplesOutDec, LBRR_symbol; 92 93 opus_int16 *samplesOut1_tmp[ 2 ]; 93 - VARDECL( opus_int16, samplesOut1_tmp_storage ); 94 + VARDECL( opus_int16, samplesOut1_tmp_storage1 ); 95 + VARDECL( opus_int16, samplesOut1_tmp_storage2 ); 94 96 VARDECL( opus_int16, samplesOut2_tmp ); 95 97 opus_int32 MS_pred_Q13[ 2 ] = { 0 }; 96 98 opus_int16 *resample_out_ptr; ··· 98 100 silk_decoder_state *channel_state = psDec->channel_state; 99 101 opus_int has_side; 100 102 opus_int stereo_to_mono; 103 + int delay_stack_alloc; 101 104 SAVE_STACK; 102 105 103 106 silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); ··· 196 199 for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { 197 200 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 198 201 if( channel_state[ n ].LBRR_flags[ i ] ) { 199 - opus_int pulses[ MAX_FRAME_LENGTH ]; 202 + opus_int16 pulses[ MAX_FRAME_LENGTH ]; 200 203 opus_int condCoding; 201 204 202 205 if( decControl->nChannelsInternal == 2 && n == 0 ) { ··· 251 254 psDec->channel_state[ 1 ].first_frame_after_reset = 1; 252 255 } 253 256 254 - ALLOC( samplesOut1_tmp_storage, 255 - decControl->nChannelsInternal*( 256 - channel_state[ 0 ].frame_length + 2 ), 257 + /* Check if the temp buffer fits into the output PCM buffer. If it fits, 258 + we can delay allocating the temp buffer until after the SILK peak stack 259 + usage. We need to use a < and not a <= because of the two extra samples. */ 260 + delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal 261 + < decControl->API_sampleRate*decControl->nChannelsAPI; 262 + ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE 263 + : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), 257 264 opus_int16 ); 258 - samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; 259 - samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage 260 - + channel_state[ 0 ].frame_length + 2; 265 + if ( delay_stack_alloc ) 266 + { 267 + samplesOut1_tmp[ 0 ] = samplesOut; 268 + samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2; 269 + } else { 270 + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; 271 + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; 272 + } 261 273 262 274 if( lostFlag == FLAG_DECODE_NORMAL ) { 263 275 has_side = !decode_only_middle; ··· 312 324 resample_out_ptr = samplesOut; 313 325 } 314 326 327 + ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc 328 + ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ) 329 + : ALLOC_NONE, 330 + opus_int16 ); 331 + if ( delay_stack_alloc ) { 332 + OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2)); 333 + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2; 334 + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2; 335 + } 315 336 for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { 316 337 317 338 /* Resample decoded signal to API_sampleRate */
+3 -4
lib/rbcodec/codecs/libopus/silk/decode_core.c
··· 39 39 silk_decoder_state *psDec, /* I/O Decoder state */ 40 40 silk_decoder_control *psDecCtrl, /* I Decoder control */ 41 41 opus_int16 xq[], /* O Decoded speech */ 42 - const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ 42 + const opus_int16 pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ 43 43 ) 44 44 { 45 45 opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType; ··· 49 49 opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10; 50 50 opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14; 51 51 VARDECL( opus_int32, res_Q14 ); 52 - /* VARDECL( opus_int32, sLPC_Q14 ); */ 52 + VARDECL( opus_int32, sLPC_Q14 ); 53 53 SAVE_STACK; 54 54 55 55 silk_assert( psDec->prev_gain_Q16 != 0 ); ··· 57 57 ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); 58 58 ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); 59 59 ALLOC( res_Q14, psDec->subfr_length, opus_int32 ); 60 - /* ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 ); */ 61 - opus_int32 sLPC_Q14[psDec->subfr_length + MAX_LPC_ORDER]; /* worst case is 80 + 16 */ 60 + ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 ); 62 61 63 62 offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ]; 64 63
+8 -8
lib/rbcodec/codecs/libopus/silk/decode_frame.c
··· 47 47 { 48 48 VARDECL( silk_decoder_control, psDecCtrl ); 49 49 opus_int L, mv_len, ret = 0; 50 - VARDECL( opus_int, pulses ); 51 50 SAVE_STACK; 52 51 53 52 L = psDec->frame_length; 54 53 ALLOC( psDecCtrl, 1, silk_decoder_control ); 55 - ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) & 56 - ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int ); 57 54 psDecCtrl->LTP_scale_Q14 = 0; 58 55 59 56 /* Safety checks */ ··· 62 59 if( lostFlag == FLAG_DECODE_NORMAL || 63 60 ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) ) 64 61 { 62 + VARDECL( opus_int16, pulses ); 63 + ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) & 64 + ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 ); 65 65 /*********************************************/ 66 66 /* Decode quantization indices of side info */ 67 67 /*********************************************/ ··· 107 107 silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); 108 108 silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); 109 109 110 - /****************************************************************/ 111 - /* Ensure smooth connection of extrapolated and good frames */ 112 - /****************************************************************/ 113 - silk_PLC_glue_frames( psDec, pOut, L ); 114 - 115 110 /************************************************/ 116 111 /* Comfort noise generation / estimation */ 117 112 /************************************************/ 118 113 silk_CNG( psDec, psDecCtrl, pOut, L ); 114 + 115 + /****************************************************************/ 116 + /* Ensure smooth connection of extrapolated and good frames */ 117 + /****************************************************************/ 118 + silk_PLC_glue_frames( psDec, pOut, L ); 119 119 120 120 /* Update some decoder state variables */ 121 121 psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];
+3 -3
lib/rbcodec/codecs/libopus/silk/decode_pulses.c
··· 36 36 /*********************************************/ 37 37 void silk_decode_pulses( 38 38 ec_dec *psRangeDec, /* I/O Compressor data structure */ 39 - opus_int pulses[], /* O Excitation signal */ 39 + opus_int16 pulses[], /* O Excitation signal */ 40 40 const opus_int signalType, /* I Sigtype */ 41 41 const opus_int quantOffsetType, /* I quantOffsetType */ 42 42 const opus_int frame_length /* I Frame length */ ··· 44 44 { 45 45 opus_int i, j, k, iter, abs_q, nLS, RateLevelIndex; 46 46 opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ]; 47 - opus_int *pulses_ptr; 47 + opus_int16 *pulses_ptr; 48 48 const opus_uint8 *cdf_ptr; 49 49 50 50 /*********************/ ··· 84 84 if( sum_pulses[ i ] > 0 ) { 85 85 silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] ); 86 86 } else { 87 - silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) ); 87 + silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( pulses[0] ) ); 88 88 } 89 89 } 90 90
+8 -1
lib/rbcodec/codecs/libopus/silk/macros.h
··· 79 79 (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \ 80 80 ((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) ) 81 81 82 + #if defined(MIPSr1_ASM) 83 + #include "mips/macros_mipsr1.h" 84 + #endif 85 + 82 86 #include "ecintrin.h" 83 - 87 + #ifndef OVERRIDE_silk_CLZ16 84 88 static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16) 85 89 { 86 90 return 32 - EC_ILOG(in16<<16|0x8000); 87 91 } 92 + #endif 88 93 94 + #ifndef OVERRIDE_silk_CLZ32 89 95 static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32) 90 96 { 91 97 return in32 ? 32 - EC_ILOG(in32) : 32; 92 98 } 99 + #endif 93 100 94 101 /* Row based */ 95 102 #define matrix_ptr(Matrix_base_adr, row, column, N) \
+4 -4
lib/rbcodec/codecs/libopus/silk/main.h
··· 116 116 /* Decodes signs of excitation */ 117 117 void silk_decode_signs( 118 118 ec_dec *psRangeDec, /* I/O Compressor data structure */ 119 - opus_int pulses[], /* I/O pulse signal */ 119 + opus_int16 pulses[], /* I/O pulse signal */ 120 120 opus_int length, /* I length of input */ 121 121 const opus_int signalType, /* I Signal type */ 122 122 const opus_int quantOffsetType, /* I Quantization offset type */ ··· 161 161 162 162 /* Shell decoder, operates on one shell code frame of 16 pulses */ 163 163 void silk_shell_decoder( 164 - opus_int *pulses0, /* O data: nonnegative pulse amplitudes */ 164 + opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ 165 165 ec_dec *psRangeDec, /* I/O Compressor data structure */ 166 166 const opus_int pulses4 /* I number of pulses per pulse-subframe */ 167 167 ); ··· 397 397 silk_decoder_state *psDec, /* I/O Decoder state */ 398 398 silk_decoder_control *psDecCtrl, /* I Decoder control */ 399 399 opus_int16 xq[], /* O Decoded speech */ 400 - const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ 400 + const opus_int16 pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ 401 401 ); 402 402 403 403 /* Decode quantization indices of excitation (Shell coding) */ 404 404 void silk_decode_pulses( 405 405 ec_dec *psRangeDec, /* I/O Compressor data structure */ 406 - opus_int pulses[], /* O Excitation signal */ 406 + opus_int16 pulses[], /* O Excitation signal */ 407 407 const opus_int signalType, /* I Sigtype */ 408 408 const opus_int quantOffsetType, /* I quantOffsetType */ 409 409 const opus_int frame_length /* I Frame length */
+4 -7
lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c
··· 72 72 silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; 73 73 opus_int32 nSamplesIn; 74 74 opus_int32 max_index_Q16, index_increment_Q16; 75 - /* VARDECL( opus_int16, buf ); 76 - SAVE_STACK; */ 75 + VARDECL( opus_int16, buf ); 76 + SAVE_STACK; 77 77 78 - /* ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 ); */ 79 - 80 - /* worst case = 2*16*10+8 = 328 * 2 = 656bytes */ 81 - opus_int16 buf[2 * S->batchSize + RESAMPLER_ORDER_FIR_12]; 78 + ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 ); 82 79 83 80 /* Copy buffered samples to start of buffer */ 84 81 silk_memcpy( buf, S->sFIR.i16, RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); ··· 106 103 107 104 /* Copy last part of filtered signal to the state for the next call */ 108 105 silk_memcpy( S->sFIR.i16, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); 109 - /* RESTORE_STACK; */ 106 + RESTORE_STACK; 110 107 }
+4 -4
lib/rbcodec/codecs/libopus/silk/shell_coder.c
··· 60 60 #endif 61 61 62 62 static OPUS_INLINE void decode_split( 63 - opus_int *p_child1, /* O pulse amplitude of first child subframe */ 64 - opus_int *p_child2, /* O pulse amplitude of second child subframe */ 63 + opus_int16 *p_child1, /* O pulse amplitude of first child subframe */ 64 + opus_int16 *p_child2, /* O pulse amplitude of second child subframe */ 65 65 ec_dec *psRangeDec, /* I/O Compressor data structure */ 66 66 const opus_int p, /* I pulse amplitude of current subframe */ 67 67 const opus_uint8 *shell_table /* I table of shell cdfs */ ··· 121 121 122 122 /* Shell decoder, operates on one shell code frame of 16 pulses */ 123 123 void silk_shell_decoder( 124 - opus_int *pulses0, /* O data: nonnegative pulse amplitudes */ 124 + opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ 125 125 ec_dec *psRangeDec, /* I/O Compressor data structure */ 126 126 const opus_int pulses4 /* I number of pulses per pulse-subframe */ 127 127 ) 128 128 { 129 - opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ]; 129 + opus_int16 pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ]; 130 130 131 131 /* this function operates on one shell code frame of 16 pulses */ 132 132 silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );
+1
lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c
··· 53 53 /* Scale down */ 54 54 nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); 55 55 shft = 2; 56 + i+=2; 56 57 break; 57 58 } 58 59 }
-2
lib/rbcodec/codecs/opus.c
··· 337 337 param = ci->id3->elapsed; 338 338 strtoffset = ci->id3->offset; 339 339 340 - global_stack = 0; 341 - 342 340 #if defined(CPU_COLDFIRE) 343 341 /* EMAC rounding is disabled because of MULT16_32_Q15, which will be 344 342 inaccurate with rounding in its current incarnation */