Sync to upstream libopus · tsiry-sandratraina.com/rockbox-zig@9b7ec42

-4

lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h

··· 65 65 do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ 66 66 (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) 67 67 68 - # define C_MUL4(m,a,b) \ 69 - do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \ 70 - (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0) 71 - 72 68 # define C_MULBYSCALAR( c, s ) \ 73 69 do{ (c).r = S_MUL( (c).r , s ) ;\ 74 70 (c).i = S_MUL( (c).i , s ) ; }while(0)

+31 -3

lib/rbcodec/codecs/libopus/celt/arch.h

··· 69 69 70 70 #define IMUL32(a,b) ((a)*(b)) 71 71 72 - #define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ 73 - #define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ 72 + #define ABS(x) ((x) < 0 ? (-(x)) : (x)) 74 73 #define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ 75 74 #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ 76 - #define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ 77 75 #define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ 78 76 #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ 79 77 #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ ··· 108 106 #define SCALEIN(a) (a) 109 107 #define SCALEOUT(a) (a) 110 108 109 + #define ABS16(x) ((x) < 0 ? (-(x)) : (x)) 110 + #define ABS32(x) ((x) < 0 ? (-(x)) : (x)) 111 + 112 + static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { 113 + return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; 114 + } 115 + 111 116 #ifdef FIXED_DEBUG 112 117 #include "fixed_debug.h" 113 118 #else ··· 139 144 typedef float celt_norm; 140 145 typedef float celt_ener; 141 146 147 + #ifdef FLOAT_APPROX 148 + /* This code should reliably detect NaN/inf even when -ffast-math is used. 149 + Assumes IEEE 754 format. */ 150 + static OPUS_INLINE int celt_isnan(float x) 151 + { 152 + union {float f; opus_uint32 i;} in; 153 + in.f = x; 154 + return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0; 155 + } 156 + #else 157 + #ifdef __FAST_MATH__ 158 + #error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input 159 + #endif 160 + #define celt_isnan(x) ((x)!=(x)) 161 + #endif 162 + 142 163 #define Q15ONE 1.0f 143 164 144 165 #define NORM_SCALING 1.f ··· 147 168 #define VERY_SMALL 1e-30f 148 169 #define VERY_LARGE16 1e15f 149 170 #define Q15_ONE ((opus_val16)1.f) 171 + 172 + /* This appears to be the same speed as C99's fabsf() but it's more portable. */ 173 + #define ABS16(x) ((float)fabs(x)) 174 + #define ABS32(x) ((float)fabs(x)) 150 175 151 176 #define QCONST16(x,bits) (x) 152 177 #define QCONST32(x,bits) (x) ··· 186 211 #define MULT32_32_Q31(a,b) ((a)*(b)) 187 212 188 213 #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) 214 + #define MAC16_32_Q16(c,a,b) ((c)+(a)*(b)) 189 215 190 216 #define MULT16_16_Q11_32(a,b) ((a)*(b)) 191 217 #define MULT16_16_Q11(a,b) ((a)*(b)) ··· 202 228 203 229 #define SCALEIN(a) ((a)*CELT_SIG_SCALE) 204 230 #define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) 231 + 232 + #define SIG2WORD16(x) (x) 205 233 206 234 #endif /* !FIXED_POINT */ 207 235

+4

lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h

··· 68 68 #undef MAC16_32_Q15 69 69 #define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) 70 70 71 + /** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. 72 + Result fits in 32 bits. */ 73 + #undef MAC16_32_Q16 74 + #define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b)) 71 75 72 76 /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ 73 77 #undef MULT32_32_Q31

+35

lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h

··· 82 82 } 83 83 #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) 84 84 85 + /** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. 86 + Result fits in 32 bits. */ 87 + #undef MAC16_32_Q16 88 + static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a, 89 + opus_val32 b) 90 + { 91 + int res; 92 + __asm__( 93 + "#MAC16_32_Q16\n\t" 94 + "smlawb %0, %1, %2, %3;\n" 95 + : "=r"(res) 96 + : "r"(b), "r"(a), "r"(c) 97 + ); 98 + return res; 99 + } 100 + #define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b)) 101 + 85 102 /** 16x16 multiply-add where the result fits in 32 bits */ 86 103 #undef MAC16_16 87 104 static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, ··· 112 129 return res; 113 130 } 114 131 #define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) 132 + 133 + #ifdef OPUS_ARM_INLINE_MEDIA 134 + 135 + #undef SIG2WORD16 136 + static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x) 137 + { 138 + celt_sig res; 139 + __asm__( 140 + "#SIG2WORD16\n\t" 141 + "ssat %0, #16, %1, ASR #12\n\t" 142 + : "=r"(res) 143 + : "r"(x+2048) 144 + ); 145 + return EXTRACT16(res); 146 + } 147 + #define SIG2WORD16(x) (SIG2WORD16_armv6(x)) 148 + 149 + #endif /* OPUS_ARM_INLINE_MEDIA */ 115 150 116 151 #endif

+111 -104

lib/rbcodec/codecs/libopus/celt/bands.c

··· 93 93 #if 0 94 94 #ifdef FIXED_POINT 95 95 /* Compute the amplitude (sqrt energy) in each of the bands */ 96 - void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) 96 + void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) 97 97 { 98 98 int i, c, N; 99 99 const opus_int16 *eBands = m->eBands; 100 - N = M*m->shortMdctSize; 100 + N = m->shortMdctSize<<LM; 101 101 c=0; do { 102 102 for (i=0;i<end;i++) 103 103 { ··· 105 105 opus_val32 maxval=0; 106 106 opus_val32 sum = 0; 107 107 108 - j=M*eBands[i]; do { 109 - maxval = MAX32(maxval, X[j+c*N]); 110 - maxval = MAX32(maxval, -X[j+c*N]); 111 - } while (++j<M*eBands[i+1]); 112 - 108 + maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); 113 109 if (maxval > 0) 114 110 { 115 - int shift = celt_ilog2(maxval)-10; 116 - j=M*eBands[i]; do { 117 - sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), 118 - EXTRACT16(VSHR32(X[j+c*N],shift))); 119 - } while (++j<M*eBands[i+1]); 111 + int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1); 112 + j=eBands[i]<<LM; 113 + if (shift>0) 114 + { 115 + do { 116 + sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)), 117 + EXTRACT16(SHR32(X[j+c*N],shift))); 118 + } while (++j<eBands[i+1]<<LM); 119 + } else { 120 + do { 121 + sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)), 122 + EXTRACT16(SHL32(X[j+c*N],-shift))); 123 + } while (++j<eBands[i+1]<<LM); 124 + } 120 125 /* We're adding one here to ensure the normalized band isn't larger than unity norm */ 121 126 bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); 122 127 } else { ··· 151 156 152 157 #else /* FIXED_POINT */ 153 158 /* Compute the amplitude (sqrt energy) in each of the bands */ 154 - void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) 159 + void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) 155 160 { 156 161 int i, c, N; 157 162 const opus_int16 *eBands = m->eBands; 158 - N = M*m->shortMdctSize; 163 + N = m->shortMdctSize<<LM; 159 164 c=0; do { 160 165 for (i=0;i<end;i++) 161 166 { 162 - int j; 163 - opus_val32 sum = 1e-27f; 164 - for (j=M*eBands[i];j<M*eBands[i+1];j++) 165 - sum += X[j+c*N]*X[j+c*N]; 167 + opus_val32 sum; 168 + sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); 166 169 bandE[i+c*m->nbEBands] = celt_sqrt(sum); 167 170 /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ 168 171 } ··· 192 195 193 196 /* De-normalise the energy to produce the synthesis from the unit-energy bands */ 194 197 void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, 195 - celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M) 198 + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, 199 + int end, int M, int downsample, int silence) 196 200 { 197 - int i, c, N; 201 + int i, N; 202 + int bound; 203 + celt_sig * OPUS_RESTRICT f; 204 + const celt_norm * OPUS_RESTRICT x; 198 205 const opus_int16 *eBands = m->eBands; 199 206 N = M*m->shortMdctSize; 200 - celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels"); 201 - c=0; do { 202 - celt_sig * OPUS_RESTRICT f; 203 - const celt_norm * OPUS_RESTRICT x; 204 - f = freq+c*N; 205 - x = X+c*N+M*eBands[start]; 206 - for (i=0;i<M*eBands[start];i++) 207 - *f++ = 0; 208 - for (i=start;i<end;i++) 209 - { 210 - int j, band_end; 211 - opus_val16 g; 212 - opus_val16 lg; 207 + bound = M*eBands[end]; 208 + if (downsample!=1) 209 + bound = IMIN(bound, N/downsample); 210 + if (silence) 211 + { 212 + bound = 0; 213 + start = end = 0; 214 + } 215 + f = freq; 216 + x = X+M*eBands[start]; 217 + for (i=0;i<M*eBands[start];i++) 218 + *f++ = 0; 219 + for (i=start;i<end;i++) 220 + { 221 + int j, band_end; 222 + opus_val16 g; 223 + opus_val16 lg; 213 224 #ifdef FIXED_POINT 214 - int shift; 225 + int shift; 215 226 #endif 216 - j=M*eBands[i]; 217 - band_end = M*eBands[i+1]; 218 - lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6)); 227 + j=M*eBands[i]; 228 + band_end = M*eBands[i+1]; 229 + lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); 219 230 #ifndef FIXED_POINT 220 - g = celt_exp2(lg); 231 + g = celt_exp2(lg); 221 232 #else 222 - /* Handle the integer part of the log energy */ 223 - shift = 16-(lg>>DB_SHIFT); 224 - if (shift>31) 233 + /* Handle the integer part of the log energy */ 234 + shift = 16-(lg>>DB_SHIFT); 235 + if (shift>31) 236 + { 237 + shift=0; 238 + g=0; 239 + } else { 240 + /* Handle the fractional part. */ 241 + g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); 242 + } 243 + /* Handle extreme gains with negative shift. */ 244 + if (shift<0) 245 + { 246 + /* For shift < -2 we'd be likely to overflow, so we're capping 247 + the gain here. This shouldn't happen unless the bitstream is 248 + already corrupted. */ 249 + if (shift < -2) 225 250 { 226 - shift=0; 227 - g=0; 228 - } else { 229 - /* Handle the fractional part. */ 230 - g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); 251 + g = 32767; 252 + shift = -2; 231 253 } 232 - /* Handle extreme gains with negative shift. */ 233 - if (shift<0) 234 - { 235 - /* For shift < -2 we'd be likely to overflow, so we're capping 236 - the gain here. This shouldn't happen unless the bitstream is 237 - already corrupted. */ 238 - if (shift < -2) 239 - { 240 - g = 32767; 241 - shift = -2; 242 - } 243 - do { 244 - *f++ = SHL32(MULT16_16(*x++, g), -shift); 245 - } while (++j<band_end); 246 - } else 254 + do { 255 + *f++ = SHL32(MULT16_16(*x++, g), -shift); 256 + } while (++j<band_end); 257 + } else 247 258 #endif 248 259 /* Be careful of the fixed-point "else" just above when changing this code */ 249 260 do { 250 261 *f++ = SHR32(MULT16_16(*x++, g), shift); 251 262 } while (++j<band_end); 252 - } 253 - celt_assert(start <= end); 254 - for (i=M*eBands[end];i<N;i++) 255 - *f++ = 0; 256 - } while (++c<C); 263 + } 264 + celt_assert(start <= end); 265 + OPUS_CLEAR(&freq[bound], N-bound); 257 266 } 258 267 259 268 /* This prevents energy collapse for transients with multiple short MDCTs */ 260 269 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, 261 - int start, int end, opus_val16 *logE, opus_val16 *prev1logE, 262 - opus_val16 *prev2logE, int *pulses, opus_uint32 seed) 270 + int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE, 271 + const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed) 263 272 { 264 273 int c, i, j, k; 265 274 for (i=start;i<end;i++) ··· 274 283 275 284 N0 = m->eBands[i+1]-m->eBands[i]; 276 285 /* depth in 1/8 bits */ 277 - depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM); 286 + celt_assert(pulses[i]>=0); 287 + depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; 278 288 279 289 #ifdef FIXED_POINT 280 290 thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); ··· 352 362 } 353 363 } 354 364 355 - static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N) 365 + static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N) 356 366 { 357 367 int i = bandID; 358 368 int j; ··· 372 382 celt_norm r, l; 373 383 l = X[j]; 374 384 r = Y[j]; 375 - X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r); 385 + X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14)); 376 386 /* Side is not encoded, no need to calculate */ 377 387 } 378 388 } 379 389 380 - static void stereo_split(celt_norm *X, celt_norm *Y, int N) 390 + static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N) 381 391 { 382 392 int j; 383 393 for (j=0;j<N;j++) 384 394 { 385 - celt_norm r, l; 386 - l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]); 387 - r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]); 388 - X[j] = l+r; 389 - Y[j] = r-l; 395 + opus_val32 r, l; 396 + l = MULT16_16(QCONST16(.70710678f, 15), X[j]); 397 + r = MULT16_16(QCONST16(.70710678f, 15), Y[j]); 398 + X[j] = EXTRACT16(SHR32(ADD32(l, r), 15)); 399 + Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15)); 390 400 } 391 401 } 392 402 393 - static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) 403 + static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N) 394 404 { 395 405 int j; 396 406 opus_val32 xp=0, side=0; ··· 411 421 Er = MULT16_16(mid2, mid2) + side + 2*xp; 412 422 if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) 413 423 { 414 - for (j=0;j<N;j++) 415 - Y[j] = X[j]; 424 + OPUS_COPY(Y, X, N); 416 425 return; 417 426 } 418 427 ··· 436 445 { 437 446 celt_norm r, l; 438 447 /* Apply mid scaling (side is already scaled) */ 439 - l = MULT16_16_Q15(mid, X[j]); 448 + l = MULT16_16_P15(mid, X[j]); 440 449 r = Y[j]; 441 450 X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1)); 442 451 Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1)); ··· 445 454 446 455 #if 0 447 456 /* Decide whether we should spread the pulses in the current frame */ 448 - int spreading_decision(const CELTMode *m, celt_norm *X, int *average, 457 + int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, 449 458 int last_decision, int *hf_average, int *tapset_decision, int update_hf, 450 459 int end, int C, int M) 451 460 { ··· 466 475 { 467 476 int j, N, tmp=0; 468 477 int tcount[3] = {0,0,0}; 469 - celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; 478 + const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; 470 479 N = M*(eBands[i+1]-eBands[i]); 471 480 if (N<=8) 472 481 continue; ··· 486 495 487 496 /* Only include four last bands (8 kHz and up) */ 488 497 if (i>m->nbEBands-4) 489 - hf_sum += 32*(tcount[1]+tcount[0])/N; 498 + hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); 490 499 tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); 491 500 sum += tmp*256; 492 501 nbBands++; ··· 496 505 if (update_hf) 497 506 { 498 507 if (hf_sum) 499 - hf_sum /= C*(4-m->nbEBands+end); 508 + hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end)); 500 509 *hf_average = (*hf_average+hf_sum)>>1; 501 510 hf_sum = *hf_average; 502 511 if (*tapset_decision==2) ··· 512 521 } 513 522 /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ 514 523 celt_assert(nbBands>0); /* end has to be non-zero */ 515 - sum /= nbBands; 524 + celt_assert(sum>=0); 525 + sum = celt_udiv(sum, nbBands); 516 526 /* Recursive averaging */ 517 527 sum = (sum+*average)>>1; 518 528 *average = sum; ··· 571 581 for (j=0;j<N0;j++) 572 582 tmp[i*N0+j] = X[j*stride+i]; 573 583 } 574 - for (j=0;j<N;j++) 575 - X[j] = tmp[j]; 584 + OPUS_COPY(X, tmp, N); 576 585 RESTORE_STACK; 577 586 } 578 587 ··· 595 604 for (j=0;j<N0;j++) 596 605 tmp[j*stride+i] = X[i*N0+j]; 597 606 } 598 - for (j=0;j<N;j++) 599 - X[j] = tmp[j]; 607 + OPUS_COPY(X, tmp, N); 600 608 RESTORE_STACK; 601 609 } 602 610 ··· 607 615 for (i=0;i<stride;i++) 608 616 for (j=0;j<N0;j++) 609 617 { 610 - celt_norm tmp1, tmp2; 611 - tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]); 612 - tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); 613 - X[stride*2*j+i] = tmp1 + tmp2; 614 - X[stride*(2*j+1)+i] = tmp1 - tmp2; 618 + opus_val32 tmp1, tmp2; 619 + tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]); 620 + tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); 621 + X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15)); 622 + X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15)); 615 623 } 616 624 } 617 625 ··· 626 634 /* The upper limit ensures that in a stereo split with itheta==16384, we'll 627 635 always have enough bits left over to code at least one pulse in the 628 636 side; otherwise it would collapse, since it doesn't get folded. */ 629 - qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2); 637 + qb = celt_sudiv(b+N2*offset, N2); 638 + qb = IMIN(b-pulse_cap-(4<<BITRES), qb); 630 639 631 640 qb = IMIN(8<<BITRES, qb); 632 641 ··· 773 782 ec_dec_update(ec, fl, fl+fs, ft); 774 783 } 775 784 } 776 - itheta = (opus_int32)itheta*16384/qn; 785 + celt_assert(itheta>=0); 786 + itheta = celt_udiv((opus_int32)itheta*16384, qn); 777 787 if (encode && stereo) 778 788 { 779 789 if (itheta==0) ··· 1025 1035 fill &= cm_mask; 1026 1036 if (!fill) 1027 1037 { 1028 - for (j=0;j<N;j++) 1029 - X[j] = 0; 1038 + OPUS_CLEAR(X, N); 1030 1039 } else { 1031 1040 if (lowband == NULL) 1032 1041 { ··· 1088 1097 1089 1098 longBlocks = B0==1; 1090 1099 1091 - N_B /= B; 1100 + N_B = celt_udiv(N_B, B); 1092 1101 1093 1102 /* Special case for one sample */ 1094 1103 if (N==1) ··· 1102 1111 1103 1112 if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) 1104 1113 { 1105 - int j; 1106 - for (j=0;j<N;j++) 1107 - lowband_scratch[j] = lowband[j]; 1114 + OPUS_COPY(lowband_scratch, lowband, N); 1108 1115 lowband = lowband_scratch; 1109 1116 } 1110 1117 ··· 1432 1439 ctx.remaining_bits = remaining_bits; 1433 1440 if (i <= codedBands-1) 1434 1441 { 1435 - curr_balance = balance / IMIN(3, codedBands-i); 1442 + curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i)); 1436 1443 b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance))); 1437 1444 } else { 1438 1445 b = 0;

+6 -5

lib/rbcodec/codecs/libopus/celt/bands.h

··· 41 41 * @param X Spectrum 42 42 * @param bandE Square root of the energy for each band (returned) 43 43 */ 44 - void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M); 44 + void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM); 45 45 46 46 /*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/ 47 47 ··· 59 59 * @param bandE Square root of the energy for each band 60 60 */ 61 61 void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, 62 - celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M); 62 + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, 63 + int end, int M, int downsample, int silence); 63 64 64 65 #define SPREAD_NONE (0) 65 66 #define SPREAD_LIGHT (1) 66 67 #define SPREAD_NORMAL (2) 67 68 #define SPREAD_AGGRESSIVE (3) 68 69 69 - int spreading_decision(const CELTMode *m, celt_norm *X, int *average, 70 + int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, 70 71 int last_decision, int *hf_average, int *tapset_decision, int update_hf, 71 72 int end, int C, int M); 72 73 ··· 104 105 opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed); 105 106 106 107 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, 107 - int start, int end, opus_val16 *logE, opus_val16 *prev1logE, 108 - opus_val16 *prev2logE, int *pulses, opus_uint32 seed); 108 + int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE, 109 + const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed); 109 110 110 111 opus_uint32 celt_lcg_rand(opus_uint32 seed); 111 112

+73 -6

lib/rbcodec/codecs/libopus/celt/celt.c

··· 54 54 #define PACKAGE_VERSION "unknown" 55 55 #endif 56 56 57 + #if defined(MIPSr1_ASM) 58 + #include "mips/celt_mipsr1.h" 59 + #endif 60 + 57 61 58 62 int resampling_factor(opus_int32 rate) 59 63 { ··· 86 90 } 87 91 88 92 #ifndef OVERRIDE_COMB_FILTER_CONST 93 + /* This version should be faster on ARM */ 94 + #ifdef OPUS_ARM_ASM 95 + static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, 96 + opus_val16 g10, opus_val16 g11, opus_val16 g12) 97 + { 98 + opus_val32 x0, x1, x2, x3, x4; 99 + int i; 100 + x4 = SHL32(x[-T-2], 1); 101 + x3 = SHL32(x[-T-1], 1); 102 + x2 = SHL32(x[-T], 1); 103 + x1 = SHL32(x[-T+1], 1); 104 + for (i=0;i<N-4;i+=5) 105 + { 106 + opus_val32 t; 107 + x0=SHL32(x[i-T+2],1); 108 + t = MAC16_32_Q16(x[i], g10, x2); 109 + t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); 110 + t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); 111 + y[i] = t; 112 + x4=SHL32(x[i-T+3],1); 113 + t = MAC16_32_Q16(x[i+1], g10, x1); 114 + t = MAC16_32_Q16(t, g11, ADD32(x0,x2)); 115 + t = MAC16_32_Q16(t, g12, ADD32(x4,x3)); 116 + y[i+1] = t; 117 + x3=SHL32(x[i-T+4],1); 118 + t = MAC16_32_Q16(x[i+2], g10, x0); 119 + t = MAC16_32_Q16(t, g11, ADD32(x4,x1)); 120 + t = MAC16_32_Q16(t, g12, ADD32(x3,x2)); 121 + y[i+2] = t; 122 + x2=SHL32(x[i-T+5],1); 123 + t = MAC16_32_Q16(x[i+3], g10, x4); 124 + t = MAC16_32_Q16(t, g11, ADD32(x3,x0)); 125 + t = MAC16_32_Q16(t, g12, ADD32(x2,x1)); 126 + y[i+3] = t; 127 + x1=SHL32(x[i-T+6],1); 128 + t = MAC16_32_Q16(x[i+4], g10, x3); 129 + t = MAC16_32_Q16(t, g11, ADD32(x2,x4)); 130 + t = MAC16_32_Q16(t, g12, ADD32(x1,x0)); 131 + y[i+4] = t; 132 + } 133 + #ifdef CUSTOM_MODES 134 + for (;i<N;i++) 135 + { 136 + opus_val32 t; 137 + x0=SHL32(x[i-T+2],1); 138 + t = MAC16_32_Q16(x[i], g10, x2); 139 + t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); 140 + t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); 141 + y[i] = t; 142 + x4=x3; 143 + x3=x2; 144 + x2=x1; 145 + x1=x0; 146 + } 147 + #endif 148 + } 149 + #else 89 150 static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, 90 151 opus_val16 g10, opus_val16 g11, opus_val16 g12) 91 152 { ··· 110 171 111 172 } 112 173 #endif 174 + #endif 113 175 176 + #ifndef OVERRIDE_comb_filter 114 177 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, 115 178 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, 116 179 const opus_val16 *window, int overlap) ··· 131 194 OPUS_MOVE(y, x, N); 132 195 return; 133 196 } 134 - g00 = MULT16_16_Q15(g0, gains[tapset0][0]); 135 - g01 = MULT16_16_Q15(g0, gains[tapset0][1]); 136 - g02 = MULT16_16_Q15(g0, gains[tapset0][2]); 137 - g10 = MULT16_16_Q15(g1, gains[tapset1][0]); 138 - g11 = MULT16_16_Q15(g1, gains[tapset1][1]); 139 - g12 = MULT16_16_Q15(g1, gains[tapset1][2]); 197 + g00 = MULT16_16_P15(g0, gains[tapset0][0]); 198 + g01 = MULT16_16_P15(g0, gains[tapset0][1]); 199 + g02 = MULT16_16_P15(g0, gains[tapset0][2]); 200 + g10 = MULT16_16_P15(g1, gains[tapset1][0]); 201 + g11 = MULT16_16_P15(g1, gains[tapset1][1]); 202 + g12 = MULT16_16_P15(g1, gains[tapset1][2]); 140 203 x1 = x[-T1+1]; 141 204 x2 = x[-T1 ]; 142 205 x3 = x[-T1-1]; 143 206 x4 = x[-T1-2]; 207 + /* If the filter didn't change, we don't need the overlap */ 208 + if (g0==g1 && T0==T1 && tapset0==tapset1) 209 + overlap=0; 144 210 for (i=0;i<overlap;i++) 145 211 { 146 212 opus_val16 f; ··· 170 236 /* Compute the part with the constant filter. */ 171 237 comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12); 172 238 } 239 + #endif /* OVERRIDE_comb_filter */ 173 240 174 241 const signed char tf_select_table[4][8] = { 175 242 {0, -1, 0, -1, 0,-1, 0,-1},

+6 -5

lib/rbcodec/codecs/libopus/celt/celt.h

··· 134 134 135 135 int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels); 136 136 137 - int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec); 137 + int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, 138 + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum); 138 139 139 140 #define celt_encoder_ctl opus_custom_encoder_ctl 140 141 #define celt_decoder_ctl opus_custom_decoder_ctl ··· 205 206 void init_caps(const CELTMode *m,int *cap,int LM,int C); 206 207 207 208 #ifdef RESYNTH 208 - void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch); 209 - 210 - void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, 211 - celt_sig * OPUS_RESTRICT out_mem[], int C, int LM); 209 + void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem); 210 + void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], 211 + opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient, 212 + int LM, int downsample, int silence); 212 213 #endif 213 214 214 215 #ifdef __cplusplus

+180 -145

lib/rbcodec/codecs/libopus/celt/celt_decoder.c

··· 51 51 #include "celt_lpc.h" 52 52 #include "vq.h" 53 53 54 + #if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT) 55 + #define NORM_ALIASING_HACK 56 + #endif 54 57 /**********************************************************************/ 55 58 /* */ 56 59 /* DECODER */ ··· 175 178 } 176 179 #endif /* CUSTOM_MODES */ 177 180 178 - static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x) 179 - { 180 - #ifdef FIXED_POINT 181 - x = PSHR32(x, SIG_SHIFT); 182 - x = MAX32(x, -32768); 183 - x = MIN32(x, 32767); 184 - return EXTRACT16(x); 185 - #else 186 - return (opus_val16)x; 187 - #endif 188 - } 189 181 190 182 #ifndef RESYNTH 191 183 static 192 184 #endif 193 - void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch) 185 + void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, 186 + celt_sig *mem, int accum) 194 187 { 195 188 int c; 196 189 int Nd; 197 190 int apply_downsampling=0; 198 191 opus_val16 coef0; 199 - 192 + VARDECL(celt_sig, scratch); 193 + SAVE_STACK; 194 + #ifndef FIXED_POINT 195 + (void)accum; 196 + celt_assert(accum==0); 197 + #endif 198 + ALLOC(scratch, N, celt_sig); 200 199 coef0 = coef[0]; 201 200 Nd = N/downsample; 202 201 c=0; do { ··· 234 233 apply_downsampling=1; 235 234 } else { 236 235 /* Shortcut for the standard (non-custom modes) case */ 237 - for (j=0;j<N;j++) 236 + #ifdef FIXED_POINT 237 + if (accum) 238 238 { 239 - celt_sig tmp = x[j] + m + VERY_SMALL; 240 - m = MULT16_32_Q15(coef0, tmp); 241 - y[j*C] = SCALEOUT(SIG2WORD16(tmp)); 239 + for (j=0;j<N;j++) 240 + { 241 + celt_sig tmp = x[j] + m + VERY_SMALL; 242 + m = MULT16_32_Q15(coef0, tmp); 243 + y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp)))); 244 + } 245 + } else 246 + #endif 247 + { 248 + for (j=0;j<N;j++) 249 + { 250 + celt_sig tmp = x[j] + m + VERY_SMALL; 251 + m = MULT16_32_Q15(coef0, tmp); 252 + y[j*C] = SCALEOUT(SIG2WORD16(tmp)); 253 + } 242 254 } 243 255 } 244 256 mem[c] = m; ··· 246 258 if (apply_downsampling) 247 259 { 248 260 /* Perform down-sampling */ 249 - for (j=0;j<Nd;j++) 250 - y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); 261 + #ifdef FIXED_POINT 262 + if (accum) 263 + { 264 + for (j=0;j<Nd;j++) 265 + y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample])))); 266 + } else 267 + #endif 268 + { 269 + for (j=0;j<Nd;j++) 270 + y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); 271 + } 251 272 } 252 273 } while (++c<C); 274 + RESTORE_STACK; 253 275 } 254 276 255 - /** Compute the IMDCT and apply window for all sub-frames and 256 - all channels in a frame */ 257 277 #ifndef RESYNTH 258 278 static 259 279 #endif 260 - void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, 261 - celt_sig * OPUS_RESTRICT out_mem[], int C, int LM) 280 + void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], 281 + opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient, 282 + int LM, int downsample, int silence) 262 283 { 263 - int b, c; 284 + int c, i; 285 + int M; 286 + int b; 264 287 int B; 265 - int N; 288 + int N, NB; 266 289 int shift; 267 - const int overlap = OVERLAP(mode); 290 + int nbEBands; 291 + int overlap; 292 + VARDECL(celt_sig, freq); 293 + SAVE_STACK; 294 + 295 + overlap = mode->overlap; 296 + nbEBands = mode->nbEBands; 297 + N = mode->shortMdctSize<<LM; 298 + ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */ 299 + M = 1<<LM; 268 300 269 - if (shortBlocks) 301 + if (isTransient) 270 302 { 271 - B = shortBlocks; 272 - N = mode->shortMdctSize; 303 + B = M; 304 + NB = mode->shortMdctSize; 273 305 shift = mode->maxLM; 274 306 } else { 275 307 B = 1; 276 - N = mode->shortMdctSize<<LM; 308 + NB = mode->shortMdctSize<<LM; 277 309 shift = mode->maxLM-LM; 278 310 } 279 - c=0; do { 280 - /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */ 311 + 312 + if (CC==2&&C==1) 313 + { 314 + /* Copying a mono streams to two channels */ 315 + celt_sig *freq2; 316 + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, 317 + downsample, silence); 318 + /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */ 319 + freq2 = out_syn[1]+overlap/2; 320 + OPUS_COPY(freq2, freq, N); 321 + for (b=0;b<B;b++) 322 + clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); 281 323 for (b=0;b<B;b++) 282 - clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B); 283 - } while (++c<C); 324 + clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B); 325 + } else if (CC==1&&C==2) 326 + { 327 + /* Downmixing a stereo stream to mono */ 328 + celt_sig *freq2; 329 + freq2 = out_syn[0]+overlap/2; 330 + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, 331 + downsample, silence); 332 + /* Use the output buffer as temp array before downmixing. */ 333 + denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, 334 + downsample, silence); 335 + for (i=0;i<N;i++) 336 + freq[i] = HALF32(ADD32(freq[i],freq2[i])); 337 + for (b=0;b<B;b++) 338 + clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); 339 + } else { 340 + /* Normal case (mono or stereo) */ 341 + c=0; do { 342 + denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M, 343 + downsample, silence); 344 + for (b=0;b<B;b++) 345 + clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B); 346 + } while (++c<CC); 347 + } 348 + RESTORE_STACK; 284 349 } 285 350 286 351 static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec) ··· 330 395 pitch of 480 Hz. */ 331 396 #define PLC_PITCH_LAG_MIN (100) 332 397 333 - static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM) 398 + static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch) 399 + { 400 + int pitch_index; 401 + VARDECL( opus_val16, lp_pitch_buf ); 402 + SAVE_STACK; 403 + ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); 404 + pitch_downsample(decode_mem, lp_pitch_buf, 405 + DECODE_BUFFER_SIZE, C, arch); 406 + pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, 407 + DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, 408 + PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch); 409 + pitch_index = PLC_PITCH_LAG_MAX-pitch_index; 410 + RESTORE_STACK; 411 + return pitch_index; 412 + } 413 + 414 + static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) 334 415 { 335 416 int c; 336 417 int i; ··· 343 424 int nbEBands; 344 425 int overlap; 345 426 int start; 346 - int downsample; 347 427 int loss_count; 348 428 int noise_based; 349 429 const opus_int16 *eBands; 350 - VARDECL(celt_sig, scratch); 351 430 SAVE_STACK; 352 431 353 432 mode = st->mode; ··· 367 446 368 447 loss_count = st->loss_count; 369 448 start = st->start; 370 - downsample = st->downsample; 371 449 noise_based = loss_count >= 5 || start != 0; 372 - ALLOC(scratch, noise_based?N*C:N, celt_sig); 373 450 if (noise_based) 374 451 { 375 452 /* Noise-based PLC/CNG */ 376 - celt_sig *freq; 453 + #ifdef NORM_ALIASING_HACK 454 + celt_norm *X; 455 + #else 377 456 VARDECL(celt_norm, X); 457 + #endif 378 458 opus_uint32 seed; 379 459 opus_val16 *plcLogE; 380 460 int end; ··· 383 463 end = st->end; 384 464 effEnd = IMAX(start, IMIN(end, mode->effEBands)); 385 465 386 - /* Share the interleaved signal MDCT coefficient buffer with the 387 - deemphasis scratch buffer. */ 388 - freq = scratch; 466 + #ifdef NORM_ALIASING_HACK 467 + /* This is an ugly hack that breaks aliasing rules and would be easily broken, 468 + but it saves almost 4kB of stack. */ 469 + X = (celt_norm*)(out_syn[C-1]+overlap/2); 470 + #else 389 471 ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ 472 + #endif 390 473 391 474 if (loss_count >= 5) 392 475 plcLogE = backgroundLogE; ··· 421 504 } 422 505 st->rng = seed; 423 506 424 - denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM); 425 - 426 - c=0; do { 427 - int bound = eBands[effEnd]<<LM; 428 - if (downsample!=1) 429 - bound = IMIN(bound, N/downsample); 430 - for (i=bound;i<N;i++) 431 - freq[c*N+i] = 0; 432 - } while (++c<C); 433 507 c=0; do { 434 508 OPUS_MOVE(decode_mem[c], decode_mem[c]+N, 435 509 DECODE_BUFFER_SIZE-N+(overlap>>1)); 436 510 } while (++c<C); 437 - compute_inv_mdcts(mode, 0, freq, out_syn, C, LM); 511 + 512 + celt_synthesis(mode, X, out_syn, plcLogE, start, effEnd, C, C, 0, LM, st->downsample, 0); 438 513 } else { 439 514 /* Pitch-based PLC */ 440 515 const opus_val16 *window; ··· 445 520 446 521 if (loss_count == 0) 447 522 { 448 - VARDECL( opus_val16, lp_pitch_buf ); 449 - ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); 450 - pitch_downsample(decode_mem, lp_pitch_buf, 451 - DECODE_BUFFER_SIZE, C, st->arch); 452 - pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, 453 - DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, 454 - PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch); 455 - pitch_index = PLC_PITCH_LAG_MAX-pitch_index; 456 - st->last_pitch_index = pitch_index; 523 + st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch); 457 524 } else { 458 525 pitch_index = st->last_pitch_index; 459 526 fade = QCONST16(.8f,15); ··· 644 711 } while (++c<C); 645 712 } 646 713 647 - deemphasis(out_syn, pcm, N, C, downsample, 648 - mode->preemph, st->preemph_memD, scratch); 649 - 650 714 st->loss_count = loss_count+1; 651 715 652 716 RESTORE_STACK; 653 717 } 654 718 655 - #define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */ 656 - static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */ 657 - static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */ 658 - int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec) 719 + int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, 720 + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum) 659 721 { 660 722 int c, i, N; 661 723 int spread_decision; 662 724 opus_int32 bits; 663 725 ec_dec _dec; 664 - VARDECL(celt_sig, freq); 726 + #ifdef NORM_ALIASING_HACK 727 + celt_norm *X; 728 + #else 665 729 VARDECL(celt_norm, X); 730 + #endif 666 731 VARDECL(int, fine_quant); 667 732 VARDECL(int, pulses); 668 733 VARDECL(int, cap); ··· 680 745 int intra_ener; 681 746 const int CC = st->channels; 682 747 int LM, M; 748 + int start; 749 + int end; 683 750 int effEnd; 684 751 int codedBands; 685 752 int alloc_trim; ··· 706 773 nbEBands = mode->nbEBands; 707 774 overlap = mode->overlap; 708 775 eBands = mode->eBands; 776 + start = st->start; 777 + end = st->end; 709 778 frame_size *= st->downsample; 710 779 711 - c=0; do { 712 - decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); 713 - } while (++c<CC); 714 780 lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); 715 781 oldBandE = lpc+CC*LPC_ORDER; 716 782 oldLogE = oldBandE + 2*nbEBands; ··· 728 794 if (data0<0) 729 795 return OPUS_INVALID_PACKET; 730 796 } 731 - st->end = IMAX(1, mode->effEBands-2*(data0>>5)); 797 + st->end = end = IMAX(1, mode->effEBands-2*(data0>>5)); 732 798 LM = (data0>>3)&0x3; 733 799 C = 1 + ((data0>>2)&0x1); 734 800 data++; ··· 755 821 return OPUS_BAD_ARG; 756 822 757 823 N = M*mode->shortMdctSize; 824 + c=0; do { 825 + decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); 826 + out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; 827 + } while (++c<CC); 758 828 759 - effEnd = st->end; 829 + effEnd = end; 760 830 if (effEnd > mode->effEBands) 761 831 effEnd = mode->effEBands; 762 832 763 833 if (data == NULL || len<=1) 764 834 { 765 - celt_decode_lost(st, pcm, N, LM); 835 + celt_decode_lost(st, N, LM); 836 + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); 766 837 RESTORE_STACK; 767 838 return frame_size/st->downsample; 768 839 } ··· 798 869 postfilter_gain = 0; 799 870 postfilter_pitch = 0; 800 871 postfilter_tapset = 0; 801 - if (st->start==0 && tell+16 <= total_bits) 872 + if (start==0 && tell+16 <= total_bits) 802 873 { 803 874 if(ec_dec_bit_logp(dec, 1)) 804 875 { ··· 829 900 /* Decode the global flags (first symbols in the stream) */ 830 901 intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; 831 902 /* Get band energies */ 832 - unquant_coarse_energy(mode, st->start, st->end, oldBandE, 903 + unquant_coarse_energy(mode, start, end, oldBandE, 833 904 intra_ener, dec, C, LM); 834 905 835 906 ALLOC(tf_res, nbEBands, int); 836 - tf_decode(st->start, st->end, isTransient, tf_res, LM, dec); 907 + tf_decode(start, end, isTransient, tf_res, LM, dec); 837 908 838 909 tell = ec_tell(dec); 839 910 spread_decision = SPREAD_NORMAL; ··· 849 920 dynalloc_logp = 6; 850 921 total_bits<<=BITRES; 851 922 tell = ec_tell_frac(dec); 852 - for (i=st->start;i<st->end;i++) 923 + for (i=start;i<end;i++) 853 924 { 854 925 int width, quanta; 855 926 int dynalloc_loop_logp; ··· 888 959 ALLOC(pulses, nbEBands, int); 889 960 ALLOC(fine_priority, nbEBands, int); 890 961 891 - codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, 962 + codedBands = compute_allocation(mode, start, end, offsets, cap, 892 963 alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, 893 964 fine_quant, fine_priority, C, LM, dec, 0, 0, 0); 894 965 895 - unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C); 966 + unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C); 967 + 968 + c=0; do { 969 + OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); 970 + } while (++c<CC); 896 971 897 972 /* Decode fixed codebook */ 898 973 ALLOC(collapse_masks, C*nbEBands, unsigned char); 899 - /**< Interleaved normalised MDCTs */ 900 - if (FREQ_X_BUF_SIZE >= C*N) 901 - X = s_X; 902 - else 903 - ALLOC(X, C*N, celt_norm); 904 974 905 - quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, 975 + #ifdef NORM_ALIASING_HACK 976 + /* This is an ugly hack that breaks aliasing rules and would be easily broken, 977 + but it saves almost 4kB of stack. */ 978 + X = (celt_norm*)(out_syn[CC-1]+overlap/2); 979 + #else 980 + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ 981 + #endif 982 + 983 + quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, 906 984 NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, 907 985 len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng); 908 986 ··· 911 989 anti_collapse_on = ec_dec_bits(dec, 1); 912 990 } 913 991 914 - unquant_energy_finalise(mode, st->start, st->end, oldBandE, 992 + unquant_energy_finalise(mode, start, end, oldBandE, 915 993 fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); 916 994 917 995 if (anti_collapse_on) 918 996 anti_collapse(mode, X, collapse_masks, LM, C, N, 919 - st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); 920 - 921 - /**< Interleaved signal MDCTs */ 922 - if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N) 923 - freq = s_freq; 924 - else 925 - ALLOC(freq, IMAX(CC,C)*N, celt_sig); 997 + start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); 926 998 927 999 if (silence) 928 1000 { 929 1001 for (i=0;i<C*nbEBands;i++) 930 1002 oldBandE[i] = -QCONST16(28.f,DB_SHIFT); 931 - for (i=0;i<C*N;i++) 932 - freq[i] = 0; 933 - } else { 934 - /* Synthesis */ 935 - denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M); 936 1003 } 937 - c=0; do { 938 - OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); 939 - } while (++c<CC); 940 1004 941 - c=0; do { 942 - int bound = M*eBands[effEnd]; 943 - if (st->downsample!=1) 944 - bound = IMIN(bound, N/st->downsample); 945 - for (i=bound;i<N;i++) 946 - freq[c*N+i] = 0; 947 - } while (++c<C); 948 - 949 - c=0; do { 950 - out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; 951 - } while (++c<CC); 952 - 953 - if (CC==2&&C==1) 954 - { 955 - for (i=0;i<N;i++) 956 - freq[N+i] = freq[i]; 957 - } 958 - if (CC==1&&C==2) 959 - { 960 - for (i=0;i<N;i++) 961 - freq[i] = HALF32(ADD32(freq[i],freq[N+i])); 962 - } 963 - 964 - /* Compute inverse MDCTs */ 965 - compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM); 1005 + celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, CC, isTransient, LM, st->downsample, silence); 966 1006 967 1007 c=0; do { 968 1008 st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); ··· 989 1029 st->postfilter_tapset_old = st->postfilter_tapset; 990 1030 } 991 1031 992 - if (C==1) { 993 - for (i=0;i<nbEBands;i++) 994 - oldBandE[nbEBands+i]=oldBandE[i]; 995 - } 1032 + if (C==1) 1033 + OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); 996 1034 997 1035 /* In case start or end were to change */ 998 1036 if (!isTransient) 999 1037 { 1000 - for (i=0;i<2*nbEBands;i++) 1001 - oldLogE2[i] = oldLogE[i]; 1002 - for (i=0;i<2*nbEBands;i++) 1003 - oldLogE[i] = oldBandE[i]; 1038 + OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands); 1039 + OPUS_COPY(oldLogE, oldBandE, 2*nbEBands); 1004 1040 for (i=0;i<2*nbEBands;i++) 1005 1041 backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); 1006 1042 } else { ··· 1009 1045 } 1010 1046 c=0; do 1011 1047 { 1012 - for (i=0;i<st->start;i++) 1048 + for (i=0;i<start;i++) 1013 1049 { 1014 1050 oldBandE[c*nbEBands+i]=0; 1015 1051 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); 1016 1052 } 1017 - for (i=st->end;i<nbEBands;i++) 1053 + for (i=end;i<nbEBands;i++) 1018 1054 { 1019 1055 oldBandE[c*nbEBands+i]=0; 1020 1056 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); ··· 1022 1058 } while (++c<2); 1023 1059 st->rng = dec->rng; 1024 1060 1025 - /* We reuse freq[] as scratch space for the de-emphasis */ 1026 - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq); 1061 + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); 1027 1062 st->loss_count = 0; 1028 1063 RESTORE_STACK; 1029 1064 if (ec_tell(dec) > 8*len) ··· 1039 1074 #ifdef FIXED_POINT 1040 1075 int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) 1041 1076 { 1042 - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); 1077 + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); 1043 1078 } 1044 1079 1045 1080 #ifndef DISABLE_FLOAT_API ··· 1056 1091 N = frame_size; 1057 1092 1058 1093 ALLOC(out, C*N, opus_int16); 1059 - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); 1094 + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); 1060 1095 if (ret>0) 1061 1096 for (j=0;j<C*ret;j++) 1062 1097 pcm[j]=out[j]*(1.f/32768.f); ··· 1070 1105 1071 1106 int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) 1072 1107 { 1073 - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); 1108 + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); 1074 1109 } 1075 1110 1076 1111 int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) ··· 1086 1121 N = frame_size; 1087 1122 ALLOC(out, C*N, celt_sig); 1088 1123 1089 - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); 1124 + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); 1090 1125 1091 1126 if (ret>0) 1092 1127 for (j=0;j<C*ret;j++)

+29 -11

lib/rbcodec/codecs/libopus/celt/cwrs.c

··· 460 460 ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k)); 461 461 } 462 462 463 - static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ 463 + static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ 464 464 opus_uint32 p; 465 465 int s; 466 466 int k0; 467 + opus_int16 val; 468 + opus_val32 yy=0; 467 469 celt_assert(_k>0); 468 470 celt_assert(_n>1); 469 471 while(_n>2){ ··· 487 489 } 488 490 else for(p=row[_k];p>_i;p=row[_k])_k--; 489 491 _i-=p; 490 - *_y++=(k0-_k+s)^s; 492 + val=(k0-_k+s)^s; 493 + *_y++=val; 494 + yy=MAC16_16(yy,val,val); 491 495 } 492 496 /*Lots of dimensions case:*/ 493 497 else{ ··· 507 511 do p=CELT_PVQ_U_ROW[--_k][_n]; 508 512 while(p>_i); 509 513 _i-=p; 510 - *_y++=(k0-_k+s)^s; 514 + val=(k0-_k+s)^s; 515 + *_y++=val; 516 + yy=MAC16_16(yy,val,val); 511 517 } 512 518 } 513 519 _n--; ··· 519 525 k0=_k; 520 526 _k=(_i+1)>>1; 521 527 if(_k)_i-=2*_k-1; 522 - *_y++=(k0-_k+s)^s; 528 + val=(k0-_k+s)^s; 529 + *_y++=val; 530 + yy=MAC16_16(yy,val,val); 523 531 /*_n==1*/ 524 532 s=-(int)_i; 525 - *_y=(_k+s)^s; 533 + val=(_k+s)^s; 534 + *_y=val; 535 + yy=MAC16_16(yy,val,val); 536 + return yy; 526 537 } 527 538 528 - void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ 529 - cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); 539 + opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ 540 + return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); 530 541 } 531 542 532 543 #else /* SMALL_FOOTPRINT */ ··· 591 602 _y: Returns the vector of pulses. 592 603 _u: Must contain entries [0..._k+1] of row _n of U() on input. 593 604 Its contents will be destructively modified.*/ 594 - static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ 605 + static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ 595 606 int j; 607 + opus_int16 val; 608 + opus_val32 yy=0; 596 609 celt_assert(_n>0); 597 610 j=0; 598 611 do{ ··· 607 620 while(p>_i)p=_u[--_k]; 608 621 _i-=p; 609 622 yj-=_k; 610 - _y[j]=(yj+s)^s; 623 + val=(yj+s)^s; 624 + _y[j]=val; 625 + yy=MAC16_16(yy,val,val); 611 626 uprev(_u,_k+2,0); 612 627 } 613 628 while(++j<_n); 629 + return yy; 614 630 } 615 631 616 632 /*Returns the index of the given combination of K elements chosen from a set ··· 685 701 RESTORE_STACK; 686 702 } 687 703 688 - void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ 704 + opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ 689 705 VARDECL(opus_uint32,u); 706 + int ret; 690 707 SAVE_STACK; 691 708 celt_assert(_k>0); 692 709 ALLOC(u,_k+2U,opus_uint32); 693 - cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); 710 + ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); 694 711 RESTORE_STACK; 712 + return ret; 695 713 } 696 714 697 715 #endif /* SMALL_FOOTPRINT */

+1 -1

lib/rbcodec/codecs/libopus/celt/cwrs.h

··· 43 43 44 44 void encode_pulses(const int *_y, int N, int K, ec_enc *enc); 45 45 46 - void decode_pulses(int *_y, int N, int K, ec_dec *dec); 46 + opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec); 47 47 48 48 #endif /* CWRS_H */

+60

lib/rbcodec/codecs/libopus/celt/entcode.c

··· 62 62 } 63 63 #endif 64 64 65 + #if 1 66 + /* This is a faster version of ec_tell_frac() that takes advantage 67 + of the low (1/8 bit) resolution to use just a linear function 68 + followed by a lookup to determine the exact transition thresholds. */ 69 + opus_uint32 ec_tell_frac(ec_ctx *_this){ 70 + static const unsigned correction[8] = 71 + {35733, 38967, 42495, 46340, 72 + 50535, 55109, 60097, 65535}; 73 + opus_uint32 nbits; 74 + opus_uint32 r; 75 + int l; 76 + unsigned b; 77 + nbits=_this->nbits_total<<BITRES; 78 + l=EC_ILOG(_this->rng); 79 + r=_this->rng>>(l-16); 80 + b = (r>>12)-8; 81 + b += r>correction[b]; 82 + l = (l<<3)+b; 83 + return nbits-l; 84 + } 85 + #else 65 86 opus_uint32 ec_tell_frac(ec_ctx *_this){ 66 87 opus_uint32 nbits; 67 88 opus_uint32 r; ··· 91 112 } 92 113 return nbits-l; 93 114 } 115 + #endif 116 + 117 + #ifdef USE_SMALL_DIV_TABLE 118 + /* Result of 2^32/(2*i+1), except for i=0. */ 119 + const opus_uint32 SMALL_DIV_TABLE[129] ICONST_ATTR = { 120 + 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924, 121 + 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111, 122 + 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C, 123 + 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084, 124 + 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906, 125 + 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A, 126 + 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A, 127 + 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104, 128 + 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1, 129 + 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2, 130 + 0x0329161F, 0x03159721, 0x03030303, 0x02F14990, 131 + 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46, 132 + 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597, 133 + 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17, 134 + 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902, 135 + 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810, 136 + 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC, 137 + 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30, 138 + 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364, 139 + 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14, 140 + 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F, 141 + 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE, 142 + 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6, 143 + 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3, 144 + 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E, 145 + 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A, 146 + 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190, 147 + 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227, 148 + 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4, 149 + 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E, 150 + 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3, 151 + 0x01073260, 0x0105197F, 0x0103091B, 0x01010101 152 + }; 153 + #endif

+35

lib/rbcodec/codecs/libopus/celt/entcode.h

··· 34 34 # include <stddef.h> 35 35 # include "ecintrin.h" 36 36 37 + extern const opus_uint32 SMALL_DIV_TABLE[129]; 38 + 39 + #ifdef OPUS_ARM_ASM 40 + #define USE_SMALL_DIV_TABLE 41 + #endif 42 + 37 43 /*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a 38 44 larger type, you can speed up the decoder by using it here.*/ 39 45 typedef opus_uint32 ec_window; ··· 113 119 This will always be slightly larger than the exact value (e.g., all 114 120 rounding error is in the positive direction).*/ 115 121 opus_uint32 ec_tell_frac(ec_ctx *_this); 122 + 123 + /* Tested exhaustively for all n and for 1<=d<=256 */ 124 + static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { 125 + celt_assert(d>0); 126 + #ifdef USE_SMALL_DIV_TABLE 127 + if (d>256) 128 + return n/d; 129 + else { 130 + opus_uint32 t, q; 131 + t = EC_ILOG(d&-d); 132 + q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32; 133 + return q+(n-q*d >= d); 134 + } 135 + #else 136 + return n/d; 137 + #endif 138 + } 139 + 140 + static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) { 141 + celt_assert(d>0); 142 + #ifdef USE_SMALL_DIV_TABLE 143 + if (n<0) 144 + return -(opus_int32)celt_udiv(-n, d); 145 + else 146 + return celt_udiv(n, d); 147 + #else 148 + return n/d; 149 + #endif 150 + } 116 151 117 152 #endif

+1 -1

lib/rbcodec/codecs/libopus/celt/entdec.c

··· 138 138 139 139 unsigned ec_decode(ec_dec *_this,unsigned _ft){ 140 140 unsigned s; 141 - _this->ext=_this->rng/_ft; 141 + _this->ext=celt_udiv(_this->rng,_ft); 142 142 s=(unsigned)(_this->val/_this->ext); 143 143 return _ft-EC_MINI(s+1,_ft); 144 144 }

+1 -1

lib/rbcodec/codecs/libopus/celt/entenc.c

··· 127 127 128 128 void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){ 129 129 opus_uint32 r; 130 - r=_this->rng/_ft; 130 + r=celt_udiv(_this->rng,_ft); 131 131 if(_fl>0){ 132 132 _this->val+=_this->rng-IMUL32(r,(_ft-_fl)); 133 133 _this->rng=IMUL32(r,(_fh-_fl));

+18 -1

lib/rbcodec/codecs/libopus/celt/fixed_generic.h

··· 113 113 /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. 114 114 b must fit in 31 bits. 115 115 Result fits in 32 bits. */ 116 - #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) 116 + #define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) 117 + 118 + /** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add. 119 + Results fits in 32 bits */ 120 + #define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))) 117 121 118 122 #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) 119 123 #define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) ··· 130 134 131 135 /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ 132 136 #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) 137 + 138 + #if defined(MIPSr1_ASM) 139 + #include "mips/fixed_generic_mipsr1.h" 140 + #endif 141 + 142 + static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) 143 + { 144 + x = PSHR32(x, SIG_SHIFT); 145 + x = MAX32(x, -32768); 146 + x = MIN32(x, 32767); 147 + return EXTRACT16(x); 148 + } 149 + #define SIG2WORD16(x) (SIG2WORD16_generic(x)) 133 150 134 151 #endif

+169 -305

lib/rbcodec/codecs/libopus/celt/kiss_fft.c

··· 45 45 complex numbers. It also delares the kf_ internal functions. 46 46 */ 47 47 48 - #if 0 49 48 static void kf_bfly2( 50 49 kiss_fft_cpx * Fout, 51 - const size_t fstride, 52 - const kiss_fft_state *st, 53 50 int m, 54 - int N, 55 - int mm 51 + int N 56 52 ) 57 53 { 58 54 kiss_fft_cpx * Fout2; 59 - const kiss_twiddle_cpx * tw1; 60 - int i,j; 61 - kiss_fft_cpx * Fout_beg = Fout; 62 - for (i=0;i<N;i++) 55 + int i; 56 + (void)m; 57 + #ifdef CUSTOM_MODES 58 + if (m==1) 63 59 { 64 - Fout = Fout_beg + i*mm; 65 - Fout2 = Fout + m; 66 - tw1 = st->twiddles; 67 - for(j=0;j<m;j++) 60 + celt_assert(m==1); 61 + for (i=0;i<N;i++) 68 62 { 69 63 kiss_fft_cpx t; 70 - Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); 71 - Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); 72 - C_MUL (t, *Fout2 , *tw1); 73 - tw1 += fstride; 64 + Fout2 = Fout + 1; 65 + t = *Fout2; 74 66 C_SUB( *Fout2 , *Fout , t ); 75 67 C_ADDTO( *Fout , t ); 76 - ++Fout2; 77 - ++Fout; 68 + Fout += 2; 78 69 } 79 - } 80 - } 70 + } else 81 71 #endif 82 - 83 - static void ki_bfly2( 84 - kiss_fft_cpx * Fout, 85 - const size_t fstride, 86 - const kiss_fft_state *st, 87 - int m, 88 - int N, 89 - int mm 90 - ) 91 - { 92 - kiss_fft_cpx * Fout2; 93 - const kiss_twiddle_cpx * tw1; 94 - kiss_fft_cpx t; 95 - int i,j; 96 - kiss_fft_cpx * Fout_beg = Fout; 97 - for (i=0;i<N;i++) 98 72 { 99 - Fout = Fout_beg + i*mm; 100 - Fout2 = Fout + m; 101 - tw1 = st->twiddles; 102 - for(j=0;j<m;j++) 73 + opus_val16 tw; 74 + tw = QCONST16(0.7071067812f, 15); 75 + /* We know that m==4 here because the radix-2 is just after a radix-4 */ 76 + celt_assert(m==4); 77 + for (i=0;i<N;i++) 103 78 { 104 - C_MULC (t, *Fout2 , *tw1); 105 - tw1 += fstride; 106 - C_SUB( *Fout2 , *Fout , t ); 107 - C_ADDTO( *Fout , t ); 108 - ++Fout2; 109 - ++Fout; 79 + kiss_fft_cpx t; 80 + Fout2 = Fout + 4; 81 + t = Fout2[0]; 82 + C_SUB( Fout2[0] , Fout[0] , t ); 83 + C_ADDTO( Fout[0] , t ); 84 + 85 + t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); 86 + t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); 87 + C_SUB( Fout2[1] , Fout[1] , t ); 88 + C_ADDTO( Fout[1] , t ); 89 + 90 + t.r = Fout2[2].i; 91 + t.i = -Fout2[2].r; 92 + C_SUB( Fout2[2] , Fout[2] , t ); 93 + C_ADDTO( Fout[2] , t ); 94 + 95 + t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); 96 + t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); 97 + C_SUB( Fout2[3] , Fout[3] , t ); 98 + C_ADDTO( Fout[3] , t ); 99 + Fout += 8; 110 100 } 111 101 } 112 102 } 113 103 114 - #if 0 115 104 static void kf_bfly4( 116 105 kiss_fft_cpx * Fout, 117 106 const size_t fstride, ··· 121 110 int mm 122 111 ) 123 112 { 124 - const kiss_twiddle_cpx *tw1,*tw2,*tw3; 125 - kiss_fft_cpx scratch[6]; 126 - const size_t m2=2*m; 127 - const size_t m3=3*m; 128 - int i, j; 113 + int i; 129 114 130 - kiss_fft_cpx * Fout_beg = Fout; 131 - for (i=0;i<N;i++) 115 + if (m==1) 132 116 { 133 - Fout = Fout_beg + i*mm; 134 - tw3 = tw2 = tw1 = st->twiddles; 135 - for (j=0;j<m;j++) 117 + /* Degenerate case where all the twiddles are 1. */ 118 + for (i=0;i<N;i++) 136 119 { 137 - C_MUL4(scratch[0],Fout[m] , *tw1 ); 138 - C_MUL4(scratch[1],Fout[m2] , *tw2 ); 139 - C_MUL4(scratch[2],Fout[m3] , *tw3 ); 120 + kiss_fft_cpx scratch0, scratch1; 140 121 141 - Fout->r = PSHR32(Fout->r, 2); 142 - Fout->i = PSHR32(Fout->i, 2); 143 - C_SUB( scratch[5] , *Fout, scratch[1] ); 144 - C_ADDTO(*Fout, scratch[1]); 145 - C_ADD( scratch[3] , scratch[0] , scratch[2] ); 146 - C_SUB( scratch[4] , scratch[0] , scratch[2] ); 147 - C_SUB( Fout[m2], *Fout, scratch[3] ); 148 - tw1 += fstride; 149 - tw2 += fstride*2; 150 - tw3 += fstride*3; 151 - C_ADDTO( *Fout , scratch[3] ); 122 + C_SUB( scratch0 , *Fout, Fout[2] ); 123 + C_ADDTO(*Fout, Fout[2]); 124 + C_ADD( scratch1 , Fout[1] , Fout[3] ); 125 + C_SUB( Fout[2], *Fout, scratch1 ); 126 + C_ADDTO( *Fout , scratch1 ); 127 + C_SUB( scratch1 , Fout[1] , Fout[3] ); 152 128 153 - Fout[m].r = scratch[5].r + scratch[4].i; 154 - Fout[m].i = scratch[5].i - scratch[4].r; 155 - Fout[m3].r = scratch[5].r - scratch[4].i; 156 - Fout[m3].i = scratch[5].i + scratch[4].r; 157 - ++Fout; 129 + Fout[1].r = scratch0.r + scratch1.i; 130 + Fout[1].i = scratch0.i - scratch1.r; 131 + Fout[3].r = scratch0.r - scratch1.i; 132 + Fout[3].i = scratch0.i + scratch1.r; 133 + Fout+=4; 158 134 } 159 - } 160 - } 161 - #endif 162 - 163 - static void ki_bfly4( 164 - kiss_fft_cpx * Fout, 165 - const size_t fstride, 166 - const kiss_fft_state *st, 167 - int m, 168 - int N, 169 - int mm 170 - ) 171 - { 172 - const kiss_twiddle_cpx *tw1,*tw2,*tw3; 173 - kiss_fft_cpx scratch[6]; 174 - const size_t m2=2*m; 175 - const size_t m3=3*m; 176 - int i, j; 177 - 178 - kiss_fft_cpx * Fout_beg = Fout; 179 - for (i=0;i<N;i++) 180 - { 181 - Fout = Fout_beg + i*mm; 182 - tw3 = tw2 = tw1 = st->twiddles; 183 - for (j=0;j<m;j++) 135 + } else { 136 + int j; 137 + kiss_fft_cpx scratch[6]; 138 + const kiss_twiddle_cpx *tw1,*tw2,*tw3; 139 + const int m2=2*m; 140 + const int m3=3*m; 141 + kiss_fft_cpx * Fout_beg = Fout; 142 + for (i=0;i<N;i++) 184 143 { 185 - C_MULC(scratch[0],Fout[m] , *tw1 ); 186 - C_MULC(scratch[1],Fout[m2] , *tw2 ); 187 - C_MULC(scratch[2],Fout[m3] , *tw3 ); 144 + Fout = Fout_beg + i*mm; 145 + tw3 = tw2 = tw1 = st->twiddles; 146 + /* m is guaranteed to be a multiple of 4. */ 147 + for (j=0;j<m;j++) 148 + { 149 + C_MUL(scratch[0],Fout[m] , *tw1 ); 150 + C_MUL(scratch[1],Fout[m2] , *tw2 ); 151 + C_MUL(scratch[2],Fout[m3] , *tw3 ); 188 152 189 - C_SUB( scratch[5] , *Fout, scratch[1] ); 190 - C_ADDTO(*Fout, scratch[1]); 191 - C_ADD( scratch[3] , scratch[0] , scratch[2] ); 192 - C_SUB( scratch[4] , scratch[0] , scratch[2] ); 193 - C_SUB( Fout[m2], *Fout, scratch[3] ); 194 - tw1 += fstride; 195 - tw2 += fstride*2; 196 - tw3 += fstride*3; 197 - C_ADDTO( *Fout , scratch[3] ); 153 + C_SUB( scratch[5] , *Fout, scratch[1] ); 154 + C_ADDTO(*Fout, scratch[1]); 155 + C_ADD( scratch[3] , scratch[0] , scratch[2] ); 156 + C_SUB( scratch[4] , scratch[0] , scratch[2] ); 157 + C_SUB( Fout[m2], *Fout, scratch[3] ); 158 + tw1 += fstride; 159 + tw2 += fstride*2; 160 + tw3 += fstride*3; 161 + C_ADDTO( *Fout , scratch[3] ); 198 162 199 - Fout[m].r = scratch[5].r - scratch[4].i; 200 - Fout[m].i = scratch[5].i + scratch[4].r; 201 - Fout[m3].r = scratch[5].r + scratch[4].i; 202 - Fout[m3].i = scratch[5].i - scratch[4].r; 203 - ++Fout; 163 + Fout[m].r = scratch[5].r + scratch[4].i; 164 + Fout[m].i = scratch[5].i - scratch[4].r; 165 + Fout[m3].r = scratch[5].r - scratch[4].i; 166 + Fout[m3].i = scratch[5].i + scratch[4].r; 167 + ++Fout; 168 + } 204 169 } 205 170 } 206 171 } 207 172 173 + 208 174 #ifndef RADIX_TWO_ONLY 209 175 210 - #if 0 211 176 static void kf_bfly3( 212 177 kiss_fft_cpx * Fout, 213 178 const size_t fstride, ··· 225 190 kiss_twiddle_cpx epi3; 226 191 227 192 kiss_fft_cpx * Fout_beg = Fout; 193 + #ifdef FIXED_POINT 194 + epi3.r = -16384; 195 + epi3.i = -28378; 196 + #else 228 197 epi3 = st->twiddles[fstride*m]; 198 + #endif 229 199 for (i=0;i<N;i++) 230 200 { 231 201 Fout = Fout_beg + i*mm; 232 202 tw1=tw2=st->twiddles; 203 + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ 233 204 k=m; 234 205 do { 235 - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); 236 206 237 207 C_MUL(scratch[1],Fout[m] , *tw1); 238 208 C_MUL(scratch[2],Fout[m2] , *tw2); ··· 259 229 } while(--k); 260 230 } 261 231 } 262 - #endif 263 232 264 - static void ki_bfly3( 265 - kiss_fft_cpx * Fout, 266 - const size_t fstride, 267 - const kiss_fft_state *st, 268 - int m, 269 - int N, 270 - int mm 271 - ) 272 - { 273 - int i, k; 274 - const size_t m2 = 2*m; 275 - const kiss_twiddle_cpx *tw1,*tw2; 276 - kiss_fft_cpx scratch[5]; 277 - kiss_twiddle_cpx epi3; 278 233 279 - kiss_fft_cpx * Fout_beg = Fout; 280 - epi3 = st->twiddles[fstride*m]; 281 - for (i=0;i<N;i++) 282 - { 283 - Fout = Fout_beg + i*mm; 284 - tw1=tw2=st->twiddles; 285 - k=m; 286 - do{ 287 - 288 - C_MULC(scratch[1],Fout[m] , *tw1); 289 - C_MULC(scratch[2],Fout[m2] , *tw2); 290 - 291 - C_ADD(scratch[3],scratch[1],scratch[2]); 292 - C_SUB(scratch[0],scratch[1],scratch[2]); 293 - tw1 += fstride; 294 - tw2 += fstride*2; 295 - 296 - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); 297 - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); 298 - 299 - C_MULBYSCALAR( scratch[0] , -epi3.i ); 300 - 301 - C_ADDTO(*Fout,scratch[3]); 302 - 303 - Fout[m2].r = Fout[m].r + scratch[0].i; 304 - Fout[m2].i = Fout[m].i - scratch[0].r; 305 - 306 - Fout[m].r -= scratch[0].i; 307 - Fout[m].i += scratch[0].r; 308 - 309 - ++Fout; 310 - }while(--k); 311 - } 312 - } 313 - 314 - #if 0 234 + #ifndef OVERRIDE_kf_bfly5 315 235 static void kf_bfly5( 316 236 kiss_fft_cpx * Fout, 317 237 const size_t fstride, ··· 324 244 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 325 245 int i, u; 326 246 kiss_fft_cpx scratch[13]; 327 - const kiss_twiddle_cpx * twiddles = st->twiddles; 328 247 const kiss_twiddle_cpx *tw; 329 248 kiss_twiddle_cpx ya,yb; 330 249 kiss_fft_cpx * Fout_beg = Fout; 331 250 332 - ya = twiddles[fstride*m]; 333 - yb = twiddles[fstride*2*m]; 251 + #ifdef FIXED_POINT 252 + ya.r = 10126; 253 + ya.i = -31164; 254 + yb.r = -26510; 255 + yb.i = -19261; 256 + #else 257 + ya = st->twiddles[fstride*m]; 258 + yb = st->twiddles[fstride*2*m]; 259 + #endif 334 260 tw=st->twiddles; 335 261 336 262 for (i=0;i<N;i++) ··· 342 268 Fout3=Fout0+3*m; 343 269 Fout4=Fout0+4*m; 344 270 271 + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ 345 272 for ( u=0; u<m; ++u ) { 346 - C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); 347 273 scratch[0] = *Fout0; 348 274 349 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); ··· 380 306 } 381 307 } 382 308 } 383 - #endif 309 + #endif /* OVERRIDE_kf_bfly5 */ 384 310 385 - static void ki_bfly5( 386 - kiss_fft_cpx * Fout, 387 - const size_t fstride, 388 - const kiss_fft_state *st, 389 - int m, 390 - int N, 391 - int mm 392 - ) 393 - { 394 - kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 395 - int i, u; 396 - kiss_fft_cpx scratch[13]; 397 - const kiss_twiddle_cpx * twiddles = st->twiddles; 398 - const kiss_twiddle_cpx *tw; 399 - kiss_twiddle_cpx ya,yb; 400 - kiss_fft_cpx * Fout_beg = Fout; 401 - 402 - ya = twiddles[fstride*m]; 403 - yb = twiddles[fstride*2*m]; 404 - tw=st->twiddles; 405 - 406 - for (i=0;i<N;i++) 407 - { 408 - Fout = Fout_beg + i*mm; 409 - Fout0=Fout; 410 - Fout1=Fout0+m; 411 - Fout2=Fout0+2*m; 412 - Fout3=Fout0+3*m; 413 - Fout4=Fout0+4*m; 414 - 415 - for ( u=0; u<m; ++u ) { 416 - scratch[0] = *Fout0; 417 - 418 - C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); 419 - C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); 420 - C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); 421 - C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); 422 - 423 - C_ADD( scratch[7],scratch[1],scratch[4]); 424 - C_SUB( scratch[10],scratch[1],scratch[4]); 425 - C_ADD( scratch[8],scratch[2],scratch[3]); 426 - C_SUB( scratch[9],scratch[2],scratch[3]); 427 - 428 - Fout0->r += scratch[7].r + scratch[8].r; 429 - Fout0->i += scratch[7].i + scratch[8].i; 430 - 431 - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); 432 - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); 433 - 434 - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); 435 - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); 436 - 437 - C_SUB(*Fout1,scratch[5],scratch[6]); 438 - C_ADD(*Fout4,scratch[5],scratch[6]); 439 - 440 - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); 441 - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); 442 - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); 443 - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); 444 - 445 - C_ADD(*Fout2,scratch[11],scratch[12]); 446 - C_SUB(*Fout3,scratch[11],scratch[12]); 447 - 448 - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; 449 - } 450 - } 451 - } 452 311 453 312 #endif 454 313 ··· 496 355 int kf_factor(int n,opus_int16 * facbuf) 497 356 { 498 357 int p=4; 358 + int i; 359 + int stages=0; 360 + int nbak = n; 499 361 500 362 /*factor out powers of 4, powers of 2, then any remaining primes */ 501 363 do { ··· 517 379 { 518 380 return 0; 519 381 } 520 - *facbuf++ = p; 521 - *facbuf++ = n; 382 + facbuf[2*stages] = p; 383 + if (p==2 && stages > 1) 384 + { 385 + facbuf[2*stages] = 4; 386 + facbuf[2] = 2; 387 + } 388 + stages++; 522 389 } while (n > 1); 390 + n = nbak; 391 + /* Reverse the order to get the radix 4 at the end, so we can use the 392 + fast degenerate case. It turns out that reversing the order also 393 + improves the noise behaviour. */ 394 + for (i=0;i<stages/2;i++) 395 + { 396 + int tmp; 397 + tmp = facbuf[2*i]; 398 + facbuf[2*i] = facbuf[2*(stages-i-1)]; 399 + facbuf[2*(stages-i-1)] = tmp; 400 + } 401 + for (i=0;i<stages;i++) 402 + { 403 + n /= facbuf[2*i]; 404 + facbuf[2*i+1] = n; 405 + } 523 406 return 1; 524 407 } 525 408 ··· 563 446 kiss_twiddle_cpx *twiddles; 564 447 565 448 st->nfft=nfft; 566 - #ifndef FIXED_POINT 449 + #ifdef FIXED_POINT 450 + st->scale_shift = celt_ilog2(st->nfft); 451 + if (st->nfft == 1<<st->scale_shift) 452 + st->scale = Q15ONE; 453 + else 454 + st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); 455 + #else 567 456 st->scale = 1.f/nfft; 568 457 #endif 569 458 if (base != NULL) 570 459 { 571 460 st->twiddles = base->twiddles; 572 461 st->shift = 0; 573 - while (nfft<<st->shift != base->nfft && st->shift < 32) 462 + while (st->shift < 32 && nfft<<st->shift != base->nfft) 574 463 st->shift++; 575 464 if (st->shift>=32) 576 465 goto fail; ··· 614 503 615 504 #endif /* CUSTOM_MODES */ 616 505 617 - #if 0 618 - void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 506 + void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) 619 507 { 620 508 int m2, m; 621 509 int p; ··· 627 515 /* st->shift can be -1 */ 628 516 shift = st->shift>0 ? st->shift : 0; 629 517 630 - celt_assert2 (fin != fout, "In-place FFT not supported"); 631 - /* Bit-reverse the input */ 632 - for (i=0;i<st->nfft;i++) 633 - { 634 - fout[st->bitrev[i]] = fin[i]; 635 - #ifndef FIXED_POINT 636 - fout[st->bitrev[i]].r *= st->scale; 637 - fout[st->bitrev[i]].i *= st->scale; 638 - #endif 639 - } 640 - 641 518 fstride[0] = 1; 642 519 L=0; 643 520 do { ··· 656 533 switch (st->factors[2*i]) 657 534 { 658 535 case 2: 659 - kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); 536 + kf_bfly2(fout, m, fstride[i]); 660 537 break; 661 538 case 4: 662 539 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); ··· 673 550 m = m2; 674 551 } 675 552 } 676 - #endif 677 553 678 - void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 554 + #if 0 555 + void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 679 556 { 680 - int m2, m; 681 - int p; 682 - int L; 683 - int fstride[MAXFACTORS]; 684 557 int i; 685 - int shift; 558 + opus_val16 scale; 559 + #ifdef FIXED_POINT 560 + /* Allows us to scale with MULT16_32_Q16(), which is faster than 561 + MULT16_32_Q15() on ARM. */ 562 + int scale_shift = st->scale_shift-1; 563 + #endif 564 + scale = st->scale; 686 565 687 - /* st->shift can be -1 */ 688 - shift = st->shift>0 ? st->shift : 0; 689 566 celt_assert2 (fin != fout, "In-place FFT not supported"); 690 567 /* Bit-reverse the input */ 691 568 for (i=0;i<st->nfft;i++) 692 - fout[st->bitrev[i]] = fin[i]; 693 - 694 - fstride[0] = 1; 695 - L=0; 696 - do { 697 - p = st->factors[2*L]; 698 - m = st->factors[2*L+1]; 699 - fstride[L+1] = fstride[L]*p; 700 - L++; 701 - } while(m!=1); 702 - m = st->factors[2*L-1]; 703 - for (i=L-1;i>=0;i--) 704 569 { 705 - if (i!=0) 706 - m2 = st->factors[2*i-1]; 707 - else 708 - m2 = 1; 709 - switch (st->factors[2*i]) 710 - { 711 - case 2: 712 - ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); 713 - break; 714 - case 4: 715 - ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); 716 - break; 717 - #ifndef RADIX_TWO_ONLY 718 - case 3: 719 - ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); 720 - break; 721 - case 5: 722 - ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); 723 - break; 724 - #endif 725 - } 726 - m = m2; 570 + kiss_fft_cpx x = fin[i]; 571 + fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); 572 + fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); 727 573 } 574 + opus_fft_impl(st, fout); 728 575 } 576 + #endif 729 577 578 + 579 + #ifdef TEST_UNIT_DFT_C 580 + void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 581 + { 582 + int i; 583 + celt_assert2 (fin != fout, "In-place FFT not supported"); 584 + /* Bit-reverse the input */ 585 + for (i=0;i<st->nfft;i++) 586 + fout[st->bitrev[i]] = fin[i]; 587 + for (i=0;i<st->nfft;i++) 588 + fout[i].i = -fout[i].i; 589 + opus_fft_impl(st, fout); 590 + for (i=0;i<st->nfft;i++) 591 + fout[i].i = -fout[i].i; 592 + } 593 + #endif

+6 -9

lib/rbcodec/codecs/libopus/celt/kiss_fft.h

··· 79 79 80 80 typedef struct kiss_fft_state{ 81 81 int nfft; 82 - #ifndef FIXED_POINT 83 - kiss_fft_scalar scale; 82 + opus_val16 scale; 83 + #ifdef FIXED_POINT 84 + int scale_shift; 84 85 #endif 85 86 int shift; 86 87 opus_int16 factors[2*MAXFACTORS]; ··· 128 129 f[k].r and f[k].i 129 130 * */ 130 131 void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); 131 - 132 - #if defined(CPU_COLDFIRE) 133 - #define IFFT_ICODE ICODE_ATTR 134 - #else 135 - #define IFFT_ICODE 136 - #endif 132 + void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); 137 133 138 - void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) IFFT_ICODE; 134 + void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); 135 + void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); 139 136 140 137 void opus_fft_free(const kiss_fft_state *cfg); 141 138

+110 -81

lib/rbcodec/codecs/libopus/celt/mdct.c

··· 53 53 #include "mathops.h" 54 54 #include "stack_alloc.h" 55 55 56 + #if defined(MIPSr1_ASM) 57 + #include "mips/mdct_mipsr1.h" 58 + #endif 59 + 60 + 56 61 #ifdef CUSTOM_MODES 57 62 58 63 int clt_mdct_init(mdct_lookup *l,int N, int maxshift) 59 64 { 60 65 int i; 61 - int N4; 62 66 kiss_twiddle_scalar *trig; 63 - #if defined(FIXED_POINT) 67 + int shift; 64 68 int N2=N>>1; 65 - #endif 66 69 l->n = N; 67 - N4 = N>>2; 68 70 l->maxshift = maxshift; 69 71 for (i=0;i<=maxshift;i++) 70 72 { ··· 77 79 return 0; 78 80 #endif 79 81 } 80 - l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); 82 + l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar)); 81 83 if (l->trig==NULL) 82 84 return 0; 83 - /* We have enough points that sine isn't necessary */ 85 + for (shift=0;shift<=maxshift;shift++) 86 + { 87 + /* We have enough points that sine isn't necessary */ 84 88 #if defined(FIXED_POINT) 85 - for (i=0;i<=N4;i++) 86 - trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); 89 + #if 1 90 + for (i=0;i<N2;i++) 91 + trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N)); 87 92 #else 88 - for (i=0;i<=N4;i++) 89 - trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N); 93 + for (i=0;i<N2;i++) 94 + trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N)))); 90 95 #endif 96 + #else 97 + for (i=0;i<N2;i++) 98 + trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N); 99 + #endif 100 + trig += N2; 101 + N2 >>= 1; 102 + N >>= 1; 103 + } 91 104 return 1; 92 105 } 93 106 ··· 103 116 104 117 #if 0 105 118 /* Forward MDCT trashes the input array */ 119 + #ifndef OVERRIDE_clt_mdct_forward 106 120 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 107 121 const opus_val16 *window, int overlap, int shift, int stride) 108 122 { 109 123 int i; 110 124 int N, N2, N4; 111 - kiss_twiddle_scalar sine; 112 125 VARDECL(kiss_fft_scalar, f); 113 - VARDECL(kiss_fft_scalar, f2); 126 + VARDECL(kiss_fft_cpx, f2); 127 + const kiss_fft_state *st = l->kfft[shift]; 128 + const kiss_twiddle_scalar *trig; 129 + opus_val16 scale; 130 + #ifdef FIXED_POINT 131 + /* Allows us to scale with MULT16_32_Q16(), which is faster than 132 + MULT16_32_Q15() on ARM. */ 133 + int scale_shift = st->scale_shift-1; 134 + #endif 114 135 SAVE_STACK; 136 + scale = st->scale; 137 + 115 138 N = l->n; 116 - N >>= shift; 139 + trig = l->trig; 140 + for (i=0;i<shift;i++) 141 + { 142 + N >>= 1; 143 + trig += N; 144 + } 117 145 N2 = N>>1; 118 146 N4 = N>>2; 147 + 119 148 ALLOC(f, N2, kiss_fft_scalar); 120 - ALLOC(f2, N2, kiss_fft_scalar); 121 - /* sin(x) ~= x here */ 122 - #ifdef FIXED_POINT 123 - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; 124 - #else 125 - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; 126 - #endif 149 + ALLOC(f2, N4, kiss_fft_cpx); 127 150 128 151 /* Consider the input to be composed of four blocks: [a, b, c, d] */ 129 152 /* Window, shuffle, fold */ ··· 168 191 /* Pre-rotation */ 169 192 { 170 193 kiss_fft_scalar * OPUS_RESTRICT yp = f; 171 - const kiss_twiddle_scalar *t = &l->trig[0]; 194 + const kiss_twiddle_scalar *t = &trig[0]; 172 195 for(i=0;i<N4;i++) 173 196 { 197 + kiss_fft_cpx yc; 198 + kiss_twiddle_scalar t0, t1; 174 199 kiss_fft_scalar re, im, yr, yi; 175 - re = yp[0]; 176 - im = yp[1]; 177 - yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); 178 - yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); 179 - /* works because the cos is nearly one */ 180 - *yp++ = yr + S_MUL(yi,sine); 181 - *yp++ = yi - S_MUL(yr,sine); 200 + t0 = t[i]; 201 + t1 = t[N4+i]; 202 + re = *yp++; 203 + im = *yp++; 204 + yr = S_MUL(re,t0) - S_MUL(im,t1); 205 + yi = S_MUL(im,t0) + S_MUL(re,t1); 206 + yc.r = yr; 207 + yc.i = yi; 208 + yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift); 209 + yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift); 210 + f2[st->bitrev[i]] = yc; 182 211 } 183 212 } 184 213 185 - /* N/4 complex FFT, down-scales by 4/N */ 186 - opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); 214 + /* N/4 complex FFT, does not downscale anymore */ 215 + opus_fft_impl(st, f2); 187 216 188 217 /* Post-rotate */ 189 218 { 190 219 /* Temp pointers to make it really clear to the compiler what we're doing */ 191 - const kiss_fft_scalar * OPUS_RESTRICT fp = f2; 220 + const kiss_fft_cpx * OPUS_RESTRICT fp = f2; 192 221 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 193 222 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); 194 - const kiss_twiddle_scalar *t = &l->trig[0]; 223 + const kiss_twiddle_scalar *t = &trig[0]; 195 224 /* Temp pointers to make it really clear to the compiler what we're doing */ 196 225 for(i=0;i<N4;i++) 197 226 { 198 227 kiss_fft_scalar yr, yi; 199 - yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); 200 - yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); 201 - /* works because the cos is nearly one */ 202 - *yp1 = yr - S_MUL(yi,sine); 203 - *yp2 = yi + S_MUL(yr,sine);; 204 - fp += 2; 228 + yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]); 229 + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); 230 + *yp1 = yr; 231 + *yp2 = yi; 232 + fp++; 205 233 yp1 += 2*stride; 206 234 yp2 -= 2*stride; 207 235 } 208 236 } 209 237 RESTORE_STACK; 210 238 } 239 + #endif /* OVERRIDE_clt_mdct_forward */ 211 240 #endif 212 241 242 + #ifndef OVERRIDE_clt_mdct_backward 213 243 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 214 244 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) 215 245 { 216 246 int i; 217 247 int N, N2, N4; 218 - kiss_twiddle_scalar sine; 219 - /* VARDECL(kiss_fft_scalar, f2); 220 - SAVE_STACK; */ 248 + const kiss_twiddle_scalar *trig; 249 + 221 250 N = l->n; 222 - N >>= shift; 251 + trig = l->trig; 252 + for (i=0;i<shift;i++) 253 + { 254 + N >>= 1; 255 + trig += N; 256 + } 223 257 N2 = N>>1; 224 258 N4 = N>>2; 225 - /* ALLOC(f2, N2, kiss_fft_scalar); */ 226 - kiss_fft_scalar f2[N2]; /* worst case 3840b */ 227 - /* sin(x) ~= x here */ 228 - #ifdef FIXED_POINT 229 - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; 230 - #else 231 - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; 232 - #endif 233 259 234 260 /* Pre-rotate */ 235 261 { 236 262 /* Temp pointers to make it really clear to the compiler what we're doing */ 237 263 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; 238 264 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); 239 - kiss_fft_scalar * OPUS_RESTRICT yp = f2; 240 - const kiss_twiddle_scalar *t = &l->trig[0]; 265 + kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); 266 + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; 267 + const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; 241 268 for(i=0;i<N4;i++) 242 269 { 270 + int rev; 243 271 kiss_fft_scalar yr, yi; 244 - yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); 245 - yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); 246 - /* works because the cos is nearly one */ 247 - *yp++ = yr - S_MUL(yi,sine); 248 - *yp++ = yi + S_MUL(yr,sine); 272 + rev = *bitrev++; 273 + yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]); 274 + yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]); 275 + /* We swap real and imag because we use an FFT instead of an IFFT. */ 276 + yp[2*rev+1] = yr; 277 + yp[2*rev] = yi; 278 + /* Storing the pre-rotation directly in the bitrev order. */ 249 279 xp1+=2*stride; 250 280 xp2-=2*stride; 251 281 } 252 282 } 253 283 254 - /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ 255 - opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); 284 + opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); 256 285 257 286 /* Post-rotate and de-shuffle from both ends of the buffer at once to make 258 287 it in-place. */ 259 288 { 260 - kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); 261 - kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; 262 - const kiss_twiddle_scalar *t = &l->trig[0]; 289 + kiss_fft_scalar * yp0 = out+(overlap>>1); 290 + kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; 291 + const kiss_twiddle_scalar *t = &trig[0]; 263 292 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the 264 293 middle pair will be computed twice. */ 265 294 for(i=0;i<(N4+1)>>1;i++) 266 295 { 267 296 kiss_fft_scalar re, im, yr, yi; 268 297 kiss_twiddle_scalar t0, t1; 269 - re = yp0[0]; 270 - im = yp0[1]; 271 - t0 = t[i<<shift]; 272 - t1 = t[(N4-i)<<shift]; 298 + /* We swap real and imag because we're using an FFT instead of an IFFT. */ 299 + re = yp0[1]; 300 + im = yp0[0]; 301 + t0 = t[i]; 302 + t1 = t[N4+i]; 273 303 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 274 - yr = S_MUL(re,t0) - S_MUL(im,t1); 275 - yi = S_MUL(im,t0) + S_MUL(re,t1); 276 - re = yp1[0]; 277 - im = yp1[1]; 278 - /* works because the cos is nearly one */ 279 - yp0[0] = -(yr - S_MUL(yi,sine)); 280 - yp1[1] = yi + S_MUL(yr,sine); 304 + yr = S_MUL(re,t0) + S_MUL(im,t1); 305 + yi = S_MUL(re,t1) - S_MUL(im,t0); 306 + /* We swap real and imag because we're using an FFT instead of an IFFT. */ 307 + re = yp1[1]; 308 + im = yp1[0]; 309 + yp0[0] = yr; 310 + yp1[1] = yi; 281 311 282 - t0 = t[(N4-i-1)<<shift]; 283 - t1 = t[(i+1)<<shift]; 312 + t0 = t[(N4-i-1)]; 313 + t1 = t[(N2-i-1)]; 284 314 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 285 - yr = S_MUL(re,t0) - S_MUL(im,t1); 286 - yi = S_MUL(im,t0) + S_MUL(re,t1); 287 - /* works because the cos is nearly one */ 288 - yp1[0] = -(yr - S_MUL(yi,sine)); 289 - yp0[1] = yi + S_MUL(yr,sine); 315 + yr = S_MUL(re,t0) + S_MUL(im,t1); 316 + yi = S_MUL(re,t1) - S_MUL(im,t0); 317 + yp1[0] = yr; 318 + yp0[1] = yi; 290 319 yp0 += 2; 291 320 yp1 -= 2; 292 321 } ··· 310 339 wp2--; 311 340 } 312 341 } 313 - /* RESTORE_STACK; */ 314 342 } 343 + #endif /* OVERRIDE_clt_mdct_backward */

-8

lib/rbcodec/codecs/libopus/celt/modes.h

··· 39 39 40 40 #define MAX_PERIOD 1024 41 41 42 - #ifndef OVERLAP 43 - #define OVERLAP(mode) ((mode)->overlap) 44 - #endif 45 - 46 - #ifndef FRAMESIZE 47 - #define FRAMESIZE(mode) ((mode)->mdctSize) 48 - #endif 49 - 50 42 typedef struct { 51 43 int size; 52 44 const opus_int16 *index;

+14 -16

lib/rbcodec/codecs/libopus/celt/pitch.c

··· 252 252 #endif 253 253 celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) 254 254 { 255 - int i,j; 255 + int i; 256 256 /*The EDSP version requires that max_pitch is at least 1, and that _x is 257 257 32-bit aligned. 258 258 Since it's hard to put asserts in assembly, put them here.*/ 259 - celt_assert(max_pitch>0); 260 - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); 261 259 #ifdef FIXED_POINT 262 260 opus_val32 maxcorr=1; 263 261 #endif 262 + celt_assert(max_pitch>0); 263 + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); 264 264 for (i=0;i<max_pitch-3;i+=4) 265 265 { 266 266 opus_val32 sum[4]={0,0,0,0}; ··· 279 279 /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */ 280 280 for (;i<max_pitch;i++) 281 281 { 282 - opus_val32 sum = 0; 283 - for (j=0;j<len;j++) 284 - sum = MAC16_16(sum, _x[j],_y[i+j]); 282 + opus_val32 sum; 283 + sum = celt_inner_prod(_x, _y+i, len); 285 284 xcorr[i] = sum; 286 285 #ifdef FIXED_POINT 287 286 maxcorr = MAX32(maxcorr, sum); ··· 361 360 #endif 362 361 for (i=0;i<max_pitch>>1;i++) 363 362 { 364 - opus_val32 sum=0; 363 + opus_val32 sum; 365 364 xcorr[i] = 0; 366 365 if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) 367 366 continue; 367 + #ifdef FIXED_POINT 368 + sum = 0; 368 369 for (j=0;j<len>>1;j++) 369 370 sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); 371 + #else 372 + sum = celt_inner_prod(x_lp, y+i, len>>1); 373 + #endif 370 374 xcorr[i] = MAX32(-1, sum); 371 375 #ifdef FIXED_POINT 372 376 maxcorr = MAX32(maxcorr, sum); ··· 457 461 opus_val16 g1; 458 462 opus_val16 cont=0; 459 463 opus_val16 thresh; 460 - T1 = (2*T0+k)/(2*k); 464 + T1 = celt_udiv(2*T0+k, 2*k); 461 465 if (T1 < minperiod) 462 466 break; 463 467 /* Look for another strong correlation at T1b */ ··· 469 473 T1b = T0+T1; 470 474 } else 471 475 { 472 - T1b = (2*second_check[k]*T0+k)/(2*k); 476 + T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); 473 477 } 474 478 dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); 475 479 xy += xy2; ··· 514 518 pg = SHR32(frac_div32(best_xy,best_yy+1),16); 515 519 516 520 for (k=0;k<3;k++) 517 - { 518 - int T1 = T+k-1; 519 - xy = 0; 520 - for (i=0;i<N;i++) 521 - xy = MAC16_16(xy, x[i], x[i-T1]); 522 - xcorr[k] = xy; 523 - } 521 + xcorr[k] = celt_inner_prod(x, x-(T+k-1), N); 524 522 if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) 525 523 offset = 1; 526 524 else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))

+17 -1

lib/rbcodec/codecs/libopus/celt/pitch.h

··· 41 41 #include "x86/pitch_sse.h" 42 42 #endif 43 43 44 + #if defined(MIPSr1_ASM) 45 + #include "mips/pitch_mipsr1.h" 46 + #endif 47 + 44 48 #if defined(OPUS_ARM_ASM) && defined(FIXED_POINT) 45 - # include "arm/pitch_arm.h" 49 + //# include "arm/pitch_arm.h" 46 50 #endif 47 51 48 52 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, ··· 138 142 } 139 143 *xy1 = xy01; 140 144 *xy2 = xy02; 145 + } 146 + #endif 147 + 148 + #ifndef OVERRIDE_CELT_INNER_PROD 149 + static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, const opus_val16 *y, 150 + int N) 151 + { 152 + int i; 153 + opus_val32 xy=0; 154 + for (i=0;i<N;i++) 155 + xy = MAC16_16(xy, x[i], y[i]); 156 + return xy; 141 157 } 142 158 #endif 143 159

+4 -3

lib/rbcodec/codecs/libopus/celt/rate.c

··· 333 333 /*Figure out how many left-over bits we would be adding to this band. 334 334 This can include bits we've stolen back from higher, skipped bands.*/ 335 335 left = total-psum; 336 - percoeff = left/(m->eBands[codedBands]-m->eBands[start]); 336 + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); 337 337 left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; 338 338 rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0); 339 339 band_width = m->eBands[codedBands]-m->eBands[j]; ··· 414 414 415 415 /* Allocate the remaining bits */ 416 416 left = total-psum; 417 - percoeff = left/(m->eBands[codedBands]-m->eBands[start]); 417 + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); 418 418 left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; 419 419 for (j=start;j<codedBands;j++) 420 420 bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j])); ··· 465 465 offset += NClogN>>3; 466 466 467 467 /* Divide with rounding */ 468 - ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<<BITRES)); 468 + ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1)))); 469 + ebits[j] = celt_udiv(ebits[j], den)>>BITRES; 469 470 470 471 /* Make sure not to bust */ 471 472 if (C*ebits[j] > (bits[j]>>BITRES))

+7 -1

lib/rbcodec/codecs/libopus/celt/stack_alloc.h

··· 116 116 #else 117 117 118 118 #ifdef CELT_C 119 + char *scratch_ptr=0; 119 120 char *global_stack=0; 120 121 #else 121 122 extern char *global_stack; 123 + extern char *scratch_ptr; 122 124 #endif /* CELT_C */ 123 125 124 126 #ifdef ENABLE_VALGRIND ··· 140 142 141 143 #define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) 142 144 #define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char)))) 145 + #if 0 /* Set this to 1 to instrument pseudostack usage */ 146 + #define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack) 147 + #else 143 148 #define RESTORE_STACK (global_stack = _saved_stack) 144 - #define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack; 149 + #endif 150 + #define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack; 145 151 146 152 #endif /* ENABLE_VALGRIND */ 147 153

+434 -162

lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h

··· 341 341 #ifndef FFT_BITREV480 342 342 #define FFT_BITREV480 343 343 static const opus_int16 fft_bitrev480[480] = { 344 - 0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330, 345 - 450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225, 346 - 345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95, 347 - 215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440, 348 - 110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310, 349 - 430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205, 350 - 325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61, 351 - 181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406, 352 - 76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276, 353 - 396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171, 354 - 291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41, 355 - 161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386, 356 - 56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242, 357 - 362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137, 358 - 257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7, 359 - 127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457, 360 - 22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352, 361 - 472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222, 362 - 342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117, 363 - 237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423, 364 - 93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318, 365 - 438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188, 366 - 308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83, 367 - 203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403, 368 - 73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298, 369 - 418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154, 370 - 274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49, 371 - 169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369, 372 - 39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264, 373 - 384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134, 374 - 254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29, 375 - 149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479, 344 + 0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, 345 + 8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, 346 + 16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, 347 + 24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, 348 + 4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, 349 + 12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, 350 + 20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, 351 + 28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, 352 + 1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, 353 + 9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, 354 + 17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, 355 + 25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, 356 + 5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, 357 + 13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, 358 + 21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, 359 + 29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, 360 + 2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, 361 + 10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, 362 + 18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, 363 + 26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, 364 + 6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, 365 + 14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, 366 + 22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, 367 + 30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, 368 + 3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, 369 + 11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, 370 + 19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, 371 + 27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, 372 + 7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, 373 + 15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, 374 + 23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, 375 + 31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, 376 376 }; 377 377 #endif 378 378 379 379 #ifndef FFT_BITREV240 380 380 #define FFT_BITREV240 381 381 static const opus_int16 fft_bitrev240[240] = { 382 - 0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165, 383 - 225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110, 384 - 170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55, 385 - 115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211, 386 - 46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156, 387 - 216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101, 388 - 161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32, 389 - 92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202, 390 - 37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147, 391 - 207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78, 392 - 138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23, 393 - 83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193, 394 - 28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124, 395 - 184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69, 396 - 129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14, 397 - 74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239, 382 + 0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, 383 + 4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, 384 + 8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, 385 + 12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, 386 + 1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, 387 + 5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, 388 + 9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, 389 + 13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, 390 + 2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, 391 + 6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, 392 + 10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, 393 + 14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, 394 + 3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, 395 + 7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, 396 + 11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, 397 + 15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, 398 398 }; 399 399 #endif 400 400 401 401 #ifndef FFT_BITREV120 402 402 #define FFT_BITREV120 403 403 static const opus_int16 fft_bitrev120[120] = { 404 - 0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80, 405 - 110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46, 406 - 76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26, 407 - 56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97, 408 - 22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63, 409 - 93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43, 410 - 73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9, 411 - 39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119, 404 + 0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, 405 + 4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, 406 + 1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, 407 + 5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, 408 + 2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, 409 + 6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, 410 + 3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, 411 + 7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, 412 412 }; 413 413 #endif 414 414 415 415 #ifndef FFT_BITREV60 416 416 #define FFT_BITREV60 417 417 static const opus_int16 fft_bitrev60[60] = { 418 - 0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31, 419 - 46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22, 420 - 37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13, 421 - 28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59, 418 + 0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, 419 + 1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, 420 + 2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, 421 + 3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, 422 422 }; 423 423 #endif 424 424 ··· 426 426 #define FFT_STATE48000_960_0 427 427 static const kiss_fft_state fft_state48000_960_0 ICONST_ATTR = { 428 428 480, /* nfft */ 429 + 17476, /* scale */ 430 + 8, /* scale_shift */ 429 431 -1, /* shift */ 430 - {4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ 432 + {5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ 431 433 fft_bitrev480, /* bitrev */ 432 434 fft_twiddles48000_960, /* bitrev */ 433 435 }; ··· 437 439 #define FFT_STATE48000_960_1 438 440 static const kiss_fft_state fft_state48000_960_1 ICONST_ATTR = { 439 441 240, /* nfft */ 442 + 17476, /* scale */ 443 + 7, /* scale_shift */ 440 444 1, /* shift */ 441 - {4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 445 + {5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 442 446 fft_bitrev240, /* bitrev */ 443 447 fft_twiddles48000_960, /* bitrev */ 444 448 }; ··· 448 452 #define FFT_STATE48000_960_2 449 453 static const kiss_fft_state fft_state48000_960_2 ICONST_ATTR = { 450 454 120, /* nfft */ 455 + 17476, /* scale */ 456 + 6, /* scale_shift */ 451 457 2, /* shift */ 452 - {4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 458 + {5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 453 459 fft_bitrev120, /* bitrev */ 454 460 fft_twiddles48000_960, /* bitrev */ 455 461 }; ··· 459 465 #define FFT_STATE48000_960_3 460 466 static const kiss_fft_state fft_state48000_960_3 ICONST_ATTR = { 461 467 60, /* nfft */ 468 + 17476, /* scale */ 469 + 5, /* scale_shift */ 462 470 3, /* shift */ 463 - {4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 471 + {5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 464 472 fft_bitrev60, /* bitrev */ 465 473 fft_twiddles48000_960, /* bitrev */ 466 474 }; ··· 470 478 471 479 #ifndef MDCT_TWIDDLES960 472 480 #define MDCT_TWIDDLES960 473 - static const opus_val16 mdct_twiddles960[481] ICONST_ATTR = { 474 - 32767, 32767, 32767, 32767, 32766, 475 - 32763, 32762, 32759, 32757, 32753, 476 - 32751, 32747, 32743, 32738, 32733, 477 - 32729, 32724, 32717, 32711, 32705, 478 - 32698, 32690, 32683, 32676, 32667, 479 - 32658, 32650, 32640, 32631, 32620, 480 - 32610, 32599, 32588, 32577, 32566, 481 - 32554, 32541, 32528, 32515, 32502, 482 - 32487, 32474, 32459, 32444, 32429, 483 - 32413, 32397, 32381, 32364, 32348, 484 - 32331, 32313, 32294, 32277, 32257, 485 - 32239, 32219, 32200, 32180, 32159, 486 - 32138, 32118, 32096, 32074, 32051, 487 - 32029, 32006, 31984, 31960, 31936, 488 - 31912, 31888, 31863, 31837, 31812, 489 - 31786, 31760, 31734, 31707, 31679, 490 - 31652, 31624, 31596, 31567, 31539, 491 - 31508, 31479, 31450, 31419, 31388, 492 - 31357, 31326, 31294, 31262, 31230, 493 - 31198, 31164, 31131, 31097, 31063, 494 - 31030, 30994, 30959, 30924, 30889, 495 - 30853, 30816, 30779, 30743, 30705, 496 - 30668, 30629, 30592, 30553, 30515, 497 - 30475, 30435, 30396, 30356, 30315, 498 - 30274, 30233, 30191, 30149, 30107, 499 - 30065, 30022, 29979, 29936, 29891, 500 - 29847, 29803, 29758, 29713, 29668, 501 - 29622, 29577, 29529, 29483, 29436, 502 - 29390, 29341, 29293, 29246, 29197, 503 - 29148, 29098, 29050, 29000, 28949, 504 - 28899, 28848, 28797, 28746, 28694, 505 - 28642, 28590, 28537, 28485, 28432, 506 - 28378, 28324, 28271, 28217, 28162, 507 - 28106, 28051, 27995, 27940, 27884, 508 - 27827, 27770, 27713, 27657, 27598, 509 - 27540, 27481, 27423, 27365, 27305, 510 - 27246, 27187, 27126, 27066, 27006, 511 - 26945, 26883, 26822, 26760, 26698, 512 - 26636, 26574, 26510, 26448, 26383, 513 - 26320, 26257, 26191, 26127, 26062, 514 - 25997, 25931, 25866, 25800, 25734, 515 - 25667, 25601, 25533, 25466, 25398, 516 - 25330, 25262, 25194, 25125, 25056, 517 - 24987, 24917, 24848, 24778, 24707, 518 - 24636, 24566, 24495, 24424, 24352, 519 - 24280, 24208, 24135, 24063, 23990, 520 - 23917, 23842, 23769, 23695, 23622, 521 - 23546, 23472, 23398, 23322, 23246, 522 - 23171, 23095, 23018, 22942, 22866, 523 - 22788, 22711, 22634, 22557, 22478, 524 - 22400, 22322, 22244, 22165, 22085, 525 - 22006, 21927, 21846, 21766, 21687, 526 - 21606, 21524, 21443, 21363, 21282, 527 - 21199, 21118, 21035, 20954, 20870, 528 - 20788, 20705, 20621, 20538, 20455, 529 - 20371, 20286, 20202, 20118, 20034, 530 - 19947, 19863, 19777, 19692, 19606, 531 - 19520, 19434, 19347, 19260, 19174, 532 - 19088, 18999, 18911, 18825, 18737, 533 - 18648, 18560, 18472, 18384, 18294, 534 - 18205, 18116, 18025, 17936, 17846, 535 - 17757, 17666, 17576, 17485, 17395, 536 - 17303, 17212, 17122, 17030, 16937, 537 - 16846, 16755, 16662, 16569, 16477, 538 - 16385, 16291, 16198, 16105, 16012, 539 - 15917, 15824, 15730, 15636, 15541, 540 - 15447, 15352, 15257, 15162, 15067, 541 - 14973, 14875, 14781, 14685, 14589, 542 - 14493, 14396, 14300, 14204, 14107, 543 - 14010, 13914, 13815, 13718, 13621, 544 - 13524, 13425, 13328, 13230, 13133, 545 - 13033, 12935, 12836, 12738, 12638, 546 - 12540, 12441, 12341, 12241, 12142, 547 - 12044, 11943, 11843, 11744, 11643, 548 - 11542, 11442, 11342, 11241, 11139, 549 - 11039, 10939, 10836, 10736, 10635, 550 - 10534, 10431, 10330, 10228, 10127, 551 - 10024, 9921, 9820, 9718, 9614, 552 - 9512, 9410, 9306, 9204, 9101, 553 - 8998, 8895, 8791, 8689, 8585, 554 - 8481, 8377, 8274, 8171, 8067, 555 - 7962, 7858, 7753, 7650, 7545, 556 - 7441, 7336, 7231, 7129, 7023, 557 - 6917, 6813, 6709, 6604, 6498, 558 - 6393, 6288, 6182, 6077, 5973, 559 - 5867, 5760, 5656, 5549, 5445, 560 - 5339, 5232, 5127, 5022, 4914, 561 - 4809, 4703, 4596, 4490, 4384, 562 - 4278, 4171, 4065, 3958, 3852, 563 - 3745, 3640, 3532, 3426, 3318, 564 - 3212, 3106, 2998, 2891, 2786, 565 - 2679, 2570, 2465, 2358, 2251, 566 - 2143, 2037, 1929, 1823, 1715, 567 - 1609, 1501, 1393, 1287, 1180, 568 - 1073, 964, 858, 751, 644, 569 - 535, 429, 322, 214, 107, 570 - 0, }; 481 + static const opus_val16 mdct_twiddles960[1800] ICONST_ATTR = { 482 + 32767, 32767, 32767, 32766, 32765, 483 + 32763, 32761, 32759, 32756, 32753, 484 + 32750, 32746, 32742, 32738, 32733, 485 + 32728, 32722, 32717, 32710, 32704, 486 + 32697, 32690, 32682, 32674, 32666, 487 + 32657, 32648, 32639, 32629, 32619, 488 + 32609, 32598, 32587, 32576, 32564, 489 + 32552, 32539, 32526, 32513, 32500, 490 + 32486, 32472, 32457, 32442, 32427, 491 + 32411, 32395, 32379, 32362, 32345, 492 + 32328, 32310, 32292, 32274, 32255, 493 + 32236, 32217, 32197, 32177, 32157, 494 + 32136, 32115, 32093, 32071, 32049, 495 + 32027, 32004, 31981, 31957, 31933, 496 + 31909, 31884, 31859, 31834, 31809, 497 + 31783, 31756, 31730, 31703, 31676, 498 + 31648, 31620, 31592, 31563, 31534, 499 + 31505, 31475, 31445, 31415, 31384, 500 + 31353, 31322, 31290, 31258, 31226, 501 + 31193, 31160, 31127, 31093, 31059, 502 + 31025, 30990, 30955, 30920, 30884, 503 + 30848, 30812, 30775, 30738, 30701, 504 + 30663, 30625, 30587, 30548, 30509, 505 + 30470, 30430, 30390, 30350, 30309, 506 + 30269, 30227, 30186, 30144, 30102, 507 + 30059, 30016, 29973, 29930, 29886, 508 + 29842, 29797, 29752, 29707, 29662, 509 + 29616, 29570, 29524, 29477, 29430, 510 + 29383, 29335, 29287, 29239, 29190, 511 + 29142, 29092, 29043, 28993, 28943, 512 + 28892, 28842, 28791, 28739, 28688, 513 + 28636, 28583, 28531, 28478, 28425, 514 + 28371, 28317, 28263, 28209, 28154, 515 + 28099, 28044, 27988, 27932, 27876, 516 + 27820, 27763, 27706, 27648, 27591, 517 + 27533, 27474, 27416, 27357, 27298, 518 + 27238, 27178, 27118, 27058, 26997, 519 + 26936, 26875, 26814, 26752, 26690, 520 + 26628, 26565, 26502, 26439, 26375, 521 + 26312, 26247, 26183, 26119, 26054, 522 + 25988, 25923, 25857, 25791, 25725, 523 + 25658, 25592, 25524, 25457, 25389, 524 + 25322, 25253, 25185, 25116, 25047, 525 + 24978, 24908, 24838, 24768, 24698, 526 + 24627, 24557, 24485, 24414, 24342, 527 + 24270, 24198, 24126, 24053, 23980, 528 + 23907, 23834, 23760, 23686, 23612, 529 + 23537, 23462, 23387, 23312, 23237, 530 + 23161, 23085, 23009, 22932, 22856, 531 + 22779, 22701, 22624, 22546, 22468, 532 + 22390, 22312, 22233, 22154, 22075, 533 + 21996, 21916, 21836, 21756, 21676, 534 + 21595, 21515, 21434, 21352, 21271, 535 + 21189, 21107, 21025, 20943, 20860, 536 + 20777, 20694, 20611, 20528, 20444, 537 + 20360, 20276, 20192, 20107, 20022, 538 + 19937, 19852, 19767, 19681, 19595, 539 + 19509, 19423, 19336, 19250, 19163, 540 + 19076, 18988, 18901, 18813, 18725, 541 + 18637, 18549, 18460, 18372, 18283, 542 + 18194, 18104, 18015, 17925, 17835, 543 + 17745, 17655, 17565, 17474, 17383, 544 + 17292, 17201, 17110, 17018, 16927, 545 + 16835, 16743, 16650, 16558, 16465, 546 + 16372, 16279, 16186, 16093, 15999, 547 + 15906, 15812, 15718, 15624, 15529, 548 + 15435, 15340, 15245, 15150, 15055, 549 + 14960, 14864, 14769, 14673, 14577, 550 + 14481, 14385, 14288, 14192, 14095, 551 + 13998, 13901, 13804, 13706, 13609, 552 + 13511, 13414, 13316, 13218, 13119, 553 + 13021, 12923, 12824, 12725, 12626, 554 + 12527, 12428, 12329, 12230, 12130, 555 + 12030, 11930, 11831, 11730, 11630, 556 + 11530, 11430, 11329, 11228, 11128, 557 + 11027, 10926, 10824, 10723, 10622, 558 + 10520, 10419, 10317, 10215, 10113, 559 + 10011, 9909, 9807, 9704, 9602, 560 + 9499, 9397, 9294, 9191, 9088, 561 + 8985, 8882, 8778, 8675, 8572, 562 + 8468, 8364, 8261, 8157, 8053, 563 + 7949, 7845, 7741, 7637, 7532, 564 + 7428, 7323, 7219, 7114, 7009, 565 + 6905, 6800, 6695, 6590, 6485, 566 + 6380, 6274, 6169, 6064, 5958, 567 + 5853, 5747, 5642, 5536, 5430, 568 + 5325, 5219, 5113, 5007, 4901, 569 + 4795, 4689, 4583, 4476, 4370, 570 + 4264, 4157, 4051, 3945, 3838, 571 + 3732, 3625, 3518, 3412, 3305, 572 + 3198, 3092, 2985, 2878, 2771, 573 + 2664, 2558, 2451, 2344, 2237, 574 + 2130, 2023, 1916, 1809, 1702, 575 + 1594, 1487, 1380, 1273, 1166, 576 + 1059, 952, 844, 737, 630, 577 + 523, 416, 308, 201, 94, 578 + -13, -121, -228, -335, -442, 579 + -550, -657, -764, -871, -978, 580 + -1086, -1193, -1300, -1407, -1514, 581 + -1621, -1728, -1835, -1942, -2049, 582 + -2157, -2263, -2370, -2477, -2584, 583 + -2691, -2798, -2905, -3012, -3118, 584 + -3225, -3332, -3439, -3545, -3652, 585 + -3758, -3865, -3971, -4078, -4184, 586 + -4290, -4397, -4503, -4609, -4715, 587 + -4821, -4927, -5033, -5139, -5245, 588 + -5351, -5457, -5562, -5668, -5774, 589 + -5879, -5985, -6090, -6195, -6301, 590 + -6406, -6511, -6616, -6721, -6826, 591 + -6931, -7036, -7140, -7245, -7349, 592 + -7454, -7558, -7663, -7767, -7871, 593 + -7975, -8079, -8183, -8287, -8390, 594 + -8494, -8597, -8701, -8804, -8907, 595 + -9011, -9114, -9217, -9319, -9422, 596 + -9525, -9627, -9730, -9832, -9934, 597 + -10037, -10139, -10241, -10342, -10444, 598 + -10546, -10647, -10748, -10850, -10951, 599 + -11052, -11153, -11253, -11354, -11455, 600 + -11555, -11655, -11756, -11856, -11955, 601 + -12055, -12155, -12254, -12354, -12453, 602 + -12552, -12651, -12750, -12849, -12947, 603 + -13046, -13144, -13242, -13340, -13438, 604 + -13536, -13633, -13731, -13828, -13925, 605 + -14022, -14119, -14216, -14312, -14409, 606 + -14505, -14601, -14697, -14793, -14888, 607 + -14984, -15079, -15174, -15269, -15364, 608 + -15459, -15553, -15647, -15741, -15835, 609 + -15929, -16023, -16116, -16210, -16303, 610 + -16396, -16488, -16581, -16673, -16766, 611 + -16858, -16949, -17041, -17133, -17224, 612 + -17315, -17406, -17497, -17587, -17678, 613 + -17768, -17858, -17948, -18037, -18127, 614 + -18216, -18305, -18394, -18483, -18571, 615 + -18659, -18747, -18835, -18923, -19010, 616 + -19098, -19185, -19271, -19358, -19444, 617 + -19531, -19617, -19702, -19788, -19873, 618 + -19959, -20043, -20128, -20213, -20297, 619 + -20381, -20465, -20549, -20632, -20715, 620 + -20798, -20881, -20963, -21046, -21128, 621 + -21210, -21291, -21373, -21454, -21535, 622 + -21616, -21696, -21776, -21856, -21936, 623 + -22016, -22095, -22174, -22253, -22331, 624 + -22410, -22488, -22566, -22643, -22721, 625 + -22798, -22875, -22951, -23028, -23104, 626 + -23180, -23256, -23331, -23406, -23481, 627 + -23556, -23630, -23704, -23778, -23852, 628 + -23925, -23998, -24071, -24144, -24216, 629 + -24288, -24360, -24432, -24503, -24574, 630 + -24645, -24716, -24786, -24856, -24926, 631 + -24995, -25064, -25133, -25202, -25270, 632 + -25339, -25406, -25474, -25541, -25608, 633 + -25675, -25742, -25808, -25874, -25939, 634 + -26005, -26070, -26135, -26199, -26264, 635 + -26327, -26391, -26455, -26518, -26581, 636 + -26643, -26705, -26767, -26829, -26891, 637 + -26952, -27013, -27073, -27133, -27193, 638 + -27253, -27312, -27372, -27430, -27489, 639 + -27547, -27605, -27663, -27720, -27777, 640 + -27834, -27890, -27946, -28002, -28058, 641 + -28113, -28168, -28223, -28277, -28331, 642 + -28385, -28438, -28491, -28544, -28596, 643 + -28649, -28701, -28752, -28803, -28854, 644 + -28905, -28955, -29006, -29055, -29105, 645 + -29154, -29203, -29251, -29299, -29347, 646 + -29395, -29442, -29489, -29535, -29582, 647 + -29628, -29673, -29719, -29764, -29808, 648 + -29853, -29897, -29941, -29984, -30027, 649 + -30070, -30112, -30154, -30196, -30238, 650 + -30279, -30320, -30360, -30400, -30440, 651 + -30480, -30519, -30558, -30596, -30635, 652 + -30672, -30710, -30747, -30784, -30821, 653 + -30857, -30893, -30929, -30964, -30999, 654 + -31033, -31068, -31102, -31135, -31168, 655 + -31201, -31234, -31266, -31298, -31330, 656 + -31361, -31392, -31422, -31453, -31483, 657 + -31512, -31541, -31570, -31599, -31627, 658 + -31655, -31682, -31710, -31737, -31763, 659 + -31789, -31815, -31841, -31866, -31891, 660 + -31915, -31939, -31963, -31986, -32010, 661 + -32032, -32055, -32077, -32099, -32120, 662 + -32141, -32162, -32182, -32202, -32222, 663 + -32241, -32260, -32279, -32297, -32315, 664 + -32333, -32350, -32367, -32383, -32399, 665 + -32415, -32431, -32446, -32461, -32475, 666 + -32489, -32503, -32517, -32530, -32542, 667 + -32555, -32567, -32579, -32590, -32601, 668 + -32612, -32622, -32632, -32641, -32651, 669 + -32659, -32668, -32676, -32684, -32692, 670 + -32699, -32706, -32712, -32718, -32724, 671 + -32729, -32734, -32739, -32743, -32747, 672 + -32751, -32754, -32757, -32760, -32762, 673 + -32764, -32765, -32767, -32767, -32767, 674 + 32767, 32767, 32765, 32761, 32756, 675 + 32750, 32742, 32732, 32722, 32710, 676 + 32696, 32681, 32665, 32647, 32628, 677 + 32608, 32586, 32562, 32538, 32512, 678 + 32484, 32455, 32425, 32393, 32360, 679 + 32326, 32290, 32253, 32214, 32174, 680 + 32133, 32090, 32046, 32001, 31954, 681 + 31906, 31856, 31805, 31753, 31700, 682 + 31645, 31588, 31530, 31471, 31411, 683 + 31349, 31286, 31222, 31156, 31089, 684 + 31020, 30951, 30880, 30807, 30733, 685 + 30658, 30582, 30504, 30425, 30345, 686 + 30263, 30181, 30096, 30011, 29924, 687 + 29836, 29747, 29656, 29564, 29471, 688 + 29377, 29281, 29184, 29086, 28987, 689 + 28886, 28784, 28681, 28577, 28471, 690 + 28365, 28257, 28147, 28037, 27925, 691 + 27812, 27698, 27583, 27467, 27349, 692 + 27231, 27111, 26990, 26868, 26744, 693 + 26620, 26494, 26367, 26239, 26110, 694 + 25980, 25849, 25717, 25583, 25449, 695 + 25313, 25176, 25038, 24900, 24760, 696 + 24619, 24477, 24333, 24189, 24044, 697 + 23898, 23751, 23602, 23453, 23303, 698 + 23152, 22999, 22846, 22692, 22537, 699 + 22380, 22223, 22065, 21906, 21746, 700 + 21585, 21423, 21261, 21097, 20933, 701 + 20767, 20601, 20434, 20265, 20096, 702 + 19927, 19756, 19584, 19412, 19239, 703 + 19065, 18890, 18714, 18538, 18361, 704 + 18183, 18004, 17824, 17644, 17463, 705 + 17281, 17098, 16915, 16731, 16546, 706 + 16361, 16175, 15988, 15800, 15612, 707 + 15423, 15234, 15043, 14852, 14661, 708 + 14469, 14276, 14083, 13889, 13694, 709 + 13499, 13303, 13107, 12910, 12713, 710 + 12515, 12317, 12118, 11918, 11718, 711 + 11517, 11316, 11115, 10913, 10710, 712 + 10508, 10304, 10100, 9896, 9691, 713 + 9486, 9281, 9075, 8869, 8662, 714 + 8455, 8248, 8040, 7832, 7623, 715 + 7415, 7206, 6996, 6787, 6577, 716 + 6366, 6156, 5945, 5734, 5523, 717 + 5311, 5100, 4888, 4675, 4463, 718 + 4251, 4038, 3825, 3612, 3399, 719 + 3185, 2972, 2758, 2544, 2330, 720 + 2116, 1902, 1688, 1474, 1260, 721 + 1045, 831, 617, 402, 188, 722 + -27, -241, -456, -670, -885, 723 + -1099, -1313, -1528, -1742, -1956, 724 + -2170, -2384, -2598, -2811, -3025, 725 + -3239, -3452, -3665, -3878, -4091, 726 + -4304, -4516, -4728, -4941, -5153, 727 + -5364, -5576, -5787, -5998, -6209, 728 + -6419, -6629, -6839, -7049, -7258, 729 + -7467, -7676, -7884, -8092, -8300, 730 + -8507, -8714, -8920, -9127, -9332, 731 + -9538, -9743, -9947, -10151, -10355, 732 + -10558, -10761, -10963, -11165, -11367, 733 + -11568, -11768, -11968, -12167, -12366, 734 + -12565, -12762, -12960, -13156, -13352, 735 + -13548, -13743, -13937, -14131, -14324, 736 + -14517, -14709, -14900, -15091, -15281, 737 + -15470, -15659, -15847, -16035, -16221, 738 + -16407, -16593, -16777, -16961, -17144, 739 + -17326, -17508, -17689, -17869, -18049, 740 + -18227, -18405, -18582, -18758, -18934, 741 + -19108, -19282, -19455, -19627, -19799, 742 + -19969, -20139, -20308, -20475, -20642, 743 + -20809, -20974, -21138, -21301, -21464, 744 + -21626, -21786, -21946, -22105, -22263, 745 + -22420, -22575, -22730, -22884, -23037, 746 + -23189, -23340, -23490, -23640, -23788, 747 + -23935, -24080, -24225, -24369, -24512, 748 + -24654, -24795, -24934, -25073, -25211, 749 + -25347, -25482, -25617, -25750, -25882, 750 + -26013, -26143, -26272, -26399, -26526, 751 + -26651, -26775, -26898, -27020, -27141, 752 + -27260, -27379, -27496, -27612, -27727, 753 + -27841, -27953, -28065, -28175, -28284, 754 + -28391, -28498, -28603, -28707, -28810, 755 + -28911, -29012, -29111, -29209, -29305, 756 + -29401, -29495, -29587, -29679, -29769, 757 + -29858, -29946, -30032, -30118, -30201, 758 + -30284, -30365, -30445, -30524, -30601, 759 + -30677, -30752, -30825, -30897, -30968, 760 + -31038, -31106, -31172, -31238, -31302, 761 + -31365, -31426, -31486, -31545, -31602, 762 + -31658, -31713, -31766, -31818, -31869, 763 + -31918, -31966, -32012, -32058, -32101, 764 + -32144, -32185, -32224, -32262, -32299, 765 + -32335, -32369, -32401, -32433, -32463, 766 + -32491, -32518, -32544, -32568, -32591, 767 + -32613, -32633, -32652, -32669, -32685, 768 + -32700, -32713, -32724, -32735, -32744, 769 + -32751, -32757, -32762, -32766, -32767, 770 + 32767, 32764, 32755, 32741, 32720, 771 + 32694, 32663, 32626, 32583, 32535, 772 + 32481, 32421, 32356, 32286, 32209, 773 + 32128, 32041, 31948, 31850, 31747, 774 + 31638, 31523, 31403, 31278, 31148, 775 + 31012, 30871, 30724, 30572, 30415, 776 + 30253, 30086, 29913, 29736, 29553, 777 + 29365, 29172, 28974, 28771, 28564, 778 + 28351, 28134, 27911, 27684, 27452, 779 + 27216, 26975, 26729, 26478, 26223, 780 + 25964, 25700, 25432, 25159, 24882, 781 + 24601, 24315, 24026, 23732, 23434, 782 + 23133, 22827, 22517, 22204, 21886, 783 + 21565, 21240, 20912, 20580, 20244, 784 + 19905, 19563, 19217, 18868, 18516, 785 + 18160, 17802, 17440, 17075, 16708, 786 + 16338, 15964, 15588, 15210, 14829, 787 + 14445, 14059, 13670, 13279, 12886, 788 + 12490, 12093, 11693, 11291, 10888, 789 + 10482, 10075, 9666, 9255, 8843, 790 + 8429, 8014, 7597, 7180, 6760, 791 + 6340, 5919, 5496, 5073, 4649, 792 + 4224, 3798, 3372, 2945, 2517, 793 + 2090, 1661, 1233, 804, 375, 794 + -54, -483, -911, -1340, -1768, 795 + -2197, -2624, -3052, -3479, -3905, 796 + -4330, -4755, -5179, -5602, -6024, 797 + -6445, -6865, -7284, -7702, -8118, 798 + -8533, -8946, -9358, -9768, -10177, 799 + -10584, -10989, -11392, -11793, -12192, 800 + -12589, -12984, -13377, -13767, -14155, 801 + -14541, -14924, -15305, -15683, -16058, 802 + -16430, -16800, -17167, -17531, -17892, 803 + -18249, -18604, -18956, -19304, -19649, 804 + -19990, -20329, -20663, -20994, -21322, 805 + -21646, -21966, -22282, -22595, -22904, 806 + -23208, -23509, -23806, -24099, -24387, 807 + -24672, -24952, -25228, -25499, -25766, 808 + -26029, -26288, -26541, -26791, -27035, 809 + -27275, -27511, -27741, -27967, -28188, 810 + -28405, -28616, -28823, -29024, -29221, 811 + -29412, -29599, -29780, -29957, -30128, 812 + -30294, -30455, -30611, -30761, -30906, 813 + -31046, -31181, -31310, -31434, -31552, 814 + -31665, -31773, -31875, -31972, -32063, 815 + -32149, -32229, -32304, -32373, -32437, 816 + -32495, -32547, -32594, -32635, -32671, 817 + -32701, -32726, -32745, -32758, -32766, 818 + 32767, 32754, 32717, 32658, 32577, 819 + 32473, 32348, 32200, 32029, 31837, 820 + 31624, 31388, 31131, 30853, 30553, 821 + 30232, 29891, 29530, 29148, 28746, 822 + 28324, 27883, 27423, 26944, 26447, 823 + 25931, 25398, 24847, 24279, 23695, 824 + 23095, 22478, 21846, 21199, 20538, 825 + 19863, 19174, 18472, 17757, 17030, 826 + 16291, 15541, 14781, 14010, 13230, 827 + 12441, 11643, 10837, 10024, 9204, 828 + 8377, 7545, 6708, 5866, 5020, 829 + 4171, 3319, 2464, 1608, 751, 830 + -107, -965, -1822, -2678, -3532, 831 + -4383, -5232, -6077, -6918, -7754, 832 + -8585, -9409, -10228, -11039, -11843, 833 + -12639, -13426, -14204, -14972, -15730, 834 + -16477, -17213, -17937, -18648, -19347, 835 + -20033, -20705, -21363, -22006, -22634, 836 + -23246, -23843, -24423, -24986, -25533, 837 + -26062, -26573, -27066, -27540, -27995, 838 + -28431, -28848, -29245, -29622, -29979, 839 + -30315, -30630, -30924, -31197, -31449, 840 + -31679, -31887, -32074, -32239, -32381, 841 + -32501, -32600, -32675, -32729, -32759, 842 + }; 571 843 #endif 572 844 573 845 static const CELTMode mode48000_960_120 ICONST_ATTR = {

+31 -38

lib/rbcodec/codecs/libopus/celt/vq.c

··· 37 37 #include "os_support.h" 38 38 #include "bands.h" 39 39 #include "rate.h" 40 + #include "pitch.h" 40 41 42 + #if defined(MIPSr1_ASM) 43 + #include "mips/vq_mipsr1.h" 44 + #endif 45 + 46 + #ifndef OVERRIDE_vq_exp_rotation1 41 47 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) 42 48 { 43 49 int i; 50 + opus_val16 ms; 44 51 celt_norm *Xptr; 45 52 Xptr = X; 53 + ms = NEG16(s); 46 54 for (i=0;i<len-stride;i++) 47 55 { 48 56 celt_norm x1, x2; 49 57 x1 = Xptr[0]; 50 58 x2 = Xptr[stride]; 51 - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); 52 - *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); 59 + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); 60 + *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); 53 61 } 54 62 Xptr = &X[len-2*stride-1]; 55 63 for (i=len-2*stride-1;i>=0;i--) ··· 57 65 celt_norm x1, x2; 58 66 x1 = Xptr[0]; 59 67 x2 = Xptr[stride]; 60 - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); 61 - *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); 68 + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); 69 + *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); 62 70 } 63 71 } 72 + #endif /* OVERRIDE_vq_exp_rotation1 */ 64 73 65 74 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) 66 75 { ··· 91 100 } 92 101 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 93 102 extract_collapse_mask().*/ 94 - len /= stride; 103 + len = celt_udiv(len, stride); 95 104 for (i=0;i<stride;i++) 96 105 { 97 106 if (dir < 0) ··· 140 149 return 1; 141 150 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 142 151 exp_rotation().*/ 143 - N0 = N/B; 152 + N0 = celt_udiv(N, B); 144 153 collapse_mask = 0; 145 154 i=0; do { 146 155 int j; 156 + unsigned tmp=0; 147 157 j=0; do { 148 - collapse_mask |= (iy[i*N0+j]!=0)<<i; 158 + tmp |= iy[i*N0+j]; 149 159 } while (++j<N0); 160 + collapse_mask |= (tmp!=0)<<i; 150 161 } while (++i<B); 151 162 return collapse_mask; 152 163 } ··· 322 333 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, 323 334 ec_dec *dec, opus_val16 gain) 324 335 { 325 - int i; 326 336 opus_val32 Ryy; 327 337 unsigned collapse_mask; 328 - /* VARDECL(int, iy); 329 - SAVE_STACK; */ 330 - 331 - /* the difference between the last two values of eband5ms shifted by maxLM 332 - which is 22 << 3 with the static mode */ 333 - int iy[176]; 338 + VARDECL(int, iy); 339 + SAVE_STACK; 334 340 335 341 celt_assert2(K>0, "alg_unquant() needs at least one pulse"); 336 342 celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); 337 - /* ALLOC(iy, N, int); */ 338 - decode_pulses(iy, N, K, dec); 339 - Ryy = 0; 340 - i=0; 341 - do { 342 - Ryy = MAC16_16(Ryy, iy[i], iy[i]); 343 - } while (++i < N); 343 + ALLOC(iy, N, int); 344 + Ryy = decode_pulses(iy, N, K, dec); 344 345 normalise_residual(iy, X, N, Ryy, gain); 345 346 exp_rotation(X, N, -1, B, K, spread); 346 347 collapse_mask = extract_collapse_mask(iy, N, B); 347 - /* RESTORE_STACK; */ 348 + RESTORE_STACK; 348 349 return collapse_mask; 349 350 } 350 351 352 + #ifndef OVERRIDE_renormalise_vector 351 353 void renormalise_vector(celt_norm *X, int N, opus_val16 gain) 352 354 { 353 355 int i; 354 356 #ifdef FIXED_POINT 355 357 int k; 356 358 #endif 357 - opus_val32 E = EPSILON; 359 + opus_val32 E; 358 360 opus_val16 g; 359 361 opus_val32 t; 360 - celt_norm *xptr = X; 361 - for (i=0;i<N;i++) 362 - { 363 - E = MAC16_16(E, *xptr, *xptr); 364 - xptr++; 365 - } 362 + celt_norm *xptr; 363 + E = EPSILON + celt_inner_prod(X, X, N); 366 364 #ifdef FIXED_POINT 367 365 k = celt_ilog2(E)>>1; 368 366 #endif ··· 377 375 } 378 376 /*return celt_sqrt(E);*/ 379 377 } 378 + #endif /* OVERRIDE_renormalise_vector */ 380 379 381 - int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) 380 + int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N) 382 381 { 383 382 int i; 384 383 int itheta; ··· 397 396 Eside = MAC16_16(Eside, s, s); 398 397 } 399 398 } else { 400 - for (i=0;i<N;i++) 401 - { 402 - celt_norm m, s; 403 - m = X[i]; 404 - s = Y[i]; 405 - Emid = MAC16_16(Emid, m, m); 406 - Eside = MAC16_16(Eside, s, s); 407 - } 399 + Emid += celt_inner_prod(X, X, N); 400 + Eside += celt_inner_prod(Y, Y, N); 408 401 } 409 402 mid = celt_sqrt(Emid); 410 403 side = celt_sqrt(Eside);

+1 -1

lib/rbcodec/codecs/libopus/celt/vq.h

··· 65 65 66 66 void renormalise_vector(celt_norm *X, int N, opus_val16 gain); 67 67 68 - int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N); 68 + int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N); 69 69 70 70 #endif /* VQ_H */

+3 -1

lib/rbcodec/codecs/libopus/config.h

··· 9 9 #define OPUS_BUILD 10 10 11 11 /* alloc stuff */ 12 - #define NONTHREADSAFE_PSEUDOSTACK 12 + #define VAR_ARRAYS 13 + #define NORM_ALIASING_HACK 13 14 14 15 #define OVERRIDE_OPUS_ALLOC 15 16 #define OVERRIDE_OPUS_FREE ··· 40 41 #endif 41 42 42 43 #if defined(CPU_ARM) 44 + #define OPUS_ARM_ASM 43 45 #if ARM_ARCH == 4 44 46 #define OPUS_ARM_INLINE_ASM 45 47 #elif ARM_ARCH > 4

+21

lib/rbcodec/codecs/libopus/opus.c

··· 168 168 } 169 169 } 170 170 171 + int opus_packet_get_samples_per_frame(const unsigned char *data, 172 + opus_int32 Fs) 173 + { 174 + int audiosize; 175 + if (data[0]&0x80) 176 + { 177 + audiosize = ((data[0]>>3)&0x3); 178 + audiosize = (Fs<<audiosize)/400; 179 + } else if ((data[0]&0x60) == 0x60) 180 + { 181 + audiosize = (data[0]&0x08) ? Fs/50 : Fs/100; 182 + } else { 183 + audiosize = ((data[0]>>3)&0x3); 184 + if (audiosize == 3) 185 + audiosize = Fs*60/1000; 186 + else 187 + audiosize = (Fs<<audiosize)/100; 188 + } 189 + return audiosize; 190 + } 191 + 171 192 int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, 172 193 int self_delimited, unsigned char *out_toc, 173 194 const unsigned char *frames[48], opus_int16 size[48],

+47 -38

lib/rbcodec/codecs/libopus/opus_decoder.c

··· 77 77 opus_uint32 rangeFinal; 78 78 }; 79 79 80 - #ifdef FIXED_POINT 81 - static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { 82 - return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; 83 - } 84 - #endif 85 - 86 80 87 81 int opus_decoder_get_size(int channels) 88 82 { ··· 222 216 VARDECL(opus_val16, pcm_transition_silk); 223 217 int pcm_transition_celt_size; 224 218 VARDECL(opus_val16, pcm_transition_celt); 225 - opus_val16 *pcm_transition = NULL; /* Silence false positive "may be used uninitialized" warning */ 219 + opus_val16 *pcm_transition=NULL; 226 220 int redundant_audio_size; 227 221 VARDECL(opus_val16, redundant_audio); 228 222 ··· 237 231 int F2_5, F5, F10, F20; 238 232 const opus_val16 *window; 239 233 opus_uint32 redundant_rng = 0; 234 + int celt_accum; 240 235 ALLOC_STACK; 241 236 242 237 silk_dec = (char*)st+st->silk_dec_offset; ··· 302 297 } 303 298 } 304 299 300 + /* In fixed-point, we can tell CELT to do the accumulation on top of the 301 + SILK PCM buffer. This saves some stack space. */ 302 + #ifdef FIXED_POINT 303 + celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10); 304 + #else 305 + celt_accum = 0; 306 + #endif 307 + 305 308 pcm_transition_silk_size = ALLOC_NONE; 306 309 pcm_transition_celt_size = ALLOC_NONE; 307 310 if (data!=NULL && st->prev_mode > 0 && ( ··· 332 335 } 333 336 334 337 /* Don't allocate any memory when in CELT-only mode */ 335 - pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; 338 + pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; 336 339 ALLOC(pcm_silk, pcm_silk_size, opus_int16); 337 340 338 341 /* SILK processing */ 339 342 if (mode != MODE_CELT_ONLY) 340 343 { 341 344 int lost_flag, decoded_samples; 342 - opus_int16 *pcm_ptr = pcm_silk; 345 + opus_int16 *pcm_ptr; 346 + #ifdef FIXED_POINT 347 + if (celt_accum) 348 + pcm_ptr = pcm; 349 + else 350 + #endif 351 + pcm_ptr = pcm_silk; 343 352 344 353 if (st->prev_mode==MODE_CELT_ONLY) 345 354 silk_InitDecoder( silk_dec ); ··· 469 478 { 470 479 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); 471 480 celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, 472 - redundant_audio, F5, NULL); 481 + redundant_audio, F5, NULL, 0); 473 482 celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); 474 483 } 475 484 ··· 484 493 celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); 485 494 /* Decode CELT */ 486 495 celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data, 487 - len, pcm, celt_frame_size, &dec); 496 + len, pcm, celt_frame_size, &dec, celt_accum); 488 497 } else { 489 498 unsigned char silence[2] = {0xFF, 0xFF}; 490 - for (i=0;i<frame_size*st->channels;i++) 491 - pcm[i] = 0; 499 + if (!celt_accum) 500 + { 501 + for (i=0;i<frame_size*st->channels;i++) 502 + pcm[i] = 0; 503 + } 492 504 /* For hybrid -> SILK transitions, we let the CELT MDCT 493 505 do a fade-out by decoding a silence frame */ 494 506 if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) ) 495 507 { 496 508 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); 497 - celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL); 509 + celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum); 498 510 } 499 511 } 500 512 501 - if (mode != MODE_CELT_ONLY) 513 + if (mode != MODE_CELT_ONLY && !celt_accum) 502 514 { 503 515 #ifdef FIXED_POINT 504 516 for (i=0;i<frame_size*st->channels;i++) 505 - pcm[i] = SAT16(pcm[i] + pcm_silk[i]); 517 + pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i])); 506 518 #else 507 519 for (i=0;i<frame_size*st->channels;i++) 508 520 pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]); ··· 521 533 celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); 522 534 celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); 523 535 524 - celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL); 536 + celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); 525 537 celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); 526 538 smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, 527 539 pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); ··· 717 729 { 718 730 VARDECL(opus_int16, out); 719 731 int ret, i; 732 + int nb_samples; 720 733 ALLOC_STACK; 721 734 722 735 if(frame_size<=0) 723 736 { 724 737 RESTORE_STACK; 725 738 return OPUS_BAD_ARG; 739 + } 740 + if (data != NULL && len > 0 && !decode_fec) 741 + { 742 + nb_samples = opus_decoder_get_nb_samples(st, data, len); 743 + if (nb_samples>0) 744 + frame_size = IMIN(frame_size, nb_samples); 745 + else 746 + return OPUS_INVALID_PACKET; 726 747 } 727 748 ALLOC(out, frame_size*st->channels, opus_int16); 728 749 ··· 744 765 { 745 766 VARDECL(float, out); 746 767 int ret, i; 768 + int nb_samples; 747 769 ALLOC_STACK; 748 770 749 771 if(frame_size<=0) ··· 752 774 return OPUS_BAD_ARG; 753 775 } 754 776 777 + if (data != NULL && len > 0 && !decode_fec) 778 + { 779 + nb_samples = opus_decoder_get_nb_samples(st, data, len); 780 + if (nb_samples>0) 781 + frame_size = IMIN(frame_size, nb_samples); 782 + else 783 + return OPUS_INVALID_PACKET; 784 + } 755 785 ALLOC(out, frame_size*st->channels, float); 756 786 757 787 ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1); ··· 909 939 bandwidth = OPUS_BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3); 910 940 } 911 941 return bandwidth; 912 - } 913 - 914 - int opus_packet_get_samples_per_frame(const unsigned char *data, 915 - opus_int32 Fs) 916 - { 917 - int audiosize; 918 - if (data[0]&0x80) 919 - { 920 - audiosize = ((data[0]>>3)&0x3); 921 - audiosize = (Fs<<audiosize)/400; 922 - } else if ((data[0]&0x60) == 0x60) 923 - { 924 - audiosize = (data[0]&0x08) ? Fs/50 : Fs/100; 925 - } else { 926 - audiosize = ((data[0]>>3)&0x3); 927 - if (audiosize == 3) 928 - audiosize = Fs*60/1000; 929 - else 930 - audiosize = (Fs<<audiosize)/100; 931 - } 932 - return audiosize; 933 942 } 934 943 935 944 int opus_packet_get_nb_channels(const unsigned char *data)

+25 -25

lib/rbcodec/codecs/libopus/opus_defines.h

··· 454 454 * @hideinitializer */ 455 455 #define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x) 456 456 457 - /** Gets the sampling rate the encoder or decoder was initialized with. 458 - * This simply returns the <code>Fs</code> value passed to opus_encoder_init() 459 - * or opus_decoder_init(). 460 - * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder. 461 - * @hideinitializer 462 - */ 463 - #define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) 464 - 465 457 /** Gets the total samples of delay added by the entire codec. 466 458 * This can be queried by the encoder and then the provided number of samples can be 467 459 * skipped on from the start of the decoder's output to provide time aligned input ··· 544 536 * 24 (default: 24). 545 537 * @hideinitializer */ 546 538 #define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x) 547 - 548 - /** Gets the duration (in samples) of the last packet successfully decoded or concealed. 549 - * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate). 550 - * @hideinitializer */ 551 - #define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x) 552 539 553 540 /** Configures the encoder's use of variable duration frames. 554 541 * When variable duration is enabled, the encoder is free to use a shorter frame ··· 648 635 * 649 636 * @hideinitializer */ 650 637 #define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x) 651 - 652 - /** Gets the pitch of the last decoded frame, if available. 653 - * This can be used for any post-processing algorithm requiring the use of pitch, 654 - * e.g. time stretching/shortening. If the last frame was not voiced, or if the 655 - * pitch was not coded in the frame, then zero is returned. 656 - * 657 - * This CTL is only implemented for decoder instances. 658 - * 659 - * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available) 660 - * 661 - * @hideinitializer */ 662 - #define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x) 663 638 664 639 /** Gets the encoder's configured bandpass or the decoder's last bandpass. 665 640 * @see OPUS_SET_BANDWIDTH ··· 675 650 * @hideinitializer */ 676 651 #define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x) 677 652 653 + /** Gets the sampling rate the encoder or decoder was initialized with. 654 + * This simply returns the <code>Fs</code> value passed to opus_encoder_init() 655 + * or opus_decoder_init(). 656 + * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder. 657 + * @hideinitializer 658 + */ 659 + #define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) 660 + 678 661 /**@}*/ 679 662 680 663 /** @defgroup opus_decoderctls Decoder related CTLs ··· 698 681 * @param[out] x <tt>opus_int32 *</tt>: Amount to scale PCM signal by in Q8 dB units. 699 682 * @hideinitializer */ 700 683 #define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x) 684 + 685 + /** Gets the duration (in samples) of the last packet successfully decoded or concealed. 686 + * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate). 687 + * @hideinitializer */ 688 + #define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x) 689 + 690 + /** Gets the pitch of the last decoded frame, if available. 691 + * This can be used for any post-processing algorithm requiring the use of pitch, 692 + * e.g. time stretching/shortening. If the last frame was not voiced, or if the 693 + * pitch was not coded in the frame, then zero is returned. 694 + * 695 + * This CTL is only implemented for decoder instances. 696 + * 697 + * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available) 698 + * 699 + * @hideinitializer */ 700 + #define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x) 701 701 702 702 /**@}*/ 703 703

+2 -5

lib/rbcodec/codecs/libopus/opus_private.h

··· 86 86 void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); 87 87 void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); 88 88 89 - int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, 90 - int bitrate, opus_val16 tonality, float *mem, int buffering, 91 - downmix_func downmix); 92 - 93 89 int encode_size(int size, unsigned char *data); 94 90 95 91 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); ··· 104 100 105 101 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, 106 102 unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, 107 - const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix); 103 + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, 104 + int analysis_channels, downmix_func downmix, int float_api); 108 105 109 106 int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, 110 107 opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,

+15 -6

lib/rbcodec/codecs/libopus/silk/CNG.c

··· 34 34 35 35 /* Generates excitation for CNG LPC synthesis */ 36 36 static OPUS_INLINE void silk_CNG_exc( 37 - opus_int32 residual_Q10[], /* O CNG residual signal Q10 */ 37 + opus_int32 exc_Q10[], /* O CNG excitation signal Q10 */ 38 38 opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */ 39 39 opus_int32 Gain_Q16, /* I Gain to apply */ 40 40 opus_int length, /* I Length */ ··· 55 55 idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask ); 56 56 silk_assert( idx >= 0 ); 57 57 silk_assert( idx <= CNG_BUF_MASK_MAX ); 58 - residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) ); 58 + exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) ); 59 59 } 60 60 *rand_seed = seed; 61 61 } ··· 85 85 ) 86 86 { 87 87 opus_int i, subfr; 88 - opus_int32 sum_Q6, max_Gain_Q16; 88 + opus_int32 sum_Q6, max_Gain_Q16, gain_Q16; 89 89 opus_int16 A_Q12[ MAX_LPC_ORDER ]; 90 90 silk_CNG_struct *psCNG = &psDec->sCNG; 91 91 SAVE_STACK; ··· 125 125 /* Add CNG when packet is lost or during DTX */ 126 126 if( psDec->lossCnt ) { 127 127 VARDECL( opus_int32, CNG_sig_Q10 ); 128 - 129 128 ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 ); 130 129 131 130 /* Generate CNG excitation */ 132 - silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed ); 131 + gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] ); 132 + if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) { 133 + gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 ); 134 + gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); 135 + gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 ); 136 + } else { 137 + gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 ); 138 + gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); 139 + gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 ); 140 + } 141 + silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed ); 133 142 134 143 /* Convert CNG NLSF to filter representation */ 135 144 silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order ); ··· 162 171 /* Update states */ 163 172 CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 ); 164 173 165 - frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) ); 174 + frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) ); 166 175 } 167 176 silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); 168 177 } else {

+36 -17

lib/rbcodec/codecs/libopus/silk/PLC.c

··· 165 165 psPLC->nb_subfr = psDec->nb_subfr; 166 166 } 167 167 168 + static OPUS_INLINE void silk_PLC_energy(opus_int32 *energy1, opus_int *shift1, opus_int32 *energy2, opus_int *shift2, 169 + const opus_int32 *exc_Q14, const opus_int32 *prevGain_Q10, int subfr_length, int nb_subfr) 170 + { 171 + int i, k; 172 + VARDECL( opus_int16, exc_buf ); 173 + opus_int16 *exc_buf_ptr; 174 + SAVE_STACK; 175 + ALLOC( exc_buf, 2*subfr_length, opus_int16 ); 176 + /* Find random noise component */ 177 + /* Scale previous excitation signal */ 178 + exc_buf_ptr = exc_buf; 179 + for( k = 0; k < 2; k++ ) { 180 + for( i = 0; i < subfr_length; i++ ) { 181 + exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( 182 + silk_SMULWW( exc_Q14[ i + ( k + nb_subfr - 2 ) * subfr_length ], prevGain_Q10[ k ] ), 8 ) ); 183 + } 184 + exc_buf_ptr += subfr_length; 185 + } 186 + /* Find the subframe with lowest energy of the last two and use that as random noise generator */ 187 + silk_sum_sqr_shift( energy1, shift1, exc_buf, subfr_length ); 188 + silk_sum_sqr_shift( energy2, shift2, &exc_buf[ subfr_length ], subfr_length ); 189 + RESTORE_STACK; 190 + } 191 + 168 192 static OPUS_INLINE void silk_PLC_conceal( 169 193 silk_decoder_state *psDec, /* I/O Decoder state */ 170 194 silk_decoder_control *psDecCtrl, /* I/O Decoder control */ ··· 177 201 opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr; 178 202 opus_int32 LPC_pred_Q10, LTP_pred_Q12; 179 203 opus_int16 rand_scale_Q14; 180 - opus_int16 *B_Q14, *exc_buf_ptr; 204 + opus_int16 *B_Q14; 181 205 opus_int32 *sLPC_Q14_ptr; 182 - VARDECL( opus_int16, exc_buf ); 183 206 opus_int16 A_Q12[ MAX_LPC_ORDER ]; 207 + #ifdef SMALL_FOOTPRINT 208 + opus_int16 *sLTP; 209 + #else 184 210 VARDECL( opus_int16, sLTP ); 211 + #endif 185 212 VARDECL( opus_int32, sLTP_Q14 ); 186 213 silk_PLC_struct *psPLC = &psDec->sPLC; 187 214 opus_int32 prevGain_Q10[2]; 188 215 SAVE_STACK; 189 216 190 - ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 ); 217 + ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); 218 + #ifdef SMALL_FOOTPRINT 219 + /* Ugly hack that breaks aliasing rules to save stack: put sLTP at the very end of sLTP_Q14. */ 220 + sLTP = ((opus_int16*)&sLTP_Q14[psDec->ltp_mem_length + psDec->frame_length])-psDec->ltp_mem_length; 221 + #else 191 222 ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); 192 - ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); 223 + #endif 193 224 194 225 prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6); 195 226 prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6); ··· 198 229 silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) ); 199 230 } 200 231 201 - /* Find random noise component */ 202 - /* Scale previous excitation signal */ 203 - exc_buf_ptr = exc_buf; 204 - for( k = 0; k < 2; k++ ) { 205 - for( i = 0; i < psPLC->subfr_length; i++ ) { 206 - exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( 207 - silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) ); 208 - } 209 - exc_buf_ptr += psPLC->subfr_length; 210 - } 211 - /* Find the subframe with lowest energy of the last two and use that as random noise generator */ 212 - silk_sum_sqr_shift( &energy1, &shift1, exc_buf, psPLC->subfr_length ); 213 - silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length ); 232 + silk_PLC_energy(&energy1, &shift1, &energy2, &shift2, psDec->exc_Q14, prevGain_Q10, psDec->subfr_length, psDec->nb_subfr); 214 233 215 234 if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) { 216 235 /* First sub-frame has lowest energy */

+5

lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h

··· 587 587 #include "arm/SigProc_FIX_armv5e.h" 588 588 #endif 589 589 590 + #if defined(MIPSr1_ASM) 591 + #include "mips/sigproc_fix_mipsr1.h" 592 + #endif 593 + 594 + 590 595 #ifdef __cplusplus 591 596 } 592 597 #endif

+2 -2

lib/rbcodec/codecs/libopus/silk/code_signs.c

··· 76 76 /* Decodes signs of excitation */ 77 77 void silk_decode_signs( 78 78 ec_dec *psRangeDec, /* I/O Compressor data structure */ 79 - opus_int pulses[], /* I/O pulse signal */ 79 + opus_int16 pulses[], /* I/O pulse signal */ 80 80 opus_int length, /* I length of input */ 81 81 const opus_int signalType, /* I Signal type */ 82 82 const opus_int quantOffsetType, /* I Quantization offset type */ ··· 85 85 { 86 86 opus_int i, j, p; 87 87 opus_uint8 icdf[ 2 ]; 88 - opus_int *q_ptr; 88 + opus_int16 *q_ptr; 89 89 const opus_uint8 *icdf_ptr; 90 90 91 91 icdf[ 1 ] = 0;

+29 -8

lib/rbcodec/codecs/libopus/silk/dec_API.c

··· 31 31 #include "API.h" 32 32 #include "main.h" 33 33 #include "stack_alloc.h" 34 + #include "os_support.h" 34 35 35 36 /************************/ 36 37 /* Decoder Super Struct */ ··· 90 91 opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; 91 92 opus_int32 nSamplesOutDec, LBRR_symbol; 92 93 opus_int16 *samplesOut1_tmp[ 2 ]; 93 - VARDECL( opus_int16, samplesOut1_tmp_storage ); 94 + VARDECL( opus_int16, samplesOut1_tmp_storage1 ); 95 + VARDECL( opus_int16, samplesOut1_tmp_storage2 ); 94 96 VARDECL( opus_int16, samplesOut2_tmp ); 95 97 opus_int32 MS_pred_Q13[ 2 ] = { 0 }; 96 98 opus_int16 *resample_out_ptr; ··· 98 100 silk_decoder_state *channel_state = psDec->channel_state; 99 101 opus_int has_side; 100 102 opus_int stereo_to_mono; 103 + int delay_stack_alloc; 101 104 SAVE_STACK; 102 105 103 106 silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); ··· 196 199 for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { 197 200 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 198 201 if( channel_state[ n ].LBRR_flags[ i ] ) { 199 - opus_int pulses[ MAX_FRAME_LENGTH ]; 202 + opus_int16 pulses[ MAX_FRAME_LENGTH ]; 200 203 opus_int condCoding; 201 204 202 205 if( decControl->nChannelsInternal == 2 && n == 0 ) { ··· 251 254 psDec->channel_state[ 1 ].first_frame_after_reset = 1; 252 255 } 253 256 254 - ALLOC( samplesOut1_tmp_storage, 255 - decControl->nChannelsInternal*( 256 - channel_state[ 0 ].frame_length + 2 ), 257 + /* Check if the temp buffer fits into the output PCM buffer. If it fits, 258 + we can delay allocating the temp buffer until after the SILK peak stack 259 + usage. We need to use a < and not a <= because of the two extra samples. */ 260 + delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal 261 + < decControl->API_sampleRate*decControl->nChannelsAPI; 262 + ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE 263 + : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), 257 264 opus_int16 ); 258 - samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; 259 - samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage 260 - + channel_state[ 0 ].frame_length + 2; 265 + if ( delay_stack_alloc ) 266 + { 267 + samplesOut1_tmp[ 0 ] = samplesOut; 268 + samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2; 269 + } else { 270 + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; 271 + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; 272 + } 261 273 262 274 if( lostFlag == FLAG_DECODE_NORMAL ) { 263 275 has_side = !decode_only_middle; ··· 312 324 resample_out_ptr = samplesOut; 313 325 } 314 326 327 + ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc 328 + ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ) 329 + : ALLOC_NONE, 330 + opus_int16 ); 331 + if ( delay_stack_alloc ) { 332 + OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2)); 333 + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2; 334 + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2; 335 + } 315 336 for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { 316 337 317 338 /* Resample decoded signal to API_sampleRate */

+3 -4

lib/rbcodec/codecs/libopus/silk/decode_core.c

··· 39 39 silk_decoder_state *psDec, /* I/O Decoder state */ 40 40 silk_decoder_control *psDecCtrl, /* I Decoder control */ 41 41 opus_int16 xq[], /* O Decoded speech */ 42 - const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ 42 + const opus_int16 pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ 43 43 ) 44 44 { 45 45 opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType; ··· 49 49 opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10; 50 50 opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14; 51 51 VARDECL( opus_int32, res_Q14 ); 52 - /* VARDECL( opus_int32, sLPC_Q14 ); */ 52 + VARDECL( opus_int32, sLPC_Q14 ); 53 53 SAVE_STACK; 54 54 55 55 silk_assert( psDec->prev_gain_Q16 != 0 ); ··· 57 57 ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); 58 58 ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); 59 59 ALLOC( res_Q14, psDec->subfr_length, opus_int32 ); 60 - /* ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 ); */ 61 - opus_int32 sLPC_Q14[psDec->subfr_length + MAX_LPC_ORDER]; /* worst case is 80 + 16 */ 60 + ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 ); 62 61 63 62 offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ]; 64 63

+8 -8

lib/rbcodec/codecs/libopus/silk/decode_frame.c

··· 47 47 { 48 48 VARDECL( silk_decoder_control, psDecCtrl ); 49 49 opus_int L, mv_len, ret = 0; 50 - VARDECL( opus_int, pulses ); 51 50 SAVE_STACK; 52 51 53 52 L = psDec->frame_length; 54 53 ALLOC( psDecCtrl, 1, silk_decoder_control ); 55 - ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) & 56 - ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int ); 57 54 psDecCtrl->LTP_scale_Q14 = 0; 58 55 59 56 /* Safety checks */ ··· 62 59 if( lostFlag == FLAG_DECODE_NORMAL || 63 60 ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) ) 64 61 { 62 + VARDECL( opus_int16, pulses ); 63 + ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) & 64 + ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 ); 65 65 /*********************************************/ 66 66 /* Decode quantization indices of side info */ 67 67 /*********************************************/ ··· 107 107 silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); 108 108 silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); 109 109 110 - /****************************************************************/ 111 - /* Ensure smooth connection of extrapolated and good frames */ 112 - /****************************************************************/ 113 - silk_PLC_glue_frames( psDec, pOut, L ); 114 - 115 110 /************************************************/ 116 111 /* Comfort noise generation / estimation */ 117 112 /************************************************/ 118 113 silk_CNG( psDec, psDecCtrl, pOut, L ); 114 + 115 + /****************************************************************/ 116 + /* Ensure smooth connection of extrapolated and good frames */ 117 + /****************************************************************/ 118 + silk_PLC_glue_frames( psDec, pOut, L ); 119 119 120 120 /* Update some decoder state variables */ 121 121 psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];

+3 -3

lib/rbcodec/codecs/libopus/silk/decode_pulses.c

··· 36 36 /*********************************************/ 37 37 void silk_decode_pulses( 38 38 ec_dec *psRangeDec, /* I/O Compressor data structure */ 39 - opus_int pulses[], /* O Excitation signal */ 39 + opus_int16 pulses[], /* O Excitation signal */ 40 40 const opus_int signalType, /* I Sigtype */ 41 41 const opus_int quantOffsetType, /* I quantOffsetType */ 42 42 const opus_int frame_length /* I Frame length */ ··· 44 44 { 45 45 opus_int i, j, k, iter, abs_q, nLS, RateLevelIndex; 46 46 opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ]; 47 - opus_int *pulses_ptr; 47 + opus_int16 *pulses_ptr; 48 48 const opus_uint8 *cdf_ptr; 49 49 50 50 /*********************/ ··· 84 84 if( sum_pulses[ i ] > 0 ) { 85 85 silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] ); 86 86 } else { 87 - silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) ); 87 + silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( pulses[0] ) ); 88 88 } 89 89 } 90 90

+8 -1

lib/rbcodec/codecs/libopus/silk/macros.h

··· 79 79 (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \ 80 80 ((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) ) 81 81 82 + #if defined(MIPSr1_ASM) 83 + #include "mips/macros_mipsr1.h" 84 + #endif 85 + 82 86 #include "ecintrin.h" 83 - 87 + #ifndef OVERRIDE_silk_CLZ16 84 88 static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16) 85 89 { 86 90 return 32 - EC_ILOG(in16<<16|0x8000); 87 91 } 92 + #endif 88 93 94 + #ifndef OVERRIDE_silk_CLZ32 89 95 static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32) 90 96 { 91 97 return in32 ? 32 - EC_ILOG(in32) : 32; 92 98 } 99 + #endif 93 100 94 101 /* Row based */ 95 102 #define matrix_ptr(Matrix_base_adr, row, column, N) \

+4 -4

lib/rbcodec/codecs/libopus/silk/main.h

··· 116 116 /* Decodes signs of excitation */ 117 117 void silk_decode_signs( 118 118 ec_dec *psRangeDec, /* I/O Compressor data structure */ 119 - opus_int pulses[], /* I/O pulse signal */ 119 + opus_int16 pulses[], /* I/O pulse signal */ 120 120 opus_int length, /* I length of input */ 121 121 const opus_int signalType, /* I Signal type */ 122 122 const opus_int quantOffsetType, /* I Quantization offset type */ ··· 161 161 162 162 /* Shell decoder, operates on one shell code frame of 16 pulses */ 163 163 void silk_shell_decoder( 164 - opus_int *pulses0, /* O data: nonnegative pulse amplitudes */ 164 + opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ 165 165 ec_dec *psRangeDec, /* I/O Compressor data structure */ 166 166 const opus_int pulses4 /* I number of pulses per pulse-subframe */ 167 167 ); ··· 397 397 silk_decoder_state *psDec, /* I/O Decoder state */ 398 398 silk_decoder_control *psDecCtrl, /* I Decoder control */ 399 399 opus_int16 xq[], /* O Decoded speech */ 400 - const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ 400 + const opus_int16 pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ 401 401 ); 402 402 403 403 /* Decode quantization indices of excitation (Shell coding) */ 404 404 void silk_decode_pulses( 405 405 ec_dec *psRangeDec, /* I/O Compressor data structure */ 406 - opus_int pulses[], /* O Excitation signal */ 406 + opus_int16 pulses[], /* O Excitation signal */ 407 407 const opus_int signalType, /* I Sigtype */ 408 408 const opus_int quantOffsetType, /* I quantOffsetType */ 409 409 const opus_int frame_length /* I Frame length */

+4 -7

lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c

··· 72 72 silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; 73 73 opus_int32 nSamplesIn; 74 74 opus_int32 max_index_Q16, index_increment_Q16; 75 - /* VARDECL( opus_int16, buf ); 76 - SAVE_STACK; */ 75 + VARDECL( opus_int16, buf ); 76 + SAVE_STACK; 77 77 78 - /* ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 ); */ 79 - 80 - /* worst case = 2*16*10+8 = 328 * 2 = 656bytes */ 81 - opus_int16 buf[2 * S->batchSize + RESAMPLER_ORDER_FIR_12]; 78 + ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 ); 82 79 83 80 /* Copy buffered samples to start of buffer */ 84 81 silk_memcpy( buf, S->sFIR.i16, RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); ··· 106 103 107 104 /* Copy last part of filtered signal to the state for the next call */ 108 105 silk_memcpy( S->sFIR.i16, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); 109 - /* RESTORE_STACK; */ 106 + RESTORE_STACK; 110 107 }

+4 -4

lib/rbcodec/codecs/libopus/silk/shell_coder.c

··· 60 60 #endif 61 61 62 62 static OPUS_INLINE void decode_split( 63 - opus_int *p_child1, /* O pulse amplitude of first child subframe */ 64 - opus_int *p_child2, /* O pulse amplitude of second child subframe */ 63 + opus_int16 *p_child1, /* O pulse amplitude of first child subframe */ 64 + opus_int16 *p_child2, /* O pulse amplitude of second child subframe */ 65 65 ec_dec *psRangeDec, /* I/O Compressor data structure */ 66 66 const opus_int p, /* I pulse amplitude of current subframe */ 67 67 const opus_uint8 *shell_table /* I table of shell cdfs */ ··· 121 121 122 122 /* Shell decoder, operates on one shell code frame of 16 pulses */ 123 123 void silk_shell_decoder( 124 - opus_int *pulses0, /* O data: nonnegative pulse amplitudes */ 124 + opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ 125 125 ec_dec *psRangeDec, /* I/O Compressor data structure */ 126 126 const opus_int pulses4 /* I number of pulses per pulse-subframe */ 127 127 ) 128 128 { 129 - opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ]; 129 + opus_int16 pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ]; 130 130 131 131 /* this function operates on one shell code frame of 16 pulses */ 132 132 silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );

+1

lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c

··· 53 53 /* Scale down */ 54 54 nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); 55 55 shft = 2; 56 + i+=2; 56 57 break; 57 58 } 58 59 }

-2

lib/rbcodec/codecs/opus.c

··· 337 337 param = ci->id3->elapsed; 338 338 strtoffset = ci->id3->offset; 339 339 340 - global_stack = 0; 341 - 342 340 #if defined(CPU_COLDFIRE) 343 341 /* EMAC rounding is disabled because of MULT16_32_Q15, which will be 344 342 inaccurate with rounding in its current incarnation */

Configure Feed

Configure Feed