at 15.09-beta 14 kB view raw
1diff -Naur a52dec.old/include/a52.h a52dec.new/include/a52.h 2--- a52dec.old/include/a52.h 2012-07-16 14:24:14.000000000 +0200 3+++ a52dec.new/include/a52.h 2012-07-16 14:31:37.000000000 +0200 4@@ -42,6 +42,11 @@ 5 } expbap_t; 6 7 typedef struct { 8+ sample_t real; 9+ sample_t imag; 10+} complex_t; 11+ 12+typedef struct { 13 uint8_t fscod; /* sample rate */ 14 uint8_t halfrate; /* halfrate factor */ 15 uint8_t acmod; /* coded channels */ 16@@ -94,6 +99,20 @@ 17 18 sample_t * samples; 19 int downmixed; 20+ 21+ /* Root values for IFFT */ 22+ sample_t * roots16; // size 3 23+ sample_t * roots32; // size 7 24+ sample_t * roots64; // size 15 25+ sample_t * roots128; // size 31 26+ 27+ /* Twiddle factors for IMDCT */ 28+ complex_t * pre1; // size 128 29+ complex_t * post1; // size 64 30+ complex_t * pre2; // size 64 31+ complex_t * post2; // size 32 32+ 33+ sample_t * a52_imdct_window; // size 256 34 } a52_state_t; 35 36 #define A52_CHANNEL 0 37diff -Naur a52dec.old/liba52/a52_internal.h a52dec.new/liba52/a52_internal.h 38--- a52dec.old/liba52/a52_internal.h 2012-07-16 14:24:14.000000000 +0200 39+++ a52dec.new/liba52/a52_internal.h 2012-07-16 14:28:33.000000000 +0200 40@@ -49,6 +49,6 @@ 41 sample_t clev, sample_t slev); 42 void a52_upmix (sample_t * samples, int acmod, int output); 43 44-void a52_imdct_init (uint32_t mm_accel); 45-void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); 46-void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias); 47+void a52_imdct_init (a52_state_t * state, uint32_t mm_accel); 48+void a52_imdct_256 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias); 49+void a52_imdct_512 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias); 50diff -Naur a52dec.old/liba52/imdct.c a52dec.new/liba52/imdct.c 51--- a52dec.old/liba52/imdct.c 2012-07-16 14:24:14.000000000 +0200 52+++ a52dec.new/liba52/imdct.c 2012-07-16 14:33:00.000000000 +0200 53@@ -40,11 +40,6 @@ 54 #include "a52_internal.h" 55 #include "mm_accel.h" 56 57-typedef struct complex_s { 58- sample_t real; 59- sample_t imag; 60-} complex_t; 61- 62 static uint8_t fftorder[] = { 63 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, 64 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, 65@@ -56,22 +51,8 @@ 66 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 67 }; 68 69-/* Root values for IFFT */ 70-static sample_t roots16[3]; 71-static sample_t roots32[7]; 72-static sample_t roots64[15]; 73-static sample_t roots128[31]; 74- 75-/* Twiddle factors for IMDCT */ 76-static complex_t pre1[128]; 77-static complex_t post1[64]; 78-static complex_t pre2[64]; 79-static complex_t post2[32]; 80- 81-static sample_t a52_imdct_window[256]; 82- 83-static void (* ifft128) (complex_t * buf); 84-static void (* ifft64) (complex_t * buf); 85+static void (* ifft128) (a52_state_t * state, complex_t * buf); 86+static void (* ifft64) (a52_state_t * state, complex_t * buf); 87 88 static inline void ifft2 (complex_t * buf) 89 { 90@@ -167,7 +148,7 @@ 91 a1.imag += tmp4; \ 92 } while (0) 93 94-static inline void ifft8 (complex_t * buf) 95+static inline void ifft8 (a52_state_t * state, complex_t * buf) 96 { 97 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; 98 99@@ -175,7 +156,7 @@ 100 ifft2 (buf + 4); 101 ifft2 (buf + 6); 102 BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]); 103- BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]); 104+ BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], state->roots16[1]); 105 } 106 107 static void ifft_pass (complex_t * buf, sample_t * weight, int n) 108@@ -205,66 +186,66 @@ 109 } while (--i); 110 } 111 112-static void ifft16 (complex_t * buf) 113+static void ifft16 (a52_state_t * state, complex_t * buf) 114 { 115- ifft8 (buf); 116+ ifft8 (state, buf); 117 ifft4 (buf + 8); 118 ifft4 (buf + 12); 119- ifft_pass (buf, roots16 - 4, 4); 120+ ifft_pass (buf, state->roots16 - 4, 4); 121 } 122 123-static void ifft32 (complex_t * buf) 124+static void ifft32 (a52_state_t * state, complex_t * buf) 125 { 126- ifft16 (buf); 127- ifft8 (buf + 16); 128- ifft8 (buf + 24); 129- ifft_pass (buf, roots32 - 8, 8); 130+ ifft16 (state, buf); 131+ ifft8 (state, buf + 16); 132+ ifft8 (state, buf + 24); 133+ ifft_pass (buf, state->roots32 - 8, 8); 134 } 135 136-static void ifft64_c (complex_t * buf) 137+static void ifft64_c (a52_state_t * state, complex_t * buf) 138 { 139- ifft32 (buf); 140- ifft16 (buf + 32); 141- ifft16 (buf + 48); 142- ifft_pass (buf, roots64 - 16, 16); 143+ ifft32 (state, buf); 144+ ifft16 (state, buf + 32); 145+ ifft16 (state, buf + 48); 146+ ifft_pass (buf, state->roots64 - 16, 16); 147 } 148 149-static void ifft128_c (complex_t * buf) 150+static void ifft128_c (a52_state_t * state, complex_t * buf) 151 { 152- ifft32 (buf); 153- ifft16 (buf + 32); 154- ifft16 (buf + 48); 155- ifft_pass (buf, roots64 - 16, 16); 156+ ifft32 (state, buf); 157+ ifft16 (state, buf + 32); 158+ ifft16 (state, buf + 48); 159+ ifft_pass (buf, state->roots64 - 16, 16); 160 161- ifft32 (buf + 64); 162- ifft32 (buf + 96); 163- ifft_pass (buf, roots128 - 32, 32); 164+ ifft32 (state, buf + 64); 165+ ifft32 (state, buf + 96); 166+ ifft_pass (buf, state->roots128 - 32, 32); 167 } 168 169-void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) 170+void a52_imdct_512 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias) 171 { 172 int i, k; 173 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; 174- const sample_t * window = a52_imdct_window; 175+ const sample_t * window = state->a52_imdct_window; 176 complex_t buf[128]; 177 178 for (i = 0; i < 128; i++) { 179 k = fftorder[i]; 180- t_r = pre1[i].real; 181- t_i = pre1[i].imag; 182+ t_r = state->pre1[i].real; 183+ t_i = state->pre1[i].imag; 184 185 buf[i].real = t_i * data[255-k] + t_r * data[k]; 186 buf[i].imag = t_r * data[255-k] - t_i * data[k]; 187 } 188 189- ifft128 (buf); 190+ ifft128 (state, buf); 191 192 /* Post IFFT complex multiply plus IFFT complex conjugate*/ 193 /* Window and convert to real valued signal */ 194 for (i = 0; i < 64; i++) { 195 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ 196- t_r = post1[i].real; 197- t_i = post1[i].imag; 198+ t_r = state->post1[i].real; 199+ t_i = state->post1[i].imag; 200 201 a_r = t_r * buf[i].real + t_i * buf[i].imag; 202 a_i = t_i * buf[i].real - t_r * buf[i].imag; 203@@ -285,18 +266,18 @@ 204 } 205 } 206 207-void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) 208+void a52_imdct_256(a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias) 209 { 210 int i, k; 211 sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; 212- const sample_t * window = a52_imdct_window; 213+ const sample_t * window = state->a52_imdct_window; 214 complex_t buf1[64], buf2[64]; 215 216 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ 217 for (i = 0; i < 64; i++) { 218 k = fftorder[i]; 219- t_r = pre2[i].real; 220- t_i = pre2[i].imag; 221+ t_r = state->pre2[i].real; 222+ t_i = state->pre2[i].imag; 223 224 buf1[i].real = t_i * data[254-k] + t_r * data[k]; 225 buf1[i].imag = t_r * data[254-k] - t_i * data[k]; 226@@ -305,15 +286,15 @@ 227 buf2[i].imag = t_r * data[255-k] - t_i * data[k+1]; 228 } 229 230- ifft64 (buf1); 231- ifft64 (buf2); 232+ ifft64 (state, buf1); 233+ ifft64 (state, buf2); 234 235 /* Post IFFT complex multiply */ 236 /* Window and convert to real valued signal */ 237 for (i = 0; i < 32; i++) { 238 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ 239- t_r = post2[i].real; 240- t_i = post2[i].imag; 241+ t_r = state->post2[i].real; 242+ t_i = state->post2[i].imag; 243 244 a_r = t_r * buf1[i].real + t_i * buf1[i].imag; 245 a_i = t_i * buf1[i].real - t_r * buf1[i].imag; 246@@ -362,7 +343,7 @@ 247 return bessel; 248 } 249 250-void a52_imdct_init (uint32_t mm_accel) 251+void a52_imdct_init (a52_state_t * state, uint32_t mm_accel) 252 { 253 int i, k; 254 double sum; 255@@ -371,50 +352,50 @@ 256 sum = 0; 257 for (i = 0; i < 256; i++) { 258 sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256)); 259- a52_imdct_window[i] = sum; 260+ state->a52_imdct_window[i] = sum; 261 } 262 sum++; 263 for (i = 0; i < 256; i++) 264- a52_imdct_window[i] = sqrt (a52_imdct_window[i] / sum); 265+ state->a52_imdct_window[i] = sqrt (state->a52_imdct_window[i] / sum); 266 267 for (i = 0; i < 3; i++) 268- roots16[i] = cos ((M_PI / 8) * (i + 1)); 269+ state->roots16[i] = cos ((M_PI / 8) * (i + 1)); 270 271 for (i = 0; i < 7; i++) 272- roots32[i] = cos ((M_PI / 16) * (i + 1)); 273+ state->roots32[i] = cos ((M_PI / 16) * (i + 1)); 274 275 for (i = 0; i < 15; i++) 276- roots64[i] = cos ((M_PI / 32) * (i + 1)); 277+ state->roots64[i] = cos ((M_PI / 32) * (i + 1)); 278 279 for (i = 0; i < 31; i++) 280- roots128[i] = cos ((M_PI / 64) * (i + 1)); 281+ state->roots128[i] = cos ((M_PI / 64) * (i + 1)); 282 283 for (i = 0; i < 64; i++) { 284 k = fftorder[i] / 2 + 64; 285- pre1[i].real = cos ((M_PI / 256) * (k - 0.25)); 286- pre1[i].imag = sin ((M_PI / 256) * (k - 0.25)); 287+ state->pre1[i].real = cos ((M_PI / 256) * (k - 0.25)); 288+ state->pre1[i].imag = sin ((M_PI / 256) * (k - 0.25)); 289 } 290 291 for (i = 64; i < 128; i++) { 292 k = fftorder[i] / 2 + 64; 293- pre1[i].real = -cos ((M_PI / 256) * (k - 0.25)); 294- pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25)); 295+ state->pre1[i].real = -cos ((M_PI / 256) * (k - 0.25)); 296+ state->pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25)); 297 } 298 299 for (i = 0; i < 64; i++) { 300- post1[i].real = cos ((M_PI / 256) * (i + 0.5)); 301- post1[i].imag = sin ((M_PI / 256) * (i + 0.5)); 302+ state->post1[i].real = cos ((M_PI / 256) * (i + 0.5)); 303+ state->post1[i].imag = sin ((M_PI / 256) * (i + 0.5)); 304 } 305 306 for (i = 0; i < 64; i++) { 307 k = fftorder[i] / 4; 308- pre2[i].real = cos ((M_PI / 128) * (k - 0.25)); 309- pre2[i].imag = sin ((M_PI / 128) * (k - 0.25)); 310+ state->pre2[i].real = cos ((M_PI / 128) * (k - 0.25)); 311+ state->pre2[i].imag = sin ((M_PI / 128) * (k - 0.25)); 312 } 313 314 for (i = 0; i < 32; i++) { 315- post2[i].real = cos ((M_PI / 128) * (i + 0.5)); 316- post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); 317+ state->post2[i].real = cos ((M_PI / 128) * (i + 0.5)); 318+ state->post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); 319 } 320 321 #ifdef LIBA52_DJBFFT 322diff -Naur a52dec.old/liba52/parse.c a52dec.new/liba52/parse.c 323--- a52dec.old/liba52/parse.c 2012-07-16 14:24:14.000000000 +0200 324+++ a52dec.new/liba52/parse.c 2012-07-16 14:33:00.000000000 +0200 325@@ -56,16 +56,53 @@ 326 a52_state_t * state; 327 int i; 328 329- state = malloc (sizeof (a52_state_t)); 330+ state = calloc (1, sizeof (a52_state_t)); 331 if (state == NULL) 332 return NULL; 333 334 state->samples = memalign (16, 256 * 12 * sizeof (sample_t)); 335 if (state->samples == NULL) { 336- free (state); 337- return NULL; 338+ goto fail; 339 } 340 341+ /* Root values for IFFT */ 342+ state->roots16 = memalign (16, 3 * sizeof (sample_t)); 343+ if (state->roots16 == NULL) 344+ goto fail; 345+ 346+ state->roots32 = memalign (16, 7 * sizeof (sample_t)); 347+ if (state->roots32 == NULL) 348+ goto fail; 349+ 350+ state->roots64 = memalign (16, 15 * sizeof (sample_t)); 351+ if (state->roots64 == NULL) 352+ goto fail; 353+ 354+ state->roots128 = memalign (16, 31 * sizeof (sample_t)); 355+ if (state->roots128 == NULL) 356+ goto fail; 357+ 358+ /* Twiddle factors for IMDCT */ 359+ state->pre1 = memalign (16, 128 * sizeof (complex_t)); 360+ if (state->pre1 == NULL) 361+ goto fail; 362+ 363+ state->post1 = memalign (16, 64 * sizeof (complex_t)); 364+ if (state->post1 == NULL) 365+ goto fail; 366+ 367+ state->pre2 = memalign (16, 64 * sizeof (complex_t)); 368+ if (state->pre2 == NULL) 369+ goto fail; 370+ 371+ state->post2 = memalign (16, 32 * sizeof (complex_t)); 372+ if (state->post2 == NULL) 373+ goto fail; 374+ 375+ state->a52_imdct_window = memalign (16, 256 * sizeof (sample_t)); 376+ if (state->a52_imdct_window == NULL) 377+ goto fail; 378+ 379 for (i = 0; i < 256 * 12; i++) 380 state->samples[i] = 0; 381 382@@ -73,9 +110,27 @@ 383 384 state->lfsr_state = 1; 385 386- a52_imdct_init (mm_accel); 387+ a52_imdct_init (state, mm_accel); 388 389 return state; 390+ 391+fail: 392+ if ( state ) 393+ { 394+ free (state->a52_imdct_window); 395+ free (state->post2); 396+ free (state->pre2); 397+ free (state->post1); 398+ free (state->pre1); 399+ free (state->roots128); 400+ free (state->roots64); 401+ free (state->roots32); 402+ free (state->roots16); 403+ free (state->samples); 404+ free (state); 405+ } 406+ return NULL; 407+ 408 } 409 410 sample_t * a52_samples (a52_state_t * state) 411@@ -825,7 +880,7 @@ 412 state->dynrng, 0, 7); 413 for (i = 7; i < 256; i++) 414 (samples-256)[i] = 0; 415- a52_imdct_512 (samples - 256, samples + 1536 - 256, state->bias); 416+ a52_imdct_512 (state, samples - 256, samples + 1536 - 256, state->bias); 417 } else { 418 /* just skip the LFE coefficients */ 419 coeff_get (state, samples + 1280, &state->lfe_expbap, &quantizer, 420@@ -854,10 +909,10 @@ 421 422 if (coeff[i]) { 423 if (blksw[i]) 424- a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i, 425+ a52_imdct_256 (state, samples + 256 * i, samples + 1536 + 256 * i, 426 bias); 427 else 428- a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i, 429+ a52_imdct_512 (state, samples + 256 * i, samples + 1536 + 256 * i, 430 bias); 431 } else { 432 int j; 433@@ -883,11 +938,11 @@ 434 435 if (blksw[0]) 436 for (i = 0; i < nfchans; i++) 437- a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i, 438+ a52_imdct_256 (state, samples + 256 * i, samples + 1536 + 256 * i, 439 state->bias); 440 else 441 for (i = 0; i < nfchans; i++) 442- a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i, 443+ a52_imdct_512 (state, samples + 256 * i, samples + 1536 + 256 * i, 444 state->bias); 445 } 446 447@@ -896,6 +951,15 @@ 448 449 void a52_free (a52_state_t * state) 450 { 451+ free (state->a52_imdct_window); 452+ free (state->post2); 453+ free (state->pre2); 454+ free (state->post1); 455+ free (state->pre1); 456+ free (state->roots128); 457+ free (state->roots64); 458+ free (state->roots32); 459+ free (state->roots16); 460 free (state->samples); 461 free (state); 462 }