1diff -Naur a52dec.old/include/a52.h a52dec.new/include/a52.h
2--- a52dec.old/include/a52.h 2012-07-16 14:24:14.000000000 +0200
3+++ a52dec.new/include/a52.h 2012-07-16 14:31:37.000000000 +0200
4@@ -42,6 +42,11 @@
5 } expbap_t;
6
7 typedef struct {
8+ sample_t real;
9+ sample_t imag;
10+} complex_t;
11+
12+typedef struct {
13 uint8_t fscod; /* sample rate */
14 uint8_t halfrate; /* halfrate factor */
15 uint8_t acmod; /* coded channels */
16@@ -94,6 +99,20 @@
17
18 sample_t * samples;
19 int downmixed;
20+
21+ /* Root values for IFFT */
22+ sample_t * roots16; // size 3
23+ sample_t * roots32; // size 7
24+ sample_t * roots64; // size 15
25+ sample_t * roots128; // size 31
26+
27+ /* Twiddle factors for IMDCT */
28+ complex_t * pre1; // size 128
29+ complex_t * post1; // size 64
30+ complex_t * pre2; // size 64
31+ complex_t * post2; // size 32
32+
33+ sample_t * a52_imdct_window; // size 256
34 } a52_state_t;
35
36 #define A52_CHANNEL 0
37diff -Naur a52dec.old/liba52/a52_internal.h a52dec.new/liba52/a52_internal.h
38--- a52dec.old/liba52/a52_internal.h 2012-07-16 14:24:14.000000000 +0200
39+++ a52dec.new/liba52/a52_internal.h 2012-07-16 14:28:33.000000000 +0200
40@@ -49,6 +49,6 @@
41 sample_t clev, sample_t slev);
42 void a52_upmix (sample_t * samples, int acmod, int output);
43
44-void a52_imdct_init (uint32_t mm_accel);
45-void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias);
46-void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias);
47+void a52_imdct_init (a52_state_t * state, uint32_t mm_accel);
48+void a52_imdct_256 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias);
49+void a52_imdct_512 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias);
50diff -Naur a52dec.old/liba52/imdct.c a52dec.new/liba52/imdct.c
51--- a52dec.old/liba52/imdct.c 2012-07-16 14:24:14.000000000 +0200
52+++ a52dec.new/liba52/imdct.c 2012-07-16 14:33:00.000000000 +0200
53@@ -40,11 +40,6 @@
54 #include "a52_internal.h"
55 #include "mm_accel.h"
56
57-typedef struct complex_s {
58- sample_t real;
59- sample_t imag;
60-} complex_t;
61-
62 static uint8_t fftorder[] = {
63 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176,
64 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88,
65@@ -56,22 +51,8 @@
66 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86
67 };
68
69-/* Root values for IFFT */
70-static sample_t roots16[3];
71-static sample_t roots32[7];
72-static sample_t roots64[15];
73-static sample_t roots128[31];
74-
75-/* Twiddle factors for IMDCT */
76-static complex_t pre1[128];
77-static complex_t post1[64];
78-static complex_t pre2[64];
79-static complex_t post2[32];
80-
81-static sample_t a52_imdct_window[256];
82-
83-static void (* ifft128) (complex_t * buf);
84-static void (* ifft64) (complex_t * buf);
85+static void (* ifft128) (a52_state_t * state, complex_t * buf);
86+static void (* ifft64) (a52_state_t * state, complex_t * buf);
87
88 static inline void ifft2 (complex_t * buf)
89 {
90@@ -167,7 +148,7 @@
91 a1.imag += tmp4; \
92 } while (0)
93
94-static inline void ifft8 (complex_t * buf)
95+static inline void ifft8 (a52_state_t * state, complex_t * buf)
96 {
97 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
98
99@@ -175,7 +156,7 @@
100 ifft2 (buf + 4);
101 ifft2 (buf + 6);
102 BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]);
103- BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]);
104+ BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], state->roots16[1]);
105 }
106
107 static void ifft_pass (complex_t * buf, sample_t * weight, int n)
108@@ -205,66 +186,66 @@
109 } while (--i);
110 }
111
112-static void ifft16 (complex_t * buf)
113+static void ifft16 (a52_state_t * state, complex_t * buf)
114 {
115- ifft8 (buf);
116+ ifft8 (state, buf);
117 ifft4 (buf + 8);
118 ifft4 (buf + 12);
119- ifft_pass (buf, roots16 - 4, 4);
120+ ifft_pass (buf, state->roots16 - 4, 4);
121 }
122
123-static void ifft32 (complex_t * buf)
124+static void ifft32 (a52_state_t * state, complex_t * buf)
125 {
126- ifft16 (buf);
127- ifft8 (buf + 16);
128- ifft8 (buf + 24);
129- ifft_pass (buf, roots32 - 8, 8);
130+ ifft16 (state, buf);
131+ ifft8 (state, buf + 16);
132+ ifft8 (state, buf + 24);
133+ ifft_pass (buf, state->roots32 - 8, 8);
134 }
135
136-static void ifft64_c (complex_t * buf)
137+static void ifft64_c (a52_state_t * state, complex_t * buf)
138 {
139- ifft32 (buf);
140- ifft16 (buf + 32);
141- ifft16 (buf + 48);
142- ifft_pass (buf, roots64 - 16, 16);
143+ ifft32 (state, buf);
144+ ifft16 (state, buf + 32);
145+ ifft16 (state, buf + 48);
146+ ifft_pass (buf, state->roots64 - 16, 16);
147 }
148
149-static void ifft128_c (complex_t * buf)
150+static void ifft128_c (a52_state_t * state, complex_t * buf)
151 {
152- ifft32 (buf);
153- ifft16 (buf + 32);
154- ifft16 (buf + 48);
155- ifft_pass (buf, roots64 - 16, 16);
156+ ifft32 (state, buf);
157+ ifft16 (state, buf + 32);
158+ ifft16 (state, buf + 48);
159+ ifft_pass (buf, state->roots64 - 16, 16);
160
161- ifft32 (buf + 64);
162- ifft32 (buf + 96);
163- ifft_pass (buf, roots128 - 32, 32);
164+ ifft32 (state, buf + 64);
165+ ifft32 (state, buf + 96);
166+ ifft_pass (buf, state->roots128 - 32, 32);
167 }
168
169-void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)
170+void a52_imdct_512 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias)
171 {
172 int i, k;
173 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
174- const sample_t * window = a52_imdct_window;
175+ const sample_t * window = state->a52_imdct_window;
176 complex_t buf[128];
177
178 for (i = 0; i < 128; i++) {
179 k = fftorder[i];
180- t_r = pre1[i].real;
181- t_i = pre1[i].imag;
182+ t_r = state->pre1[i].real;
183+ t_i = state->pre1[i].imag;
184
185 buf[i].real = t_i * data[255-k] + t_r * data[k];
186 buf[i].imag = t_r * data[255-k] - t_i * data[k];
187 }
188
189- ifft128 (buf);
190+ ifft128 (state, buf);
191
192 /* Post IFFT complex multiply plus IFFT complex conjugate*/
193 /* Window and convert to real valued signal */
194 for (i = 0; i < 64; i++) {
195 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
196- t_r = post1[i].real;
197- t_i = post1[i].imag;
198+ t_r = state->post1[i].real;
199+ t_i = state->post1[i].imag;
200
201 a_r = t_r * buf[i].real + t_i * buf[i].imag;
202 a_i = t_i * buf[i].real - t_r * buf[i].imag;
203@@ -285,18 +266,18 @@
204 }
205 }
206
207-void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
208+void a52_imdct_256(a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias)
209 {
210 int i, k;
211 sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2;
212- const sample_t * window = a52_imdct_window;
213+ const sample_t * window = state->a52_imdct_window;
214 complex_t buf1[64], buf2[64];
215
216 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
217 for (i = 0; i < 64; i++) {
218 k = fftorder[i];
219- t_r = pre2[i].real;
220- t_i = pre2[i].imag;
221+ t_r = state->pre2[i].real;
222+ t_i = state->pre2[i].imag;
223
224 buf1[i].real = t_i * data[254-k] + t_r * data[k];
225 buf1[i].imag = t_r * data[254-k] - t_i * data[k];
226@@ -305,15 +286,15 @@
227 buf2[i].imag = t_r * data[255-k] - t_i * data[k+1];
228 }
229
230- ifft64 (buf1);
231- ifft64 (buf2);
232+ ifft64 (state, buf1);
233+ ifft64 (state, buf2);
234
235 /* Post IFFT complex multiply */
236 /* Window and convert to real valued signal */
237 for (i = 0; i < 32; i++) {
238 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
239- t_r = post2[i].real;
240- t_i = post2[i].imag;
241+ t_r = state->post2[i].real;
242+ t_i = state->post2[i].imag;
243
244 a_r = t_r * buf1[i].real + t_i * buf1[i].imag;
245 a_i = t_i * buf1[i].real - t_r * buf1[i].imag;
246@@ -362,7 +343,7 @@
247 return bessel;
248 }
249
250-void a52_imdct_init (uint32_t mm_accel)
251+void a52_imdct_init (a52_state_t * state, uint32_t mm_accel)
252 {
253 int i, k;
254 double sum;
255@@ -371,50 +352,50 @@
256 sum = 0;
257 for (i = 0; i < 256; i++) {
258 sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256));
259- a52_imdct_window[i] = sum;
260+ state->a52_imdct_window[i] = sum;
261 }
262 sum++;
263 for (i = 0; i < 256; i++)
264- a52_imdct_window[i] = sqrt (a52_imdct_window[i] / sum);
265+ state->a52_imdct_window[i] = sqrt (state->a52_imdct_window[i] / sum);
266
267 for (i = 0; i < 3; i++)
268- roots16[i] = cos ((M_PI / 8) * (i + 1));
269+ state->roots16[i] = cos ((M_PI / 8) * (i + 1));
270
271 for (i = 0; i < 7; i++)
272- roots32[i] = cos ((M_PI / 16) * (i + 1));
273+ state->roots32[i] = cos ((M_PI / 16) * (i + 1));
274
275 for (i = 0; i < 15; i++)
276- roots64[i] = cos ((M_PI / 32) * (i + 1));
277+ state->roots64[i] = cos ((M_PI / 32) * (i + 1));
278
279 for (i = 0; i < 31; i++)
280- roots128[i] = cos ((M_PI / 64) * (i + 1));
281+ state->roots128[i] = cos ((M_PI / 64) * (i + 1));
282
283 for (i = 0; i < 64; i++) {
284 k = fftorder[i] / 2 + 64;
285- pre1[i].real = cos ((M_PI / 256) * (k - 0.25));
286- pre1[i].imag = sin ((M_PI / 256) * (k - 0.25));
287+ state->pre1[i].real = cos ((M_PI / 256) * (k - 0.25));
288+ state->pre1[i].imag = sin ((M_PI / 256) * (k - 0.25));
289 }
290
291 for (i = 64; i < 128; i++) {
292 k = fftorder[i] / 2 + 64;
293- pre1[i].real = -cos ((M_PI / 256) * (k - 0.25));
294- pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25));
295+ state->pre1[i].real = -cos ((M_PI / 256) * (k - 0.25));
296+ state->pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25));
297 }
298
299 for (i = 0; i < 64; i++) {
300- post1[i].real = cos ((M_PI / 256) * (i + 0.5));
301- post1[i].imag = sin ((M_PI / 256) * (i + 0.5));
302+ state->post1[i].real = cos ((M_PI / 256) * (i + 0.5));
303+ state->post1[i].imag = sin ((M_PI / 256) * (i + 0.5));
304 }
305
306 for (i = 0; i < 64; i++) {
307 k = fftorder[i] / 4;
308- pre2[i].real = cos ((M_PI / 128) * (k - 0.25));
309- pre2[i].imag = sin ((M_PI / 128) * (k - 0.25));
310+ state->pre2[i].real = cos ((M_PI / 128) * (k - 0.25));
311+ state->pre2[i].imag = sin ((M_PI / 128) * (k - 0.25));
312 }
313
314 for (i = 0; i < 32; i++) {
315- post2[i].real = cos ((M_PI / 128) * (i + 0.5));
316- post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
317+ state->post2[i].real = cos ((M_PI / 128) * (i + 0.5));
318+ state->post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
319 }
320
321 #ifdef LIBA52_DJBFFT
322diff -Naur a52dec.old/liba52/parse.c a52dec.new/liba52/parse.c
323--- a52dec.old/liba52/parse.c 2012-07-16 14:24:14.000000000 +0200
324+++ a52dec.new/liba52/parse.c 2012-07-16 14:33:00.000000000 +0200
325@@ -56,16 +56,53 @@
326 a52_state_t * state;
327 int i;
328
329- state = malloc (sizeof (a52_state_t));
330+ state = calloc (1, sizeof (a52_state_t));
331 if (state == NULL)
332 return NULL;
333
334 state->samples = memalign (16, 256 * 12 * sizeof (sample_t));
335 if (state->samples == NULL) {
336- free (state);
337- return NULL;
338+ goto fail;
339 }
340
341+ /* Root values for IFFT */
342+ state->roots16 = memalign (16, 3 * sizeof (sample_t));
343+ if (state->roots16 == NULL)
344+ goto fail;
345+
346+ state->roots32 = memalign (16, 7 * sizeof (sample_t));
347+ if (state->roots32 == NULL)
348+ goto fail;
349+
350+ state->roots64 = memalign (16, 15 * sizeof (sample_t));
351+ if (state->roots64 == NULL)
352+ goto fail;
353+
354+ state->roots128 = memalign (16, 31 * sizeof (sample_t));
355+ if (state->roots128 == NULL)
356+ goto fail;
357+
358+ /* Twiddle factors for IMDCT */
359+ state->pre1 = memalign (16, 128 * sizeof (complex_t));
360+ if (state->pre1 == NULL)
361+ goto fail;
362+
363+ state->post1 = memalign (16, 64 * sizeof (complex_t));
364+ if (state->post1 == NULL)
365+ goto fail;
366+
367+ state->pre2 = memalign (16, 64 * sizeof (complex_t));
368+ if (state->pre2 == NULL)
369+ goto fail;
370+
371+ state->post2 = memalign (16, 32 * sizeof (complex_t));
372+ if (state->post2 == NULL)
373+ goto fail;
374+
375+ state->a52_imdct_window = memalign (16, 256 * sizeof (sample_t));
376+ if (state->a52_imdct_window == NULL)
377+ goto fail;
378+
379 for (i = 0; i < 256 * 12; i++)
380 state->samples[i] = 0;
381
382@@ -73,9 +110,27 @@
383
384 state->lfsr_state = 1;
385
386- a52_imdct_init (mm_accel);
387+ a52_imdct_init (state, mm_accel);
388
389 return state;
390+
391+fail:
392+ if ( state )
393+ {
394+ free (state->a52_imdct_window);
395+ free (state->post2);
396+ free (state->pre2);
397+ free (state->post1);
398+ free (state->pre1);
399+ free (state->roots128);
400+ free (state->roots64);
401+ free (state->roots32);
402+ free (state->roots16);
403+ free (state->samples);
404+ free (state);
405+ }
406+ return NULL;
407+
408 }
409
410 sample_t * a52_samples (a52_state_t * state)
411@@ -825,7 +880,7 @@
412 state->dynrng, 0, 7);
413 for (i = 7; i < 256; i++)
414 (samples-256)[i] = 0;
415- a52_imdct_512 (samples - 256, samples + 1536 - 256, state->bias);
416+ a52_imdct_512 (state, samples - 256, samples + 1536 - 256, state->bias);
417 } else {
418 /* just skip the LFE coefficients */
419 coeff_get (state, samples + 1280, &state->lfe_expbap, &quantizer,
420@@ -854,10 +909,10 @@
421
422 if (coeff[i]) {
423 if (blksw[i])
424- a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i,
425+ a52_imdct_256 (state, samples + 256 * i, samples + 1536 + 256 * i,
426 bias);
427 else
428- a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i,
429+ a52_imdct_512 (state, samples + 256 * i, samples + 1536 + 256 * i,
430 bias);
431 } else {
432 int j;
433@@ -883,11 +938,11 @@
434
435 if (blksw[0])
436 for (i = 0; i < nfchans; i++)
437- a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i,
438+ a52_imdct_256 (state, samples + 256 * i, samples + 1536 + 256 * i,
439 state->bias);
440 else
441 for (i = 0; i < nfchans; i++)
442- a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i,
443+ a52_imdct_512 (state, samples + 256 * i, samples + 1536 + 256 * i,
444 state->bias);
445 }
446
447@@ -896,6 +951,15 @@
448
449 void a52_free (a52_state_t * state)
450 {
451+ free (state->a52_imdct_window);
452+ free (state->post2);
453+ free (state->pre2);
454+ free (state->post1);
455+ free (state->pre1);
456+ free (state->roots128);
457+ free (state->roots64);
458+ free (state->roots32);
459+ free (state->roots16);
460 free (state->samples);
461 free (state);
462 }