Brought up to date
[swftools.git] / lib / bladeenc / encode.c
1 /*
2                         (c) Copyright 1998-2001 - Tord Jansson
3                         =======================================
4
5                 This file is part of the BladeEnc MP3 Encoder, based on
6                 ISO's reference code for MPEG Layer 3 compression, and might
7                 contain smaller or larger sections that are directly taken
8                 from ISO's reference code.
9
10                 All changes to the ISO reference code herein are either
11                 copyrighted by Tord Jansson (tord.jansson@swipnet.se)
12                 or sublicensed to Tord Jansson by a third party.
13
14         BladeEnc is free software; you can redistribute this file
15         and/or modify it under the terms of the GNU Lesser General Public
16         License as published by the Free Software Foundation; either
17         version 2.1 of the License, or (at your option) any later version.
18
19
20
21         ------------    Changes    ------------
22
23         2000-11-06  Andre Piotrowski
24
25         -       speed up: complete new, much faster functions 'create_ana_filter()', 'windowFilterSubband()'
26         -                 'WIND_SB_CHANGE_LEVEL' 3 requires a new 'enwindow[]' (see 'tables.c')
27
28         2000-11-22      ap
29
30         -       bug fix: initWindowFilterSubband() -- Dividing the enwindow-entries is allowed to be done once only!
31
32         2000-12-11  ap
33
34         -       speed up: WIND_SB_CHANGE_LEVEL 4
35
36         2001-01-12  ap
37
38         -       bug fix: fixed some typo relevant in case of ORG_BUFFERS == 1
39 */
40
41 #include "common.h"
42 #include "encoder.h"
43
44
45
46
47
48 /*  ========================================================================================  */
49 /*      rebuffer_audio                                                                        */
50 /*  ========================================================================================  */
51
52 #if ORG_BUFFERS
53
54 void rebuffer_audio
55 (
56         short                                   buffer[2][1152],
57         short                                   *insamp,
58         unsigned int                    samples_read,
59         int                                             stereo
60 )
61 {
62         unsigned int                    j;
63
64         if (stereo == 2)
65         { 
66                 for (j = 0;  j < samples_read/2;  j++)
67                 {
68                         buffer[0][j] = insamp[2*j];
69                         buffer[1][j] = insamp[2*j+1];
70                 }
71         }
72         else
73         {               
74                 for (j = 0;  j < samples_read;  j++)
75                 {
76                         buffer[0][j] = insamp[j];
77                         buffer[1][j] = 0;
78                 }
79         }
80
81         for( ;  j < 1152;  j++)
82         {
83                 buffer[0][j] = 0;
84                 buffer[1][j] = 0;
85         }
86
87         return;
88 }
89
90 #else
91
92 #define         FLUSH                                   1152
93 #define         DELAY                                    768
94
95 void rebuffer_audio
96 (
97         const short                             *insamp,
98         FLOAT                                   buffer[2][2048],
99         int                                             *buffer_idx,
100         unsigned int                    samples_read,
101         int                                             stereo
102 )
103 {
104         int                                             idx, med, fin;
105
106         /* flush the last two read granules */
107         *buffer_idx = (*buffer_idx + FLUSH) & 2047;
108         idx = (*buffer_idx + DELAY) & 2047;
109         fin = (idx + FLUSH) & 2047;
110
111         if (stereo == 2)
112         {
113                 med = (idx + samples_read/2) & 2047;
114                 if (idx >= med)
115                 {
116                         while (idx < 2048)
117                         {
118                                 buffer[0][idx] = *insamp++;
119                                 buffer[1][idx] = *insamp++;
120                                 idx++;
121                         }
122                         idx = 0;
123                 }
124                 while (idx < med)
125                 {
126                         buffer[0][idx] = *insamp++;
127                         buffer[1][idx] = *insamp++;
128                         idx++;
129                 }
130         }
131         else
132         {               
133                 med = (idx + samples_read) & 2047;
134                 if (idx >= med)
135                 {
136                         while (idx < 2048)
137                         {
138                                 buffer[0][idx] = *insamp++;
139                                 buffer[1][idx] = 0;
140                                 idx++;
141                         }
142                         idx = 0;
143                 }
144                 while (idx < med)
145                 {
146                         buffer[0][idx] = *insamp++;
147                         buffer[1][idx] = 0;
148                         idx++;
149                 }
150         }
151
152         if (idx != fin)
153         {
154                 if (idx > fin)
155                 {
156                         while (idx < 2048)
157                         {
158                                 buffer[0][idx] = 0;
159                                 buffer[1][idx] = 0;
160                                 idx++;
161                         }
162                         idx = 0;
163                 }
164                 while (idx < fin)
165                 {
166                         buffer[0][idx] = 0;
167                         buffer[1][idx] = 0;
168                         idx++;
169                 }
170         }
171 }
172
173 #endif
174
175
176
177
178
179 #if ORG_BUFFERS
180 #define WIND_SB_CHANGE_LEVEL    3
181 #else
182 #define WIND_SB_CHANGE_LEVEL    4
183 #endif
184
185
186
187 #if WIND_SB_CHANGE_LEVEL==4
188
189         typedef double                  filter_matrix[SBLIMIT/4][32];
190
191         static  filter_matrix   m;
192
193         /*  ========================================================================================  */
194         /*      create_ana_filter                                                                     */
195         /*  ========================================================================================  */
196         /*
197                 Calculates the analysis filterbank coefficients and rounds to the 9th decimal place
198                 accuracy of the filterbank tables in the ISO document.
199                 The coefficients are stored in #new_filter#
200
201
202                 Originally was computed
203
204                         filter[i][j]  :=  cos (PI64 * ((2*i+1) * (16-j)))
205
206                 for i = 0..SBLIMIT-1 and j = 0..63 .
207
208
209                 But for j = 0..15 is 16-j = -(16-(32-j)) which implies
210
211                 (1)             filter[i][j]  =  filter[i][32-j] .
212
213                 (We know that filter[i][16] = cos(0) = 1 , but that is not so interesting.)
214
215                 Furthermore, for j = 33..48 we get
216
217                                    filter[i][96-j]
218                                 =  cos (PI64 * (2*i+1) * (j-80))
219                                 =  cos (PI * (2*i+1) + PI64 * (2*i+1) * (16-j))
220                 (2)             =  cos (PI * (2*i+1)) * cos (PI64 * (2*i+1) * (16-j))   // sin (PI * (2*i+1)) = 0
221                                 =  -cos (PI64 * (2*i+1) * (16-j))
222                                 =  -filter[i][j] ,
223
224                 especially is filter[i][48] = 0 .
225
226
227                 On the other hand there is
228
229                                    filter[31-i][j]
230                                 =  cos (PI64 * (2*(31-i)+1) * (16-j))
231                                 =  cos (PI64 * (64-(2*i+1)) * (16-j))
232                                 =  cos (PI * (16-j) - PI64 * (2*i+1) * (16-j))
233                                 =  cos (PI * (16-j)) * cos (PI64 * (2*i+1) * (16-j))   // sin (PI * (16-j)) = 0
234                                 =  (-1)^^j * cos (PI64 * (2*i+1) * (16-j))
235                                 =  (-1)^^j * filter[i][j] .
236
237
238                 There is a third somewhat more complication "symmetry":
239
240                                    filter[15-i][j]
241                                 =  cos (PI64 * (2*(15-i)+1) * (16-j))
242                                 =  cos (PI64 * (32-(2*i+1)) * (16-j))
243                                 =  cos (PI/2 * (16-j) - PI64 * (2*i+1) * (16-j))
244                                 =  cos (PI/2 * (16-j)) * cos (PI64 * (2*i+1) * (16-j)) + sin (PI/2 * (16-j)) * sin (PI64 * (2*i+1) * (16-j))
245                                 =  cos (PI/2 * (16-j)) * cos (PI64 * (2*i+1) * (16-j)) + sin (PI/2 * (16-j)) * cos (PI64 * (2*i+1) * (-1)^^i * (16-(j+32)))
246
247                                 =  (-1)^^(  j  /2    ) * filter[i][j   ]   for even j
248                                 =  (-1)^^((j+1)/2 + i) * filter[i][j+32]   for odd  j  with  j <  32
249                                 =  (-1)^^((j-1)/2 + i) * filter[i][j-32]   for odd  j  with  j >= 32
250
251
252                 For these reasons we create a filter of the eighth size only and set
253
254                         new_filter[i][j]  :=  filter[i][j]       for i = 0..SBLIMIT/4-1 and j =  0..16
255                         new_filter[i][j]  :=  filter[i][j+16]    for i = 0..SBLIMIT/4-1 and j = 17..31
256         */
257
258         static  void create_ana_filter (double new_filter[SBLIMIT/4][32])
259         {
260                 register int                    i, j;
261                 double                                  t;
262
263                 for (i = 0;  i < SBLIMIT/4;  i++)
264                 {
265                         for (j = 0;  j <= 16;  j++)
266                         {
267                                 t = 1e9 * cos (PI64 * (double) ((2*i+1) * (16-j)));
268                                 if (t >= 0)
269                                         modf (t + 0.5, &t);
270                                 else
271                                         modf (t - 0.5, &t);
272                                 new_filter[i][j] = t * 1e-9;
273                         }
274                         for (j = 17;  j < 32;  j++)
275                         {
276                                 t = 1e9 * cos (PI64 * (double) ((2*i+1) * j));   /* (16-(j+16)) */
277                                 if (t >= 0)
278                                         modf (t + 0.5, &t);
279                                 else
280                                         modf (t - 0.5, &t);
281                                 new_filter[i][j] = t * 1e-9;
282                         }
283                 }
284         }
285
286         /*  ========================================================================================  */
287         /*      windowFilterSubband                                                                   */
288         /*  ========================================================================================  */
289         /*
290                 Calculates the analysis filter bank coefficients
291
292                 The windowed samples #z# is filtered by the digital filter matrix #m# to produce the
293                 subband samples #s#. This done by first selectively picking out values from the windowed
294                 samples, and then multiplying them by the filter matrix, producing 32 subband samples.
295         */
296
297         void  windowFilterSubband
298         (
299                 const FLOAT                             *buffer,
300                 int                                             buffer_idx,
301                 double                                  s[SBLIMIT]
302         )
303         {
304                 int                                             i, k;
305
306                 double                                  t, u, v, w;
307
308                 double                                  z[32];
309                 double                                  *p;
310                 double                                  *pEnw;
311
312                 pEnw = enwindow;
313
314                 for (k = 0;  k <= 16;  k++)
315                 {
316                         t =  buffer[(buffer_idx+511) & 2047] * *pEnw++;
317                         t += buffer[(buffer_idx+447) & 2047] * *pEnw++;
318                         t += buffer[(buffer_idx+383) & 2047] * *pEnw++;
319                         t += buffer[(buffer_idx+319) & 2047] * *pEnw++;
320                         t += buffer[(buffer_idx+255) & 2047] * *pEnw++;
321                         t += buffer[(buffer_idx+191) & 2047] * *pEnw++;
322                         t += buffer[(buffer_idx+127) & 2047] * *pEnw++;
323                         t += buffer[(buffer_idx+ 63) & 2047] * *pEnw++;
324                         z[k] = t;
325
326                         buffer_idx--;
327                 }                       
328
329                 for (k = 15;  k >= 0;  k--)
330                 {
331                         t =  buffer[(buffer_idx+511) & 2047] * *pEnw++;
332                         t += buffer[(buffer_idx+447) & 2047] * *pEnw++;
333                         t += buffer[(buffer_idx+383) & 2047] * *pEnw++;
334                         t += buffer[(buffer_idx+319) & 2047] * *pEnw++;
335                         t += buffer[(buffer_idx+255) & 2047] * *pEnw++;
336                         t += buffer[(buffer_idx+191) & 2047] * *pEnw++;
337                         t += buffer[(buffer_idx+127) & 2047] * *pEnw++;
338                         t += buffer[(buffer_idx+ 63) & 2047] * *pEnw++;
339                         z[k] += t;
340
341                         buffer_idx--;
342                 }                       
343
344                 for (k = 17;  k < 32;  k++)
345                 {
346                         t =  buffer[(buffer_idx+511) & 2047] * *pEnw++;
347                         t += buffer[(buffer_idx+447) & 2047] * *pEnw++;
348                         t += buffer[(buffer_idx+383) & 2047] * *pEnw++;
349                         t += buffer[(buffer_idx+319) & 2047] * *pEnw++;
350                         t += buffer[(buffer_idx+255) & 2047] * *pEnw++;
351                         t += buffer[(buffer_idx+191) & 2047] * *pEnw++;
352                         t += buffer[(buffer_idx+127) & 2047] * *pEnw++;
353                         t += buffer[(buffer_idx+ 63) & 2047] * *pEnw++;
354                         z[k] = t;
355
356                         buffer_idx--;
357                 }                       
358
359                 /* normally y[48] was computed like the other entries, */
360                 /* but this entry is not needed because m[i][48] = 0.  */
361                 buffer_idx--; /* But we have to increase the pointers. */
362                 pEnw += 8;
363
364                 for (k = 31;  k > 16;  k--)
365                 {
366                         t =  buffer[(buffer_idx+511) & 2047] * *pEnw++;
367                         t += buffer[(buffer_idx+447) & 2047] * *pEnw++;
368                         t += buffer[(buffer_idx+383) & 2047] * *pEnw++;
369                         t += buffer[(buffer_idx+319) & 2047] * *pEnw++;
370                         t += buffer[(buffer_idx+255) & 2047] * *pEnw++;
371                         t += buffer[(buffer_idx+191) & 2047] * *pEnw++;
372                         t += buffer[(buffer_idx+127) & 2047] * *pEnw++;
373                         t += buffer[(buffer_idx+ 63) & 2047] * *pEnw++;
374                         z[k] -= t;
375
376                         buffer_idx--;
377                 }                       
378
379                 for (i = 0;  i < SBLIMIT/4;  i++)
380                 {
381                         p = m[i];
382
383                         t = u = v = w = 0.0;
384                         for (k = 0;  k < 16;  k += 4)
385                         {
386                                 t += p[k   ] * z[k  ];
387                                 v += p[k+ 2] * z[k+2];
388                                 u += p[k+ 1] * z[k+1] + p[k+ 3] * z[k+3];
389                                 w -= p[k+17] * z[k+1] - p[k+19] * z[k+3];
390                         }
391                         for ( ;  k < 32;  k += 4)
392                         {
393                                 t += p[k   ] * z[k  ];
394                                 v += p[k+ 2] * z[k+2];
395                                 u += p[k+ 1] * z[k+1] + p[k+ 3] * z[k+3];
396                                 w += p[k-15] * z[k+1] - p[k-13] * z[k+3];
397                         }
398
399                         s[          i] = (t + v) + u;
400                         s[SBLIMIT-1-i] = (t + v) - u;
401                         if (i & 1)
402                         {
403                                 s[SBLIMIT/2-1-i] = (t - v) - w;
404                                 s[SBLIMIT/2  +i] = (t - v) + w;
405                         }
406                         else
407                         {
408                                 s[SBLIMIT/2-1-i] = (t - v) + w;
409                                 s[SBLIMIT/2  +i] = (t - v) - w;
410                         }
411                 }        
412         }
413
414 #elif WIND_SB_CHANGE_LEVEL==3
415
416         #define imax                    (SBLIMIT/4)
417         typedef double                  filter_matrix[imax][32];
418
419         static  int                             half[2];
420         static  int                             off[2];
421         static  double                  x[2][512];
422         static  filter_matrix   m;
423
424         /*  ========================================================================================  */
425         /*      create_ana_filter                                                                     */
426         /*  ========================================================================================  */
427         /*
428                 Calculates the analysis filterbank coefficients and rounds to the 9th decimal place
429                 accuracy of the filterbank tables in the ISO document.
430                 The coefficients are stored in #new_filter#
431
432
433                 Originally was computed
434
435                         filter[i][j]  :=  cos (PI64 * ((2*i+1) * (16-j)))
436
437                 for i = 0..SBLIMIT-1 and j = 0..63 .
438
439
440                 But for j = 0..15 is 16-j = -(16-(32-j)) which implies
441
442                 (1)             filter[i][j]  =  filter[i][32-j] .
443
444                 (We know that filter[i][16] = cos(0) = 1 , but that is not so interesting.)
445
446                 Furthermore, for j = 33..48 we get
447
448                                    filter[i][96-j]
449                                 =  cos (PI64 * (2*i+1) * (j-80))
450                                 =  cos (PI * (2*i+1) + PI64 * (2*i+1) * (16-j))
451                 (2)             =  cos (PI * (2*i+1)) * cos (PI64 * (2*i+1) * (16-j))   // sin (PI * (2*i+1)) = 0
452                                 =  -cos (PI64 * (2*i+1) * (16-j))
453                                 =  -filter[i][j] ,
454
455                 especially is filter[i][48] = 0 .
456
457
458                 On the other hand there is
459
460                                    filter[31-i][j]
461                                 =  cos (PI64 * (2*(31-i)+1) * (16-j))
462                                 =  cos (PI64 * (64-(2*i+1)) * (16-j))
463                                 =  cos (PI * (16-j) - PI64 * (2*i+1) * (16-j))
464                                 =  cos (PI * (16-j)) * cos (PI64 * (2*i+1) * (16-j))   // sin (PI * (16-j)) = 0
465                                 =  (-1)^^j * cos (PI64 * (2*i+1) * (16-j))
466                                 =  (-1)^^j * filter[i][j] .
467
468
469                 There is a third somewhat more complication "symmetry":
470
471                                    filter[15-i][j]
472                                 =  cos (PI64 * (2*(15-i)+1) * (16-j))
473                                 =  cos (PI64 * (32-(2*i+1)) * (16-j))
474                                 =  cos (PI/2 * (16-j) - PI64 * (2*i+1) * (16-j))
475                                 =  cos (PI/2 * (16-j)) * cos (PI64 * (2*i+1) * (16-j)) + sin (PI/2 * (16-j)) * sin (PI64 * (2*i+1) * (16-j))
476                                 =  cos (PI/2 * (16-j)) * cos (PI64 * (2*i+1) * (16-j)) + sin (PI/2 * (16-j)) * cos (PI64 * (2*i+1) * (-1)^^i * (16-(j+32)))
477
478                                 =  (-1)^^(  j  /2    ) * filter[i][j   ]   for even j
479                                 =  (-1)^^((j+1)/2 + i) * filter[i][j+32]   for odd  j  with  j <  32
480                                 =  (-1)^^((j-1)/2 + i) * filter[i][j-32]   for odd  j  with  j >= 32
481
482
483                 For these reasons we create a filter of the eighth size only and set
484
485                         new_filter[i][j]  :=  filter[i][j]       for i = 0..SBLIMIT/4-1 and j =  0..16
486                         new_filter[i][j]  :=  filter[i][j+16]    for i = 0..SBLIMIT/4-1 and j = 17..31
487         */
488
489         static  void create_ana_filter (double new_filter[imax][32])
490         {
491                 register int                    i, j;
492                 double                                  t;
493
494                 for (i = 0;  i < imax;  i++)
495                 {
496                         for (j = 0;  j <= 16;  j++)
497                         {
498                                 t = 1e9 * cos (PI64 * (double) ((2*i+1) * (16-j)));
499                                 if (t >= 0)
500                                         modf (t + 0.5, &t);
501                                 else
502                                         modf (t - 0.5, &t);
503                                 new_filter[i][j] = t * 1e-9;
504                         }
505                         for (j = 17;  j < 32;  j++)
506                         {
507                                 t = 1e9 * cos (PI64 * (double) ((2*i+1) * j));   /* (16-(j+16)) */
508                                 if (t >= 0)
509                                         modf (t + 0.5, &t);
510                                 else
511                                         modf (t - 0.5, &t);
512                                 new_filter[i][j] = t * 1e-9;
513                         }
514                 }
515         }
516
517         /*  ========================================================================================  */
518         /*      windowFilterSubband                                                                   */
519         /*  ========================================================================================  */
520         /*
521                 Calculates the analysis filter bank coefficients
522
523                 The windowed samples #z# is filtered by the digital filter matrix #m# to produce the
524                 subband samples #s#. This done by first selectively picking out values from the windowed
525                 samples, and then multiplying them by the filter matrix, producing 32 subband samples.
526         */
527
528         void  windowFilterSubband
529         (
530                 short                                   *pBuffer,
531                 int                                             ch,
532                 double                                  s[SBLIMIT]
533         )
534         {
535                 int                                             i, k;
536                 int                                             a;
537
538                 double                                  t, u, v, w;
539
540                 double                                  z[32];
541                 double                                  *pm;
542                 double                                  *px;
543                 double                                  *pEnw;
544
545
546                 px = x[ch] + half[ch];
547                 a = off[ch];
548
549                 /* replace 32 oldest samples with 32 new samples */
550
551                 for (i = 0;  i < 32;  i++)
552                         px[a + (31-i)*8] = (double) pBuffer[i]/*/SCALE*/;
553
554
555                 pEnw = enwindow;
556
557                 for (k = 0;  k <= 16;  k++)
558                 {
559                         t =  px[(a+0) & 7] * *pEnw++;
560                         t += px[(a+1) & 7] * *pEnw++;
561                         t += px[(a+2) & 7] * *pEnw++;
562                         t += px[(a+3) & 7] * *pEnw++;
563                         t += px[(a+4) & 7] * *pEnw++;
564                         t += px[(a+5) & 7] * *pEnw++;
565                         t += px[(a+6) & 7] * *pEnw++;
566                         t += px[(a+7) & 7] * *pEnw++;
567                         z[k] = t;
568
569                         px += 8;
570                 }                       
571
572                 for (k = 15;  k > 0;  k--)
573                 {
574                         t =  px[(a+0) & 7] * *pEnw++;
575                         t += px[(a+1) & 7] * *pEnw++;
576                         t += px[(a+2) & 7] * *pEnw++;
577                         t += px[(a+3) & 7] * *pEnw++;
578                         t += px[(a+4) & 7] * *pEnw++;
579                         t += px[(a+5) & 7] * *pEnw++;
580                         t += px[(a+6) & 7] * *pEnw++;
581                         t += px[(a+7) & 7] * *pEnw++;
582                         z[k] += t;
583
584                         px += 8;
585                 }                       
586
587                 half[ch] ^= 256;   /* toggling 0 or 256 */
588                 if (half[ch])
589                 {
590                         off[ch] = (a + 7) & 7;
591                 }
592                 else
593                 {
594                         px = x[ch];
595                         a = (a + 1) & 7;
596                 }
597
598                 /* k = 0; */
599                 {
600                         t =  px[(a+0) & 7] * *pEnw++;
601                         t += px[(a+1) & 7] * *pEnw++;
602                         t += px[(a+2) & 7] * *pEnw++;
603                         t += px[(a+3) & 7] * *pEnw++;
604                         t += px[(a+4) & 7] * *pEnw++;
605                         t += px[(a+5) & 7] * *pEnw++;
606                         t += px[(a+6) & 7] * *pEnw++;
607                         t += px[(a+7) & 7] * *pEnw++;
608                         z[k] += t;
609
610                         px += 8;
611                 }                       
612
613                 for (k = 17;  k < 32;  k++)
614                 {
615                         t =  px[(a+0) & 7] * *pEnw++;
616                         t += px[(a+1) & 7] * *pEnw++;
617                         t += px[(a+2) & 7] * *pEnw++;
618                         t += px[(a+3) & 7] * *pEnw++;
619                         t += px[(a+4) & 7] * *pEnw++;
620                         t += px[(a+5) & 7] * *pEnw++;
621                         t += px[(a+6) & 7] * *pEnw++;
622                         t += px[(a+7) & 7] * *pEnw++;
623                         z[k] = t;
624
625                         px += 8;
626                 }                       
627
628                 /* normally y[48] was computed like the other entries, */
629                 /* but this entry is not needed because m[i][48] = 0.  */
630                 px += 8;      /* But we have to increase the pointers. */
631                 pEnw += 8;
632
633                 for (k = 31;  k > 16;  k--)
634                 {
635                         t =  px[(a+0) & 7] * *pEnw++;
636                         t += px[(a+1) & 7] * *pEnw++;
637                         t += px[(a+2) & 7] * *pEnw++;
638                         t += px[(a+3) & 7] * *pEnw++;
639                         t += px[(a+4) & 7] * *pEnw++;
640                         t += px[(a+5) & 7] * *pEnw++;
641                         t += px[(a+6) & 7] * *pEnw++;
642                         t += px[(a+7) & 7] * *pEnw++;
643                         z[k] -= t;
644
645                         px += 8;
646                 }                       
647
648                 for (i = 0;  i < imax;  i++)
649                 {
650                         pm = m[i];
651
652                         t = u = v = w = 0.0;
653                         for (k = 0;  k < 16;  k += 4)
654                         {
655                                 t += pm[k   ] * z[k  ];
656                                 v += pm[k+ 2] * z[k+2];
657                                 u += pm[k+ 1] * z[k+1] + pm[k+ 3] * z[k+3];
658                                 w -= pm[k+17] * z[k+1] - pm[k+19] * z[k+3];
659                         }
660                         for (   ;  k < 32;  k += 4)
661                         {
662                                 t += pm[k   ] * z[k  ];
663                                 v += pm[k+ 2] * z[k+2];
664                                 u += pm[k+ 1] * z[k+1] + pm[k+ 3] * z[k+3];
665                                 w += pm[k-15] * z[k+1] - pm[k-13] * z[k+3];
666                         }
667
668                         s[          i] = (t + v) + u;
669                         s[SBLIMIT-1-i] = (t + v) - u;
670                         if (i & 1)
671                         {
672                                 s[SBLIMIT/2-1-i] = (t - v) - w;
673                                 s[SBLIMIT/2  +i] = (t - v) + w;
674                         }
675                         else
676                         {
677                                 s[SBLIMIT/2-1-i] = (t - v) + w;
678                                 s[SBLIMIT/2  +i] = (t - v) - w;
679                         }
680                 }        
681         }
682
683 #elif WIND_SB_CHANGE_LEVEL==2
684
685         typedef double                  filter_matrix[SBLIMIT/2][32];
686
687         static  int                             off[2];
688         static  int                             half[2];
689         static  double                  x[2][512];
690         static  filter_matrix   m;
691
692         /*  ========================================================================================  */
693         /*      create_ana_filter                                                                     */
694         /*  ========================================================================================  */
695         /*
696                 Calculates the analysis filterbank coefficients and rounds to the 9th decimal place
697                 accuracy of the filterbank tables in the ISO document.
698                 The coefficients are stored in #new_filter#
699
700
701                 Originally was computed
702
703                         filter[i][j]  :=  cos (PI64 * ((2*i+1) * (16-j)))
704
705                 for i = 0..SBLIMIT-1 and j = 0..63 .
706
707
708                 But for j = 0..15 is 16-j = -(16-(32-j)) which implies
709
710                 (1)             filter[i][j]  =  filter[i][32-j] .
711
712                 (We know that filter[i][16] = cos(0) = 1 , but that is not so interesting.)
713
714                 Furthermore, for j = 33..48 we get
715
716                                    filter[i][96-j]
717                                 =  cos (PI64 * (2*i+1) * (j-80))
718                                 =  cos (PI * (2*i+1) + PI64 * (2*i+1) * (16-j))
719                 (2)             =  cos (PI * (2*i+1)) * cos (PI64 * (2*i+1) * (16-j))   // sin (PI * (2*i+1)) = 0
720                                 =  -cos (PI64 * (2*i+1) * (16-j))
721                                 =  -filter[i][j] ,
722
723                 especially is filter[i][48] = 0 .
724
725
726                 On the other hand there is
727
728                                    filter[31-i][j]
729                                 =  cos (PI64 * (2*(31-i)+1) * (16-j))
730                                 =  cos (PI64 * (64-(2*i+1)) * (16-j))
731                                 =  cos (PI * (16-j) - PI64 * (2*i+1) * (16-j))
732                                 =  cos (PI * (16-j)) * cos (PI64 * (2*i+1) * (16-j))   // sin (PI * (16-j)) = 0
733                                 =  (-1)^^j * cos (PI64 * (2*i+1) * (16-j))
734                                 =  (-1)^^j * filter[i][j] .
735
736
737                 For these reasons we create a filter of the quarter size only and set
738
739                         new_filter[i][j]  :=  filter[i][j]       for i = 0..SBLIMIT/2-1 and j =  0..16
740                         new_filter[i][j]  :=  filter[i][j+16]    for i = 0..SBLIMIT/2-1 and j = 17..31
741         */
742
743         static  void create_ana_filter (double new_filter[SBLIMIT/2][32])
744         {
745                 register int                    i, j;
746                 double                                  t;
747
748                 for (i = 0;  i < SBLIMIT/2;  i++)
749                 {
750                         for (j = 0;  j < =16;  j++)
751                         {
752                                 t = 1e9 * cos (PI64 * (double) ((2*i+1) * (16-j)));
753                                 if (t >= 0)
754                                         modf (t + 0.5, &t);
755                                 else
756                                         modf (t - 0.5, &t);
757                                 new_filter[i][j] = t * 1e-9;
758                         }
759                         for (j = 17;  j < 32;  j++)
760                         {
761                                 t = 1e9 * cos (PI64 * (double) ((2*i+1) * j));   /* (16-(j+16)) */
762                                 if (t >= 0)
763                                         modf (t + 0.5, &t);
764                                 else
765                                         modf (t - 0.5, &t);
766                                 new_filter[i][j] = t * 1e-9;
767                         }
768                 }
769         }
770
771         /*  ========================================================================================  */
772         /*      windowFilterSubband                                                                   */
773         /*  ========================================================================================  */
774         /*
775                 Calculates the analysis filter bank coefficients
776
777                 The windowed samples #z# is filtered by the digital filter matrix #m# to produce the
778                 subband samples #s#. This done by first selectively picking out values from the windowed
779                 samples, and then multiplying them by the filter matrix, producing 32 subband samples.
780         */
781
782         void  windowFilterSubband
783         (
784                 short                                   *pBuffer,
785                 int                                             ch,
786                 double                                  s[SBLIMIT]
787         )
788         {
789                 int                                             i, k;
790                 int                                             a;
791
792                 double                                  t, u;
793
794                 double                                  z[32];   /* y[64]; */
795                 double                                  *pm;
796                 double                                  *px;
797                 double                                  *pEnw;
798
799
800                 px = x[ch] + half[ch];
801                 a = off[ch];
802
803                 /* replace 32 oldest samples with 32 new samples */
804
805                 for (i = 0;  i < 32;  i++)
806                         px[a + (31-i)*8] = (double) pBuffer[i] /*/SCALE*/;
807
808
809                 /*
810                         for k = 0..15 we compute
811
812                                 z[k]  :=  y[k] + y[32-k]
813
814                         and we set
815
816                                 z[16|  :=  y[16] .
817                 */
818
819                 pEnw = enwindow;
820
821                 for (k = 0;  k <= 16;  k++)   /* for (j = 0;  j < =16;  j++) */
822                 {
823                         t =  px[(a+0) & 7] * pEnw[64*0];
824                         t += px[(a+1) & 7] * pEnw[64*1];
825                         t += px[(a+2) & 7] * pEnw[64*2];
826                         t += px[(a+3) & 7] * pEnw[64*3];
827                         t += px[(a+4) & 7] * pEnw[64*4];
828                         t += px[(a+5) & 7] * pEnw[64*5];
829                         t += px[(a+6) & 7] * pEnw[64*6];
830                         t += px[(a+7) & 7] * pEnw[64*7];
831                         z[k] = t;   /* y[j] = t; */
832
833                         px += 8;
834                         pEnw++;
835                 }                       
836
837                 for (k = 15;  k > 0;  k--)   /* for (j = 17;  j < 32;  j++) */
838                 {
839                         t =  px[(a+0) & 7] * pEnw[64*0];
840                         t += px[(a+1) & 7] * pEnw[64*1];
841                         t += px[(a+2) & 7] * pEnw[64*2];
842                         t += px[(a+3) & 7] * pEnw[64*3];
843                         t += px[(a+4) & 7] * pEnw[64*4];
844                         t += px[(a+5) & 7] * pEnw[64*5];
845                         t += px[(a+6) & 7] * pEnw[64*6];
846                         t += px[(a+7) & 7] * pEnw[64*7];
847                         z[k] += t;   /* y[j] = t; */
848
849                         px += 8;
850                         pEnw++;
851                 }                       
852
853                 half[ch] ^= 256;   /* toggling 0 or 256 */
854                 if (half[ch])
855                 {
856                         off[ch] = (a + 7) & 7;   /*offset is modulo (HAN_SIZE-1)*/
857                 }
858                 else
859                 {
860                         px = x[ch];
861                         a = (a + 1) & 7;
862                 }
863
864                 /* k = 0; not needed, actual value of k is 0 */   /* j = 32; */
865                 {
866                         t =  px[(a+0) & 7] * pEnw[64*0];
867                         t += px[(a+1) & 7] * pEnw[64*1];
868                         t += px[(a+2) & 7] * pEnw[64*2];
869                         t += px[(a+3) & 7] * pEnw[64*3];
870                         t += px[(a+4) & 7] * pEnw[64*4];
871                         t += px[(a+5) & 7] * pEnw[64*5];
872                         t += px[(a+6) & 7] * pEnw[64*6];
873                         t += px[(a+7) & 7] * pEnw[64*7];
874                         z[k] += t;   /* y[j] = t; */
875
876                         px += 8;
877                         pEnw++;
878                 }                       
879
880                 /*
881                         for k = 17..31 we compute
882
883                                 z[k]  :=  y[k+16] - y[80-k]
884                 */
885
886                 for (k = 17;  k < 32;  k++)   /* for (j = 33;  j < 47;  j++) */
887                 {
888                         t =  px[(a+0) & 7] * pEnw[64*0];
889                         t += px[(a+1) & 7] * pEnw[64*1];
890                         t += px[(a+2) & 7] * pEnw[64*2];
891                         t += px[(a+3) & 7] * pEnw[64*3];
892                         t += px[(a+4) & 7] * pEnw[64*4];
893                         t += px[(a+5) & 7] * pEnw[64*5];
894                         t += px[(a+6) & 7] * pEnw[64*6];
895                         t += px[(a+7) & 7] * pEnw[64*7];
896                         z[k] = t;   /* y[j] = t; */
897
898                         px += 8;
899                         pEnw++;
900                 }                       
901
902                 /* normally y[48] was computed like the other entries, */
903                 /* but this entry is not needed because m[i][48] = 0.  */
904                 px += 8;      /* But we have to increase the pointers. */
905                 pEnw++;
906
907                 for (k = 31;  k > 16;  k--)   /* for (j = 49;  j < 64;  j++) */
908                 {
909                         t =  px[(a+0) & 7] * pEnw[64*0];
910                         t += px[(a+1) & 7] * pEnw[64*1];
911                         t += px[(a+2) & 7] * pEnw[64*2];
912                         t += px[(a+3) & 7] * pEnw[64*3];
913                         t += px[(a+4) & 7] * pEnw[64*4];
914                         t += px[(a+5) & 7] * pEnw[64*5];
915                         t += px[(a+6) & 7] * pEnw[64*6];
916                         t += px[(a+7) & 7] * pEnw[64*7];
917                         z[k] -= t;   /* y[j] = t; */
918
919                         px += 8;
920                         pEnw++;
921                 }                       
922
923                 pm = m[0];                           /* pm = m[0];                      */
924                 for (i = 0;  i < SBLIMIT/2;  i++)    /* for (i = 0;  i < SBLIMIT;  i++) */
925                 {
926
927                         t = u = 0.0;                     /*    t = 0.0;                     */
928                         for (k = 0;  k < 32; )           /*    for (j = 0;  j < 64;  j++)   */
929                         {
930                                 t += *pm++ * z[k++];         /*       t += *pm++ * y[j];        */
931                                 u += *pm++ * z[k++];
932                         }
933
934                         s[          i] = t + u;          /*    s[i] = t;                    */
935                         s[SBLIMIT-1-i] = t - u;
936                 }        
937         }
938
939 #elif WIND_SB_CHANGE_LEVEL==1
940
941         typedef double                  filter_matrix[SBLIMIT][64];
942
943         static  int                             off[2];
944         static  int                             half[2];
945         static  double                  x[2][512];
946         static  filter_matrix   m;
947
948         /*  ========================================================================================  */
949         /*      create_ana_filter                                                                     */
950         /*  ========================================================================================  */
951         /*
952                 Calculates the analysis filterbank coefficients and rounds to the 9th decimal place
953                 accuracy of the filterbank tables in the ISO document.
954                 The coefficients are stored in #filter#
955         */
956
957         static  void create_ana_filter (filter_matrix filter)
958         {
959                 register int                    i, j;
960                 double                                  t;
961
962                 for (i = 0;  i < SBLIMIT;  i++)
963                 {
964                         for (j = 0;  j < 64;  j++)
965                         {
966                                 t = 1e9 * cos (PI64 * (double) ((2*i+1) * (16-j)));
967                                 if (t >= 0)
968                                         modf (t + 0.5, &t);
969                                 else
970                                         modf (t - 0.5, &t);
971                                 filter[i][j] = t * 1e-9;
972                         }
973                 }
974         }
975
976         /*  ========================================================================================  */
977         /*      windowFilterSubband                                                                   */
978         /*  ========================================================================================  */
979         /*
980                 Calculates the analysis filter bank coefficients
981
982                 The windowed samples #y# is filtered by the digital filter matrix #m# to produce the
983                 subband samples #s#. This done by first selectively picking out values from the windowed
984                 samples, and then multiplying them by the filter matrix, producing 32 subband samples.
985         */
986
987         void  windowFilterSubband
988         (
989                 short                                   *pBuffer,
990                 int                                             ch,
991                 double                                  s[SBLIMIT]
992         )
993         {
994                 int                                             i, j;
995                 int                                             a;
996
997                 double                                  t;
998
999                 double                                  y[64];
1000                 double                                  *pm;
1001                 double                                  *px;
1002                 double                                  *pEnw;
1003
1004
1005                 px = x[ch] + half[ch];
1006                 a = off[ch];
1007
1008                 /* replace 32 oldest samples with 32 new samples */
1009
1010                 for (i = 0;  i < 32;  i++)
1011                         px[a + (31-i)*8] = (double) pBuffer[i] /*/SCALE*/;
1012
1013
1014                 pEnw = enwindow;
1015
1016                 for (j = 0;  j < 32;  j++)
1017                 {
1018                         t =  px[(a+0) & 7] * pEnw[64*0];
1019                         t += px[(a+1) & 7] * pEnw[64*1];
1020                         t += px[(a+2) & 7] * pEnw[64*2];
1021                         t += px[(a+3) & 7] * pEnw[64*3];
1022                         t += px[(a+4) & 7] * pEnw[64*4];
1023                         t += px[(a+5) & 7] * pEnw[64*5];
1024                         t += px[(a+6) & 7] * pEnw[64*6];
1025                         t += px[(a+7) & 7] * pEnw[64*7];
1026                         y[j] = t;
1027
1028                         px += 8;
1029                         pEnw++;
1030                 }                       
1031
1032                 half[ch] ^= 256;   /* toggling 0 or 256 */
1033                 if (half[ch])
1034                 {
1035                         off[ch] = (a + 7) & 7;   /*offset is modulo (HAN_SIZE-1)*/
1036                 }
1037                 else
1038                 {
1039                         px = x[ch];
1040                         a = (a + 1) & 7;
1041                 }
1042
1043                 for (j = 32;  j < 64;  j++)
1044                 {
1045                         t =  px[(a+0) & 7] * pEnw[64*0];
1046                         t += px[(a+1) & 7] * pEnw[64*1];
1047                         t += px[(a+2) & 7] * pEnw[64*2];
1048                         t += px[(a+3) & 7] * pEnw[64*3];
1049                         t += px[(a+4) & 7] * pEnw[64*4];
1050                         t += px[(a+5) & 7] * pEnw[64*5];
1051                         t += px[(a+6) & 7] * pEnw[64*6];
1052                         t += px[(a+7) & 7] * pEnw[64*7];
1053                         y[j] = t;
1054
1055                         px += 8;
1056                         pEnw++;
1057                 }                       
1058
1059                 pm = m[0];
1060                 for (i = 0;  i < SBLIMIT;  i++)
1061                 {
1062                         t = 0.0;
1063                         for (j = 0;  j < 64;  j++)
1064                                 t += *pm++ * y[j];
1065
1066                         s[i] = t;
1067                 }        
1068         }
1069
1070 #endif
1071
1072
1073
1074
1075
1076 /*  ========================================================================================  */
1077 /*      initWindowFilterSubband                                                               */
1078 /*  ========================================================================================  */
1079
1080 void initWindowFilterSubband (void)
1081 {
1082         static  int                             initialized = 0;
1083
1084         int                                             i;
1085
1086         if (!initialized)
1087         {
1088                 create_ana_filter (m);
1089
1090                 for (i = 0;  i < 512;  i++)
1091                         enwindow[i] /= SCALE;
1092
1093                 initialized = 1;
1094         }
1095
1096 #if ORG_BUFFERS
1097         half[0] = half[1] = 0;
1098         off[0] = off[1] = 0;
1099         memset (x, 0, sizeof(x));
1100 #endif
1101 }
1102
1103
1104
1105
1106