lib/lame/quantize_pvt.c

   1 /*
   2  *      quantize_pvt source file
   3  *
   4  *      Copyright (c) 1999 Takehiro TOMINAGA
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Library General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Library General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Library General Public
  17  * License along with this library; if not, write to the
  18  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19  * Boston, MA 02111-1307, USA.
  20  */
  21
  22 /* $Id: quantize_pvt.c,v 1.1 2002/04/28 17:30:26 kramm Exp $ */
  23 #include "config_static.h"
  24
  25 #include <assert.h>
  26 #include "util.h"
  27 #include "lame-analysis.h"
  28 #include "tables.h"
  29 #include "reservoir.h"
  30 #include "quantize_pvt.h"
  31
  32 #ifdef WITH_DMALLOC
  33 #include <dmalloc.h>
  34 #endif
  35
  36
  37 #define NSATHSCALE 100 // Assuming dynamic range=96dB, this value should be 92
  38
  39 const char  slen1_tab [16] = { 0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 };
  40 const char  slen2_tab [16] = { 0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3 };
  41
  42
  43 /*
  44   The following table is used to implement the scalefactor
  45   partitioning for MPEG2 as described in section
  46   2.4.3.2 of the IS. The indexing corresponds to the
  47   way the tables are presented in the IS:
  48
  49   [table_number][row_in_table][column of nr_of_sfb]
  50 */
  51 const int  nr_of_sfb_block [6] [3] [4] =
  52 {
  53   {
  54     {6, 5, 5, 5},
  55     {9, 9, 9, 9},
  56     {6, 9, 9, 9}
  57   },
  58   {
  59     {6, 5, 7, 3},
  60     {9, 9, 12, 6},
  61     {6, 9, 12, 6}
  62   },
  63   {
  64     {11, 10, 0, 0},
  65     {18, 18, 0, 0},
  66     {15,18,0,0}
  67   },
  68   {
  69     {7, 7, 7, 0},
  70     {12, 12, 12, 0},
  71     {6, 15, 12, 0}
  72   },
  73   {
  74     {6, 6, 6, 3},
  75     {12, 9, 9, 6},
  76     {6, 12, 9, 6}
  77   },
  78   {
  79     {8, 8, 5, 0},
  80     {15,12,9,0},
  81     {6,18,9,0}
  82   }
  83 };
  84
  85
  86 /* Table B.6: layer3 preemphasis */
  87 const char  pretab [SBMAX_l] =
  88 {
  89     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  90     1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 0
  91 };
  92
  93 /*
  94   Here are MPEG1 Table B.8 and MPEG2 Table B.1
  95   -- Layer III scalefactor bands.
  96   Index into this using a method such as:
  97     idx  = fr_ps->header->sampling_frequency
  98            + (fr_ps->header->version * 3)
  99 */
 100
 101
 102 const scalefac_struct sfBandIndex[9] =
 103 {
 104   { /* Table B.2.b: 22.05 kHz */
 105     {0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
 106     {0,4,8,12,18,24,32,42,56,74,100,132,174,192}
 107   },
 108   { /* Table B.2.c: 24 kHz */                 /* docs: 332. mpg123(broken): 330 */
 109     {0,6,12,18,24,30,36,44,54,66,80,96,114,136,162,194,232,278, 332, 394,464,540,576},
 110     {0,4,8,12,18,26,36,48,62,80,104,136,180,192}
 111   },
 112   { /* Table B.2.a: 16 kHz */
 113     {0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
 114     {0,4,8,12,18,26,36,48,62,80,104,134,174,192}
 115   },
 116   { /* Table B.8.b: 44.1 kHz */
 117     {0,4,8,12,16,20,24,30,36,44,52,62,74,90,110,134,162,196,238,288,342,418,576},
 118     {0,4,8,12,16,22,30,40,52,66,84,106,136,192}
 119   },
 120   { /* Table B.8.c: 48 kHz */
 121     {0,4,8,12,16,20,24,30,36,42,50,60,72,88,106,128,156,190,230,276,330,384,576},
 122     {0,4,8,12,16,22,28,38,50,64,80,100,126,192}
 123   },
 124   { /* Table B.8.a: 32 kHz */
 125     {0,4,8,12,16,20,24,30,36,44,54,66,82,102,126,156,194,240,296,364,448,550,576},
 126     {0,4,8,12,16,22,30,42,58,78,104,138,180,192}
 127   },
 128   { /* MPEG-2.5 11.025 kHz */
 129     {0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
 130     {0/3,12/3,24/3,36/3,54/3,78/3,108/3,144/3,186/3,240/3,312/3,402/3,522/3,576/3}
 131   },
 132   { /* MPEG-2.5 12 kHz */
 133     {0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
 134     {0/3,12/3,24/3,36/3,54/3,78/3,108/3,144/3,186/3,240/3,312/3,402/3,522/3,576/3}
 135   },
 136   { /* MPEG-2.5 8 kHz */
 137     {0,12,24,36,48,60,72,88,108,132,160,192,232,280,336,400,476,566,568,570,572,574,576},
 138     {0/3,24/3,48/3,72/3,108/3,156/3,216/3,288/3,372/3,480/3,486/3,492/3,498/3,576/3}
 139   }
 140 };
 141
 142
 143
 144 FLOAT8 pow20[Q_MAX];
 145 FLOAT8 ipow20[Q_MAX];
 146 FLOAT8 iipow20[Q_MAX];
 147 FLOAT8 *iipow20_;
 148 FLOAT8 pow43[PRECALC_SIZE];
 149 /* initialized in first call to iteration_init */
 150 FLOAT8 adj43asm[PRECALC_SIZE];
 151 FLOAT8 adj43[PRECALC_SIZE];
 152
 153 /************************************************************************/
 154 /*  initialization for iteration_loop */
 155 /************************************************************************/
 156 void
 157 iteration_init( lame_global_flags *gfp)
 158 {
 159   lame_internal_flags *gfc=gfp->internal_flags;
 160   III_side_info_t * const l3_side = &gfc->l3_side;
 161   int i;
 162
 163   if ( gfc->iteration_init_init==0 ) {
 164     gfc->iteration_init_init=1;
 165
 166     l3_side->main_data_begin = 0;
 167     compute_ath(gfp,gfc->ATH->l,gfc->ATH->s);
 168
 169     pow43[0] = 0.0;
 170     for(i=1;i<PRECALC_SIZE;i++)
 171         pow43[i] = pow((FLOAT8)i, 4.0/3.0);
 172
 173     adj43asm[0] = 0.0;
 174     for (i = 1; i < PRECALC_SIZE; i++)
 175       adj43asm[i] = i - 0.5 - pow(0.5 * (pow43[i - 1] + pow43[i]),0.75);
 176     for (i = 0; i < PRECALC_SIZE-1; i++)
 177         adj43[i] = (i + 1) - pow(0.5 * (pow43[i] + pow43[i + 1]), 0.75);
 178     adj43[i] = 0.5;
 179     iipow20_ = &iipow20[210];
 180     for (i = 0; i < Q_MAX; i++) {
 181         iipow20[i] = pow(2.0, (double)(i - 210) * 0.1875);
 182         ipow20[i] = pow(2.0, (double)(i - 210) * -0.1875);
 183         pow20[i] = pow(2.0, (double)(i - 210) * 0.25);
 184     }
 185     huffman_init(gfc);
 186   }
 187 }
 188
 189
 190
 191
 192
 193 /*
 194 compute the ATH for each scalefactor band
 195 cd range:  0..96db
 196
 197 Input:  3.3kHz signal  32767 amplitude  (3.3kHz is where ATH is smallest = -5db)
 198 longblocks:  sfb=12   en0/bw=-11db    max_en0 = 1.3db
 199 shortblocks: sfb=5           -9db              0db
 200
 201 Input:  1 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1 -1 (repeated)
 202 longblocks:  amp=1      sfb=12   en0/bw=-103 db      max_en0 = -92db
 203             amp=32767   sfb=12           -12 db                 -1.4db
 204
 205 Input:  1 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1 -1 (repeated)
 206 shortblocks: amp=1      sfb=5   en0/bw= -99                    -86
 207             amp=32767   sfb=5           -9  db                  4db
 208
 209
 210 MAX energy of largest wave at 3.3kHz = 1db
 211 AVE energy of largest wave at 3.3kHz = -11db
 212 Let's take AVE:  -11db = maximum signal in sfb=12.
 213 Dynamic range of CD: 96db.  Therefor energy of smallest audible wave
 214 in sfb=12  = -11  - 96 = -107db = ATH at 3.3kHz.
 215
 216 ATH formula for this wave: -5db.  To adjust to LAME scaling, we need
 217 ATH = ATH_formula  - 103  (db)
 218 ATH = ATH * 2.5e-10      (ener)
 219
 220 */
 221
 222 FLOAT8 ATHmdct( lame_global_flags *gfp, FLOAT8 f )
 223 {
 224     lame_internal_flags *gfc = gfp->internal_flags;
 225     FLOAT8 ath;
 226
 227     ath = ATHformula( f , gfp );
 228
 229     if (gfc->nsPsy.use) {
 230         ath -= NSATHSCALE;
 231     } else {
 232         ath -= 114;
 233     }
 234
 235     /*  modify the MDCT scaling for the ATH  */
 236     ath -= gfp->ATHlower;
 237
 238     /* convert to energy */
 239     ath = pow( 10.0, ath/10.0 );
 240     return ath;
 241 }
 242
 243
 244 void compute_ath( lame_global_flags *gfp, FLOAT8 ATH_l[], FLOAT8 ATH_s[] )
 245 {
 246     lame_internal_flags *gfc = gfp->internal_flags;
 247     int sfb, i, start, end;
 248     FLOAT8 ATH_f;
 249     FLOAT8 samp_freq = gfp->out_samplerate;
 250
 251     for (sfb = 0; sfb < SBMAX_l; sfb++) {
 252         start = gfc->scalefac_band.l[ sfb ];
 253         end   = gfc->scalefac_band.l[ sfb+1 ];
 254         ATH_l[sfb]=FLOAT8_MAX;
 255         for (i = start ; i < end; i++) {
 256             FLOAT8 freq = i*samp_freq/(2*576);
 257             ATH_f = ATHmdct( gfp, freq );  /* freq in kHz */
 258             ATH_l[sfb] = Min( ATH_l[sfb], ATH_f );
 259         }
 260     }
 261
 262     for (sfb = 0; sfb < SBMAX_s; sfb++){
 263         start = gfc->scalefac_band.s[ sfb ];
 264         end   = gfc->scalefac_band.s[ sfb+1 ];
 265         ATH_s[sfb] = FLOAT8_MAX;
 266         for (i = start ; i < end; i++) {
 267             FLOAT8 freq = i*samp_freq/(2*192);
 268             ATH_f = ATHmdct( gfp, freq );    /* freq in kHz */
 269             ATH_s[sfb] = Min( ATH_s[sfb], ATH_f );
 270         }
 271     }
 272
 273     /*  no-ATH mode:
 274      *  reduce ATH to -200 dB
 275      */
 276
 277     if (gfp->noATH) {
 278         for (sfb = 0; sfb < SBMAX_l; sfb++) {
 279             ATH_l[sfb] = 1E-37;
 280         }
 281         for (sfb = 0; sfb < SBMAX_s; sfb++) {
 282             ATH_s[sfb] = 1E-37;
 283         }
 284     }
 285
 286     /*  work in progress, don't rely on it too much
 287      */
 288     gfc->ATH-> floor = 10. * log10( ATHmdct( gfp, -1. ) );
 289
 290     /*
 291     {   FLOAT8 g=10000, t=1e30, x;
 292         for ( f = 100; f < 10000; f++ ) {
 293             x = ATHmdct( gfp, f );
 294             if ( t > x ) t = x, g = f;
 295         }
 296         printf("min=%g\n", g);
 297     }*/
 298 }
 299
 300
 301
 302
 303
 304 /* convert from L/R <-> Mid/Side, src == dst allowed */
 305 void ms_convert(FLOAT8 dst[2][576], FLOAT8 src[2][576])
 306 {
 307     FLOAT8 l;
 308     FLOAT8 r;
 309     int i;
 310     for (i = 0; i < 576; ++i) {
 311         l = src[0][i];
 312         r = src[1][i];
 313         dst[0][i] = (l+r) * (FLOAT8)(SQRT2*0.5);
 314         dst[1][i] = (l-r) * (FLOAT8)(SQRT2*0.5);
 315     }
 316 }
 317
 318
 319
 320 /************************************************************************
 321  * allocate bits among 2 channels based on PE
 322  * mt 6/99
 323  * bugfixes rh 8/01: often allocated more than the allowed 4095 bits
 324  ************************************************************************/
 325 int on_pe( lame_global_flags *gfp, FLOAT8 pe[][2], III_side_info_t *l3_side,
 326            int targ_bits[2], int mean_bits, int gr )
 327 {
 328     lame_internal_flags * gfc = gfp->internal_flags;
 329     gr_info *   cod_info;
 330     int     extra_bits, tbits, bits;
 331     int     add_bits[2];
 332     int     max_bits;  /* maximum allowed bits for this granule */
 333     int     ch;
 334
 335     /* allocate targ_bits for granule */
 336     ResvMaxBits( gfp, mean_bits, &tbits, &extra_bits );
 337     max_bits = tbits + extra_bits;
 338     if (max_bits > MAX_BITS) /* hard limit per granule */
 339         max_bits = MAX_BITS;
 340
 341     for ( bits = 0, ch = 0; ch < gfc->channels_out; ++ch ) {
 342         /******************************************************************
 343          * allocate bits for each channel
 344          ******************************************************************/
 345         cod_info = &l3_side->gr[gr].ch[ch].tt;
 346
 347         targ_bits[ch] = Min( MAX_BITS, tbits/gfc->channels_out );
 348
 349         if (gfc->nsPsy.use) {
 350             add_bits[ch] = targ_bits[ch] * pe[gr][ch] / 700.0 - targ_bits[ch];
 351         }
 352         else {
 353             add_bits[ch] = (pe[gr][ch]-750) / 1.4;
 354             /* short blocks us a little extra, no matter what the pe */
 355             if ( cod_info->block_type == SHORT_TYPE ) {
 356                 if (add_bits[ch] < mean_bits/4)
 357                     add_bits[ch] = mean_bits/4;
 358             }
 359         }
 360
 361         /* at most increase bits by 1.5*average */
 362         if (add_bits[ch] > mean_bits*3/4)
 363             add_bits[ch] = mean_bits*3/4;
 364         if (add_bits[ch] < 0)
 365             add_bits[ch] = 0;
 366
 367         if (add_bits[ch]+targ_bits[ch] > MAX_BITS)
 368             add_bits[ch] = Max( 0, MAX_BITS-targ_bits[ch] );
 369
 370         bits += add_bits[ch];
 371     }
 372     if (bits > extra_bits) {
 373         for ( ch = 0; ch < gfc->channels_out; ++ch ) {
 374             add_bits[ch] = extra_bits * add_bits[ch] / bits;
 375         }
 376     }
 377
 378     for ( ch = 0; ch < gfc->channels_out; ++ch ) {
 379         targ_bits[ch] += add_bits[ch];
 380         extra_bits    -= add_bits[ch];
 381         assert( targ_bits[ch] <= MAX_BITS );
 382     }
 383     assert( max_bits <= MAX_BITS );
 384     return max_bits;
 385 }
 386
 387
 388
 389
 390 void reduce_side(int targ_bits[2],FLOAT8 ms_ener_ratio,int mean_bits,int max_bits)
 391 {
 392   int move_bits;
 393   FLOAT fac;
 394
 395
 396   /*  ms_ener_ratio = 0:  allocate 66/33  mid/side  fac=.33
 397    *  ms_ener_ratio =.5:  allocate 50/50 mid/side   fac= 0 */
 398   /* 75/25 split is fac=.5 */
 399   /* float fac = .50*(.5-ms_ener_ratio[gr])/.5;*/
 400   fac = .33*(.5-ms_ener_ratio)/.5;
 401   if (fac<0) fac=0;
 402   if (fac>.5) fac=.5;
 403
 404     /* number of bits to move from side channel to mid channel */
 405     /*    move_bits = fac*targ_bits[1];  */
 406     move_bits = fac*.5*(targ_bits[0]+targ_bits[1]);
 407
 408     if (move_bits > MAX_BITS - targ_bits[0]) {
 409         move_bits = MAX_BITS - targ_bits[0];
 410     }
 411     if (move_bits<0) move_bits=0;
 412
 413     if (targ_bits[1] >= 125) {
 414       /* dont reduce side channel below 125 bits */
 415       if (targ_bits[1]-move_bits > 125) {
 416
 417         /* if mid channel already has 2x more than average, dont bother */
 418         /* mean_bits = bits per granule (for both channels) */
 419         if (targ_bits[0] < mean_bits)
 420           targ_bits[0] += move_bits;
 421         targ_bits[1] -= move_bits;
 422       } else {
 423         targ_bits[0] += targ_bits[1] - 125;
 424         targ_bits[1] = 125;
 425       }
 426     }
 427
 428     move_bits=targ_bits[0]+targ_bits[1];
 429     if (move_bits > max_bits) {
 430       targ_bits[0]=(max_bits*targ_bits[0])/move_bits;
 431       targ_bits[1]=(max_bits*targ_bits[1])/move_bits;
 432     }
 433     assert (targ_bits[0] <= MAX_BITS);
 434     assert (targ_bits[1] <= MAX_BITS);
 435 }
 436
 437
 438 /**
 439  *  Robert Hegemann 2001-04-27:
 440  *  this adjusts the ATH, keeping the original noise floor
 441  *  affects the higher frequencies more than the lower ones
 442  */
 443
 444 FLOAT8 athAdjust( FLOAT8 a, FLOAT8 x, FLOAT8 athFloor )
 445 {
 446     /*  work in progress
 447      */
 448     FLOAT8 const o = 90.30873362;
 449     FLOAT8 const p = 94.82444863;
 450     FLOAT8 u = 10. * log10(x);
 451     FLOAT8 v = a*a;
 452     FLOAT8 w = 0.0;
 453     u -= athFloor;                                  // undo scaling
 454     if ( v > 1E-20 ) w = 1. + 10. * log10(v) / o;
 455     if ( w < 0  )    w = 0.;
 456     u *= w;
 457     u += athFloor + o-p;                            // redo scaling
 458
 459     return pow( 10., 0.1*u );
 460 }
 461
 462
 463 /*************************************************************************/
 464 /*            calc_xmin                                                  */
 465 /*************************************************************************/
 466
 467 /*
 468   Calculate the allowed distortion for each scalefactor band,
 469   as determined by the psychoacoustic model.
 470   xmin(sb) = ratio(sb) * en(sb) / bw(sb)
 471
 472   returns number of sfb's with energy > ATH
 473 */
 474 int calc_xmin(
 475         lame_global_flags *gfp,
 476         const FLOAT8                xr [576],
 477         const III_psy_ratio * const ratio,
 478         const gr_info       * const cod_info,
 479               III_psy_xmin  * const l3_xmin )
 480 {
 481     lame_internal_flags *gfc = gfp->internal_flags;
 482     int sfb,j,start, end, bw,l, b, ath_over=0;
 483     FLOAT8 en0, xmin, ener, tmpATH;
 484     ATH_t * ATH = gfc->ATH;
 485
 486     if (cod_info->block_type == SHORT_TYPE) {
 487
 488         for ( j = 0, sfb = 0; sfb < SBMAX_s; sfb++ ) {
 489             if ( gfp->VBR == vbr_rh || gfp->VBR == vbr_mtrh )
 490                 tmpATH = athAdjust( ATH->adjust, ATH->s[sfb], ATH->floor );
 491             else
 492                 tmpATH = ATH->adjust * ATH->s[sfb];
 493             start = gfc->scalefac_band.s[ sfb ];
 494             end   = gfc->scalefac_band.s[ sfb + 1 ];
 495             bw = end - start;
 496
 497             for ( b = 0; b < 3; b++ ) {
 498                 for (en0 = 0.0, l = start; l < end; l++) {
 499                     ener = xr[j++];
 500                     ener = ener * ener;
 501                     en0 += ener;
 502                 }
 503                 en0 /= bw;
 504
 505                 if (gfp->ATHonly || gfp->ATHshort) {
 506                     xmin = tmpATH;
 507                 }
 508                 else {
 509                     xmin = ratio->en.s[sfb][b];
 510                     if (xmin > 0.0)
 511                         xmin = en0 * ratio->thm.s[sfb][b] * gfc->masking_lower / xmin;
 512                     if (xmin < tmpATH)
 513                         xmin = tmpATH;
 514                 }
 515                 xmin *= bw;
 516
 517                 if (gfc->nsPsy.use) {
 518                     if      (sfb <=  5) xmin *= gfc->nsPsy.bass;
 519                     else if (sfb <= 10) xmin *= gfc->nsPsy.alto;
 520                     else                xmin *= gfc->nsPsy.treble;
 521                     if ((gfp->VBR == vbr_off || gfp->VBR == vbr_abr) && gfp->quality <= 1)
 522                         xmin *= 0.001;
 523                 }
 524                 l3_xmin->s[sfb][b] = xmin;
 525                 if (en0 > tmpATH) ath_over++;
 526             }   /* b */
 527         }   /* sfb */
 528
 529         if (gfp->useTemporal) {
 530             for (sfb = 0; sfb < SBMAX_s; sfb++ ) {
 531                 for ( b = 1; b < 3; b++ ) {
 532                     xmin = l3_xmin->s[sfb][b] * (1.0 - gfc->decay)
 533                          + l3_xmin->s[sfb][b-1] * gfc->decay;
 534                     if (l3_xmin->s[sfb][b] < xmin)
 535                         l3_xmin->s[sfb][b] = xmin;
 536                 }
 537             }
 538         }
 539
 540     }   /* end of short block case */
 541     else {
 542
 543         for ( sfb = 0; sfb < SBMAX_l; sfb++ ){
 544             if ( gfp->VBR == vbr_rh || gfp->VBR == vbr_mtrh )
 545                 tmpATH = athAdjust( ATH->adjust, ATH->l[sfb], ATH->floor );
 546             else
 547                 tmpATH = ATH->adjust * ATH->l[sfb];
 548             start = gfc->scalefac_band.l[ sfb ];
 549             end   = gfc->scalefac_band.l[ sfb+1 ];
 550             bw = end - start;
 551
 552             for (en0 = 0.0, l = start; l < end; l++ ) {
 553                 ener = xr[l] * xr[l];
 554                 en0 += ener;
 555             }
 556             /* why is it different from short blocks <?> */
 557             if ( !gfc->nsPsy.use ) en0 /= bw;
 558
 559             if (gfp->ATHonly) {
 560                 xmin = tmpATH;
 561             }
 562             else {
 563                 xmin = ratio->en.l[sfb];
 564                 if (xmin > 0.0)
 565                     xmin = en0 * ratio->thm.l[sfb] * gfc->masking_lower / xmin;
 566                 if (xmin < tmpATH)
 567                     xmin = tmpATH;
 568             }
 569             /* why is it different from short blocks <?> */
 570             if ( !gfc->nsPsy.use ) {
 571                 xmin *= bw;
 572             }
 573             else {
 574                 if      (sfb <=  6) xmin *= gfc->nsPsy.bass;
 575                 else if (sfb <= 13) xmin *= gfc->nsPsy.alto;
 576                 else if (sfb <= 20) xmin *= gfc->nsPsy.treble;
 577                 else                xmin *= gfc->nsPsy.sfb21;
 578                 if ((gfp->VBR == vbr_off || gfp->VBR == vbr_abr) && gfp->quality <= 1)
 579                     xmin *= 0.001;
 580             }
 581             l3_xmin->l[sfb] = xmin;
 582             if (en0 > tmpATH) ath_over++;
 583         }   /* sfb */
 584
 585     }   /* end of long block case */
 586
 587     return ath_over;
 588 }
 589
 590
 591
 592
 593
 594
 595
 596 /*************************************************************************/
 597 /*            calc_noise                                                 */
 598 /*************************************************************************/
 599
 600 // -oo dB  =>  -1.00
 601 // - 6 dB  =>  -0.97
 602 // - 3 dB  =>  -0.80
 603 // - 2 dB  =>  -0.64
 604 // - 1 dB  =>  -0.38
 605 //   0 dB  =>   0.00
 606 // + 1 dB  =>  +0.49
 607 // + 2 dB  =>  +1.06
 608 // + 3 dB  =>  +1.68
 609 // + 6 dB  =>  +3.69
 610 // +10 dB  =>  +6.45
 611
 612 double penalties ( double noise )
 613 {
 614     return log ( 0.368 + 0.632 * noise * noise * noise );
 615 }
 616
 617 /*  mt 5/99:  Function: Improved calc_noise for a single channel   */
 618
 619 int  calc_noise(
 620         const lame_internal_flags           * const gfc,
 621         const FLOAT8                    xr [576],
 622         const int                       ix [576],
 623         const gr_info           * const cod_info,
 624         const III_psy_xmin      * const l3_xmin,
 625         const III_scalefac_t    * const scalefac,
 626               III_psy_xmin      * xfsf,
 627               calc_noise_result * const res )
 628 {
 629   int sfb,start, end, j,l, i, over=0;
 630   FLOAT8 sum;
 631
 632   int count=0;
 633   FLOAT8 noise,noise_db;
 634   FLOAT8 over_noise_db = 0;
 635   FLOAT8 tot_noise_db  = 0;     /*    0 dB relative to masking */
 636   FLOAT8 max_noise  = 1E-20; /* -200 dB relative to masking */
 637   double klemm_noise = 1E-37;
 638
 639   if (cod_info->block_type == SHORT_TYPE) {
 640     int max_index = gfc->sfb21_extra ? SBMAX_s : SBPSY_s;
 641
 642     for ( j=0, sfb = 0; sfb < max_index; sfb++ ) {
 643          start = gfc->scalefac_band.s[ sfb ];
 644          end   = gfc->scalefac_band.s[ sfb+1 ];
 645          for ( i = 0; i < 3; i++ ) {
 646             FLOAT8 step;
 647             int s;
 648
 649             s = (scalefac->s[sfb][i] << (cod_info->scalefac_scale + 1))
 650                 + cod_info->subblock_gain[i] * 8;
 651             s = cod_info->global_gain - s;
 652
 653             assert(s<Q_MAX);
 654             assert(s>=0);
 655             step = POW20(s);
 656             sum = 0.0;
 657             l = start;
 658             do {
 659               FLOAT8 temp;
 660               temp = pow43[ix[j]];
 661               temp *= step;
 662               temp -= fabs(xr[j]);
 663               ++j;
 664               sum += temp * temp;
 665               l++;
 666             } while (l < end);
 667             noise = xfsf->s[sfb][i]  = sum / l3_xmin->s[sfb][i];
 668
 669             max_noise    = Max(max_noise,noise);
 670             klemm_noise += penalties (noise);
 671
 672             noise_db=10*log10(Max(noise,1E-20));
 673             tot_noise_db += noise_db;
 674
 675             if (noise > 1) {
 676                 over++;
 677                 over_noise_db += noise_db;
 678             }
 679             count++;
 680         }
 681     }
 682   }else{ /* cod_info->block_type == SHORT_TYPE */
 683     int max_index = gfc->sfb21_extra ? SBMAX_l : SBPSY_l;
 684
 685     for ( sfb = 0; sfb < max_index; sfb++ ) {
 686         FLOAT8 step;
 687         int s = scalefac->l[sfb];
 688
 689         if (cod_info->preflag)
 690             s += pretab[sfb];
 691
 692         s = cod_info->global_gain - (s << (cod_info->scalefac_scale + 1));
 693         assert(s<Q_MAX);
 694         assert(s>=0);
 695         step = POW20(s);
 696
 697         start = gfc->scalefac_band.l[ sfb ];
 698         end   = gfc->scalefac_band.l[ sfb+1 ];
 699
 700         for ( sum = 0.0, l = start; l < end; l++ ) {
 701           FLOAT8 temp;
 702           temp = fabs(xr[l]) - pow43[ix[l]] * step;
 703           sum += temp * temp;
 704         }
 705         noise = xfsf->l[sfb] = sum / l3_xmin->l[sfb];
 706         max_noise=Max(max_noise,noise);
 707         klemm_noise += penalties (noise);
 708
 709         noise_db=10*log10(Max(noise,1E-20));
 710         /* multiplying here is adding in dB, but can overflow */
 711         //tot_noise *= Max(noise, 1E-20);
 712         tot_noise_db += noise_db;
 713
 714         if (noise > 1) {
 715           over++;
 716           /* multiplying here is adding in dB -but can overflow */
 717           //over_noise *= noise;
 718           over_noise_db += noise_db;
 719         }
 720
 721         count++;
 722     }
 723   } /* cod_info->block_type == SHORT_TYPE */
 724
 725   /* normalization at this point by "count" is not necessary, since
 726    * the values are only used to compare with previous values */
 727   res->over_count = over;
 728
 729   /* convert to db. DO NOT CHANGE THESE */
 730   /* tot_noise = is really the average over each sfb of:
 731      [noise(db) - allowed_noise(db)]
 732
 733      and over_noise is the same average, only over only the
 734      bands with noise > allowed_noise.
 735
 736   */
 737
 738   //res->tot_noise   = 10.*log10(Max(1e-20,tot_noise ));
 739   //res->over_noise  = 10.*log10(Max(1e+00,over_noise));
 740   res->tot_noise   = tot_noise_db;
 741   res->over_noise  = over_noise_db;
 742   res->max_noise   = 10.*log10(Max(1e-20,max_noise ));
 743   res->klemm_noise = 10.*log10(Max(1e-20,klemm_noise));
 744
 745   return over;
 746 }
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761 /************************************************************************
 762  *
 763  *  set_pinfo()
 764  *
 765  *  updates plotting data
 766  *
 767  *  Mark Taylor 2000-??-??
 768  *
 769  *  Robert Hegemann: moved noise/distortion calc into it
 770  *
 771  ************************************************************************/
 772
 773 static
 774 void set_pinfo (
 775         lame_global_flags *gfp,
 776         const gr_info        * const cod_info,
 777         const III_psy_ratio  * const ratio,
 778         const III_scalefac_t * const scalefac,
 779         const FLOAT8                 xr[576],
 780         const int                    l3_enc[576],
 781         const int                    gr,
 782         const int                    ch )
 783 {
 784     lame_internal_flags *gfc=gfp->internal_flags;
 785     int sfb;
 786     int j,i,l,start,end,bw;
 787     FLOAT8 en0,en1;
 788     FLOAT ifqstep = ( cod_info->scalefac_scale == 0 ) ? .5 : 1.0;
 789
 790
 791     III_psy_xmin l3_xmin;
 792     calc_noise_result noise;
 793     III_psy_xmin xfsf;
 794
 795     calc_xmin (gfp,xr, ratio, cod_info, &l3_xmin);
 796
 797     calc_noise (gfc, xr, l3_enc, cod_info, &l3_xmin, scalefac, &xfsf, &noise);
 798
 799     if (cod_info->block_type == SHORT_TYPE) {
 800         for (j=0, sfb = 0; sfb < SBMAX_s; sfb++ )  {
 801             start = gfc->scalefac_band.s[ sfb ];
 802             end   = gfc->scalefac_band.s[ sfb + 1 ];
 803             bw = end - start;
 804             for ( i = 0; i < 3; i++ ) {
 805                 for ( en0 = 0.0, l = start; l < end; l++ ) {
 806                     en0 += xr[j] * xr[j];
 807                     ++j;
 808                 }
 809                 en0=Max(en0/bw,1e-20);
 810
 811
 812 #if 0
 813 {
 814     double tot1,tot2;
 815     if (sfb<SBMAX_s-1) {
 816         if (sfb==0) {
 817             tot1=0;
 818             tot2=0;
 819         }
 820         tot1 += en0;
 821         tot2 += ratio->en.s[sfb][i];
 822
 823         DEBUGF("%i %i sfb=%i mdct=%f fft=%f  fft-mdct=%f db \n",
 824                 gr,ch,sfb,
 825                 10*log10(Max(1e-25,ratio->en.s[sfb][i])),
 826                 10*log10(Max(1e-25,en0)),
 827                 10*log10((Max(1e-25,en0)/Max(1e-25,ratio->en.s[sfb][i]))));
 828
 829         if (sfb==SBMAX_s-2) {
 830             DEBUGF("%i %i toti %f %f ratio=%f db \n",gr,ch,
 831                     10*log10(Max(1e-25,tot2)),
 832                     10*log(Max(1e-25,tot1)),
 833                     10*log10(Max(1e-25,tot1)/(Max(1e-25,tot2))));
 834
 835         }
 836     }
 837     /*
 838         masking: multiplied by en0/fft_energy
 839         average seems to be about -135db.
 840      */
 841 }
 842 #endif
 843
 844
 845                 /* convert to MDCT units */
 846                 en1=1e15;  /* scaling so it shows up on FFT plot */
 847                 gfc->pinfo->xfsf_s[gr][ch][3*sfb+i]
 848                     = en1*xfsf.s[sfb][i]*l3_xmin.s[sfb][i]/bw;
 849                 gfc->pinfo->en_s[gr][ch][3*sfb+i] = en1*en0;
 850
 851                 if (ratio->en.s[sfb][i]>0)
 852                     en0 = en0/ratio->en.s[sfb][i];
 853                 else
 854                     en0=0;
 855                 if (gfp->ATHonly || gfp->ATHshort)
 856                     en0=0;
 857
 858                 gfc->pinfo->thr_s[gr][ch][3*sfb+i] =
 859                         en1*Max(en0*ratio->thm.s[sfb][i],gfc->ATH->s[sfb]);
 860
 861
 862                 /* there is no scalefactor bands >= SBPSY_s */
 863                 if (sfb < SBPSY_s) {
 864                     gfc->pinfo->LAMEsfb_s[gr][ch][3*sfb+i]=
 865                                             -ifqstep*scalefac->s[sfb][i];
 866                 } else {
 867                     gfc->pinfo->LAMEsfb_s[gr][ch][3*sfb+i]=0;
 868                 }
 869                 gfc->pinfo->LAMEsfb_s[gr][ch][3*sfb+i] -=
 870                                              2*cod_info->subblock_gain[i];
 871             }
 872         }
 873     } else {
 874         for ( sfb = 0; sfb < SBMAX_l; sfb++ )   {
 875             start = gfc->scalefac_band.l[ sfb ];
 876             end   = gfc->scalefac_band.l[ sfb+1 ];
 877             bw = end - start;
 878             for ( en0 = 0.0, l = start; l < end; l++ )
 879                 en0 += xr[l] * xr[l];
 880             if (!gfc->nsPsy.use) en0/=bw;
 881       /*
 882     DEBUGF("diff  = %f \n",10*log10(Max(ratio[gr][ch].en.l[sfb],1e-20))
 883                             -(10*log10(en0)+150));
 884        */
 885
 886 #if 0
 887  {
 888     double tot1,tot2;
 889     if (sfb==0) {
 890         tot1=0;
 891         tot2=0;
 892     }
 893     tot1 += en0;
 894     tot2 += ratio->en.l[sfb];
 895
 896
 897     DEBUGF("%i %i sfb=%i mdct=%f fft=%f  fft-mdct=%f db \n",
 898             gr,ch,sfb,
 899             10*log10(Max(1e-25,ratio->en.l[sfb])),
 900             10*log10(Max(1e-25,en0)),
 901             10*log10((Max(1e-25,en0)/Max(1e-25,ratio->en.l[sfb]))));
 902
 903     if (sfb==SBMAX_l-1) {
 904         DEBUGF("%i %i toti %f %f ratio=%f db \n",
 905             gr,ch,
 906             10*log10(Max(1e-25,tot2)),
 907             10*log(Max(1e-25,tot1)),
 908             10*log10(Max(1e-25,tot1)/(Max(1e-25,tot2))));
 909     }
 910     /*
 911         masking: multiplied by en0/fft_energy
 912         average seems to be about -147db.
 913      */
 914 }
 915 #endif
 916
 917
 918             /* convert to MDCT units */
 919             en1=1e15;  /* scaling so it shows up on FFT plot */
 920             gfc->pinfo->xfsf[gr][ch][sfb] =  en1*xfsf.l[sfb]*l3_xmin.l[sfb]/bw;
 921             gfc->pinfo->en[gr][ch][sfb] = en1*en0;
 922             if (ratio->en.l[sfb]>0)
 923                 en0 = en0/ratio->en.l[sfb];
 924             else
 925                 en0=0;
 926             if (gfp->ATHonly)
 927                 en0=0;
 928             gfc->pinfo->thr[gr][ch][sfb] =
 929                              en1*Max(en0*ratio->thm.l[sfb],gfc->ATH->l[sfb]);
 930
 931             /* there is no scalefactor bands >= SBPSY_l */
 932             if (sfb<SBPSY_l) {
 933                 if (scalefac->l[sfb]<0)  /* scfsi! */
 934                     gfc->pinfo->LAMEsfb[gr][ch][sfb] =
 935                                             gfc->pinfo->LAMEsfb[0][ch][sfb];
 936                 else
 937                     gfc->pinfo->LAMEsfb[gr][ch][sfb] = -ifqstep*scalefac->l[sfb];
 938             }else{
 939                 gfc->pinfo->LAMEsfb[gr][ch][sfb] = 0;
 940             }
 941
 942             if (cod_info->preflag && sfb>=11)
 943                 gfc->pinfo->LAMEsfb[gr][ch][sfb] -= ifqstep*pretab[sfb];
 944         } /* for sfb */
 945     } /* block type long */
 946     gfc->pinfo->LAMEqss     [gr][ch] = cod_info->global_gain;
 947     gfc->pinfo->LAMEmainbits[gr][ch] = cod_info->part2_3_length;
 948     gfc->pinfo->LAMEsfbits  [gr][ch] = cod_info->part2_length;
 949
 950     gfc->pinfo->over      [gr][ch] = noise.over_count;
 951     gfc->pinfo->max_noise [gr][ch] = noise.max_noise;
 952     gfc->pinfo->over_noise[gr][ch] = noise.over_noise;
 953     gfc->pinfo->tot_noise [gr][ch] = noise.tot_noise;
 954 }
 955
 956
 957 /************************************************************************
 958  *
 959  *  set_frame_pinfo()
 960  *
 961  *  updates plotting data for a whole frame
 962  *
 963  *  Robert Hegemann 2000-10-21
 964  *
 965  ************************************************************************/
 966
 967 void set_frame_pinfo(
 968         lame_global_flags *gfp,
 969         FLOAT8          xr       [2][2][576],
 970         III_psy_ratio   ratio    [2][2],
 971         int             l3_enc   [2][2][576],
 972         III_scalefac_t  scalefac [2][2] )
 973 {
 974     lame_internal_flags *gfc=gfp->internal_flags;
 975     unsigned int          sfb;
 976         int                   ch;
 977         int                   gr;
 978     int                   act_l3enc[576];
 979     III_scalefac_t        act_scalefac [2];
 980     int scsfi[2] = {0,0};
 981
 982
 983     gfc->masking_lower = 1.0;
 984
 985     /* reconstruct the scalefactors in case SCSFI was used
 986      */
 987     for (ch = 0; ch < gfc->channels_out; ch ++) {
 988         for (sfb = 0; sfb < SBMAX_l; sfb ++) {
 989             if (scalefac[1][ch].l[sfb] == -1) {/* scfsi */
 990                 act_scalefac[ch].l[sfb] = scalefac[0][ch].l[sfb];
 991                 scsfi[ch] = 1;
 992             } else {
 993                 act_scalefac[ch].l[sfb] = scalefac[1][ch].l[sfb];
 994             }
 995         }
 996     }
 997
 998     /* for every granule and channel patch l3_enc and set info
 999      */
1000     for (gr = 0; gr < gfc->mode_gr; gr ++) {
1001         for (ch = 0; ch < gfc->channels_out; ch ++) {
1002             int i;
1003             gr_info *cod_info = &gfc->l3_side.gr[gr].ch[ch].tt;
1004
1005             /* revert back the sign of l3enc */
1006             for ( i = 0; i < 576; i++) {
1007                 if (xr[gr][ch][i] < 0)
1008                     act_l3enc[i] = -l3_enc[gr][ch][i];
1009                 else
1010                     act_l3enc[i] = +l3_enc[gr][ch][i];
1011             }
1012             if (gr == 1 && scsfi[ch])
1013                 set_pinfo (gfp, cod_info, &ratio[gr][ch], &act_scalefac[ch],
1014                         xr[gr][ch], act_l3enc, gr, ch);
1015             else
1016                 set_pinfo (gfp, cod_info, &ratio[gr][ch], &scalefac[gr][ch],
1017                         xr[gr][ch], act_l3enc, gr, ch);
1018         } /* for ch */
1019     }    /* for gr */
1020 }
1021
1022
1023
1024
1025 /*********************************************************************
1026  * nonlinear quantization of xr
1027  * More accurate formula than the ISO formula.  Takes into account
1028  * the fact that we are quantizing xr -> ix, but we want ix^4/3 to be
1029  * as close as possible to x^4/3.  (taking the nearest int would mean
1030  * ix is as close as possible to xr, which is different.)
1031  * From Segher Boessenkool <segher@eastsite.nl>  11/1999
1032  * ASM optimization from
1033  *    Mathew Hendry <scampi@dial.pipex.com> 11/1999
1034  *    Acy Stapp <AStapp@austin.rr.com> 11/1999
1035  *    Takehiro Tominaga <tominaga@isoternet.org> 11/1999
1036  * 9/00: ASM code removed in favor of IEEE754 hack.  If you need
1037  * the ASM code, check CVS circa Aug 2000.
1038  *********************************************************************/
1039
1040
1041 #ifdef TAKEHIRO_IEEE754_HACK
1042
1043 typedef union {
1044     float f;
1045     int i;
1046 } fi_union;
1047
1048 #define MAGIC_FLOAT (65536*(128))
1049 #define MAGIC_INT 0x4b000000
1050
1051 void quantize_xrpow(const FLOAT8 *xp, int *pi, FLOAT8 istep)
1052 {
1053     /* quantize on xr^(3/4) instead of xr */
1054     int j;
1055     fi_union *fi;
1056
1057     fi = (fi_union *)pi;
1058     for (j = 576 / 4 - 1; j >= 0; --j) {
1059         double x0 = istep * xp[0];
1060         double x1 = istep * xp[1];
1061         double x2 = istep * xp[2];
1062         double x3 = istep * xp[3];
1063
1064         x0 += MAGIC_FLOAT; fi[0].f = x0;
1065         x1 += MAGIC_FLOAT; fi[1].f = x1;
1066         x2 += MAGIC_FLOAT; fi[2].f = x2;
1067         x3 += MAGIC_FLOAT; fi[3].f = x3;
1068
1069         fi[0].f = x0 + (adj43asm - MAGIC_INT)[fi[0].i];
1070         fi[1].f = x1 + (adj43asm - MAGIC_INT)[fi[1].i];
1071         fi[2].f = x2 + (adj43asm - MAGIC_INT)[fi[2].i];
1072         fi[3].f = x3 + (adj43asm - MAGIC_INT)[fi[3].i];
1073
1074         fi[0].i -= MAGIC_INT;
1075         fi[1].i -= MAGIC_INT;
1076         fi[2].i -= MAGIC_INT;
1077         fi[3].i -= MAGIC_INT;
1078         fi += 4;
1079         xp += 4;
1080     }
1081 }
1082
1083 #  define ROUNDFAC -0.0946
1084 void quantize_xrpow_ISO(const FLOAT8 *xp, int *pi, FLOAT8 istep)
1085 {
1086     /* quantize on xr^(3/4) instead of xr */
1087     int j;
1088     fi_union *fi;
1089
1090     fi = (fi_union *)pi;
1091     for (j=576/4 - 1;j>=0;j--) {
1092         fi[0].f = istep * xp[0] + (ROUNDFAC + MAGIC_FLOAT);
1093         fi[1].f = istep * xp[1] + (ROUNDFAC + MAGIC_FLOAT);
1094         fi[2].f = istep * xp[2] + (ROUNDFAC + MAGIC_FLOAT);
1095         fi[3].f = istep * xp[3] + (ROUNDFAC + MAGIC_FLOAT);
1096
1097         fi[0].i -= MAGIC_INT;
1098         fi[1].i -= MAGIC_INT;
1099         fi[2].i -= MAGIC_INT;
1100         fi[3].i -= MAGIC_INT;
1101         fi+=4;
1102         xp+=4;
1103     }
1104 }
1105
1106 #else
1107
1108 /*********************************************************************
1109  * XRPOW_FTOI is a macro to convert floats to ints.
1110  * if XRPOW_FTOI(x) = nearest_int(x), then QUANTFAC(x)=adj43asm[x]
1111  *                                         ROUNDFAC= -0.0946
1112  *
1113  * if XRPOW_FTOI(x) = floor(x), then QUANTFAC(x)=asj43[x]
1114  *                                   ROUNDFAC=0.4054
1115  *
1116  * Note: using floor() or (int) is extermely slow. On machines where
1117  * the TAKEHIRO_IEEE754_HACK code above does not work, it is worthwile
1118  * to write some ASM for XRPOW_FTOI().
1119  *********************************************************************/
1120 #define XRPOW_FTOI(src,dest) ((dest) = (int)(src))
1121 #define QUANTFAC(rx)  adj43[rx]
1122 #define ROUNDFAC 0.4054
1123
1124
1125 void quantize_xrpow(const FLOAT8 *xr, int *ix, FLOAT8 istep) {
1126     /* quantize on xr^(3/4) instead of xr */
1127     /* from Wilfried.Behne@t-online.de.  Reported to be 2x faster than
1128        the above code (when not using ASM) on PowerPC */
1129     int j;
1130
1131     for ( j = 576/8; j > 0; --j) {
1132         FLOAT8  x1, x2, x3, x4, x5, x6, x7, x8;
1133         int     rx1, rx2, rx3, rx4, rx5, rx6, rx7, rx8;
1134         x1 = *xr++ * istep;
1135         x2 = *xr++ * istep;
1136         XRPOW_FTOI(x1, rx1);
1137         x3 = *xr++ * istep;
1138         XRPOW_FTOI(x2, rx2);
1139         x4 = *xr++ * istep;
1140         XRPOW_FTOI(x3, rx3);
1141         x5 = *xr++ * istep;
1142         XRPOW_FTOI(x4, rx4);
1143         x6 = *xr++ * istep;
1144         XRPOW_FTOI(x5, rx5);
1145         x7 = *xr++ * istep;
1146         XRPOW_FTOI(x6, rx6);
1147         x8 = *xr++ * istep;
1148         XRPOW_FTOI(x7, rx7);
1149         x1 += QUANTFAC(rx1);
1150         XRPOW_FTOI(x8, rx8);
1151         x2 += QUANTFAC(rx2);
1152         XRPOW_FTOI(x1,*ix++);
1153         x3 += QUANTFAC(rx3);
1154         XRPOW_FTOI(x2,*ix++);
1155         x4 += QUANTFAC(rx4);
1156         XRPOW_FTOI(x3,*ix++);
1157         x5 += QUANTFAC(rx5);
1158         XRPOW_FTOI(x4,*ix++);
1159         x6 += QUANTFAC(rx6);
1160         XRPOW_FTOI(x5,*ix++);
1161         x7 += QUANTFAC(rx7);
1162         XRPOW_FTOI(x6,*ix++);
1163         x8 += QUANTFAC(rx8);
1164         XRPOW_FTOI(x7,*ix++);
1165         XRPOW_FTOI(x8,*ix++);
1166     }
1167 }
1168
1169
1170
1171
1172
1173
1174 void quantize_xrpow_ISO( const FLOAT8 *xr, int *ix, FLOAT8 istep )
1175 {
1176     /* quantize on xr^(3/4) instead of xr */
1177     const FLOAT8 compareval0 = (1.0 - 0.4054)/istep;
1178     int j;
1179     /* depending on architecture, it may be worth calculating a few more
1180        compareval's.
1181
1182        eg.  compareval1 = (2.0 - 0.4054/istep);
1183        .. and then after the first compare do this ...
1184        if compareval1>*xr then ix = 1;
1185
1186        On a pentium166, it's only worth doing the one compare (as done here),
1187        as the second compare becomes more expensive than just calculating
1188        the value. Architectures with slow FP operations may want to add some
1189        more comparevals. try it and send your diffs statistically speaking
1190
1191        73% of all xr*istep values give ix=0
1192        16% will give 1
1193        4%  will give 2
1194     */
1195     for (j=576;j>0;j--) {
1196         if (compareval0 > *xr) {
1197             *(ix++) = 0;
1198             xr++;
1199         } else {
1200             /*    *(ix++) = (int)( istep*(*(xr++))  + 0.4054); */
1201             XRPOW_FTOI(  istep*(*(xr++))  + ROUNDFAC , *(ix++) );
1202         }
1203     }
1204 }
1205
1206 #endif