lib/lame/util.h

   1 /*
   2  *      lame utility library include file
   3  *
   4  *      Copyright (c) 1999 Albert L Faber
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Library General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Library General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Library General Public
  17  * License along with this library; if not, write to the
  18  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19  * Boston, MA 02111-1307, USA.
  20  */
  21
  22 #ifndef LAME_UTIL_H
  23 #define LAME_UTIL_H
  24
  25 /***********************************************************************
  26 *
  27 *  Global Include Files
  28 *
  29 ***********************************************************************/
  30 #include "machine.h"
  31 #include "encoder.h"
  32 #include "lame.h"
  33 #include "lame_global_flags.h"
  34 #include "lame-analysis.h"
  35 #include "id3tag.h"
  36
  37 /***********************************************************************
  38 *
  39 *  Global Definitions
  40 *
  41 ***********************************************************************/
  42
  43 #ifndef FALSE
  44 #define         FALSE                   0
  45 #endif
  46
  47 #ifndef TRUE
  48 #define         TRUE                    (!FALSE)
  49 #endif
  50
  51 #ifdef UINT_MAX
  52 # define         MAX_U_32_NUM            UINT_MAX
  53 #else
  54 # define         MAX_U_32_NUM            0xFFFFFFFF
  55 #endif
  56
  57 #ifndef PI
  58 # ifdef M_PI
  59 #  define       PI                      M_PI
  60 # else
  61 #  define       PI                      3.14159265358979323846
  62 # endif
  63 #endif
  64
  65
  66 #ifdef M_LN2
  67 # define        LOG2                    M_LN2
  68 #else
  69 # define        LOG2                    0.69314718055994530942
  70 #endif
  71
  72 #ifdef M_LN10
  73 # define        LOG10                   M_LN10
  74 #else
  75 # define        LOG10                   2.30258509299404568402
  76 #endif
  77
  78
  79 #ifdef M_SQRT2
  80 # define        SQRT2                   M_SQRT2
  81 #else
  82 # define        SQRT2                   1.41421356237309504880
  83 #endif
  84
  85
  86 #define         HAN_SIZE                512
  87 #define         CRC16_POLYNOMIAL        0x8005
  88 #define MAX_BITS 4095
  89
  90 /* "bit_stream.h" Definitions */
  91 #define         BUFFER_SIZE     LAME_MAXMP3BUFFER
  92
  93 #define         Min(A, B)       ((A) < (B) ? (A) : (B))
  94 #define         Max(A, B)       ((A) > (B) ? (A) : (B))
  95
  96
  97
  98
  99
 100 /***********************************************************************
 101 *
 102 *  Global Type Definitions
 103 *
 104 ***********************************************************************/
 105
 106
 107
 108 /* "bit_stream.h" Type Definitions */
 109
 110 typedef struct  bit_stream_struc {
 111     unsigned char *buf;         /* bit stream buffer */
 112     int         buf_size;       /* size of buffer (in number of bytes) */
 113     int         totbit;         /* bit counter of bit stream */
 114     int         buf_byte_idx;   /* pointer to top byte in buffer */
 115     int         buf_bit_idx;    /* pointer to top bit of top byte in buffer */
 116
 117     /* format of file in rd mode (BINARY/ASCII) */
 118 } Bit_stream_struc;
 119
 120 #include "l3side.h"
 121
 122
 123 /* variables used for --nspsytune */
 124 typedef struct {
 125   int   use; /* indicates the use of exp_nspsytune */
 126   int   safejoint; /* safe joint stereo mode */
 127   FLOAT last_en_subshort[4][9];
 128   FLOAT last_attack_intensity[4][9];
 129   FLOAT last_thm[4][SBMAX_s][3];
 130   int   last_attacks[4][3];
 131   FLOAT pe_l[4],pe_s[4];
 132   FLOAT pefirbuf[19];
 133   FLOAT bass,alto,treble,sfb21;
 134 } nsPsy_t;
 135
 136 /* variables used for --alt-preset */
 137 typedef struct {
 138
 139   // indicates the use of alt-preset
 140   int     use;
 141
 142   // short block tuning
 143   int     attackthre;
 144   int     attackthre_s;
 145
 146   // adjustment to joint stereo
 147   FLOAT8  ms_maskadjust;
 148
 149   // adjustments to quantization selection
 150   int     quantcomp_current;          // current quant_compare mode we are using
 151   FLOAT8  quantcomp_adjust_rh_tot;    // adjustments for tot_noise with vbr-old
 152   FLOAT8  quantcomp_adjust_rh_max;    // adjustments for max_noise with vbr-old
 153   FLOAT8  quantcomp_adjust_mtrh;      // adjustments for calc_scalefac "c" with vbr-mtrh
 154   int     quantcomp_type_s;           // quantization comparison to switch to on non-normal blocks
 155   int     quantcomp_alt_type;          // third quantization comparison to use for special cases
 156                                        // such as high athadjust values, or long blocks, etc
 157
 158   // tunings reliant upon athadjust
 159   FLOAT8  athadjust_max_val;           // maximum value of athadjust before limit is applied
 160   FLOAT8  athadjust_switch_level;      // level of athadjust at which to apply tunings at
 161                                        // x <= 0 == never switch, x >= 1 == always switch
 162   FLOAT8  athadjust_msfix;             // msfix adjustment based on athadjust
 163   int     athadjust_safe_noiseshaping; // if 0, noise shaping 2 will not be used no matter what
 164                                        // the noise shaping type would normally be set to
 165   FLOAT8  athadjust_safe_noiseshaping_thre; // value which max_pow_alt must be greater than
 166                                             // for noise shaping 2 to be used "safely"
 167   FLOAT8  athadjust_safe_athaasensitivity; // used for second determination if it is safe to switch
 168                                               // to noise shaping 2
 169 } presetTune_t;
 170
 171 typedef struct
 172 {
 173     int sum;    // what we have seen so far
 174     int seen;   // how many frames we have seen in this chunk
 175     int want;   // how many frames we want to collect into one chunk
 176     int pos;    // actual position in our bag
 177     int size;   // size of our bag
 178     int *bag;   // pointer to our bag
 179 } VBR_seek_info_t;
 180
 181
 182 /**
 183  *  ATH related stuff, if something new ATH related has to be added,
 184  *  please plugg it here into the ATH_t struct
 185  */
 186 typedef struct
 187 {
 188     int     use_adjust;     // method for the auto adjustment
 189     FLOAT8  adjust;         // lowering based on peak volume, 1 = no lowering
 190     FLOAT8  adjust_limit;   // limit for dynamic ATH adjust
 191     FLOAT8  decay;          // determined to lower x dB each second
 192     FLOAT8  floor;          // lowest ATH value
 193     FLOAT8  l[SBMAX_l];     // ATH for sfbs in long blocks
 194     FLOAT8  s[SBMAX_s];     // ATH for sfbs in short blocks
 195     FLOAT8  cb[CBANDS];     // ATH for convolution bands
 196 } ATH_t;
 197
 198 /**
 199  *  VBR related stuff
 200  */
 201 typedef struct
 202 {
 203     int     quality;
 204     FLOAT8  mask_adjust;    // the dbQ stuff
 205     int     smooth;         // 0=no, 1=peaks, 2=+-4
 206     int     bitpressure;    // strategy for bitpressure situations
 207     FLOAT8  scratch[192];
 208 } VBR_t;
 209
 210 /**
 211  *  PSY Model related stuff
 212  */
 213 typedef struct
 214 {
 215     int     tonalityPatch;      // temporaly needed by VBR
 216     FLOAT   cwlimit;
 217     FLOAT8  prvTonRed[CBANDS];
 218 } PSY_t;
 219
 220
 221 /* Guest structure, only temporarly here */
 222
 223 typedef enum {
 224     coding_MPEG_Layer_1 = 1,
 225     coding_MPEG_Layer_2 = 2,
 226     coding_MPEG_Layer_3 = 3,
 227     coding_MPEG_AAC     = 4,
 228     coding_Ogg_Vorbis   = 5,
 229     coding_MPEG_plus    = 6
 230 } coding_t;
 231
 232 #define MAX_CHANNELS  2
 233
 234 typedef struct {
 235     unsigned long  Class_ID;        /* Class ID to recognize a resample_t
 236                                        object */
 237     FLOAT8   sample_freq_in;  /* Input sample frequency in Hz */
 238     FLOAT8   sample_freq_out; /* requested Output sample frequency in Hz */
 239     FLOAT8   lowpass_freq;    /* lowpass frequency, this is the -6 dB
 240                                        point */
 241     int            scale_in;        /* the resampling is actually done by
 242                                        scale_out: */
 243     int            scale_out;       /* frequency is
 244                                          samplefreq_in * scale_out / scal */
 245     int            taps;            /* number of taps for every FIR resample
 246                                        filter */
 247
 248     sample_t**     fir;             /* the FIR resample filters:
 249                                          fir [scale_out] [taps */
 250     void*          firfree;         /* start address of the alloced memory for
 251                                        fir, */
 252     unsigned char* src_step;
 253     sample_t*      in_old       [MAX_CHANNELS];
 254     //    uint64_t       sample_count [MAX_CHANNELS];
 255     unsigned       fir_stepper  [MAX_CHANNELS];
 256     int            inp_stepper  [MAX_CHANNELS];
 257
 258 } resample_t;
 259
 260
 261 struct lame_internal_flags {
 262
 263   /********************************************************************
 264    * internal variables NOT set by calling program, and should not be *
 265    * modified by the calling program                                  *
 266    ********************************************************************/
 267
 268   /*
 269    * Some remarks to the Class_ID field:
 270    * The Class ID is an Identifier for a pointer to this struct.
 271    * It is very unlikely that a pointer to lame_global_flags has the same 32 bits
 272    * in it's structure (large and other special properties, for instance prime).
 273    *
 274    * To test that the structure is right and initialized, use:
 275    *     if ( gfc -> Class_ID == LAME_ID ) ...
 276    * Other remark:
 277    *     If you set a flag to 0 for uninit data and 1 for init data, the right test
 278    *     should be "if (flag == 1)" and NOT "if (flag)". Unintended modification
 279    *     of this element will be otherwise misinterpreted as an init.
 280    */
 281
 282   #define  LAME_ID   0xFFF88E3B
 283   unsigned long Class_ID;
 284
 285   struct {
 286     void (*msgf)  (const char *format, va_list ap);
 287     void (*debugf)(const char *format, va_list ap);
 288     void (*errorf)(const char *format, va_list ap);
 289   } report;
 290
 291   int lame_encode_frame_init;
 292   int iteration_init_init;
 293   int fill_buffer_resample_init;
 294   int psymodel_init;
 295
 296   int padding;                  /* padding for the current frame? */
 297   int mode_gr;                    /* granules per frame */
 298   int          channels_in;     /* number of channels in the input data stream (PCM or decoded PCM) */
 299   int          channels_out;  /* number of channels in the output data stream (not used for decoding) */
 300   resample_t*  resample_in;   /* context for coding (PCM=>MP3) resampling */
 301   resample_t*  resample_out;    /* context for decoding (MP3=>PCM) resampling */
 302   FLOAT8  samplefreq_in;
 303   FLOAT8  samplefreq_out;
 304   uint16_t nMusicCRC;
 305
 306 #ifndef  MFSIZE
 307 # define MFSIZE  ( 3*1152 + ENCDELAY - MDCTDELAY )
 308 #endif
 309 #ifdef  KLEMM_44
 310   sample_t*    mfbuf [MAX_CHANNELS];
 311 #else
 312   sample_t     mfbuf [2] [MFSIZE];
 313 #endif
 314   size_t       frame_size;    /* size of one frame in samples per channel */
 315   lame_global_flags* gfp;     /* needed as long as the frame encoding functions must access gfp (all needed information can be added to gfc) */
 316   coding_t     coding;        /* MPEG Layer 1/2/3, Ogg Vorbis, MPEG AAC, ... */
 317   unsigned long frame_count;  /* Number of frames coded, 2^32 > 3 years */
 318   int          mf_samples_to_encode;
 319   int          mf_size;
 320   FLOAT8       ampl;      /* amplification at the end of the current chunk (1. = 0 dB) */
 321   FLOAT8       last_ampl;         /* amplification at the end of the last chunk    (1. = 0 dB) */
 322   int VBR_min_bitrate;            /* min bitrate index */
 323   int VBR_max_bitrate;            /* max bitrate index */
 324   FLOAT resample_ratio;           /* input_samp_rate/output_samp_rate */
 325   int bitrate_index;
 326   int samplerate_index;
 327   int mode_ext;
 328
 329
 330   /* lowpass and highpass filter control */
 331   FLOAT8 lowpass1,lowpass2;        /* normalized frequency bounds of passband */
 332   FLOAT8 highpass1,highpass2;      /* normalized frequency bounds of passband */
 333
 334   /* polyphase filter (filter_type=0)  */
 335   int lowpass_band;          /* zero bands >= lowpass_band in the polyphase filterbank */
 336   int highpass_band;         /* zero bands <= highpass_band */
 337   int lowpass_start_band;    /* amplify bands between start */
 338   int lowpass_end_band;      /* and end for lowpass */
 339   int highpass_start_band;   /* amplify bands between start */
 340   int highpass_end_band;     /* and end for highpass */
 341
 342
 343   int filter_type;          /* 0=polyphase filter, 1= FIR filter 2=MDCT filter(bad)*/
 344   int quantization;         /* 0 = ISO formual,  1=best amplitude */
 345   int noise_shaping;        /* 0 = none
 346                                1 = ISO AAC model
 347                                2 = allow scalefac_select=1
 348                              */
 349
 350   int noise_shaping_amp;    /*  0 = ISO model: amplify all distorted bands
 351                                 1 = amplify within 50% of max (on db scale)
 352                                 2 = amplify only most distorted band
 353                                 3 = amplify only most distorted band and
 354                                     use pseudo half step
 355                              */
 356
 357   int psymodel;             /* 1 = gpsycho. 0 = none */
 358   int noise_shaping_stop;   /* 0 = stop at over=0, all scalefacs amplified or
 359                                    a scalefac has reached max value
 360                                1 = stop when all scalefacs amplified or
 361                                    a scalefac has reached max value
 362                                2 = stop when all scalefacs amplified
 363                             */
 364
 365   int use_best_huffman;     /* 0 = no.  1=outside loop  2=inside loop(slow) */
 366
 367
 368
 369
 370   /* variables used by lame.c */
 371   Bit_stream_struc   bs;
 372   III_side_info_t l3_side;
 373   FLOAT8 ms_ratio[2];
 374   /* used for padding */
 375   int frac_SpF;
 376   int slot_lag;
 377
 378
 379   /* optional ID3 tags, used in id3tag.c  */
 380   struct id3tag_spec tag_spec;
 381
 382
 383   /* variables used by quantize.c */
 384   int OldValue[2];
 385   int CurrentStep;
 386   FLOAT8 decay;
 387   FLOAT8 masking_lower;
 388
 389   char bv_scf[576];
 390
 391   int sfb21_extra; /* will be set in lame_init_params */
 392
 393   int is_mpeg1; /* 1 for MPEG-1, 0 for MPEG-2(.5) */
 394
 395 #ifndef KLEMM_44
 396   /* variables used by util.c */
 397   /* BPC = maximum number of filter convolution windows to precompute */
 398 #define BPC 320
 399   sample_t *inbuf_old [2];
 400   sample_t *blackfilt [2*BPC+1];
 401   FLOAT8 itime[2];
 402 #endif
 403   int sideinfo_len;
 404
 405   /* variables for newmdct.c */
 406   FLOAT8 sb_sample[2][2][18][SBLIMIT];
 407   FLOAT8 amp_lowpass[32];
 408   FLOAT8 amp_highpass[32];
 409
 410   /* variables for bitstream.c */
 411   /* mpeg1: buffer=511 bytes  smallest frame: 96-38(sideinfo)=58
 412    * max number of frames in reservoir:  8
 413    * mpeg2: buffer=255 bytes.  smallest frame: 24-23bytes=1
 414    * with VBR, if you are encoding all silence, it is possible to
 415    * have 8kbs/24khz frames with 1byte of data each, which means we need
 416    * to buffer up to 255 headers! */
 417   /* also, max_header_buf has to be a power of two */
 418 #define MAX_HEADER_BUF 256
 419 #define MAX_HEADER_LEN 40 /* max size of header is 38 */
 420   struct {
 421     int write_timing;
 422     int ptr;
 423     char buf[MAX_HEADER_LEN];
 424   } header[MAX_HEADER_BUF];
 425
 426   int h_ptr;
 427   int w_ptr;
 428   int ancillary_flag;
 429
 430
 431   /* variables for reservoir.c */
 432   int ResvSize; /* in bits */
 433   int ResvMax;  /* in bits */
 434
 435
 436   scalefac_struct scalefac_band;
 437
 438   /* DATA FROM PSYMODEL.C */
 439 /* The static variables "r", "phi_sav", "new", "old" and "oldest" have    */
 440 /* to be remembered for the unpredictability measure.  For "r" and        */
 441 /* "phi_sav", the first index from the left is the channel select and     */
 442 /* the second index is the "age" of the data.                             */
 443   FLOAT8        minval[CBANDS];
 444   FLOAT8        nb_1[4][CBANDS], nb_2[4][CBANDS];
 445   FLOAT8        nb_s1[4][CBANDS], nb_s2[4][CBANDS];
 446   FLOAT8  *s3_ss;
 447   FLOAT8  *s3_ll;
 448
 449   III_psy_xmin thm[4];
 450   III_psy_xmin en[4];
 451
 452   /* unpredictability calculation
 453    */
 454   int cw_upper_index;
 455   int cw_lower_index;
 456   FLOAT ax_sav[4][2][HBLKSIZE];
 457   FLOAT bx_sav[4][2][HBLKSIZE];
 458   FLOAT rx_sav[4][2][HBLKSIZE];
 459   FLOAT cw[HBLKSIZE];
 460
 461   /* fft and energy calculation    */
 462   FLOAT wsamp_L[2][BLKSIZE];
 463   FLOAT energy[HBLKSIZE];
 464   FLOAT wsamp_S[2][3][BLKSIZE_s];
 465   FLOAT energy_s[3][HBLKSIZE_s];
 466   FLOAT tot_ener[4];
 467
 468
 469   /* loudness calculation (for adaptive threshold of hearing) */
 470   FLOAT loudness_sq[2][2];  /* loudness^2 approx. per granule and channel */
 471   FLOAT loudness_sq_save[2];/* account for granule delay of L3psycho_anal */
 472
 473   /* factor for tuning the (sample power) point below which adaptive threshold
 474      of hearing adjustment occurs
 475    */
 476   FLOAT athaa_sensitivity_p;
 477
 478
 479   /* fft.c    */
 480   FLOAT window[BLKSIZE], window_s[BLKSIZE_s/2];
 481
 482
 483   /* Scale Factor Bands    */
 484   III_scalefac_t pseudohalf;
 485   FLOAT8        w1_l[SBMAX_l], w2_l[SBMAX_l];
 486   FLOAT8        w1_s[SBMAX_s], w2_s[SBMAX_s];
 487   FLOAT8 mld_l[SBMAX_l],mld_s[SBMAX_s];
 488   int   bu_l[SBMAX_l],bo_l[SBMAX_l] ;
 489   int   bu_s[SBMAX_s],bo_s[SBMAX_s] ;
 490   int   npart_l,npart_s;
 491   int   npart_l_orig,npart_s_orig;
 492
 493   int   s3ind[CBANDS][2];
 494   int   s3ind_s[CBANDS][2];
 495   FLOAT8 SNR_s[CBANDS];
 496
 497   int   numlines_s[CBANDS];
 498   int   numlines_l[CBANDS];
 499
 500
 501   /* frame analyzer    */
 502   FLOAT energy_save[4][HBLKSIZE];
 503   FLOAT8 pe_save[4];
 504   FLOAT8 ers_save[4];
 505
 506   /* simple statistics */
 507   int   bitrate_stereoMode_Hist [16] [4+1];
 508
 509   /* ratios  */
 510   FLOAT8 pe[4];
 511   FLOAT8 ms_ratio_s_old,ms_ratio_l_old;
 512   FLOAT8 ms_ener_ratio_old;
 513
 514   /* block type */
 515   int   blocktype_old[2];
 516
 517   /* used by the frame analyzer */
 518   plotting_data *pinfo;
 519
 520   /* CPU features */
 521   struct {
 522     unsigned int  i387      : 1; /* FPU is a normal Intel CPU */
 523     unsigned int  MMX       : 1; /* Pentium MMX, Pentium II...IV, K6, K6-2,
 524                                     K6-III, Athlon */
 525     unsigned int  AMD_3DNow : 1; /* K6-2, K6-III, Athlon      */
 526     unsigned int  SIMD      : 1; /* Pentium III, Pentium 4    */
 527     unsigned int  SIMD2     : 1; /* Pentium 4, K8             */
 528   } CPU_features;
 529
 530   /* functions to replace with CPU feature optimized versions in takehiro.c */
 531   int (*choose_table)(const int *ix, const int *end, int *s);
 532
 533   void (*fft_fht)(FLOAT *, int);
 534
 535   nsPsy_t nsPsy;  /* variables used for --nspsytune */
 536   presetTune_t presetTune;  /* variables used for --alt-preset */
 537
 538   unsigned crcvalue;
 539
 540   VBR_seek_info_t VBR_seek_table; // used for Xing VBR header
 541
 542   ATH_t *ATH;   // all ATH related stuff
 543   VBR_t *VBR;
 544   PSY_t *PSY;
 545
 546   int nogap_total;
 547   int nogap_current;
 548 };
 549
 550
 551
 552
 553
 554 /***********************************************************************
 555 *
 556 *  Global Function Prototype Declarations
 557 *
 558 ***********************************************************************/
 559 void                  freegfc(lame_internal_flags * const gfc);
 560 extern int            BitrateIndex(int, int,int);
 561 extern int            FindNearestBitrate(int,int,int);
 562 extern int            map2MP3Frequency(int freq);
 563 extern int            SmpFrqIndex(int, int* const);
 564 extern FLOAT8         ATHformula(FLOAT8 f,lame_global_flags *gfp);
 565 extern FLOAT8         freq2bark(FLOAT8 freq);
 566 extern FLOAT8         freq2cbw(FLOAT8 freq);
 567 extern void freorder(int scalefac_band[],FLOAT8 ix_orig[576]);
 568 void disable_FPE(void);
 569
 570 extern void
 571 getframebits( const lame_global_flags *gfp, int *bitsPerFrame, int *mean_bits);
 572
 573 void fill_buffer(lame_global_flags *gfp,
 574                  sample_t *mfbuf[2],
 575                  sample_t *in_buffer[2],
 576                  int nsamples, int *n_in, int *n_out);
 577
 578 int  fill_buffer_resample (
 579         lame_global_flags *gfp,
 580         sample_t*  outbuf,
 581         int        desired_len,
 582         sample_t*  inbuf,
 583         int        len,
 584         int*       num_used,
 585         int        channels );
 586
 587
 588 extern int  has_i387  ( void );
 589 extern int  has_MMX   ( void );
 590 extern int  has_3DNow ( void );
 591 extern int  has_SIMD  ( void );
 592 extern int  has_SIMD2 ( void );
 593
 594 extern void updateStats (lame_internal_flags * const gfc);
 595
 596
 597
 598 /***********************************************************************
 599 *
 600 *  Macros about Message Printing and Exit
 601 *
 602 ***********************************************************************/
 603 extern void lame_errorf(const lame_internal_flags *gfc, const char *, ...);
 604 extern void lame_debugf(const lame_internal_flags *gfc, const char *, ...);
 605 extern void lame_msgf  (const lame_internal_flags *gfc, const char *, ...);
 606 #define DEBUGF  lame_debugf
 607 #define ERRORF  lame_errorf
 608 #define MSGF    lame_msgf
 609
 610
 611 int select_kth_int(int b[], int N, int k);
 612
 613
 614
 615 #endif /* LAME_UTIL_H */