X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Flame%2Ffft.c;fp=lib%2Flame%2Ffft.c;h=f0d7aca7311c7e07c07072706df7ce9a7ec6c68d;hb=698acf324aaa52147b1486646f6549ffd95804da;hp=0000000000000000000000000000000000000000;hpb=f8d07c79494e8536e682da73cee2057740a0e4db;p=swftools.git diff --git a/lib/lame/fft.c b/lib/lame/fft.c new file mode 100644 index 0000000..f0d7aca --- /dev/null +++ b/lib/lame/fft.c @@ -0,0 +1,309 @@ +/* +** FFT and FHT routines +** Copyright 1988, 1993; Ron Mayer +** +** fht(fz,n); +** Does a hartley transform of "n" points in the array "fz". +** +** NOTE: This routine uses at least 2 patented algorithms, and may be +** under the restrictions of a bunch of different organizations. +** Although I wrote it completely myself; it is kind of a derivative +** of a routine I once authored and released under the GPL, so it +** may fall under the free software foundation's restrictions; +** it was worked on as a Stanford Univ project, so they claim +** some rights to it; it was further optimized at work here, so +** I think this company claims parts of it. The patents are +** held by R. Bracewell (the FHT algorithm) and O. Buneman (the +** trig generator), both at Stanford Univ. +** If it were up to me, I'd say go do whatever you want with it; +** but it would be polite to give credit to the following people +** if you use this anywhere: +** Euler - probable inventor of the fourier transform. +** Gauss - probable inventor of the FFT. +** Hartley - probable inventor of the hartley transform. +** Buneman - for a really cool trig generator +** Mayer(me) - for authoring this particular version and +** including all the optimizations in one package. +** Thanks, +** Ron Mayer; mayer@acuson.com +** and added some optimization by +** Mather - idea of using lookup table +** Takehiro - some dirty hack for speed up +*/ + +/* $Id: fft.c,v 1.1 2002/04/28 17:30:18 kramm Exp $ */ + +#include "config_static.h" + +#include +#include "util.h" +#include "fft.h" + + + + +#ifdef WITH_DMALLOC +#include +#endif + +#define TRI_SIZE (5-1) /* 1024 = 4**5 */ + +static const FLOAT costab[TRI_SIZE*2] = { + 9.238795325112867e-01, 3.826834323650898e-01, + 9.951847266721969e-01, 9.801714032956060e-02, + 9.996988186962042e-01, 2.454122852291229e-02, + 9.999811752826011e-01, 6.135884649154475e-03 +}; + +static void fht(FLOAT *fz, int n) +{ + const FLOAT *tri = costab; + int k4; + FLOAT *fi, *fn, *gi; + + n <<= 1; /* to get BLKSIZE, because of 3DNow! ASM routine */ + fn = fz + n; + k4 = 4; + do { + FLOAT s1, c1; + int i, k1, k2, k3, kx; + kx = k4 >> 1; + k1 = k4; + k2 = k4 << 1; + k3 = k2 + k1; + k4 = k2 << 1; + fi = fz; + gi = fi + kx; + do { + FLOAT f0,f1,f2,f3; + f1 = fi[0] - fi[k1]; + f0 = fi[0] + fi[k1]; + f3 = fi[k2] - fi[k3]; + f2 = fi[k2] + fi[k3]; + fi[k2] = f0 - f2; + fi[0 ] = f0 + f2; + fi[k3] = f1 - f3; + fi[k1] = f1 + f3; + f1 = gi[0] - gi[k1]; + f0 = gi[0] + gi[k1]; + f3 = SQRT2 * gi[k3]; + f2 = SQRT2 * gi[k2]; + gi[k2] = f0 - f2; + gi[0 ] = f0 + f2; + gi[k3] = f1 - f3; + gi[k1] = f1 + f3; + gi += k4; + fi += k4; + } while (fiwindow_s[0]; + int i; + int j; + int b; + + for (b = 0; b < 3; b++) { + FLOAT *x = &x_real[b][BLKSIZE_s / 2]; + short k = (576 / 3) * (b + 1); + j = BLKSIZE_s / 8 - 1; + do { + FLOAT f0,f1,f2,f3, w; + + i = rv_tbl[j << 2]; + + f0 = ms00(ch01); w = ms10(ch01); f1 = f0 - w; f0 = f0 + w; + f2 = ms20(ch01); w = ms30(ch01); f3 = f2 - w; f2 = f2 + w; + + x -= 4; + x[0] = f0 + f2; + x[2] = f0 - f2; + x[1] = f1 + f3; + x[3] = f1 - f3; + + f0 = ms01(ch01); w = ms11(ch01); f1 = f0 - w; f0 = f0 + w; + f2 = ms21(ch01); w = ms31(ch01); f3 = f2 - w; f2 = f2 + w; + + x[BLKSIZE_s / 2 + 0] = f0 + f2; + x[BLKSIZE_s / 2 + 2] = f0 - f2; + x[BLKSIZE_s / 2 + 1] = f1 + f3; + x[BLKSIZE_s / 2 + 3] = f1 - f3; + } while (--j >= 0); + + gfc->fft_fht(x, BLKSIZE_s/2); + /* BLKSIZE_s/2 because of 3DNow! ASM routine */ + } +} + +void fft_long(lame_internal_flags * const gfc, + FLOAT x[BLKSIZE], int chn, const sample_t *buffer[2] ) +{ + const FLOAT* window = (const FLOAT *)&gfc->window[0]; + int i; + int jj = BLKSIZE / 8 - 1; + x += BLKSIZE / 2; + + do { + FLOAT f0,f1,f2,f3, w; + + i = rv_tbl[jj]; + f0 = ml00(ch01); w = ml10(ch01); f1 = f0 - w; f0 = f0 + w; + f2 = ml20(ch01); w = ml30(ch01); f3 = f2 - w; f2 = f2 + w; + + x -= 4; + x[0] = f0 + f2; + x[2] = f0 - f2; + x[1] = f1 + f3; + x[3] = f1 - f3; + + f0 = ml01(ch01); w = ml11(ch01); f1 = f0 - w; f0 = f0 + w; + f2 = ml21(ch01); w = ml31(ch01); f3 = f2 - w; f2 = f2 + w; + + x[BLKSIZE / 2 + 0] = f0 + f2; + x[BLKSIZE / 2 + 2] = f0 - f2; + x[BLKSIZE / 2 + 1] = f1 + f3; + x[BLKSIZE / 2 + 3] = f1 - f3; + } while (--jj >= 0); + + gfc->fft_fht(x, BLKSIZE/2); + /* BLKSIZE/2 because of 3DNow! ASM routine */ +} + + +void init_fft(lame_internal_flags * const gfc) +{ + FLOAT *window = &gfc->window[0]; + FLOAT *window_s = &gfc->window_s[0]; + int i; + +#if 0 + if (gfc->nsPsy.use) { + for (i = 0; i < BLKSIZE ; i++) + /* blackman window */ + window[i] = 0.42-0.5*cos(2*PI*i/(BLKSIZE-1))+0.08*cos(4*PI*i/(BLKSIZE-1)); + } else { + /* + * calculate HANN window coefficients + */ + for (i = 0; i < BLKSIZE ; i++) + window[i] = 0.5 * (1.0 - cos(2.0 * PI * (i + 0.5) / BLKSIZE)); + } +#endif + + // The type of window used here will make no real difference, but + // in the interest of merging nspsytune stuff - switch to blackman window + for (i = 0; i < BLKSIZE ; i++) + /* blackman window */ + window[i] = 0.42-0.5*cos(2*PI*(i+.5)/BLKSIZE)+ + 0.08*cos(4*PI*(i+.5)/BLKSIZE); + + for (i = 0; i < BLKSIZE_s/2 ; i++) + window_s[i] = 0.5 * (1.0 - cos(2.0 * PI * (i + 0.5) / BLKSIZE_s)); + +#ifdef HAVE_NASM + if (gfc->CPU_features.AMD_3DNow) { + extern void fht_3DN(FLOAT *fz, int n); + gfc->fft_fht = fht_3DN; + } else +#endif +#ifdef USE_FFTSSE + if (gfc->CPU_features.SIMD) { + extern void fht_SSE(FLOAT *fz, int n); + gfc->fft_fht = fht_SSE; + } else +#endif +#ifdef USE_FFTFPU + if (gfc->CPU_features.i387) { + extern void fht_FPU(FLOAT *fz, int n); + gfc->fft_fht = fht_FPU; + } else +#endif + gfc->fft_fht = fht; +}