//**************************************************************************
//*                     This file is part of the                           *
//*                      Mpxplay - audio player.                           *
//*                  The source code of Mpxplay is                         *
//*        (C) copyright 1998-2005 by PDSoft (Attila Padar)                *
//*                    http://mpxplay.cjb.net                              *
//*                  email: mpxplay@freemail.hu                            *
//**************************************************************************
//*  This program is distributed in the hope that it will be useful,       *
//*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
//*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
//*  Please contact with the author (with me) if you want to use           *
//*  or modify this source.                                                *
//**************************************************************************
//function: software tone (low and treble only)

#include "au_mixer.h"
#include "newfunc\newfunc.h"
#include "mix_func.h"

#define TONE_OWN_MDCT 1

#include "dec_ogg\os_types.h"
#include "dec_ogg\codec.h"
#ifndef TONE_OWN_MDCT
 #include "dec_ogg\mdct.h"
#endif

#define TONE_ASM 1 // allways

#define TONE_EQ_BANDS  10

#define TONE_MDCTBANDS   32
#define TONE_MDCTBLOCKS  32
#define TONE_BLOCKSIZE   (TONE_MDCTBANDS*TONE_MDCTBLOCKS) // =1024

#define EQ_BLOCK_SIZE (TONE_MDCTBLOCKS/2)

#define TONE_SYNCFLAG_STOP  0 // eq stopped
#define TONE_SYNCFLAG_BEGIN 1 // synchronize at start
#define TONE_SYNCFLAG_END   2 // synchronize at stop
#define TONE_SYNCFLAG_RUN   3 // eq running

#ifndef M_PI
 #define M_PI  3.14159265358979323846264338327950288
#endif

#ifdef TONE_OWN_MDCT
typedef struct {
 int n;
 int log2n;
 float  *trig;
 int    *bitrev;

 float scale;
 float *forward_buffer;
}mdct_lookup;
#endif

extern void ogg_calculate_analiser_bands(ogg_double_t *pcm,unsigned int currbs,unsigned int lastch);

static void mixer_tone_hq(struct audio_info *aui);
static float *calculate_window(int left);
static void apply_window(float *dp,float *window,unsigned long blocksize);
static void calculate_eqgain(struct audio_info *aui);

static void EQSet(int data[TONE_EQ_BANDS]);
static void perform_EQ(float data[TONE_MDCTBANDS][EQ_BLOCK_SIZE],unsigned int ch);

#ifdef TONE_OWN_MDCT
static void tone_mdct_backward(mdct_lookup *init, float *in, float *out);
static void tone_mdct_forward(mdct_lookup *init, float *in, float *out);
static void tone_mdct_init(mdct_lookup *lookup,unsigned int n);
static void tone_mdct_clear(mdct_lookup *l);
#else
 #define tone_mdct_backward(a,b,c) oggdec_mdct_backward(a,b,c)
 #define tone_mdct_forward(a,b,c)  oggdec_mdct_forward(a,b,c)
 #define tone_mdct_init(a,b)       oggdec_mdct_init(a,b)
 #define tone_mdct_clear(a)        oggdec_mdct_clear(a)
#endif

one_mixerfunc_info MIXER_FUNCINFO_tone_bass;
one_mixerfunc_info MIXER_FUNCINFO_tone_treble;

#ifndef LAYER3_EQ
static float *window;
static float pcm_buffer[PCM_MAX_CHANNELS][PCM_BUFFER_SIZE/2];
static float workbuf[TONE_BLOCKSIZE];
static float mdct_delaybuf[PCM_MAX_CHANNELS][TONE_BLOCKSIZE/2];

static mdct_lookup mdctl;
#endif
static unsigned int pcm_size,sync_flag;
//static float TONE_SCALE=(MIXER_SCALE_MAX+1);

static void mixer_tone_init(struct audio_info *aui,int inittype)
{
 switch(inittype){
  case MIXER_INITTYPE_INIT:
#ifndef LAYER3_EQ
        if(!window){
         window=calculate_window(TONE_BLOCKSIZE/2);
         tone_mdct_init(&mdctl,TONE_BLOCKSIZE);
        #ifndef TONE_OWN_MDCT
         mdctl.forward_buffer=malloc(TONE_BLOCKSIZE*sizeof(float));
        #endif
        }
#endif
        break;
  case MIXER_INITTYPE_START:
        calculate_eqgain(aui); // ??? here
  case MIXER_INITTYPE_RESET:
        if(sync_flag==TONE_SYNCFLAG_END) // to avoid on-off-on
         sync_flag=TONE_SYNCFLAG_RUN;
        else{
         sync_flag=TONE_SYNCFLAG_BEGIN;
         pcm_size=0;
        }
        //calculate_eqgain(aui);   //
        break;
  case MIXER_INITTYPE_CLOSE:
#ifndef LAYER3_EQ
        if(window)
         free(window);
        tone_mdct_clear(&mdctl);
#endif
        break;
 }
}

static void mixer_tone_lq(struct audio_info *aui)
{
 cv_n_bits_to_float(aui->pcm_sample,aui->samplenum,2,MIXER_SCALE_BITS);
 mixer_tone_hq(aui);
 if(aui->samplenum)
  cv_float16_to_int16(aui->pcm_sample,aui->samplenum,aui->mvp->frp0->infile_infos->audio_decoder_infos->infobits&ADI_FLAG_FPUROUND_CHOP);
}

static void mixer_tone_hq(struct audio_info *aui)
{
#ifndef LAYER3_EQ
 unsigned int channels=aui->chan_song,samplenum=aui->samplenum/channels;
 float *pcms=(float *)aui->pcm_sample;
 unsigned int ch,i,b;
#endif

#ifdef LAYER3_EQ
 if(sync_flag==TONE_SYNCFLAG_END){
  sync_flag=TONE_SYNCFLAG_STOP;
  MIXER_checkfunc_setflags("MIX_TONE_BASS");
 }else{
  if(sync_flag==TONE_SYNCFLAG_BEGIN)
   sync_flag=TONE_SYNCFLAG_RUN;
 }

#else

 if(sync_flag==TONE_SYNCFLAG_END){
  if(pcm_size){
   pds_qmemcpyr(pcms+pcm_size*channels,pcms,aui->samplenum);
   for(ch=0;ch<channels;ch++)
    for(i=0;i<pcm_size;i++)
     pcms[i*channels+ch]=pcm_buffer[ch][i];// * TONE_SCALE;
   aui->samplenum+=pcm_size*channels;
  }
  sync_flag=TONE_SYNCFLAG_STOP;
  MIXER_checkfunc_setflags("MIX_TONE_BASS");
  return;
 }

 for(ch=0;ch<channels;ch++)
  for(i=0;i<samplenum;i++)
   pcm_buffer[ch][pcm_size+i]=pcms[i*channels+ch];// / TONE_SCALE;

 pcm_size+=samplenum;

 b=0;

 while(pcm_size>=TONE_BLOCKSIZE){
  for(ch=0;ch<channels;ch++){
   //datain
   pds_memcpy(workbuf,&pcm_buffer[ch][b],TONE_BLOCKSIZE*sizeof(float));

   //forward
   apply_window(workbuf,window,TONE_BLOCKSIZE);
   tone_mdct_forward(&mdctl,workbuf,workbuf);

   //modify
   perform_EQ(workbuf,ch);
   //ogg_calculate_analiser_bands(workbuf,TONE_BLOCKSIZE,(ch==(channels-1)));

   //back
   tone_mdct_backward(&mdctl,workbuf,workbuf);
   apply_window(workbuf,window,TONE_BLOCKSIZE);

   //dataout
   if(sync_flag!=TONE_SYNCFLAG_BEGIN){
    for(i=0;i<(TONE_BLOCKSIZE/2);i++){
     pcms[0]=(workbuf[i]+mdct_delaybuf[ch][i]);// * TONE_SCALE;
     pcms+=channels;
    }
    pcms-=(TONE_BLOCKSIZE/2)*channels;
   }
   pcms++;

   pds_memcpy(mdct_delaybuf[ch],workbuf+(TONE_BLOCKSIZE/2),(TONE_BLOCKSIZE/2)*sizeof(float));
  }
  pcms-=channels;
  pcms+=(TONE_BLOCKSIZE/2)*channels;
  b+=(TONE_BLOCKSIZE/2);
  pcm_size-=(TONE_BLOCKSIZE/2);
 }

 if(sync_flag==TONE_SYNCFLAG_BEGIN){
  if(b){
   pcms=(float *)aui->pcm_sample;
   for(ch=0;ch<channels;ch++)
    for(i=0;i<b;i++)
     pcms[i*channels+ch]=pcm_buffer[ch][i];//*TONE_SCALE;
   sync_flag=TONE_SYNCFLAG_RUN;
  }
 }
 aui->samplenum=b*channels;
 if(b){
  for(ch=0;ch<channels;ch++)
   pds_memcpy(&pcm_buffer[ch][0],&pcm_buffer[ch][b],pcm_size*sizeof(float));
 }
#endif
}

static int mixer_tone_checkvar_bass(struct audio_info *aui)
{
 if(aui->card_infobits&AUINFOS_CARDINFOBIT_HWTONE)
  return 0;

 if((aui->card_mixer_values[AU_MIXCHAN_BASS]!=MIXER_FUNCINFO_tone_bass.var_center)
   || (aui->card_mixer_values[AU_MIXCHAN_TREBLE]!=MIXER_FUNCINFO_tone_treble.var_center)){
  return 1;
 }

 if((sync_flag==TONE_SYNCFLAG_RUN) || (sync_flag==TONE_SYNCFLAG_END)){ // to do a post processing
  sync_flag=TONE_SYNCFLAG_END;
  return 1;
 }
 sync_flag=TONE_SYNCFLAG_STOP;

 return 0;
}

static int mixer_tone_checkvar_treble(struct audio_info *aui)
{
 if(aui->card_infobits&AUINFOS_CARDINFOBIT_HWTONE)
  return 0;
 if(aui->card_mixer_values[AU_MIXCHAN_TREBLE]!=MIXER_FUNCINFO_tone_treble.var_center)
  return 1;
 return 0;
}

static int tone_setvar(one_mixerfunc_info *infop,int currvalue,unsigned int setmode,int modvalue)
{
 int newvalue;
 switch(setmode){
  case MIXER_SETMODE_RELATIVE:newvalue=currvalue+modvalue*infop->var_step;
                              if((currvalue<infop->var_center && newvalue>infop->var_center) || (currvalue>infop->var_center && newvalue<infop->var_center))
                               newvalue=infop->var_center;
                              break;
  case MIXER_SETMODE_ABSOLUTE:newvalue=modvalue;break;
  case MIXER_SETMODE_RESET   :newvalue=infop->var_center;break;
 }
 if(newvalue<infop->var_min)
  newvalue=infop->var_min;
 else
  if(newvalue>infop->var_max)
   newvalue=infop->var_max;

 return newvalue;
}

static void mixer_tone_setvar_bass(struct audio_info *aui,unsigned int setmode,int value)
{
 if(aui->card_infobits&AUINFOS_CARDINFOBIT_HWTONE){
  aui->card_mixer_values[AU_MIXCHAN_BASS]=tone_setvar(&MIXER_FUNCINFO_tone_bass,aui->card_mixer_values[AU_MIXCHAN_BASS],setmode,value);
  AU_setmixer_one(aui,AU_MIXCHAN_BASS,MIXER_SETMODE_ABSOLUTE,aui->card_mixer_values[AU_MIXCHAN_BASS]);
  return;
 }

 if((sync_flag==TONE_SYNCFLAG_BEGIN) || (sync_flag==TONE_SYNCFLAG_END))
  return;

 aui->card_mixer_values[AU_MIXCHAN_BASS]=tone_setvar(&MIXER_FUNCINFO_tone_bass,aui->card_mixer_values[AU_MIXCHAN_BASS],setmode,value);

 calculate_eqgain(aui);
}

static void mixer_tone_setvar_treble(struct audio_info *aui,unsigned int setmode,int value)
{
 if(aui->card_infobits&AUINFOS_CARDINFOBIT_HWTONE){
  aui->card_mixer_values[AU_MIXCHAN_TREBLE]=tone_setvar(&MIXER_FUNCINFO_tone_treble,aui->card_mixer_values[AU_MIXCHAN_TREBLE],setmode,value);
  AU_setmixer_one(aui,AU_MIXCHAN_TREBLE,MIXER_SETMODE_ABSOLUTE,aui->card_mixer_values[AU_MIXCHAN_TREBLE]);
  return;
 }

 if((sync_flag==TONE_SYNCFLAG_BEGIN) || (sync_flag==TONE_SYNCFLAG_END))
  return;

 aui->card_mixer_values[AU_MIXCHAN_TREBLE]=tone_setvar(&MIXER_FUNCINFO_tone_treble,aui->card_mixer_values[AU_MIXCHAN_TREBLE],setmode,value);

 calculate_eqgain(aui);
}

one_mixerfunc_info MIXER_FUNCINFO_tone_bass={
 "MIX_TONE_BASS",
 "mxtb",
 NULL,
 MIXER_INFOBIT_PARALLEL_DEPENDENCY, // loudness
 0,100,50,3,
 &mixer_tone_init,
 &mixer_tone_lq,
 &mixer_tone_hq,
 &mixer_tone_checkvar_bass,
 &mixer_tone_setvar_bass
};

one_mixerfunc_info MIXER_FUNCINFO_tone_treble={
 "MIX_TONE_TREBLE",
 "mxtt",
 NULL,
 MIXER_INFOBIT_PARALLEL_DEPENDENCY, // loudness
 0,100,50,3,
 NULL,
 NULL,
 NULL,
 &mixer_tone_checkvar_treble,
 &mixer_tone_setvar_treble
};

//-------------------------------------------------------------------
#define MIXER_TONE_LOUDNESS_DEFAULT_BASS   75
#define MIXER_TONE_LOUDNESS_DEFAULT_TREBLE 80

static int loudness_save_bass=-1,loudness_save_treble=-1;

static void mixer_tone_setvar_loudness(struct audio_info *aui,unsigned int setmode,int value)
{
 switch(setmode){
  case MIXER_SETMODE_RELATIVE:
   if((aui->card_mixer_values[AU_MIXCHAN_BASS]==MIXER_FUNCINFO_tone_bass.var_center) && (aui->card_mixer_values[AU_MIXCHAN_TREBLE]==MIXER_FUNCINFO_tone_treble.var_center)){
    if(loudness_save_bass<0)
     loudness_save_bass=MIXER_TONE_LOUDNESS_DEFAULT_BASS;
    aui->card_mixer_values[AU_MIXCHAN_BASS]=loudness_save_bass;
    if(loudness_save_treble<0)
     loudness_save_treble=MIXER_TONE_LOUDNESS_DEFAULT_TREBLE;
    aui->card_mixer_values[AU_MIXCHAN_TREBLE]=loudness_save_treble;
   }else{
    loudness_save_bass=aui->card_mixer_values[AU_MIXCHAN_BASS];
    aui->card_mixer_values[AU_MIXCHAN_BASS]=MIXER_FUNCINFO_tone_bass.var_center;
    loudness_save_treble=aui->card_mixer_values[AU_MIXCHAN_TREBLE];
    aui->card_mixer_values[AU_MIXCHAN_TREBLE]=MIXER_FUNCINFO_tone_treble.var_center;
   }
   if(aui->card_infobits&AUINFOS_CARDINFOBIT_HWTONE){
    AU_setmixer_one(aui,AU_MIXCHAN_BASS,MIXER_SETMODE_ABSOLUTE,aui->card_mixer_values[AU_MIXCHAN_BASS]);
    AU_setmixer_one(aui,AU_MIXCHAN_TREBLE,MIXER_SETMODE_ABSOLUTE,aui->card_mixer_values[AU_MIXCHAN_TREBLE]);
   }else{
    calculate_eqgain(aui);
   }
 }
}

one_mixerfunc_info MIXER_FUNCINFO_tone_loudness={
 "MIX_TONE_LOUDNESS",
 "mxtl",
 NULL,
 MIXER_INFOBIT_SWITCH,
 0,1,0,0,
 NULL,
 NULL,
 NULL,
 NULL,
 &mixer_tone_setvar_loudness
};

//-------------------------------------------------------------------
#ifndef LAYER3_EQ

static void tone_fmul_block(float *a,float *b,unsigned int len)
{
 do{
  if(len>=32){
   a[ 0]*=b[ 0];
   a[ 1]*=b[ 1];
   a[ 2]*=b[ 2];
   a[ 3]*=b[ 3];
   a[ 4]*=b[ 4];
   a[ 5]*=b[ 5];
   a[ 6]*=b[ 6];
   a[ 7]*=b[ 7];
   a[ 8]*=b[ 8];
   a[ 9]*=b[ 9];
   a[10]*=b[10];
   a[11]*=b[11];
   a[12]*=b[12];
   a[13]*=b[13];
   a[14]*=b[14];
   a[15]*=b[15];
   a[16]*=b[16];
   a[17]*=b[17];
   a[18]*=b[18];
   a[19]*=b[19];
   a[20]*=b[20];
   a[21]*=b[21];
   a[22]*=b[22];
   a[23]*=b[23];
   a[24]*=b[24];
   a[25]*=b[25];
   a[26]*=b[26];
   a[27]*=b[27];
   a[28]*=b[28];
   a[29]*=b[29];
   a[30]*=b[30];
   a[31]*=b[31];
   a+=32;
   b+=32;
   len-=32;
  }else{
   if(len)
    do{
     *a++ *= *b++;
    }while(--len);
  }
 }while(len);
}

static void tone_fmul_block_brev(float *a,float *b,unsigned int len)
{
 do{
  if(len>=32){
   a[ 0]*=b[  0];
   a[ 1]*=b[- 1];
   a[ 2]*=b[- 2];
   a[ 3]*=b[- 3];
   a[ 4]*=b[- 4];
   a[ 5]*=b[- 5];
   a[ 6]*=b[- 6];
   a[ 7]*=b[- 7];
   a[ 8]*=b[- 8];
   a[ 9]*=b[- 9];
   a[10]*=b[-10];
   a[11]*=b[-11];
   a[12]*=b[-12];
   a[13]*=b[-13];
   a[14]*=b[-14];
   a[15]*=b[-15];
   a[16]*=b[-16];
   a[17]*=b[-17];
   a[18]*=b[-18];
   a[19]*=b[-19];
   a[20]*=b[-20];
   a[21]*=b[-21];
   a[22]*=b[-22];
   a[23]*=b[-23];
   a[24]*=b[-24];
   a[25]*=b[-25];
   a[26]*=b[-26];
   a[27]*=b[-27];
   a[28]*=b[-28];
   a[29]*=b[-29];
   a[30]*=b[-30];
   a[31]*=b[-31];
   a+=32;
   b-=32;
   len-=32;
  }else{
   if(len)
    do{
     *a++ *= *b--;
    }while(--len);
  }
 }while(len);
}

static float *calculate_window(int size)
{
 int i;
 float *ret=malloc(size*sizeof(*ret));

 if(ret){
  for(i=0;i<size;i++){
   double x=((float)i+0.5F)/(float)size*(M_PI/2.0F);
   x=sin(x);
   x*=x;
   x*=(M_PI/2.0F);
   x=sin(x);
   ret[i]=x;
  }
 }
 return(ret);
}

static void apply_window(float *dp,float *window,unsigned long blocksize)
{
 unsigned long n=blocksize/2;
 tone_fmul_block(dp,window,n);
 dp+=n;
 tone_fmul_block_brev(dp,window+n-1,n);
}

#endif

//----------------------------------------------------------------------
//from dec_ogg\mdct.c (using asm routines)
#ifndef LAYER3_EQ
#ifdef TONE_OWN_MDCT

static float cPI1_8=.92387953251128675613F;
static float cPI2_8=.70710678118654752441F;
static float cPI3_8=.38268343236508977175F;
static float half=0.5f;

void mb16_0(void);
void mb16_1(void);
void mb8_1(void);
void mb8_2(void);

static void mdct_butterfly_16(float *x)
{
#pragma aux mb16_0=\
 "fld  dword ptr cPI2_8"\
"fld  dword ptr  4[eax]"\
 "fld  dword ptr   [eax]"\
 "fld  dword ptr 36[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr  4[eax]"\
 "fld  dword ptr 32[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr   [eax]"\
 "fstp dword ptr 32[eax]"\
 "fstp dword ptr 36[eax]"\
"fld  st(1)"\
 "fadd st,st(1)"\
 "fmul st,st(3)"\
 "fstp dword ptr   [eax]"\
 "fsub"\
 "fmul st,st(1)"\
 "fstp dword ptr  4[eax]"\
"fld  dword ptr 12[eax]"\
 "fld  dword ptr 40[eax]"\
 "fld  dword ptr 44[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 12[eax]"\
 "fld  dword ptr  8[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 40[eax]"\
 "fstp dword ptr 40[eax]"\
 "fstp dword ptr 44[eax]"\
 "fstp dword ptr 12[eax]"\
 "fstp dword ptr  8[eax]"\
 modify[];

#pragma aux mb16_1=\
"fld  dword ptr 48[eax]"\
 "fld  dword ptr 52[eax]"\
 "fld  dword ptr 16[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 48[eax]"\
 "fld  dword ptr 20[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 52[eax]"\
 "fstp dword ptr 52[eax]"\
 "fstp dword ptr 48[eax]"\
"fld  st(1)"\
 "fadd st,st(1)"\
 "fmul st,st(3)"\
 "fstp dword ptr 20[eax]"\
 "fsub"\
 "fmul"\
 "fstp dword ptr 16[eax]"\
"fld  dword ptr 56[eax]"\
 "fld  dword ptr 60[eax]"\
 "fld  dword ptr 24[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 56[eax]"\
 "fld  dword ptr 28[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 60[eax]"\
 "fstp dword ptr 60[eax]"\
 "fstp dword ptr 56[eax]"\
 "fstp dword ptr 28[eax]"\
 "fstp dword ptr 24[eax]"\
 modify[];
 mb16_0();
 mb16_1();

#pragma aux mb8_1=\
 "fld  dword ptr 16[eax]"\
 "fld  dword ptr 24[eax]"\
 "fld  dword ptr  8[eax]"\
 "fsub st(1),st"\
 "fadd dword ptr 24[eax]"\
 "fld  dword ptr   [eax]"\
 "fsub st(3),st"\
 "fadd dword ptr 16[eax]"\
 "fld  st(1)"\
 "fadd st,st(1)"\
 "fstp dword ptr 24[eax]"\
 "fsubp st(1),st"\
 "fstp dword ptr 16[eax]"\
 "fld  dword ptr 20[eax]"\
 "fsub dword ptr  4[eax]"\
 "fld  st(1)"\
 "fadd st,st(1)"\
 "fstp dword ptr   [eax]"\
 "fsubp st(1),st"\
 "fstp dword ptr  8[eax]"\
 "fld  dword ptr 28[eax]"\
 "fld  dword ptr 12[eax]"\
 "fsub st(1),st"\
 "fadd dword ptr 28[eax]"\
 "fld  dword ptr 20[eax]"\
 "fadd dword ptr  4[eax]"\
 "fld  st(1)"\
 "fadd st,st(1)"\
 "fstp dword ptr 28[eax]"\
 "fsubp st(1),st"\
 "fstp dword ptr 20[eax]"\
 "fld  st(1)"\
 "fadd st,st(1)"\
 "fstp dword ptr 12[eax]"\
 "fsubrp st(1),st"\
 "fstp dword ptr  4[eax]"\
 modify[];

#pragma aux mb8_2=\
 "fld  dword ptr 48[eax]"\
 "fld  dword ptr 56[eax]"\
 "fld  dword ptr 40[eax]"\
 "fsub st(1),st"\
 "fadd dword ptr 56[eax]"\
 "fld  dword ptr 32[eax]"\
 "fsub st(3),st"\
 "fadd dword ptr 48[eax]"\
 "fld  st(1)"\
 "fadd st,st(1)"\
 "fstp dword ptr 56[eax]"\
 "fsubp st(1),st"\
 "fstp dword ptr 48[eax]"\
 "fld  dword ptr 52[eax]"\
 "fsub dword ptr 36[eax]"\
 "fld  st(1)"\
 "fadd st,st(1)"\
 "fstp dword ptr 32[eax]"\
 "fsubp st(1),st"\
 "fstp dword ptr 40[eax]"\
 "fld  dword ptr 60[eax]"\
 "fld  dword ptr 44[eax]"\
 "fsub st(1),st"\
 "fadd dword ptr 60[eax]"\
 "fld  dword ptr 52[eax]"\
 "fadd dword ptr 36[eax]"\
 "fld  st(1)"\
 "fadd st,st(1)"\
 "fstp dword ptr 60[eax]"\
 "fsubp st(1),st"\
 "fstp dword ptr 52[eax]"\
 "fld  st(1)"\
 "fadd st,st(1)"\
 "fstp dword ptr 44[eax]"\
 "fsubrp st(1),st"\
 "fstp dword ptr 36[eax]"\
 modify[];

 mb8_1();
 mb8_2();
}

void mb32_0(void);
void mb32_1(void);
void mb32_2(void);
void mb32_3(void);

static void mdct_butterfly_32(float *x)
{
#pragma aux mb32_0=\
 "fld  dword ptr 120[eax]"\
 "fld  dword ptr 124[eax]"\
 "fld  dword ptr  56[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 120[eax]"\
 "fld  dword ptr  60[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 124[eax]"\
 "fstp dword ptr 124[eax]"\
 "fstp dword ptr 120[eax]"\
 "fstp dword ptr  60[eax]"\
 "fstp dword ptr  56[eax]"\
  "fld  dword ptr  88[eax]"\
 "fld  dword ptr  28[eax]"\
 "fld  dword ptr  24[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr  88[eax]"\
 "fld  dword ptr  92[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr  28[eax]"\
 "fstp dword ptr  92[eax]"\
 "fstp dword ptr  88[eax]"\
 "fstp dword ptr  24[eax]"\
 "fstp dword ptr  28[eax]"\
 modify[];
 mb32_0();

#pragma aux mb32_1=\
 "fld  dword ptr cPI2_8"\
 "fld  dword ptr 104[eax]"\
 "fld  dword ptr 108[eax]"\
 "fld  dword ptr  40[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 104[eax]"\
 "fld  dword ptr  44[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 108[eax]"\
 "fstp dword ptr 108[eax]"\
 "fstp dword ptr 104[eax]"\
 "fld st(1)"\
 "fsub st,st(1)"\
 "fmul st,st(3)"\
 "fstp dword ptr  40[eax]"\
 "fadd"\
 "fmul st,st(1)"\
 "fstp dword ptr  44[eax]"\
 "fld  dword ptr   8[eax]"\
 "fld  dword ptr  12[eax]"\
 "fld  dword ptr  72[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr   8[eax]"\
 "fld  dword ptr  76[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr  12[eax]"\
 "fstp dword ptr  76[eax]"\
 "fstp dword ptr  72[eax]"\
 "fld  st(1)"\
 "fadd st,st(1)"\
 "fmul st,st(3)"\
 "fstp dword ptr   8[eax]"\
 "fsubr"\
 "fmul"\
 "fstp dword ptr  12[eax]"\
 modify[];
 mb32_1();

#pragma aux mb32_2=\
 "fld  dword ptr cPI1_8"\
 "fld  dword ptr cPI3_8"\
 "fld  dword ptr 112[eax]"\
 "fld  dword ptr 116[eax]"\
 "fld  dword ptr  48[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 112[eax]"\
 "fld  dword ptr  52[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 116[eax]"\
 "fstp dword ptr 116[eax]"\
 "fstp dword ptr 112[eax]"\
 "fld  st(1)"\
 "fmul st,st(4)"\
 "fld  st(1)"\
 "fmul st,st(4)"\
 "fsub"\
 "fstp dword ptr  48[eax]"\
 "fmul st,st(3)"\
 "fxch"\
 "fmul st,st(2)"\
 "fadd"\
 "fstp dword ptr  52[eax]"\
"fld  dword ptr  96[eax]"\
 "fld  dword ptr 100[eax]"\
 "fld  dword ptr  32[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr  96[eax]"\
 "fld  dword ptr  36[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr 100[eax]"\
 "fstp dword ptr 100[eax]"\
 "fstp dword ptr  96[eax]"\
 "fld  st(1)"\
 "fmul st,st(3)"\
 "fld  st(1)"\
 "fmul st,st(5)"\
 "fsub"\
 "fstp dword ptr  32[eax]"\
 "fmul st,st(2)"\
 "fxch"\
 "fmul st,st(3)"\
 "fadd"\
 "fstp dword ptr  36[eax]"\
 modify[];
 mb32_2();

#pragma aux mb32_3=\
 "fld  dword ptr  16[eax]"\
 "fld  dword ptr  20[eax]"\
 "fld  dword ptr  80[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr  16[eax]"\
 "fld  dword ptr  84[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr  20[eax]"\
 "fstp dword ptr  84[eax]"\
 "fstp dword ptr  80[eax]"\
 "fld  st"\
 "fmul st,st(3)"\
 "fld  st(2)"\
 "fmul st,st(5)"\
 "fsub"\
 "fstp dword ptr  20[eax]"\
 "fmul st,st(3)"\
 "fxch"\
 "fmul st,st(2)"\
 "fadd"\
 "fstp dword ptr  16[eax]"\
"fld  dword ptr    [eax]"\
 "fld  dword ptr   4[eax]"\
 "fld  dword ptr  64[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr    [eax]"\
 "fld  dword ptr  68[eax]"\
 "fsub st(2),st"\
 "fadd dword ptr   4[eax]"\
 "fstp dword ptr  68[eax]"\
 "fstp dword ptr  64[eax]"\
 "fld  st"\
 "fmul st,st(4)"\
 "fld  st(2)"\
 "fmul st,st(4)"\
 "fsub"\
 "fstp dword ptr   4[eax]"\
 "fmulp st(2),st"\
 "fmulp st(2),st"\
 "fadd"\
 "fstp dword ptr    [eax]"\
 modify[];
 mb32_3();

 mdct_butterfly_16(x);
 mdct_butterfly_16(x+16);
}

void mbf_asm(void);

static void mdct_butterfly_first(float *T,float *x,int points)
{
#pragma aux mbf_asm=\
 "mov edi,ebx"\
 "shl edi,2"\
 "add edi,edx"\
 "shl ebx,1"\
 "add edx,ebx"\
 "shr ebx,5"\
 "mbfback1:"\
  "sub edi,32"\
  "sub edx,32"\
  "fld  dword ptr 24[edi]"\
  "fld  dword ptr 28[edi]"\
  "fld  dword ptr 24[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr 24[edi]"\
  "fld  dword ptr 28[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr 28[edi]"\
  "fstp dword ptr 28[edi]"\
  "fstp dword ptr 24[edi]"\
  "fld  dword ptr  4[eax]"\
  "fmul st,st(1)"\
  "fld  dword ptr   [eax]"\
  "fmul st(2),st"\
  "fmul st,st(3)"\
  "fadd"\
  "fld  dword ptr  4[eax]"\
  "fmulp st(3),st"\
  "fstp dword ptr 24[edx]"\
  "fsubr"\
  "fstp dword ptr 28[edx]"\
 "fld  dword ptr 16[edi]"\
  "fld  dword ptr 20[edi]"\
  "fld  dword ptr 16[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr 16[edi]"\
  "fld  dword ptr 20[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr 20[edi]"\
  "fstp dword ptr 20[edi]"\
  "fstp dword ptr 16[edi]"\
  "fld  dword ptr 20[eax]"\
  "fmul st,st(1)"\
  "fld  dword ptr 16[eax]"\
  "fmul st(2),st"\
  "fmul st,st(3)"\
  "fadd"\
  "fld  dword ptr 20[eax]"\
  "fmulp st(3),st"\
  "fstp dword ptr 16[edx]"\
  "fsubr"\
  "fstp dword ptr 20[edx]"\
 "fld  dword ptr  8[edi]"\
  "fld  dword ptr 12[edi]"\
  "fld  dword ptr  8[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr  8[edi]"\
  "fld  dword ptr 12[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr 12[edi]"\
  "fstp dword ptr 12[edi]"\
  "fstp dword ptr  8[edi]"\
  "fld  dword ptr 36[eax]"\
  "fmul st,st(1)"\
  "fld  dword ptr 32[eax]"\
  "fmul st(2),st"\
  "fmul st,st(3)"\
  "fadd"\
  "fld  dword ptr 36[eax]"\
  "fmulp st(3),st"\
  "fstp dword ptr  8[edx]"\
  "fsubr"\
  "fstp dword ptr 12[edx]"\
 "fld  dword ptr   [edi]"\
  "fld  dword ptr  4[edi]"\
  "fld  dword ptr   [edx]"\
  "fsub st(2),st"\
  "fadd dword ptr   [edi]"\
  "fld  dword ptr  4[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr  4[edi]"\
  "fstp dword ptr  4[edi]"\
  "fstp dword ptr   [edi]"\
  "fld  dword ptr 52[eax]"\
  "fmul st,st(1)"\
  "fld  dword ptr 48[eax]"\
  "fmul st(2),st"\
  "fmul st,st(3)"\
  "fadd"\
  "fld  dword ptr 52[eax]"\
  "fmulp st(3),st"\
  "fstp dword ptr   [edx]"\
  "fsubr"\
  "add eax,64"\
  "dec ebx"\
  "fstp dword ptr  4[edx]"\
 "jnz mbfback1"\
 modify [eax ebx edx edi];
 mbf_asm();
}

void mbg_asm(void);

static void mdct_butterfly_generic(float *T,float *x,int points,int trigint)
{
#pragma aux mbg_asm=\
 "mov edi,ebx"\
 "shl edi,2"\
 "add edi,edx"\
 "shl ebx,1"\
 "add edx,ebx"\
 "shr ebx,5"\
 "shl ecx,2"\
 "mbgback1:"\
  "sub edi,32"\
  "sub edx,32"\
  "fld  dword ptr 24[edi]"\
  "fld  dword ptr 28[edi]"\
  "fld  dword ptr 24[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr 24[edi]"\
  "fld  dword ptr 28[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr 28[edi]"\
  "fstp dword ptr 28[edi]"\
  "fstp dword ptr 24[edi]"\
  "fld  dword ptr  4[eax]"\
  "fmul st,st(1)"\
  "fld  dword ptr   [eax]"\
  "fmul st(2),st"\
  "fmul st,st(3)"\
  "fadd"\
  "fld  dword ptr  4[eax]"\
  "fmulp st(3),st"\
  "fstp dword ptr 24[edx]"\
  "fsubr"\
  "add eax,ecx"\
  "fstp dword ptr 28[edx]"\
 "fld  dword ptr 16[edi]"\
  "fld  dword ptr 20[edi]"\
  "fld  dword ptr 16[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr 16[edi]"\
  "fld  dword ptr 20[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr 20[edi]"\
  "fstp dword ptr 20[edi]"\
  "fstp dword ptr 16[edi]"\
  "fld  dword ptr  4[eax]"\
  "fmul st,st(1)"\
  "fld  dword ptr   [eax]"\
  "fmul st(2),st"\
  "fmul st,st(3)"\
  "fadd"\
  "fld  dword ptr  4[eax]"\
  "fmulp st(3),st"\
  "fstp dword ptr 16[edx]"\
  "fsubr"\
  "add eax,ecx"\
  "fstp dword ptr 20[edx]"\
 "fld  dword ptr  8[edi]"\
  "fld  dword ptr 12[edi]"\
  "fld  dword ptr  8[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr  8[edi]"\
  "fld  dword ptr 12[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr 12[edi]"\
  "fstp dword ptr 12[edi]"\
  "fstp dword ptr  8[edi]"\
  "fld  dword ptr  4[eax]"\
  "fmul st,st(1)"\
  "fld  dword ptr   [eax]"\
  "fmul st(2),st"\
  "fmul st,st(3)"\
  "fadd"\
  "fld  dword ptr  4[eax]"\
  "fmulp st(3),st"\
  "fstp dword ptr  8[edx]"\
  "fsubr"\
  "add eax,ecx"\
  "fstp dword ptr 12[edx]"\
 "fld  dword ptr   [edi]"\
  "fld  dword ptr  4[edi]"\
  "fld  dword ptr   [edx]"\
  "fsub st(2),st"\
  "fadd dword ptr   [edi]"\
  "fld  dword ptr  4[edx]"\
  "fsub st(2),st"\
  "fadd dword ptr  4[edi]"\
  "fstp dword ptr  4[edi]"\
  "fstp dword ptr   [edi]"\
  "fld  dword ptr  4[eax]"\
  "fmul st,st(1)"\
  "fld  dword ptr   [eax]"\
  "fmul st(2),st"\
  "fmul st,st(3)"\
  "fadd"\
  "fld  dword ptr  4[eax]"\
  "fmulp st(3),st"\
  "fstp dword ptr   [edx]"\
  "fsubr"\
  "add eax,ecx"\
  "dec ebx"\
  "fstp dword ptr  4[edx]"\
 "jnz mbgback1"\
 modify [eax ebx ecx edx edi];
 mbg_asm();
}

void mbr_asm(void);

static void mdct_bitreverse(float *x,int n,float *T,int *bit)
{
 float *x_save;

#pragma aux mbr_asm=\
 "shl edx,2"\
 "add ebx,edx"\
 "shr edx,1"\
 "add edx,eax"\
 "mov dword ptr x_save,edx"\
 "fld dword ptr half"\
 "mbrback1:"\
  "mov edi,dword ptr x_save"\
  "mov esi,edi"\
  "add edi,dword ptr  [ecx]"\
  "add esi,dword ptr 4[ecx]"\
  "fld  dword ptr  4[edi]"\
  "fsub dword ptr  4[esi]"\
  "fld  dword ptr   [edi]"\
  "fadd dword ptr   [esi]"\
  "fld  dword ptr   [ebx]"\
  "fmul st,st(1)"\
  "fld  dword ptr  4[ebx]"\
  "fmul st,st(3)"\
  "fadd"\
  "fxch st(2)"\
  "fmul dword ptr   [ebx]"\
  "fld  dword ptr  4[ebx]"\
  "fmulp st(2),st"\
  "sub edx,16"\
  "fsub"\
  "fld  dword ptr   [edi]"\
  "fsub dword ptr   [esi]"\
  "fmul st,st(3)"\
  "fld  dword ptr  4[edi]"\
  "fadd dword ptr  4[esi]"\
  "fmul st,st(4)"\
  "fld  st(3)"\
  "fadd st,st(1)"\
  "fstp dword ptr   [eax]"\
  "fsubrp st(3),st"\
  "fld  st"\
  "fadd st,st(2)"\
  "mov edi,dword ptr x_save"\
  "mov esi,edi"\
  "fstp dword ptr  4[eax]"\
  "fsub"\
  "add edi,dword ptr  8[ecx]"\
  "add esi,dword ptr 12[ecx]"\
  "fstp dword ptr 12[edx]"\
  "fstp dword ptr  8[edx]"\
 "fld  dword ptr  4[edi]"\
  "fsub dword ptr  4[esi]"\
  "fld  dword ptr   [edi]"\
  "fadd dword ptr   [esi]"\
  "fld  dword ptr  8[ebx]"\
  "fmul st,st(1)"\
  "fld  dword ptr 12[ebx]"\
  "fmul st,st(3)"\
  "fadd"\
  "fxch st(2)"\
  "fmul dword ptr  8[ebx]"\
  "fld  dword ptr 12[ebx]"\
  "fmulp st(2),st"\
  "fsub"\
  "fld  dword ptr   [edi]"\
  "fsub dword ptr   [esi]"\
  "fmul st,st(3)"\
  "fld  dword ptr  4[edi]"\
  "fadd dword ptr  4[esi]"\
  "fmul st,st(4)"\
  "fld  st(3)"\
  "fadd st,st(1)"\
  "fstp dword ptr  8[eax]"\
  "fsubrp st(3),st"\
  "fld  st"\
  "fadd st,st(2)"\
  "add ebx,16"\
  "add ecx,16"\
  "fstp dword ptr 12[eax]"\
  "fsub"\
  "add eax,16"\
  "cmp eax,edx"\
  "fstp dword ptr  4[edx]"\
  "fstp dword ptr   [edx]"\
 "jb mbrback1"\
 "fstp st"\
 modify [eax ebx ecx edx edi esi];
 mbr_asm();
}

static void mdct_butterflies(mdct_lookup *init,float *x,int points)
{
 float *T=init->trig;
 int stages=init->log2n-5;
 int i,j;

 if(--stages>0)
  mdct_butterfly_first(T,x,points);

 for(i=1;--stages>0;i++)
  for(j=0;j<(1<<i);j++)
   mdct_butterfly_generic(T,x+(points>>i)*j,points>>i,4<<i);

 for(j=0;j<points;j+=32)
  mdct_butterfly_32(x+j);
}

static void tone_mdct_backward(mdct_lookup *init, float *in, float *out)
{
 int n=init->n;
 int n2=n>>1;
 int n4=n>>2;

 float *iX = in+n2-7;
 float *oX = out+n2+n4;
 float *T  = init->trig+n4;

 do{
  oX         -= 4;
  oX[0]       = (-iX[2] * T[3] - iX[0]  * T[2]);
  oX[1]       =  (iX[0] * T[3] - iX[2]  * T[2]);
  oX[2]       = (-iX[6] * T[1] - iX[4]  * T[0]);
  oX[3]       =  (iX[4] * T[1] - iX[6]  * T[0]);
  iX         -= 8;
  T          += 4;
 }while(iX>=in);

 iX            = in+n2-8;
 oX            = out+n2+n4;
 T             = init->trig+n4;

 do{
  T          -= 4;
  oX[0]       =   (iX[4] * T[3] + iX[6] * T[2]);
  oX[1]       =   (iX[4] * T[2] - iX[6] * T[3]);
  oX[2]       =   (iX[0] * T[1] + iX[2] * T[0]);
  oX[3]       =   (iX[0] * T[0] - iX[2] * T[1]);
  iX         -= 8;
  oX         += 4;
 }while(iX>=in);

 mdct_butterflies(init,out+n2,n2);
 mdct_bitreverse(out,init->n,init->trig,init->bitrev);

 {
  float *oX1=out+n2+n4;
  float *oX2=out+n2+n4;
  float *iX =out;
  T             =init->trig+n2;

  do{
   oX1-=4;

   oX1[3]  =   (iX[0] * T[1] - iX[1] * T[0]);
   oX2[0]  = - (iX[0] * T[0] + iX[1] * T[1]);

   oX1[2]  =   (iX[2] * T[3] - iX[3] * T[2]);
   oX2[1]  = - (iX[2] * T[2] + iX[3] * T[3]);

   oX1[1]  =   (iX[4] * T[5] - iX[5] * T[4]);
   oX2[2]  = - (iX[4] * T[4] + iX[5] * T[5]);

   oX1[0]  =   (iX[6] * T[7] - iX[7] * T[6]);
   oX2[3]  = - (iX[6] * T[6] + iX[7] * T[7]);

   oX2+=4;
   iX    +=   8;
   T     +=   8;
  }while(iX<oX1);

  iX=out+n2+n4;
  oX1=out+n4;
  oX2=oX1;

  do{
   oX1-=4;
   iX-=4;

   oX2[0] = -(oX1[3] = iX[3]);
   oX2[1] = -(oX1[2] = iX[2]);
   oX2[2] = -(oX1[1] = iX[1]);
   oX2[3] = -(oX1[0] = iX[0]);

   oX2+=4;
  }while(oX2<iX);

  iX=out+n2+n4;
  oX1=out+n2+n4;
  oX2=out+n2;

  do{
   oX1-=4;
   oX1[0]= iX[3];
   oX1[1]= iX[2];
   oX1[2]= iX[1];
   oX1[3]= iX[0];
   iX+=4;
  }while(oX1>oX2);
 }
}

static void tone_mdct_forward(mdct_lookup *init, float *in, float *out)
{
  int n=init->n;
  int n2=n>>1;
  int n4=n>>2;
  int n8=n>>3;
  float *w=init->forward_buffer;
  float *w2=w+n2;

  float  r0;
  float  r1;
  float *x0=in+n2+n4;
  float *x1=x0+1;
  float *T=init->trig+n2;

  int i=0;

  for(i=0;i<n8;i+=2){
    x0 -=4;
    T-=2;
    r0= x0[2] + x1[0];
    r1= x0[0] + x1[2];
    w2[i]=   (r1*T[1] + r0*T[0]);
    w2[i+1]= (r1*T[0] - r0*T[1]);
    x1 +=4;
  }

  x1=in+1;

  for(;i<n2-n8;i+=2){
    T-=2;
    x0 -=4;
    r0= x0[2] - x1[0];
    r1= x0[0] - x1[2];
    w2[i]=   (r1*T[1] + r0*T[0]);
    w2[i+1]= (r1*T[0] - r0*T[1]);
    x1 +=4;
  }

  x0=in+n;

  for(;i<n2;i+=2){
    T-=2;
    x0 -=4;
    r0= -x0[2] - x1[0];
    r1= -x0[0] - x1[2];
    w2[i]=   (r1*T[1] + r0*T[0]);
    w2[i+1]= (r1*T[0] - r0*T[1]);
    x1 +=4;
  }


  mdct_butterflies(init,w+n2,n2);
  mdct_bitreverse(w,init->n,init->trig,init->bitrev);

  T=init->trig+n2;
  x0=out+n2;

  for(i=0;i<n4;i++){
    x0--;
    out[i] =((w[0]*T[0]+w[1]*T[1])*init->scale);
    x0[0]  =((w[0]*T[1]-w[1]*T[0])*init->scale);
    w+=2;
    T+=2;
  }
}

static void tone_mdct_init(mdct_lookup *lookup,unsigned int n)
{
 int   *bitrev=_ogg_malloc(sizeof(*bitrev)*(n/4));
 float *T=_ogg_malloc(sizeof(*T)*(n+n/4));

 int i;
 int n1=n;
 int n2=n>>1;
 int log2n=lookup->log2n=floor(log((float)n1)/log(2.0F)+0.5f);
 double pi=3.1415926536f;//atan(1.0)*4.0;
 double mpin1=pi/n1,mpin2=pi/(n1<<1);

 lookup->n=n;
 lookup->trig=T;
 lookup->bitrev=bitrev;

 for(i=0;i<n2;i+=2){
  T[i]     =( cos(mpin1*(i<<1)) );
  T[i+1]   =(-sin(mpin1*(i<<1)) );
  T[n2+i]  =( cos(mpin2*(i+1) ) );
  T[n2+i+1]=( sin(mpin2*(i+1) ) );
 }

 for(i=0;i<n/4;i+=2){
  T[n+i]  =( cos(mpin1*((i<<1)+2))*0.5F);
  T[n+i+1]=(-sin(mpin1*((i<<1)+2))*0.5F);
 }

 {
  int mask=(1<<(log2n-1))-1,i,j;
  int msb=1<<(log2n-2);
  for(i=0;i<n/8;i++){
   int acc=0;
   for(j=0;msb>>j;j++)
    if((msb>>j)&i)
     acc|=1<<j;
#ifdef TONE_ASM
   bitrev[i*2]=(((~acc)&mask)-1)<<2;  // to avoid shl
   bitrev[i*2+1]=acc<<2;
#else
   bitrev[i*2]=((~acc)&mask)-1;
   bitrev[i*2+1]=acc;
#endif
  }
 }

 lookup->scale=(4.f/n);
 lookup->forward_buffer=_ogg_malloc(n*sizeof(float));
}

static void tone_mdct_clear(mdct_lookup *l)
{
 if(l){
  if(l->trig) _ogg_free(l->trig);
  if(l->bitrev) _ogg_free(l->bitrev);
  if(l->forward_buffer) _ogg_free(l->forward_buffer);
  pds_memset(l,0,sizeof(*l));
 }
}

#endif // ifdef TONE_OWN_MDCT

#endif // ifndef LAYER3_EQ

static void calculate_eqgain(struct audio_info *aui)
{
 int i,data[TONE_EQ_BANDS];

 data[0]=aui->card_mixer_values[AU_MIXCHAN_BASS];
 data[1]=aui->card_mixer_values[AU_MIXCHAN_BASS];
 //data[1]=(aui->card_mixer_values[AU_MIXCHAN_BASS]-50)/2+50;

 for(i=2;i<TONE_EQ_BANDS-3;i++)
  data[i]=50;

 data[TONE_EQ_BANDS-3]=(aui->card_mixer_values[AU_MIXCHAN_TREBLE]-50)/4+50;
 data[TONE_EQ_BANDS-2]=(aui->card_mixer_values[AU_MIXCHAN_TREBLE]-50)/2+50;
 data[TONE_EQ_BANDS-1]=aui->card_mixer_values[AU_MIXCHAN_TREBLE];
 EQSet(data);
}


#define FIR_BANDS 8
#define FIR_DELAY 6
#define EQ_TAP      (FIR_DELAY*2+1)

#define EQdB             30
#define EQ_FREQ_RANGE    22050
#define EQSET_BLOCKSIZE  16
#define EQSET_RESOLUTION (TONE_MDCTBANDS*EQSET_BLOCKSIZE)

static long eq_freqs[TONE_EQ_BANDS]={44,87,173,345,690,1379,2757,5513,11025,16538};
static long eq_setpos[TONE_EQ_BANDS];

float EQ_gain[TONE_MDCTBANDS-FIR_BANDS];
float EQ_Filter[FIR_BANDS][FIR_DELAY+1];
//long eq_fir_delays[TONE_MDCTBANDS]={36,11,11,5,5,5,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};

static float set[EQSET_RESOLUTION];
static float mid[TONE_MDCTBANDS],power[TONE_EQ_BANDS];

static void EQSet(int data[TONE_EQ_BANDS])
{
 int i,n,k;

 for(i=0;i<TONE_EQ_BANDS;i++)
  power[i] = (float)((data[i]-50)*EQdB)/50.f;

 for(i=0;i<TONE_EQ_BANDS;i++)
  eq_setpos[i]=eq_freqs[i]*EQSET_RESOLUTION/EQ_FREQ_RANGE;

 for(n=0;n<eq_setpos[0];n++)
  set[n]=power[0];
 for(i=1;i<TONE_EQ_BANDS;i++){
  int bw=eq_setpos[i]-eq_setpos[i-1];
  if(bw==1)
   set[n++]=power[i];
  else
   for(k=0;k<bw;k++)
    set[n++]=(power[i-1]*(bw-k)+power[i]*k)/(float)bw;
 }
 for( ;n<EQSET_RESOLUTION;n++)
  set[n]=power[TONE_EQ_BANDS-1];

 for(k=0;k<EQSET_RESOLUTION;k++)
  set[k]=(float)(pow(10,set[k]/20.0f));

 pds_memset(mid,0,TONE_MDCTBANDS*sizeof(float));
 for(k=FIR_BANDS*EQSET_BLOCKSIZE; k<EQSET_RESOLUTION; k++)
  mid[k/EQSET_BLOCKSIZE] += set[k];

 for(n=FIR_BANDS; n<TONE_MDCTBANDS; n++)
  EQ_gain[n-FIR_BANDS] = (sqrt(mid[n]/(float)EQSET_BLOCKSIZE));

 for(i=0; i<FIR_BANDS; i++){
  for(n=0; n<=FIR_DELAY; n++){
   double xn=0.0,win;
   for(k=0; k<EQSET_BLOCKSIZE; k++){
    int idx = (i&1)? ((i<<4)+(EQSET_BLOCKSIZE-1)-k):((i<<4)+k);
    //int idx = ((i<<4)+k);
    xn+= (sqrt(set[idx])*cos(n*(k+0.5)*M_PI/EQSET_BLOCKSIZE));
   }
   xn=xn/(double)EQSET_BLOCKSIZE;
   //win=cos(n*M_PI/2.0/(double)(FIR_DELAY+1));
   win=cos(n*M_PI/(double)(EQ_TAP));
   win*=win;
   EQ_Filter[i][n]=xn*win;
  }
 }
}

#ifndef LAYER3_EQ

//#define TONE_MODIFY_BASS 1 // this doesn't work...

#define FIR_LEN          (FIR_DELAY*3+2)
#define EQ_MDCTSAVE_SIZE (FIR_LEN+EQ_BLOCK_SIZE)
#define EQ_SYNC_DELAY    6                 // bass turn-on delay

#ifdef TONE_MODIFY_BASS
static float mdct_save[PCM_MAX_CHANNELS][FIR_BANDS][EQ_MDCTSAVE_SIZE];
static unsigned int sync_counter;
#endif

static void perform_EQ(float data[TONE_MDCTBANDS][EQ_BLOCK_SIZE],unsigned int ch)
{
 int i,k;
#ifdef TONE_MODIFY_BASS
 int n;
 float *eqfi,*mdct;

 if(sync_counter<EQ_SYNC_DELAY)
  sync_counter++;

 eqfi=&EQ_Filter[0][0];
 mdct=&mdct_save[ch][0][0];

 for(i=0; i<FIR_BANDS; i++,eqfi+=FIR_DELAY+1,mdct+=EQ_MDCTSAVE_SIZE){
  for(k=0; k<EQ_BLOCK_SIZE; k++)
   mdct[FIR_LEN+k] = data[i][k];

  if(sync_counter>=EQ_SYNC_DELAY){
   float *mk=&mdct[FIR_DELAY];
   for(k=0;k<EQ_BLOCK_SIZE;k++,mk++){
    float *e=eqfi,*mnp=mk,*mnn=mk;
    float outdata=mnp[0]*e[0];
    n=FIR_DELAY;
    do{
     outdata += ( *(++mnp) + *(--mnn)) * *(++e);
    }while(--n);
    data[i][k]=outdata;
   }
  }

  for(n=0;n<FIR_LEN;n++)
   mdct[n]=mdct[EQ_BLOCK_SIZE+n];
 }
#endif

 for(i=FIR_BANDS; i<TONE_MDCTBANDS; i++){
  const float gain=EQ_gain[i-FIR_BANDS];
  if(gain!=1.0f){
   for(k=0; k<EQ_BLOCK_SIZE; k++)
    data[i][k] *= gain;
  }
 }
}

#endif
