// Lemur OLAP library (c) 2003 National Research Council of Canada by Daniel Lemire, and Owen Kaser
 /**
 *  This program is free software; you can
 *  redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation (version 2). This
 *  program is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 *  details. You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software Foundation,
 *  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
#ifndef MULTINORMALIZATIONS_H
#define MULTINORMALIZATIONS_H

#include "normalizationscommon.h"
#include "normalizations.h"
#include "slicesortingnormalization.h"
/*
 * Here the idea is to slice the cube in half and 
 * have *two* different normalizations.
 *
 * Why do this? Because FrequencySort seems to be
 * impossible to improve, so I want to extend the working
 * model. Instead of thinking about a cube having only
 * one normalization, that is d arrays, I think we can
 * go to a model where we have 2*d arrays as a normalization.
 * Of course, this won't scale forever so I can't repeat
 * this trick too often, however, the actual cost of slicing
 * the cube in half once is only (d-1) n where n is the number
 * of attribute values.
 *
 * Codewise, for now, I'm putting everything in the same file. If
 * succesful, I'll split the files.
 */

/**************************
 * DualNormalization is meant 
 * to replace the vector<vector<int> > we've been using elsewhere
 * with a coupled vector<vector<int> >. Obviously, there are more
 * general settings (more than just a couple), but that's only
 * meant as a test.
 */

class DualNormalization {
  public:
    DualNormalization(const norm_type&  half1, const norm_type & half2, const int dimension,
        const int CutPoint) : mHalf1(half1), mHalf2(half2), mDimension(dimension), mCutPoint(CutPoint) {}

    // this is the paranoid version
    inline vector<int> permute(const vector<int> & indices) const {
      vector<int> permuted = uncheckedPermute(indices);
      for(uint k = 0; k < permuted.size(); ++k) assert(permuted[k] != UNMAPPED);
      return permuted;
    }
    
    inline vector<int> uncheckedPermute(const vector<int> & indices) const {
      if(indices[mDimension] < mCutPoint) {
        return PermutationUtil::permute(indices, mHalf1);
      } 
      return PermutationUtil::permute(indices, mHalf2); 

    }
    
    enum{UNMAPPED = -1};// we use this to warn us of bad normalizations		
    
    inline int getDimension() const {return mDimension;}
    inline int getCutPoint() const {return mCutPoint;}
    
  protected:
    norm_type mHalf1;
    norm_type mHalf2;
    int mDimension;
    int mCutPoint;

};

/*
 * DualHolapUtil is meant to be HOLAPUtil + the methods specific to DualNormalization
 * above. Eventually should be merged into HOLAPUtil
 */

template <class DT, class LDT>
class DualHOLAPUtil {
  public:
    // given a cube, some chunk geometry and a normalization, compute the HOLAP cost
    static uint64 cost(DataCube<DT,LDT> & DC, const vector<int>& ChunkShape, \
                       const DualNormalization & Normalization, const bool verbose = false);
    static void computeChunkDensityInfo(DataCube<DT,LDT> & DC,
                                        const vector<int>& ChunkShape,
                                        const DualNormalization & Normalization,
                                        vector<int>& SizeOfChunks,
                                        vector<int>& NumberOfAllocated );
 
};

//////////////////////////////////////////
//Implementation of DualHOLAPUtil follows
//////////////////////////////////////////

// I got this method by copying and pasting from HOLAPUtil, I only need
// to replace 
//      PermutationUtil::permute(Indices,Normalization); 
// by
//                 vector<int> NewValue = Normalization.permute(Indices); 
// I could probably fix this in some clever OOP way. TODO.
template <class DT, class LDT>
void DualHOLAPUtil<DT,LDT>::
computeChunkDensityInfo(DataCube<DT,LDT> & DC,
                        const vector<int>& ChunkShape,
                        const DualNormalization& Normalization,
                        vector<int>& SizeOfChunks, // output
                        vector<int>& NumberOfAllocated // output
                       ) {
    vector<int> Shape = DC.getShape();
    for(uint dim = 0; dim < Shape.size() ; ++dim) assert(Shape[dim] >= ChunkShape[dim]);// sanity check
    //
    const int NumberOfChunks = HOLAPUtil<DT,LDT>::computeNumberOfChunks(DC,ChunkShape);
    NumberOfAllocated.resize(NumberOfChunks,0);
    SizeOfChunks.resize(NumberOfChunks,0);
    vector<int> Indices(Shape.size(), 0);
    vector<int> Start(Indices);
    vector<int> ChunkBase = HOLAPUtil<DT,LDT>::precomputeChunkBase(Shape,ChunkShape);
    //
    do {
        int ChunkIndex = HOLAPUtil<DT,LDT>::getChunkIndexFast(Indices, ChunkShape, ChunkBase);
        ++SizeOfChunks[ChunkIndex];
        vector<int> NewValue = Normalization.permute(Indices); //PermutationUtil::permute(Indices,Normalization);
        if(DC.get(NewValue) != 0) ++NumberOfAllocated[ChunkIndex];
    }
    while(MathUtil::increment( Indices, Start, Shape));
}

// basically same as HOLAPUtil, except that it takes DualNormalization as an input (should
// be a template?)
template <class _DT, class _LDT>
uint64 DualHOLAPUtil<_DT,_LDT>::
cost(DataCube<_DT, _LDT> & DC,
     const vector<int>& ChunkShape,
     const DualNormalization& Normalization, const bool verbose) {

    vector<int> Shape = DC.getShape();
    for(uint dim = 0; dim < Shape.size() ; ++dim) assert(Shape[dim] >= ChunkShape[dim]);// sanity check
    //
    vector<int> SizeOfChunks;
    vector<int> NumberOfAllocated;

    computeChunkDensityInfo(DC,ChunkShape,Normalization,SizeOfChunks,NumberOfAllocated);
    return HOLAPUtil<_DT,_LDT>::cost(SizeOfChunks,NumberOfAllocated,Shape.size(),verbose);
}

////////////////////////////////////////

/*********************
 * As we learned, FrequencySort is good enough.
 * I need to run FrequencySort twice, once over
 * the "left" subcube and once over the "right"
 * subcube. To do this, I introduce a new FrequencySort
 * that just sorts over a subrange
 */



template <class _DT, class _LDT, class _CT = LessComparator<pair<double,int> > >
class SubCubeFrequencySort : public SliceSort<_DT,_LDT,_CT> {
public:
    SubCubeFrequencySort(const vector<int>& start, const vector<int>& end) : mStart(start), mEnd(end) {
      assert(mStart.size() == mEnd.size());
      /*cout << " Frequency Sort on subcube " <<endl;
      for(uint k = 0; k < mStart.size() ; ++k) cout << mStart[k]<< " "; cout << endl;
      for(uint k = 0; k < mEnd.size() ; ++k) cout << mEnd[k]<< " "; cout << endl; */
    }
    virtual SubCubeFrequencySort* clone() const { return new SubCubeFrequencySort(*this);}
    virtual ~SubCubeFrequencySort() {}
    virtual const string getTextName(void) const { return "Subcube Frequency Sorting ("+ _CT::getDescription() +")";}
    virtual norm_type computeNormalFrom(DataCube<_DT,_LDT>& DC, 
        const norm_type & initial);
    
   virtual deque<pair<double,int> > sortedFrequencyHistogram(DataCube<_DT,_LDT>& DC, const norm_type & initial,					const uint dimension); 
protected:
    virtual map<int,_LDT> partialFrequencyHistogram(DataCube<_DT,_LDT>& DC, const norm_type & initial, 
        const uint dimension);
    vector<int> mStart, mEnd;
};


template <class _DT, class _LDT, class _CT>
deque<pair<double,int> > SubCubeFrequencySort<_DT,_LDT,_CT>::sortedFrequencyHistogram(DataCube<_DT,_LDT>& DC,
    const norm_type & initial,  const uint dimension) {
        map<int,_LDT> freq = partialFrequencyHistogram(DC, initial,   dimension);
        // everything else should be cheap
        deque<pair<double,int> > freqindexpairs;
        for(int index =  mStart[dimension]; index <  mEnd[dimension]; ++index) {
            freqindexpairs.push_back(pair<_LDT,int>(freq[index], index));
        }
        sort(freqindexpairs.begin(), freqindexpairs.end(), mComparator);
        return freqindexpairs;
}
 

 
template <class _DT, class _LDT,  class _CT>
map<int,_LDT> SubCubeFrequencySort<_DT,_LDT,_CT>::
partialFrequencyHistogram(DataCube<_DT,_LDT>& DC, const norm_type & initial , const uint dimension) {
    assert(mStart.size() == mEnd.size());
    assert(mStart.size() == DC.getShape().size());
    assert(dimension < mStart.size());
    // this can be relatively expensive
    vector<int> shape = DC.getShape();
    map<int, _LDT> answer;
    for(int value = mStart[dimension] ; value < mEnd[dimension]; ++value) {
        vector<int> Bounds(mEnd);
        vector<int> Start(mStart);
        Start[dimension] = value;
        Bounds[dimension] = value + 1;
        vector<int> indices(Start);
        do {
            if(DC.get(PermutationUtil::permute( indices , initial)) != 0) ++answer[value];
        }	while(MathUtil::increment( indices, Start, Bounds));
    }
    return answer;
}

template <class _DT, class _LDT,  class _CT>
norm_type SubCubeFrequencySort<_DT,_LDT,_CT>::
computeNormalFrom( DataCube<_DT,_LDT>& DC, const norm_type & initial) { 
    norm_type answer;
    vector<int> shape = DC.getShape();
    assert(shape.size() == mEnd.size());
    assert(shape.size() == mStart.size()); 
    for(uint dim = 0; dim < shape.size(); ++dim ) {
        const deque<pair<double,int> > & freqindexpairs = sortedFrequencyHistogram(DC,initial,dim);
        vector<int> normalization(shape[dim],DualNormalization::UNMAPPED);
        int index = mStart[dim];
        for(typename deque<pair<double,int> >::const_iterator i = freqindexpairs.begin(); 
            i != freqindexpairs.end(); ++i, ++index) {
            normalization[index] = initial[dim][i->second]; //i->second;
        }
        answer.push_back(normalization);
    }
    return answer;
}
 

/////////////////////////////////////////////
/*
 * This is where the algorithmic work happens.
 * I believe that the key point here is that
 * we should split the cube at the right spot
 * and possibly pre-normalize it. Other than that,
 * we've already checked that frequency sort on
 * each half should be optimal enough so there
 * isn't much more to do with it.
 */

template <class _DT, class _LDT>
class DualNormalizationScheme {
public:
    DualNormalizationScheme() {}
    virtual DualNormalizationScheme* clone() const { return new DualNormalizationScheme(*this);}
    virtual ~DualNormalizationScheme() {}
    virtual const string getTextName(void) const {
        return "dual normalization" ;}


    virtual DualNormalization computeNormalFrom( DataCube<_DT,_LDT>& DC,
            const DualNormalization& initial){
        //cout << " Volume = " << DC.getVolume() << endl;
        // for now, this ignores the previous normalization
        const vector<int> shape = DC.getShape();
        // what we do next is obviously suboptimal
        // if you want to hack this class, hack it here!!! TODO
        //
        // We cut accross the longest dimension
        // 
        int dimension = 0;
        for(uint k = 0; k < shape.size() ; ++ k) if(shape[k] > shape[dimension]) dimension = k;
        //cout << " chosen cutoff = "<< cutoff << endl; 
        // 
        // Next, we "pre-normalize the cube
        // 
        FrequencySort<_DT,_LDT> fs;
        deque<pair<_LDT,int> > freq = fs.sortedFrequencyHistogram(DC, dimension);  
        _LDT sum = 0;
        for(deque<pair<int64,int> >::const_iterator it = freq.begin(); it != freq.end(); ++it)
            sum += it->first;
        vector<int> normalization(shape[dimension],0);
        int index = 0;
        _LDT sumsofar = 0; int cutoff = 0;
        for(typename deque<pair<_LDT,int> >::const_iterator i = freq.begin(); i != freq.end(); ++i, ++index) {
            normalization[index] = i->second;
            sumsofar += i->first; 
            if(2 * sumsofar < sum) ++cutoff;
        }
// 				int cutoff = shape[dimension]/2 ;
        if(cutoff % 2 == 1) {
         if (cutoff  < shape[dimension]) ++cutoff;
         else if(cutoff > 0) --cutoff;
        }
        if(cutoff > shape[dimension]) cutoff = shape[dimension];
 
        norm_type init = /*fs.computeNormal(DC);*/PermutationUtil::identity(shape);
        init[dimension] = normalization;
        //
        // Ok, init contains the prenormalization
        //
        // preparing the  SubCubeFrequencySort
        //
        vector<int> begin(shape.size(),0);
        vector<int> end(shape);
        end[dimension] = cutoff ;
        assert(begin.size() == end.size());
        assert(begin.size() == shape.size());
        //
        SubCubeFrequencySort<_DT,_LDT>	left(begin,end);
        begin[dimension] = cutoff;
        end = shape;
        SubCubeFrequencySort<_DT,_LDT>	right(begin,end);
        //
        // Everything is ready, we now do the computation 
        //
        norm_type half1 = left.computeNormalFrom(DC,init);
        norm_type half2 = right.computeNormalFrom(DC,init); 
        // Done!
        return DualNormalization(half1, half2, dimension, cutoff);
    }

    virtual DualNormalization computeNormal( DataCube<_DT,_LDT>& DC) {
        vector<int> shape = DC.getShape();
        norm_type Id = PermutationUtil::identity(shape);
        return computeNormalFrom( DC, DualNormalization(Id,Id,0,0));
    }
    
    
};


#endif
