/*****************************************************************
 * author Daniel Lemire 
 * March 4th 2003 
 * (c) NRC
 *
 * Based on code by OFK, 23 Feb 2003  
  ************************************************************
 *
 *															
 * Updated (March 21st 2003) : New efficiency measure. In fact, the "holap" function
 *				is no longer measuring "efficiency" but rather cost per non-zero component.
 *
 * New description by Daniel Lemire
 *
 * I rewrote almost entirely Owen Kaser's code using C++, dropping
 * the #define and generalizing somewhat the code. My version is probably 10
 * times slower, but I'm a mathematician so I don't worry about such things.
 * I use STL everywhere. That's probably a very bad idea, but it allows
 * me to be much more productive since data structures is what
 * I find the hardest to program and debug. Again, I'm not computer scientist.
 *
 * One other important thing I added is that while I compute all permutations,
 * I only use a very small subset chosen randomly. It appears to be good enough
 * to give me a quick overview. Then what good does it do to write it in C/C++
 * if you are not going to do an exhaustive search? That's a good question but
 * I think that computing all possible permutations might be quite long in 
 * other languages so maybe it wasn't a total waste of time to do it low-level.
 *
 * Owen's  idea is very enticing. It is motivated by the following observations:
 * 
 *      1)  Normalization is not always useful. In fact, I found
 *          that in many cases it can be easily shown to be
 *          useless: no gain is possible. (Update March 23rd 2003:
 *          I think that this is mostly false. I was mislead by a
 *          software bug in my Python script.)
 *
 *      2)  A somewhat interesting measure of how useful 
 *          normalization can be defined by the question "I am
 *          certain that Normalization will reduce the max.
 *          entropy to a new bound?" For example, if the max
 *          entropy is M, do I know for a fact that normalization
 *          will always lower the entropy to M-epsilon so that the
 *          new max entropy is M-epsilon. The answer is no. There
 *          be arrays of max entropy (M) that cannot be normalized
 *          in any useful manner. (Update March 23rd 2003: this and
 *          many things below no longer make sense to me.)
 *
 * This doesn't match my intuition. My intuition is that Normalization
 * is useful (somewhat) whenever m != 1 or m != n, that is, when I 
 * consider chunks in the cube that are neither of size 1 or of full
 * size (all of the cube). In both these cases, the impact of normalization
 * is null. Interestingly enough, that's exactly what happens when I consider
 * the Kaser measure which I call "kernel radius". I say that an array
 * is in the kernel if it is perfect (100% efficiency in storage). Then, we
 * look at how far away normalization can take us from the kernel. A
 * powerful setting for normalization should match a large radius away 
 * from the kernel.
 *
 * The whole motivating factor for this work is to lay a solid foundation
 * for heuristics. I couldn't find papers who address these issues so I think
 * that doing this easy work might actually give us an edge over the competition.
 * (Update March 23rd 2003: This is still my motivation to this day.)
 *
 * 
 * To compile this, you can do g++ -o KernelRadius KernelRadius.cpp
 * It also works if you turn optimization on (-O2) not that it speeds things
 * up by much if any.
 * 
 ************************************************************
 * Old description by Owen Kaser:
 * brute force attempt to compute entropy after all
 * row/col permutations of  01010101
 *                          10101010
 *                          01010101
 *                          10101010
 *                          01010101
 *                          10101010
 *                          01010101
 *                          10101010
 *                          01010101
 *                          10101010
 * using entropy of enumerative encoding
 *
 * I have only done elementary sanity checks on it, so
 * results are suspect.  Also, this code is rather special
 * purpose and cannot easily be modified.  The problem size
 * is at the upper limit of what can sensibly be attacked
 * by brute force on a fast PC.
 */

 /**
 *  This program is free software; you can
 *  redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation (version 2). This
 *  program is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 *  details. You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software Foundation,
 *  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */


#include "OwenCommon.h"
/*
* This is really ugly code. In any case, it is pointless because
* I have a simple proof that the kernel radius is always maximal.
* Just fill densely one part of the cube along one axis and then
* spread it out evenly by renormalizing only that one dimension.
* So, in effect, the code below is now obselete.
*/
int kernelRadius() {
   const short N = 8;// just make sure it is a power of 2
   deque<deque<short> > PermutationSet;
   deque<short> StartPermut;
   // this next line takes about a second or less with N=8.
   permutations(N,StartPermut,PermutationSet);
   // convenient because easy to modify (hard-coded, but hackable)
   const bool _array[N][N] = {  {1,1,1,1,0,0,0,0},
                                {1,1,1,1,0,0,0,0},
                                {1,1,1,1,0,0,0,0},
                                {1,1,1,1,0,0,0,0},
                                {0,0,0,0,1,1,1,1},
                                {0,0,0,0,1,1,1,1},
                                {0,0,0,0,1,1,1,1},
                                {0,0,0,0,1,1,1,1}};
   if(false) const bool _array[N][N] = {{1,1,0,0,1,1,0,0},
                        {1,1,0,0,1,1,0,0},
                        {0,0,1,1,0,0,1,1},
                        {0,0,1,1,0,0,1,1},
                        {1,1,0,0,1,1,0,0},
                        {1,1,0,0,1,1,0,0},
                        {0,0,1,1,0,0,1,1},
                        {0,0,1,1,0,0,1,1}};
   /*
   * Next I'm going to copy this over into STL containers for convenience and
   * also, to make the code a bit more robust. Not that STL is very robust.
   */
   vector<vector<bool> > stlarray(N, vector<bool>(N));
   for(int k = 0; k < N; ++k) for(int l = 0; l < N; ++l) stlarray[k][l] = _array[k][l];
   const vector<vector<bool> >& array = stlarray;// this will make sure that I don't change stlarray by accident
   // the buffer is just a temporary storage component where I copy over the normalized data cube
   vector<vector<bool> > buffer(N, vector<bool>(N));
   // let the user see what you are working on
   print(array);
   // next two lines are obvious, just leave them alone unless you know where you are going
   const bool TestHOLAP = true;
   const bool TestEntropy = false;
   // next we want to check that _array was indeed in the kernel (efficiency of 1.0)
   for(short m = 2; m < N; m*=2) {
     // you expect one of those to fail depending on the choice of matrix array
     if(TestEntropy) if(entropy(array,m) != 0.0f) {
       cout <<"[warning] something is wrong with entropy for m = "<< m << " "<< entropy(array,m)<<endl;
       cout << "Want to continue?" <<endl;
       char c;
       cin >> c;
       if ((c == 'N') || (c== 'n')) return 0;
     }
     if(TestHOLAP) if(holap(array,m) != 1.0f) {
       cout <<"[warning] something is wrong with holap for m = "<< m << " " << holap(array,m)<<endl;
       cout << "Want to continue?" <<endl;
       char c;
       cin >> c;
       if ((c == 'N') || (c== 'n')) return 0;
     }
   }
   // "m" can take more than one value beside 1 and n, just sort them out
   short PossibleValuesForM = 0;
   for(short m = 2; m < N; m*=2) PossibleValuesForM++;
   // these containers are use the record worse cases
   vector<float> LargestEntropy(PossibleValuesForM, 1.0f);
   vector<float> SmallestHOLAP(PossibleValuesForM, 1.0f);
   vector<vector<vector<bool> > > WorseHOLAP(PossibleValuesForM), WorseEntropy(PossibleValuesForM);
   float CurrentEntropy = 0.0f, CurrentHOLAP = 0.0f;
   /*
   * Now, here we do something strange, but needed, we simply downsample PermutationSet so that
   * the computation becomes doable
   * 
   */
   int sampling = 500;// that's the number of Permutations we want to try out, chosen empirically
   deque<deque<short> > NewPermutationSet;
   for(int i = 0 ; i < sampling; ++i) {
      deque<short> & permut = PermutationSet[ (int)(((double) rand() * PermutationSet.size()) /RAND_MAX) ];
      NewPermutationSet.push_back( permut);
   }
   PermutationSet = NewPermutationSet;// yes, I just discard my whole bunch of permutations painfully computed
   /*
   * End of my sampling, next follow expensive code
   */
   cout << "Please be patient, this could take several minutes. "<<endl;
   const int Size = PermutationSet.size();
   for(deque<deque<short> >::iterator rows = PermutationSet.begin(); rows != PermutationSet.end(); rows++) {
     for(deque<deque<short> >::iterator cols = PermutationSet.begin(); cols != PermutationSet.end(); cols++) {
       normalize(array,buffer,*rows,*cols);// normalize the buffer      
       for(short m = 2, index = 0; m < N; m*=2, ++index) {
         if(TestEntropy) CurrentEntropy = entropy(buffer,m);
         if(TestHOLAP) CurrentHOLAP = holap(buffer,m);
         if((TestEntropy) && (LargestEntropy[index] < CurrentEntropy)) {
             LargestEntropy[index] = CurrentEntropy;
             WorseEntropy[index] = buffer;
         }
         if((TestHOLAP) && (SmallestHOLAP[index] > CurrentHOLAP)) {
             SmallestHOLAP[index] = CurrentHOLAP;
             WorseHOLAP[index] = buffer;
         }
       }
     }
   }
   // end of expensive stuff, follows the results
   for(short m = 2, index = 0; m < N; m*=2, ++index) {
         cout << " Result for m = "<< m << endl;
         if(TestEntropy) {
           cout << " The Entropy kernel radius is at least " << LargestEntropy[index] << endl;
           print(WorseEntropy[index]);
         }
         if(TestHOLAP) {
           cout << " The HOLAP kernel radius is at least " << (1.0f - SmallestHOLAP[index]) << endl;
           print(WorseHOLAP[index]);// we give the example
         }
   }
   return 0;

}
/**
 * End of really ugly code
 */



int main(void) {
   cout << "(C) NRC and Owen Kaser" << endl;//in case my boss looks at this
   srand( (unsigned)time( NULL ) );
   kernelRadius();
   cout << " Done."<<endl;// congratulation, you can write code that makes it to the end
   return 0;
}


