#if 0
  Schritt 1: Erzeuge die Teile in allen mglichen Rotationen

  Interne Darstellung zur Rotation:
  4 x 4 x 4 Pixel in einem u64, Bitpositionen wie folgt (63=MSB):
    Bottom (z=0)                              Top (z=3)
  y 51 50 49 48   35 34 33 32   19 18 17 16   03 02 01 00
  ^ 55 54 53 52   39 38 37 36   23 22 21 20   07 06 05 04
  | 59 58 57 56   43 42 41 40   27 26 25 24   11 10 09 08
  | 63 62 61 60   47 46 45 44   31 30 29 28   15 14 13 12
    --> x

  Schritt 2: Erzeuge alle Teile in allen Mgliche Positionen im Zielquader

  Interne Darstellung des Quaders:
  5 x 4 x 3 Pixel in einem u64, Bitpositionen wie folgt (63=MSB):
    Bottom (z=0)                      Top (z=2)
  y 54 42 30 18 06   53 41 29 17 05   52 40 28 16 04
  ^ 57 45 33 21 09   56 44 32 20 08   55 43 31 19 07
  | 60 48 36 24 12   59 47 35 23 11   58 46 34 22 10
  | 63 51 39 27 15   62 50 38 26 14   61 49 37 25 13
    --> x
#endif

#include <stdio.h>

typedef unsigned long long u64; // 64 bit unsigned

//------------------------- piece-methods ---------------------------

char *indent="";

void show_piece(u64 this) {
  int x,y,z; u64 mask;
  for (y=12; y>=0; y-=4) {
    mask = 0x8000000000000000ull >> y;
    for (z=0; z<64; z+=16) {
      for (x=0; x<4; x++) {
        printf("%c", this & mask ? 'X' : '.');
        mask >>= 1;
      }
      printf(" ");
      mask >>= 16-4;
    } //for
    printf("\n");
  } //for
  printf("\n");
} //show_piece

void normalise_piece(u64 *this) { // move piece max. left, front, down
  for (; 0ull == (*this & 0x8888888888888888ull); *this <<= 1); // left
  for (; 0ull == (*this & 0xf000f000f000f000ull); *this <<= 4); // front
  for (; 0ull == (*this & 0xffff000000000000ull); *this <<= 16); // down
} //normalise_piece

void rotate_piece_z(u64 *this) {
  u64 that=0ull;
  that |= (*this & 0x8000800080008000ull) >>  3;
  that |= (*this & 0x4000400040004000ull) >>  6;
  that |= (*this & 0x2000200020002000ull) >>  9;
  that |= (*this & 0x1000100010001000ull) >> 12;
  that |= (*this & 0x0800080008000800ull) <<  2;
  that |= (*this & 0x0400040004000400ull) >>  1;
  that |= (*this & 0x0200020002000200ull) >>  4;
  that |= (*this & 0x0100010001000100ull) >>  7;
  that |= (*this & 0x0080008000800080ull) <<  7;
  that |= (*this & 0x0040004000400040ull) <<  4;
  that |= (*this & 0x0020002000200020ull) <<  1;
  that |= (*this & 0x0010001000100010ull) >>  2;
  that |= (*this & 0x0008000800080008ull) << 12;
  that |= (*this & 0x0004000400040004ull) <<  9;
  that |= (*this & 0x0002000200020002ull) <<  6;
  that |= (*this & 0x0001000100010001ull) <<  3;
  *this=that;
} // rotate_piece_z

void rotate_piece_x(u64 *this) {
  u64 that=0ull;
  that |= (*this & 0xf000000000000000ull) >> 48;
  that |= (*this & 0x0f00000000000000ull) >> 28;
  that |= (*this & 0x00f0000000000000ull) >>  8;
  that |= (*this & 0x000f000000000000ull) << 12;
  that |= (*this & 0x0000f00000000000ull) >> 36;
  that |= (*this & 0x00000f0000000000ull) >> 16;
  that |= (*this & 0x000000f000000000ull) <<  4;
  that |= (*this & 0x0000000f00000000ull) << 24;
  that |= (*this & 0x00000000f0000000ull) >> 24;
  that |= (*this & 0x000000000f000000ull) >>  4;
  that |= (*this & 0x0000000000f00000ull) << 16;
  that |= (*this & 0x00000000000f0000ull) << 36;
  that |= (*this & 0x000000000000f000ull) >> 12;
  that |= (*this & 0x0000000000000f00ull) <<  8;
  that |= (*this & 0x00000000000000f0ull) << 28;
  that |= (*this & 0x000000000000000full) << 48;
  *this=that;
} // rotate_piece_x

//------------------------------------------------------

u64 rotations[12][25]; int putidx;

void start_put_piece(void) {
  putidx=0;
} //start_put_piece

void put_piece(int piece, u64 this) {
  int i;

  normalise_piece(&this);
  for (i=0; i<putidx; i++) if (rotations[piece][i]==this) { return; }
  rotations[piece][putidx]=this;
  putidx++;
} //put_piece

void end_put_piece(int piece) {
  rotations[piece][putidx]=0ull;
//printf("Teil %2i besitzt %2i verschiedene Raumlagen\n", piece, putidx);
} //end_put_piece

//------------------------- quader-methods ---------------------------

void show_quader(u64 this) {
  int x,y,z; u64 mask;
  for (y=9; y>=0; y-=3) {
    printf("%s", indent);
    mask = 0x8000000000000000ull >> y;
    for (z=0; z<3; z+=1) {
      for (x=4; x>0; x--) {
        printf("%c", this & mask ? 'X' : '.');
        mask >>= 12;
      }
        printf("%c", this & mask ? 'X' : '.');
      printf(" ");
      mask <<= 48-1;
    } //for
    printf("\n");
  } //for
  printf("\n");
} //show_quader

void tfr_piece(u64 *quader, u64 piece) {
  u64 pmask; int x,y,z;
  *quader=0ull;
  pmask=0x8000000000000000ull; // z+, y+, x+
  for (z=0; z<3; z++) {
    for (y=0; y<12; y+=3) {
      for (x=0; x<48; x+=12) {
        if (piece & pmask) {
          *quader |= 0x8000000000000000ull >> x >> y >> z;
        } //if
        pmask >>= 1;
      } //for
    } //for
  } //for
} //tfr_piece

void mirror_x(u64 *quader) {
  *quader = ((*quader & 0xfff0000000000000ull) >> 48)
          | ((*quader & 0x000fff0000000000ull) >> 24)
          | ( *quader & 0x000000fff0000000ull       )
          | ((*quader & 0x000000000fff0000ull) << 24)
          | ((*quader & 0x000000000000fff0ull) << 48);
} //mirror_x

void mirror_y(u64 *quader) {
  *quader = ((*quader & 0xe00e00e00e00e000ull) >>  9)
          | ((*quader & 0x1c01c01c01c01c00ull) >>  3)
          | ((*quader & 0x0380380380380380ull) <<  3)
          | ((*quader & 0x0070070070070070ull) <<  9);
} //mirror_y

void mirror_z(u64 *quader) {
  *quader = ((*quader & 0x9249249249249240ull) >>  2)
          | ( *quader & 0x4924924924924920ull       )
          | ((*quader & 0x2492492492492490ull) <<  2);
} //mirror_z

void normalise_quader(u64 *quader) { // select specific version of all rotated versions
  u64 temp, result;
  temp=*quader; result=temp;
  mirror_z(&temp);
  mirror_y(&temp); if (result>temp) result=temp;
  mirror_z(&temp);
  mirror_x(&temp); if (result>temp) result=temp;
  mirror_z(&temp);
  mirror_y(&temp); if (result>temp) result=temp;
  *quader=result;
} //normalise_quader

int move_piece_up(u64 *quader) {
  if (*quader & 0x2492492492492490ull) return 0;
  *quader >>= 1;
  return 1;
} //move_piece_up

int move_piece_back(u64 *quader) {
  if (*quader & 0x0070070070070070ull) return 0;
  *quader >>= 3;
  return 1;
} //move_piece_back

int move_piece_right(u64 *quader) {
  if (*quader & 0x000000000000fff0ull) return 0;
  *quader >>= 12;
  return 1;
} //move_piece_right

//----------------------------------------------------

// Listen: alle Listen enthalten komplette quader (60 bit in einem u64) und
// haben einmal den Wert 0 am Ende. Alle Listen liegen im Feld locations.

#define maxloc 190000
u64 locations[maxloc]; int locfree=0;

void put_quader(u64 quader) {
  if (locfree>=maxloc) {
    fprintf(stderr, "Out of locations ...\n"); exit(1);
  } //if
  locations[locfree++]=quader;
} //put_quader

//------------------------------------------------------

// Indizes 0..11: Die Listen aller Teile in allen Rotation in allen
// mglichen Lagen im Zielquader
// Sonstige Indizes: merged lists
int lists[20], sizes[20]; // sizes ist die Listenlnge

int lsblist[12][64];

//------------------------------------------------------

int main(void) {
  u64 pieces[12]={0xc8c0000000000000ull, // pieces in order given by c't
                  0xc460000000000000ull,
                  0xc444000000000000ull,
                  0x88c8000000000000ull,
                  0xc800880000000000ull,
                  0x44e0000000000000ull,
                  0xcc40000000000000ull,
                  0xc620000000000000ull,
                  0xc400800000000000ull,
                  0xc640000000000000ull,
                  0x4e40000000000000ull,
                  0xc8c8000000000000ull};
  int i,j,x,z,piece,bitnum;
  u64 curr, mask, qz, qy, qx;
  int pnums[12]={0,1,2,3,4,5,6,7,8,9,10,11};
  int p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11;
  int     i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11;
  int b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11;
  int j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11;
  u64 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11;
  u64 m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11;
  u64 curr_quader, results;

void show_solution (void) {
  show_quader(t11); show_quader(t10); show_quader(t9); show_quader(t8); \
  show_quader(t7); show_quader(t6); show_quader(t5); show_quader(t4); \
  show_quader(t3); show_quader(t2); show_quader(t1); show_quader(t0); \
  printf("---------------\n"); \
} //show_solution

//printf("===================================\n");

  // Schritt 1: pieces --> rotations
  // alle Teile in alle 24 Raumlagen gebracht, dabei duplikate entfernt
  for (piece=0; piece<12; piece++) {
    curr=pieces[piece]; start_put_piece();
    for (z=0; z<3; z++) {
      for (x=0; x<3; x++) {
        rotate_piece_x(&curr);
        put_piece(piece, curr);
      }
      rotate_piece_z(&curr);
      put_piece(piece, curr);
    } //for

    rotate_piece_z(&curr);
    rotate_piece_x(&curr);
    rotate_piece_x(&curr);

    for (z=0; z<3; z++) {
      for (x=0; x<3; x++) {
        rotate_piece_x(&curr);
        put_piece(piece, curr);
      }
      rotate_piece_z(&curr);
      put_piece(piece, curr);
    } //for
    end_put_piece(piece);
  } //for

//printf("===================================\n");

  // Schritt 2: rotations --> locations
  // alle rotierten Teile in alle Positionen des Zielquaders gebracht
  for (piece=0; piece<12; piece++) {
    lists[piece]=locfree;
    for (i=0; (curr=rotations[piece][i]); i++) {
      if (0 == (curr & 0x000000000000ffffull)) { // leave out rotations with height 4
        tfr_piece(&qz, curr);
        do {
          qy=qz;
          do {
            qx=qy;
            do {
              put_quader(qx);
            } while (move_piece_right(&qx));
          } while (move_piece_back(&qy));
        } while (move_piece_up(&qz));
      } //if
    } //for
    sizes[piece]=locfree-lists[piece];
  //printf("Teil %2i kann %3i Position/Lagen im Quader einnehmen\n", piece, sizes[piece]);
    put_quader(0ull);
  } //for
//printf("===================================\n");

  // Schritt 3: Eliminiere Symmetrien
  // Eliminiere fuer genau ein piece in locations[piece][*] alle symmetrien
  piece=2;
    for (i=lists[piece], lists[piece]=locfree; (curr=locations[i]); i++) {
      normalise_quader(&curr);
      for (j=lists[piece]; (j<locfree) && (curr!=locations[j]); j++);
      if (j==locfree) put_quader(curr);
    } //for
    sizes[piece]=locfree-lists[piece];
  //printf("Teil %2i kann %3i Position/Lagen im Quader einnehmen\n", piece, sizes[piece]);
    put_quader(0ull);
//printf("===================================\n");

  // show results so far
  for (piece=0; piece<12; piece++) printf("%3i ",       piece );
  printf("/ mem usage:\n");
  for (piece=0; piece<12; piece++) printf("%3i ", sizes[piece]);
  printf("/ %i free=%i%%\n\n", locfree, (maxloc-locfree)*100/maxloc);
//printf("===================================\n");

  // Schritt 4: Teile die Listen auf in kleinere Listen - nach kleinstem
  // gesetztem Bit geteilt (Bescheuerter Algorithmus - eine Liste nach der
  // anderen aufbauen macht die Speicherverwaltung aber leicht)
  for (piece=0; piece<12; piece++) {
    j=0;
    for (mask=0x10ull, bitnum=4; mask; mask<<=1, bitnum++) {
      lsblist[piece][bitnum]=locfree;
      for (i=lists[piece]; (curr=locations[i]); i++) {
        if ((curr & mask)                                     // this bit must be set and ...
        &&  ((curr & ((mask-1)&0xfffffffffffffff0ull))==0)) { // all lower bits must be clear
          put_quader(curr);
        } //if
      } //for
      i=locfree-lsblist[piece][bitnum]; j+=i;
      printf("%c", i+'0');
      put_quader(0ull);
    } //for
    printf(" %i\n", j);
  } //for
  printf("%i free=%i%%\n\n", locfree, (maxloc-locfree)*100/maxloc);

//-----------------------------------------------------------------------

#if 0
  // zum Debuggen: ausgabe der geteilten listen
  piece=3;
  for (mask=0x10ull, bitnum=4; mask; mask<<=1, bitnum++) {
    printf("==> %i %16.16llx\n", bitnum, mask);
    indent="";
    show_quader(mask);
    indent="\t";
    for (j=lsblist[piece][bitnum]; (curr=locations[j]); j++) {
      printf("\t%i\n", j);
      show_quader(curr);
    } //for
    indent="";
    printf("-----------------\n");
  } //for
exit(0);
#endif

//-----------------------------------------------------------------------
// Und ab jetzt die Makros fuer die Suche
//-----------------------------------------------------------------------

#define forpiece(piece, i, lvl) \
  for (i=lvl; i>=0; i--) { \
    piece=pnums[i]; pnums[i]=pnums[lvl];

#define endforpiece(piece, i) \
    pnums[i]=piece; \
  }

//-----------------------------------------------------------------------

#define forlist(piece, bitnum, j, this) \
  for (j=lsblist[piece][bitnum]; (this=locations[j]); j++) { \
    if (!(curr_quader & this)) {                      \
      curr_quader ^= this;
#define endforlist(this) \
      curr_quader ^= this; \
    }                      \
  } //for

//-----------------------------------------------------------------------

#define nextbit(bitnum, mask, nextbitnum, nextmask) \
  nextbitnum=bitnum+1; nextmask=mask<<1; \
  while (curr_quader & nextmask) { nextbitnum++; nextmask<<=1; }

//-----------------------------------------------------------------------

  curr_quader=0ull;
  m11=0x10ull; b11=4; // naechstes zu fuellendes bit
  results=0ull;  // gefundene Loesungen

  forpiece(p11, i11, 11)
  forlist(p11, b11, j11, t11) nextbit(b11, m11, b10, m10)
  forpiece(p10, i10, 10)
  forlist(p10, b10, j10, t10) nextbit(b10, m10, b9,  m9)
  forpiece(p9,  i9,  9)
  forlist(p9,  b9,  j9,  t9)  nextbit(b9,  m9,  b8,  m8)
  forpiece(p8,  i8,  8)
  forlist(p8,  b8,  j8,  t8)  nextbit(b8,  m8,  b7,  m7)
  forpiece(p7,  i7,  7)
  forlist(p7,  b7,  j7,  t7)  nextbit(b7,  m7,  b6,  m6)
  forpiece(p6,  i6,  6)
  forlist(p6,  b6,  j6,  t6)  nextbit(b6,  m6,  b5,  m5)
  forpiece(p5,  i5,  5)
  forlist(p5,  b5,  j5,  t5)  nextbit(b5,  m5,  b4,  m4)
  forpiece(p4,  i4,  4)
  forlist(p4,  b4,  j4,  t4)  nextbit(b4,  m4,  b3,  m3)
  forpiece(p3,  i3,  3)
  forlist(p3,  b3,  j3,  t3)  nextbit(b3,  m3,  b2,  m2)
  forpiece(p2,  i2,  2)
  forlist(p2,  b2,  j2,  t2)  nextbit(b2,  m2,  b1,  m1)
/*forpiece(p1,  i1,  1)*/ for (i1=1; i1>=0; i1--) { p1=pnums[i1];
  forlist(p1,  b1,  j1,  t1)  nextbit(b1,  m1,  b0,  m0)
/*forpiece(p0,  i0,  0)*/ p0=pnums[1-i1];
/*forlist(p0,  b0,  j0,  t0)*/ for (j0=lsblist[p0][b0]; (t0=locations[j0]); j0++) {
				 if (!(curr_quader & t0)) {
				   curr_quader ^= t0;

  results++;

  //if ((results&0x00000000000000ffull)==0) { printf("%i %llu\n", p11, results); }

  //geht nicht bei optimierung des innersten forlist/endforlist
  if (curr_quader!=0xfffffffffffffff0ull) {
    show_quader(t11); show_quader(t10); show_quader(t9); show_quader(t8);
    show_quader(t7); show_quader(t6); show_quader(t5); show_quader(t4);
    show_quader(t3); show_quader(t2); show_quader(t1); show_quader(t0);
    printf("Mist!\n"); exit(1);
  } //if

/*endforlist(t0)*/        curr_quader ^= t0; } }
/*endforpiece(p0,  i0)*/
  endforlist(t1)
/*endforpiece(p1,  i1)*/  }
  endforlist(t2)
  endforpiece(p2,  i2)
  endforlist(t3)
  endforpiece(p3,  i3)
  endforlist(t4)
  endforpiece(p4,  i4)
  endforlist(t5)
  endforpiece(p5,  i5)
  endforlist(t6)
  endforpiece(p6,  i6)
  endforlist(t7)
  endforpiece(p7,  i7)
  endforlist(t8)
  endforpiece(p8,  i8)
  endforlist(t9)
  endforpiece(p9,  i9)
  endforlist(t10)
  endforpiece(p10, i10)
  endforlist(t11)
  endforpiece(p11, i11)

  printf("Anzahl Lsungen: %llu\n", results);
  exit(0);
} // main
