
#include<stdio.h>
#include<stdlib.h>

/* breits hier wesentliche Rechenzeitoptimierung durch Start Auffllen in Richtung DIM 3 */

#define DX  3
#define DY  4
#define DZ  5

#define MSTEIN  12
#define XSTEIN  (1<<4)		/* <<4 bewirkt *XSTEIN */


#define MTRANS  24*DX*DY*DZ
#define XMAX  (1<<10)		/* <<10 bewirkt *XMAX  anstelle *MTRANS*MSTEIN */
#define UFMAX 3000		/* 24*XMAX */

/* Test von XPOS beim Auffllen in x-, y- und z-Richtung
   z=0           z=1
   -- 30 27 24   21
   pp 29 26 23   20
   31 28 25 22   19   Test in x-Richtung (XPOS=31) bringt nicht viel */

#define XPOS1 29
#define XPOS2 20
#define XMASK ((1u << XPOS1) | (1u << XPOS2)) /* 31 x-, 29 y-, 20 z-Richtung */

#define PPMAX 10
#define PPMASK ((~0u)>>(32-PPMAX))

int steind[MSTEIN][3] = {
{3,2,1},
{3,3,1},
{4,2,1},
{4,2,1},
{2,2,2},
{3,3,1},
{3,2,1},
{3,3,1},
{2,2,2},
{3,3,1},
{3,3,1},
{4,2,1},
};

int stein[MSTEIN][9] = { {
1,1,1,
1,0,1,		0,0,0 },{

1,0,0,
1,1,1,
0,0,1		},{

1,1,1,1,
1,0,0,0,	0 },{

1,1,1,1,
0,1,0,0,	0 },{

/*
   weitere wesentliche Rechenzeitoptimierung durch fixieren dieses Steins (5)
   auf diese spezielle Position:
*/

0,1,
0,1,
1,1,
0,1,		0 },{

1,1,1,
0,1,0,
0,1,0		},{

1,1,1,
1,1,0,		0,0,0 },{

1,0,0,
1,1,0,
0,1,1		},{

1,1,		/* keine y/-y Symmetrie !!! */
1,0,
0,0,
1,0,		0 },{

1,0,0,
1,1,1,
0,1,0		},{

0,1,0,
1,1,1,
0,1,0,		},{

1,1,1,1,
1,0,1,0,	0 } };

static int *upos[4*XMAX], spos, tpos, result;
static unsigned int uf1[2*2*UFMAX];
static short int kp[MSTEIN+1];
static short int opos[4*XMAX];
static short int ppfeld[PPMASK+1];


void tosort(unsigned int *vb, int *vf, int *ntrans, int *st) {
  int j,*v1,*v2;
  unsigned int *u1,*u2;
/*
    Sortieren nach *vf aufsteigend und *vb*vb aufsteigend.
*/
  j = *ntrans;
  while(--j) {
    int i;
    v1 = vf; v2 = vf + 1;
    u1 = vb; u2 = vb + 2;
    i = j + 1;
    while(--i) {
      unsigned int u;
      if(*v1 > *v2) {
          int f;
	  f = *v1; *v1 = *v2; *v2 = f;
          u = *u1; *u1 = *u2; *u2 = u; u1++; u2++;
          u = *u1; *u1 = *u2; *u2 = u; u1++; u2++;
      } else if(*v1 == *v2) {
/*
		     1..1 1..0 0..1 0..0 < uf (soll)
		u1 (ist)
		0..0  X    X    X    X
		0..1       X         X   uf[0+2*UFMAX] <-- *upos[0+XMAX]
		1..0  "    "    X    X   uf[0]         <-- *upos[0]
		1..1                 X
*/
          if((*u1 & XMASK) < (*u2 & XMASK)) {
            u = *u1; *u1 = *u2; *u2 = u; u1++; u2++;
            u = *u1; *u1 = *u2; *u2 = u; u1++; u2++;
          } else if(*u1 == *u2) {
            u1++; u2++;
            if(*u1 < *u2) {u = *u1; *u1 = *u2; *u2 = u;}
	    u1++; u2++;
          } else { u1+=2; u2+=2; }
      } else { u1+=2; u2+=2; }
      v1++; v2++;
    }
  }
/*
    Transfer und Erstellen Index-Felder: tpos fngt bei 1 an
*/
  if(*st == 0) {
    int l = 4 * XMAX; while(l--) opos[l] = 0;
  }
  j = *ntrans;
  v1 = vf;
  u1 = vb;
  while(j--) {
    int ti;
    if(tpos >= UFMAX) { printf("UFMAX to small/n/n"); exit(1); }
    ti = (*v1) * XSTEIN + (*st) + 1; v1++;
    upos[ti] = &uf1[tpos];      
    upos[2*XMAX+ti] = &uf1[tpos];   opos[ti]++;
    if( !(*u1 & (1u<<XPOS1)) )      opos[2*XMAX + ti]++;
    if( !(*u1 & (1u<<XPOS2)) ) {
      upos[XMAX+ti] = &uf1[spos];
      upos[3*XMAX+ti] = &uf1[spos]; opos[1*XMAX + ti]++;
      if( !(*u1 & (1u<<XPOS1)) )    opos[3*XMAX + ti]++;
      uf1[spos] = u1[0];
      uf1[spos + UFMAX] = u1[1];
      spos++;
    }
    uf1[tpos] = *u1; u1++;
    uf1[tpos + UFMAX] = *u1; u1++;
    tpos++;
  }
}

/*
    Austesten der relevanten freien Steine

    rekursive Funktion addnext: 32 / 64 bit - Variante
    			        (bringt allerdings lediglich etwa 5%)
*/

void addnext32(unsigned int u1, int pos1) {
  short int *p1;
  int ku,is;
  { int pp;
    pp = ((u1 >> (XPOS2-10)) & (1u<<10));	/*	*XMAX	*/
    pp += ((u1 >> (XPOS1-11)) & (1u<<11));	/*	*2*XMAX	*/
    ku = pp + pos1;
  }
  p1 = kp;
  is = tpos--;
  do {
    /* register */ unsigned int *uf;
    /* register */ int pp;
    short int *p0;
    int k1,k0;
    p0 = p1;
    k0 = *p0;
    p1 = &kp[k0];
    if( (pp = opos[ku + k0]) == 0) continue;
    uf = upos[ku + k0];
    *p0 = *p1;
    k1 = k0;			/* register k0 retten */
    do {
      if( ! (*uf & u1) ) {
        if(tpos) {
          /* register */ unsigned int ru1;
          /* register */ int pp2;
          ru1 = *uf | u1;
          pp2 = 1;
          while(ru1 >> 31) {
            int ri;
            ri = ppfeld[ ru1 >> (32-PPMAX) ];
            pp2  += ri;
            ru1<<= ri;
          };
          addnext32( ru1<<1, pos1+(pp2<<4));
        } else result++;
      }
      uf--;
    } while(--pp);
    *p0 = k1;
  } while(--is);
  tpos++;
}

void addnext64(unsigned int u1, unsigned int u2, int pos1) {
  short int *p1;
  int ku,is;
/*
  Ausgabe Zwischenstnde 
  if(tpos == 10) printf(" %i \n",result);
*/
  { int pp;
    pp = ((u1 >> (XPOS2-10)) & (1u<<10));	/*	*XMAX	*/
    pp += ((u1 >> (XPOS1-11)) & (1u<<11));	/*	*2*XMAX	*/
    ku = pp + pos1;
  }
  p1 = kp;
  is = tpos--;
  do {
    /* register */ unsigned int *uf;
    /* register */ int pp;
    short int *p0;
    int k1,k0;
    p0 = p1;
    k0 = *p0;
    p1 = &kp[k0];
    if( (pp = opos[ku + k0]) == 0) continue;
    uf = upos[ku + k0];
    *p0 = *p1;
    k1 = k0;			/* register k0 retten */
    do {
      if( ! (*uf & u1) ) {
        if( ! (uf[UFMAX] & u2) ) {
/*        if(tpos) { 				hier auch: Vorsicht! */
            /* register */ unsigned int ru1;
            int op = pp;	/* register pp retten */
            ru1 = *uf | u1;
            pp = 1;
/*					Vorsicht: 32 bit reichen hier gerade so: */
            while(ru1 >> 31) {
              int ri;
              ri = ppfeld[ ru1 >> (32-PPMAX) ];
              pp  += ri;
              ru1<<= ri;
            };
            { /* register */ unsigned int ru2;
	      /* register */ int pt;
              ru2 = (uf[UFMAX]|u2);
	      pt = pos1 + (pp<<4);
	      if(pt < 27 *16)
		    addnext64( (ru1<<1)|(ru2>>(32-pp)),(ru2<<pp),pt);
	      else
		    addnext32( (ru1<<1)|(ru2>>(32-pp)),pt);
            }
            pp = op;
/*        } else result++; */
        }
      }
      uf--;
    } while(--pp);
    *p0 = k1;
  } while(--is);
  tpos++;
}

void domino(int nstein) {
  int i;
  i = nstein + 1;
  while(i--) kp[i] = i + 1;
  tpos = nstein;
  { unsigned int u,v;
    for(u = 0u; u < PPMASK+1; u++ ){
      int i = 0;
      v = 1u << (PPMAX-1);
      while( u & v) { v>>=1; i++; };
      ppfeld[u] = i;
    }
  }
  addnext64(0u, 0u, 0);
}

void topipe(int *d, int *f, unsigned int *vb, int *vf, int *ntrans) {
  int x,y,z;
  int t,iz,iy,ix;
  unsigned int u,*v,vi[2];
  static unsigned int red[48];
  static int nred;

  x = d[0]; y = d[1]; z = d[2];
/*
   Check Wrfelgrenzen
*/
  if( (x > DX) || (y > DY) || (z > DZ) ) return;
/*
    Generierung Bit-Feld vi
*/
  if( *ntrans == 0 ) nred = 0;
  t = 0;
  u = 1u << 31;
  vi[0] = 0u;
  vi[1] = 0u;
  v = vi;
  for(iz = 0; iz < z; iz++) {
    for(iy = 0; iy < y; iy++) {
      for(ix = 0; ix < x; ix++) {
        if( f[t++] ) *v |= u;
        if( !(u >>= 1) ) { u = 1u << 31; v++; };
      }
      for( ix = x; ix < DX; ix++) if( !(u >>= 1) ) { u = 1u << 31; v++; };
    }
    for( iy = DX*y; iy < DX*DY; iy++) if( !(u >>= 1) ) { u = 1u << 31; v++; };
  }
/*
    Test auf Redundanz
*/
  v = red;
  for(ix = 0; ix < nred; ix++) {
    if(*(v++) == vi[0]) if(*v == vi[1]) return;
    v++;
  }
  nred++;
  *(v++) = vi[0];
  *v = vi[1];
/*
    Lschen fhrende 0en inclusive erste 1; t = Original-Position(first 1)
*/
  t = -1;
  do {
    t++;
    u = vi[0];
    vi[0] = (u << 1) | (vi[1] >> 31);
    vi[1] <<= 1;
  } while ( !(u >> 31) );
/*
    Abspeichern Translationen
    Es werden Bitfeld vb[] ohne fhrende 1 und Position vf[] = t der fhrenden 1 bentigt.
*/
  z--;y--;x--;
  v = &vb[ 2 * (*ntrans) ];
  for(iz = z; iz < DZ; iz++) {
    for(iy = y; iy < DY; iy++) {
      for(ix = x; ix < DX; ix++) {
        vf[(*ntrans)++] = t;
        *v = vi[0]; v++;
        *v = vi[1]; v++;
	t++;
      }
      t += x;
    }
    t += DX*y;
  }
}

void circz(int *d, int *f) {
  int x,y,z;
  int t,pz,py,p,iz,iy,ix,h[9];
  x = d[0]; y = d[1]; z = d[2];
  d[0] = y; d[1] = x;
  t = 0;
  pz = -x;
  for(iz = 0; iz < z; iz++) {
    pz += x*y;
    py = pz;
    for(iy = 0; iy < x; iy++) {
      p = py++;
      for(ix = 0; ix < y; ix++) {
        h[t++] = f[p];
        p -= x;
      }
    }
  }
  p = x*y*z;
  for(t = 0; t < p; t++) f[t] = h[t];
}

void circy(int *d, int *f) {
  int x,y,z;
  int t,xy,pz,py,p,iz,iy,ix,h[9];
  x = d[0]; y = d[1]; z = d[2];
  d[0] = z;           d[2] = x;
  t = 0;
  xy = x*y;
  pz = xy*z-xy;
  for(iz = 0; iz < x; iz++) {
    py = pz++;
    for(iy = 0; iy < y; iy++) {
      p = py;
      py += x;
      for(ix = 0; ix < z; ix++) {
        h[t++] = f[p];
        p -= xy;
      }
    }
  }
  p = xy*z;
  for(t = 0; t < p; t++) f[t] = h[t];
}

void init(int nstein, int sfix) {
  unsigned int vb[2*MTRANS];
  int vf[MTRANS],st,ix,iz,iy,ntrans,mask;
  tpos = 1;
  spos = 2*UFMAX + 1;
  sfix--;
  for(st = 0; st < nstein; st++) {
/*
    Rotate - Abbilden der Drehung um x auf Drehungen um y und z.
    24 irreduzieble Kombinationen:
    {Dy^0, Dy^1, Dy^2, Dy^3} * {Dz^0, Dz^1 * {Dy^0, Dy^1, Dy^3}, Dz^2, Dz^3}
    Ein Stein wird fixiert (egal welcher) - 6 Kombinationen bleiben:
    {Dy^0, Dy^1} * {Dz^0, Dz^1 * {Dy^0, Dy^1}}
    (x,y,z),(x,z,y),(y,x,z),(y,z,x),(z,x,y),(z,y,x) -> (X,Y,Z)
*/
    ntrans = 0;
    mask = 4;
    if(st == sfix) mask = 2;
    for(iy = 0; iy < mask; iy++) {
      for(iz = 0; iz < 4; iz++) {
        if(!(iz&mask)) topipe( &steind[st][0], &stein[st][0], vb, vf, &ntrans );
        if(iz == 1) for(ix = 1; ix < 3; ix++) {
          circy( &steind[st][0], &stein[st][0] );
          if(!(ix&mask))topipe( &steind[st][0], &stein[st][0], vb, vf, &ntrans );
          circy( &steind[st][0], &stein[st][0] );
        }
        circz( &steind[st][0], &stein[st][0] );
      }
      circy( &steind[st][0], &stein[st][0] );
    }
    printf(" Stein %3i erlaubt %4i Positionen: -",st+1,ntrans);
    if(st == sfix) printf("fixiert-\n"); else printf("frei-\n");
/*
    Einsortieren in Indexfelder upos, opos, uf1, uf2
*/
    tosort( vb, vf, &ntrans, &st );
  }
/*  printf(" Positions-Varianten: %i\n",tpos-1); */
}

int main()
{ int nstein = MSTEIN;
  printf("\n *** c't Puzzle *** \n\n");
/* Konstruktion INIT-FELD*/
  init(nstein,5);
/* Konstruktion aller mglichen Ketten */
  printf("\n Berechnung aller Kombinationen ...\n");
  result = 0;
  domino(nstein);
  printf("\n Es gibt %i Lsungen. \n",result);
  return 0;
}

