/**************************************************************************\
 *
 *  This file is part of the Klimt library.
 *  Copyright (C) 2003 by IMS, Vienna University of Technology.
 *  All rights reserved.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License
 *  ("GPL") version 2 as published by the Free Software Foundation.
 *  See the file LICENSE.GPL at the root directory of this source
 *  distribution for additional information about the GNU GPL.
 *  For the full GPL license see
 *  <URL:http://www.gnu.org/copyleft/gpl.html>
 *
 *  For using Klimt with software that can not be combined with the
 *  GNU GPL, and for taking advantage of the additional benefits of
 *  our support services, please contact IMS about acquiring a
 *  Klimt Professional Edition License.
 *
 *  Contact: <mailto:klimt@studierstube.org>
 *  See <URL:http://www.studierstube.org/klimt>
 *  for more information.
 *
 *  Vienna University of Technology
 *  Institute for Software Technology and Interactive Systems
 *  Interactive Media Systems Group
 *  Favoritenstrasse 9-11/188/2
 *  A-1040 Vienna, Austria
 *  <URL:http://www.ims.tuwien.ac.at>.
 *
 **************************************************************************
 *
 * $Header: /cvsroot/klimt/klimt/klimt/src/RasterizerSW565/klRSW565_Scanline_Texture.h,v 1.4 2004/02/10 18:19:31 drgoldie Exp $
 *
\**************************************************************************/


//
// class klRSW565 {
//


// fills a pixel buffer with textured pixels
// all pixels are perspective correct
// (destroys previous buffer content)
//
void drawScanLine_TexturePerspective(const Edge *nLeft, klFloat nPreStep, BUF_PIXELTYPE* nDst,
									 const TexGradients &nGradients, const MipMapLevel* nTexture)
{
	unsigned int	wShift = getShift(nTexture->width), wMask=nTexture->width-1, hMask=nTexture->height-1;
	TEX_PIXELTYPE*	texturePixels = nTexture->pixels;
	klFloat			dOneOverZdX = nGradients.dOneOverZdX,
					dUOverZdX =   nGradients.dUOverZdX,
					dVOverZdX =   nGradients.dVOverZdX,
					OneOverZ = nLeft->OneOverZ + nPreStep * dOneOverZdX,
					UOverZ =   nLeft->UOverZ + nPreStep * dUOverZdX,
					VOverZ =   nLeft->VOverZ + nPreStep * dVOverZdX;

	// non interpolated version
	for(int len,rlIdx=0;;)
	{
		// calculate visible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		while(len--) {
			klFloat Z = OneOverZ;
			Z.inverse();

			int U = (UOverZ * Z).getInt();
			int V = (VOverZ * Z).getInt();

			int texOffs = (U&wMask) + ((V&hMask) << wShift);

			OneOverZ += dOneOverZdX;
			UOverZ += dUOverZdX;
			VOverZ += dVOverZdX;

			*(nDst++) = texturePixels[texOffs];
		}
		if(rlIdx>=numScanlineRunLengths)
			break;

		// skip invisible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		if(rlIdx>=numScanlineRunLengths)
			break;

		OneOverZ += dOneOverZdX*len;
		UOverZ += dUOverZdX*len;
		VOverZ += dVOverZdX*len;
	}
}


// fills a pixel buffer with textured pixels
// texture coordinates are lineary interpolated
// between every 16-pixels (or less).
// (destroys previous buffer content)
//
void drawScanLine_Texture(const Edge *nLeft, klFloat nPreStep,
						  BUF_PIXELTYPE* nDst,
						  const TexGradients &nGradients, const MipMapLevel* nTexture)
{
	const int		SPAN_BITS = 4, SPAN_LEN = 1<<SPAN_BITS;
	unsigned int	wShift = getShift(nTexture->width), wMask=nTexture->width-1, hMask=nTexture->height-1;
	TEX_PIXELTYPE*	texturePixels = nTexture->pixels;
	klFloat			dOneOverZdX = nGradients.dOneOverZdX,
					dUOverZdX =   nGradients.dUOverZdX,
					dVOverZdX =   nGradients.dVOverZdX,
					OneOverZ = nLeft->OneOverZ + nPreStep * dOneOverZdX,
					UOverZ =   nLeft->UOverZ + nPreStep * dUOverZdX,
					VOverZ =   nLeft->VOverZ + nPreStep * dVOverZdX;
	klFloat			dOneOverZdSpan, dUOverZdSpan, dVOverZdSpan, Z,
					u0,v0, u1,v1, du,dv,
					spenLenF, spanLenInv;
	int				w;


	// non interpolated version
	for(int len,rlIdx=0;;)
	{
		// calculate visible pixels
		//
		len = scanlineRunLengths[rlIdx++];

		if(len>0)
		{
			// calculate an exact texture coordinate
			Z = OneOverZ;	Z.inverse();
			u0 = UOverZ * Z,
			v0 = VOverZ * Z;

			if(len>=SPAN_LEN)
			{
				spenLenF.setInt(SPAN_LEN);
				spanLenInv = spenLenF;
				spanLenInv.inverse();

				dOneOverZdSpan = dOneOverZdX;	dOneOverZdSpan<<=SPAN_BITS;
				dUOverZdSpan = dUOverZdX;		dUOverZdSpan<<=SPAN_BITS;
				dVOverZdSpan = dVOverZdX;		dVOverZdSpan<<=SPAN_BITS;

				// run through SPAN_LEN-sized spans as long as we can...
				while(len>SPAN_LEN)
				{
					OneOverZ += dOneOverZdSpan;
					UOverZ += dUOverZdSpan;
					VOverZ += dVOverZdSpan;

					// calculate another exact texture coordinate (SPAN_LEN pixels further)
					Z.inverse(OneOverZ);
					u1 = UOverZ * Z;
					v1 = VOverZ * Z;

					// calculte the texcoord steps per pixel
					du = (u1-u0);	du>>=SPAN_BITS;
					dv = (v1-v0);	dv>>=SPAN_BITS;

					// now run through the span
					len -= (w = SPAN_LEN);
					while(w--)
					{
						int texOffs = (u0.getInt()&wMask) + ((v0.getInt()&hMask) << wShift);
						*(nDst++) = texturePixels[texOffs];

						u0 += du;
						v0 += dv;
					}
				}
			}

			// now do the small rest (<SPAN_LEN) - also interpolated
			if(len>0)
			{
				spenLenF.setInt(len);
				spanLenInv = spenLenF;
				spanLenInv.inverse();

				OneOverZ += dOneOverZdX*spenLenF;
				UOverZ += dUOverZdX*spenLenF;
				VOverZ += dVOverZdX*spenLenF;

				// calculate another exact texture coordinate (SPAN_LEN pixels further)
				Z.inverse(OneOverZ);
				u1 = UOverZ * Z;
				v1 = VOverZ * Z;

				// calculte the texcoord steps per pixel
				du = (u1-u0)*spanLenInv;
				dv = (v1-v0)*spanLenInv;

				// now run through the final (shorter) span
				w = len;
				while(w--)
				{
					int texOffs = (u0.getInt()&wMask) + ((v0.getInt()&hMask) << wShift);
					*(nDst++) = texturePixels[texOffs];

					u0 += du;
					v0 += dv;
				}
			}

		}

		if(rlIdx>=numScanlineRunLengths)
			break;

		// skip invisible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		if(rlIdx>=numScanlineRunLengths)
			break;

		OneOverZ += dOneOverZdX*len;
		UOverZ += dUOverZdX*len;
		VOverZ += dVOverZdX*len;
	}
}


// this is our special texturing function
// that also does gouraud shading.
// (no blending and no fog)
//
void drawScanLine_Texture_Shaded(const Edge *nLeft, const Edge *nRight, klFloat nPreStep,
								 const klFloat& nInvWidth, BUF_PIXELTYPE* nDst,
								 const TexGradients &nGradients, const MipMapLevel* nTexture)

{
	const int				SPAN_BITS = 4, SPAN_LEN = 1<<SPAN_BITS;
	unsigned int			wShift = getShift(nTexture->width), wMask=nTexture->width-1, hMask=nTexture->height-1;
	TEX_PIXELTYPE*			texturePixels = nTexture->pixels;
	InterpolatorRGB_Linear	iRGB(nLeft, nRight, nInvWidth);
	klFloat					dOneOverZdX = nGradients.dOneOverZdX,
							dUOverZdX =   nGradients.dUOverZdX,
							dVOverZdX =   nGradients.dVOverZdX,
							OneOverZ = nLeft->OneOverZ + nPreStep * dOneOverZdX,
							UOverZ =   nLeft->UOverZ + nPreStep * dUOverZdX,
							VOverZ =   nLeft->VOverZ + nPreStep * dVOverZdX;
	klFloat					dOneOverZdSpan, dUOverZdSpan, dVOverZdSpan, Z,
							u0,v0, u1,v1, du,dv,
							spenLenF, spanLenInv;
	int						w;


	// non interpolated version
	for(int len,rlIdx=0;;)
	{
		// calculate visible pixels
		//
		len = scanlineRunLengths[rlIdx++];

		if(len>0)
		{
			// calculate an exact texture coordinate
			Z = OneOverZ;	Z.inverse();
			u0 = UOverZ * Z,
			v0 = VOverZ * Z;

			if(len>=SPAN_LEN)
			{
				spenLenF.setInt(SPAN_LEN);
				spanLenInv = spenLenF;
				spanLenInv.inverse();

				dOneOverZdSpan = dOneOverZdX;	dOneOverZdSpan<<=SPAN_BITS;
				dUOverZdSpan = dUOverZdX;		dUOverZdSpan<<=SPAN_BITS;
				dVOverZdSpan = dVOverZdX;		dVOverZdSpan<<=SPAN_BITS;

				// run through SPAN_LEN-sized spans as long as we can...
				while(len>SPAN_LEN)
				{
					OneOverZ += dOneOverZdSpan;
					UOverZ += dUOverZdSpan;
					VOverZ += dVOverZdSpan;

					// calculate another exact texture coordinate (SPAN_LEN pixels further)
					Z.inverse(OneOverZ);
					u1 = UOverZ * Z;
					v1 = VOverZ * Z;

					// calculte the texcoord steps per pixel
					du = (u1-u0);	du>>=SPAN_BITS;
					dv = (v1-v0);	dv>>=SPAN_BITS;

					// now run through the span
					len -= (w = SPAN_LEN);
					while(w--)
					{
						int texOffs = (u0.getInt()&wMask) + ((v0.getInt()&hMask) << wShift);
						register unsigned short gorCol = (unsigned short)iRGB.getRGB565();
						register unsigned short texCol = (unsigned short)texturePixels[texOffs];
						u0 += du;
						v0 += dv;
						iRGB.step();

						register unsigned int tmp = (gorCol&REDBLUE_MASK) * (texCol&REDBLUE_MASK);
						*(nDst++) =	((tmp>>16)&RED_MASK) | 
										((((gorCol&GREEN_MASK) * (texCol&GREEN_MASK))>>11)&GREEN_MASK) |
										((tmp>>5)&BLUE_MASK);
					}
				}
			}

			// now do the small rest (<SPAN_LEN) - also interpolated
			if(len>0)
			{
				spenLenF.setInt(len);
				spanLenInv = spenLenF;
				spanLenInv.inverse();

				OneOverZ += dOneOverZdX*spenLenF;
				UOverZ += dUOverZdX*spenLenF;
				VOverZ += dVOverZdX*spenLenF;

				// calculate another exact texture coordinate (SPAN_LEN pixels further)
				Z.inverse(OneOverZ);
				u1 = UOverZ * Z;
				v1 = VOverZ * Z;

				// calculte the texcoord steps per pixel
				du = (u1-u0)*spanLenInv;
				dv = (v1-v0)*spanLenInv;

				// now run through the span
				w = len;

				while(w--)
				{
					int texOffs = (u0.getInt()&wMask) + ((v0.getInt()&hMask) << wShift);
					register unsigned short gorCol = (unsigned short)iRGB.getRGB565();
					register unsigned short texCol = (unsigned short)texturePixels[texOffs];
					u0 += du;
					v0 += dv;
					iRGB.step();

					register unsigned int tmp = (gorCol&REDBLUE_MASK) * (texCol&REDBLUE_MASK);
					*(nDst++) =	((tmp>>16)&RED_MASK) | 
								((((gorCol&GREEN_MASK) * (texCol&GREEN_MASK))>>11)&GREEN_MASK) |
								((tmp>>5)&BLUE_MASK);
				}
			}

		}

		if(rlIdx>=numScanlineRunLengths)
			break;

		// skip invisible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		if(rlIdx>=numScanlineRunLengths)
			break;

		OneOverZ += dOneOverZdX*len;
		UOverZ += dUOverZdX*len;
		VOverZ += dVOverZdX*len;
		iRGB.step(len);
		nDst += len;
	}
}



// this is our special texturing function
// that also does gouraud shading.
// (no blending and no fog)
//
void drawScanLine_Texture_Shaded_Alpha(const Edge *nLeft, const Edge *nRight, klFloat nPreStep,
									   const klFloat& nInvWidth, BUF_PIXELTYPE* nDst,
									   const TexGradients &nGradients, const MipMapLevel* nTexture)

{
	const int				SPAN_BITS = 4, SPAN_LEN = 1<<SPAN_BITS;
	unsigned int			wShift = getShift(nTexture->width), wMask=nTexture->width-1, hMask=nTexture->height-1;
	TEX_PIXELTYPE*			texturePixels = nTexture->pixels;
	InterpolatorRGBA_Linear	iRGBA(nLeft, nRight, nInvWidth);
	klFloat					dOneOverZdX = nGradients.dOneOverZdX,
							dUOverZdX =   nGradients.dUOverZdX,
							dVOverZdX =   nGradients.dVOverZdX,
							OneOverZ = nLeft->OneOverZ + nPreStep * dOneOverZdX,
							UOverZ =   nLeft->UOverZ + nPreStep * dUOverZdX,
							VOverZ =   nLeft->VOverZ + nPreStep * dVOverZdX;
	klFloat					dOneOverZdSpan, dUOverZdSpan, dVOverZdSpan, Z,
							u0,v0, u1,v1, du,dv,
							spenLenF, spanLenInv;
	int						w;


	// non interpolated version
	for(int len,rlIdx=0;;)
	{
		// calculate visible pixels
		//
		len = scanlineRunLengths[rlIdx++];

		if(len>0)
		{
			// calculate an exact texture coordinate
			Z = OneOverZ;	Z.inverse();
			u0 = UOverZ * Z,
			v0 = VOverZ * Z;

			if(len>=SPAN_LEN)
			{
				spenLenF.setInt(SPAN_LEN);
				spanLenInv = spenLenF;
				spanLenInv.inverse();

				dOneOverZdSpan = dOneOverZdX;	dOneOverZdSpan<<=SPAN_BITS;
				dUOverZdSpan = dUOverZdX;		dUOverZdSpan<<=SPAN_BITS;
				dVOverZdSpan = dVOverZdX;		dVOverZdSpan<<=SPAN_BITS;

				// run through SPAN_LEN-sized spans as long as we can...
				while(len>SPAN_LEN)
				{
					OneOverZ += dOneOverZdSpan;
					UOverZ += dUOverZdSpan;
					VOverZ += dVOverZdSpan;

					// calculate another exact texture coordinate (SPAN_LEN pixels further)
					Z.inverse(OneOverZ);
					u1 = UOverZ * Z;
					v1 = VOverZ * Z;

					// calculte the texcoord steps per pixel
					du = (u1-u0);	du>>=SPAN_BITS;
					dv = (v1-v0);	dv>>=SPAN_BITS;

					// now run through the span
					len -= (w = SPAN_LEN);
					while(w--)
					{
						int texOffs = (u0.getInt()&wMask) + ((v0.getInt()&hMask) << wShift);
						register unsigned int gorCol = iRGBA.getARGB8565();
						register unsigned int texCol = texturePixels[texOffs];
						u0 += du;
						v0 += dv;
						iRGBA.step();

						register unsigned int tmpRB =  (gorCol&REDBLUE_MASK) * (texCol&REDBLUE_MASK);
						register unsigned int tmpAG = ((gorCol>>19) | ((gorCol&GREEN_MASK)<<5)) * ((texCol>>19) | ((texCol&GREEN_MASK)<<5));

						*(nDst++) =	((tmpRB>>16)&RED_MASK) |										// red
									((tmpAG>>21)&GREEN_MASK) |										// green
									((tmpRB>> 5)&BLUE_MASK) |										// blue
									((tmpAG<<14)&ALPHA_MASK);										// alpha
					}
				}
			}

			// now do the small rest (<SPAN_LEN) - also interpolated
			if(len>0)
			{
				spenLenF.setInt(len);
				spanLenInv = spenLenF;
				spanLenInv.inverse();

				OneOverZ += dOneOverZdX*spenLenF;
				UOverZ += dUOverZdX*spenLenF;
				VOverZ += dVOverZdX*spenLenF;

				// calculate another exact texture coordinate (SPAN_LEN pixels further)
				Z.inverse(OneOverZ);
				u1 = UOverZ * Z;
				v1 = VOverZ * Z;

				// calculte the texcoord steps per pixel
				du = (u1-u0)*spanLenInv;
				dv = (v1-v0)*spanLenInv;

				// now run through the span
				w = len;

				while(w--)
				{
					int texOffs = (u0.getInt()&wMask) + ((v0.getInt()&hMask) << wShift);
					register unsigned int gorCol = iRGBA.getARGB8565();
					register unsigned int texCol = texturePixels[texOffs];
					u0 += du;
					v0 += dv;
					iRGBA.step();

					register unsigned int tmpRB =  (gorCol&REDBLUE_MASK) * (texCol&REDBLUE_MASK);
					register unsigned int tmpAG = ((gorCol>>19) | ((gorCol&GREEN_MASK)<<5)) * ((texCol>>19) | ((texCol&GREEN_MASK)<<5));

					*(nDst++) =	((tmpRB>>16)&RED_MASK) |										// red
								((tmpAG>>21)&GREEN_MASK) |										// green
								((tmpRB>> 5)&BLUE_MASK) |										// blue
								((tmpAG<<14)&ALPHA_MASK);										// alpha
				}
			}

		}

		if(rlIdx>=numScanlineRunLengths)
			break;

		// skip invisible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		if(rlIdx>=numScanlineRunLengths)
			break;

		OneOverZ += dOneOverZdX*len;
		UOverZ += dUOverZdX*len;
		VOverZ += dVOverZdX*len;
		iRGBA.step(len);
		nDst += len;
	}
}



// fills a pixel buffer with affine textured pixels
// (destroys previous buffer content)
//
void drawScanLine_AffineTexture(const Edge *nLeft, const Edge *nRight, klFloat nInvWidth,
								BUF_PIXELTYPE* nDst, const MipMapLevel* nTexture)
{
    // const int		SPAN_BITS = 4, SPAN_LEN = 1<<SPAN_BITS;
	unsigned int	wShift = getShift(nTexture->width), wMask=nTexture->width-1, hMask=nTexture->height-1;
	TEX_PIXELTYPE*	texturePixels = nTexture->pixels;
	klFloat			u = nLeft->U, v = nLeft->V,
					du = (nRight->U-u)*nInvWidth, dv = (nRight->V-v)*nInvWidth;


	// non interpolated version
	for(int len,rlIdx=0;;)
	{
		// calculate visible pixels
		//
		len = scanlineRunLengths[rlIdx++];

		while(len--)
		{
			int texOffs = (u.getInt()&wMask) + ((v.getInt()&hMask) << wShift);
			*(nDst++) = texturePixels[texOffs];

			u += du;
			v += dv;
		}

		if(rlIdx>=numScanlineRunLengths)
			break;

		// skip invisible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		if(rlIdx>=numScanlineRunLengths)
			break;

		u += du*len;
		v += dv*len;
	}
}


void drawScanLine_AffineTexture_Shaded(const Edge *nLeft, const Edge *nRight, const klFloat& nInvWidth,
									   BUF_PIXELTYPE* nDst, const MipMapLevel* nTexture)
{
    // const int				SPAN_BITS = 4 , SPAN_LEN = 1<<SPAN_BITS;
	unsigned int			wShift = getShift(nTexture->width), wMask=nTexture->width-1, hMask=nTexture->height-1;
	TEX_PIXELTYPE*			texturePixels = nTexture->pixels;
	klFloat					u = nLeft->U, v = nLeft->V,
							du = (nRight->U-u)*nInvWidth, dv = (nRight->V-v)*nInvWidth;
	InterpolatorRGB_Linear	iRGB(nLeft, nRight, nInvWidth);


	// non interpolated version
	for(int len,rlIdx=0;;)
	{
		// calculate visible pixels
		//
		len = scanlineRunLengths[rlIdx++];

		while(len--)
		{
			int texOffs = (u.getInt()&wMask) + ((v.getInt()&hMask) << wShift);
			register unsigned short gorCol = (unsigned short)iRGB.getRGB565();
			register unsigned short texCol = (unsigned short)texturePixels[texOffs];
			u += du;
			v += dv;
			iRGB.step();

			register unsigned int tmp = (gorCol&REDBLUE_MASK) * (texCol&REDBLUE_MASK);
			*(nDst++) =	((tmp>>16)&RED_MASK) | 
						((((gorCol&GREEN_MASK) * (texCol&GREEN_MASK))>>11)&GREEN_MASK) |
						((tmp>>5)&BLUE_MASK);
		}

		if(rlIdx>=numScanlineRunLengths)
			break;

		// skip invisible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		if(rlIdx>=numScanlineRunLengths)
			break;

		u += du*len;
		v += dv*len;
		iRGB.step(len);
	}
}


void drawScanLine_AffineTexture_Shaded_Alpha(const Edge *nLeft, const Edge *nRight, klFloat nInvWidth,
											 BUF_PIXELTYPE* nDst, const MipMapLevel* nTexture)
{
    // const int				SPAN_BITS = 4 , SPAN_LEN = 1<<SPAN_BITS;
	unsigned int			wShift = getShift(nTexture->width), wMask=nTexture->width-1, hMask=nTexture->height-1;
	TEX_PIXELTYPE*			texturePixels = nTexture->pixels;
	klFloat					u = nLeft->U, v = nLeft->V,
							du = (nRight->U-u)*nInvWidth, dv = (nRight->V-v)*nInvWidth;
	InterpolatorRGBA_Linear	iRGBA(nLeft, nRight, nInvWidth);


	// non interpolated version
	for(int len,rlIdx=0;;)
	{
		// calculate visible pixels
		//
		len = scanlineRunLengths[rlIdx++];

		while(len--)
		{
			int texOffs = (u.getInt()&wMask) + ((v.getInt()&hMask) << wShift);
			register unsigned int gorCol = iRGBA.getARGB8565();
			register unsigned int texCol = texturePixels[texOffs]; // | 0x00ffffff;
			u += du;
			v += dv;
			iRGBA.step();

			register unsigned int tmpRB =  (gorCol&REDBLUE_MASK) * (texCol&REDBLUE_MASK);
			register unsigned int tmpAG = ((gorCol>>19) | ((gorCol&GREEN_MASK)<<5)) * ((texCol>>19) | ((texCol&GREEN_MASK)<<5));
			*(nDst++) =	((tmpRB>>16)&RED_MASK) |										// red
						((tmpAG>>21)&GREEN_MASK) |										// green
						((tmpRB>> 5)&BLUE_MASK) |										// blue
						((tmpAG<<14)&ALPHA_MASK);										// alpha
		}

		if(rlIdx>=numScanlineRunLengths)
			break;

		// skip invisible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		if(rlIdx>=numScanlineRunLengths)
			break;

		u += du*len;
		v += dv*len;
		iRGBA.step(len);
	}
}

// this is our special texturing function that renders into video memeory
// (no shading, no blending and no fog)
//
void drawScanLine_Texture_VidMem(const Edge *nLeft, klFloat nPreStep, PIXELTYPE* nDst,
								 const TexGradients &nGradients, const MipMapLevel* nTexture)
{
	const int		SPAN_BITS = 4, SPAN_LEN = 1<<SPAN_BITS;
	unsigned int	wShift = getShift(nTexture->width), wMask=nTexture->width-1, hMask=nTexture->height-1;
	TEX_PIXELTYPE*	texturePixels = nTexture->pixels;
	klFloat			dOneOverZdX = nGradients.dOneOverZdX,
					dUOverZdX =   nGradients.dUOverZdX,
					dVOverZdX =   nGradients.dVOverZdX,
					OneOverZ = nLeft->OneOverZ + nPreStep * dOneOverZdX,
					UOverZ =   nLeft->UOverZ + nPreStep * dUOverZdX,
					VOverZ =   nLeft->VOverZ + nPreStep * dVOverZdX;
	klFloat			dOneOverZdSpan, dUOverZdSpan, dVOverZdSpan, Z,
					u0,v0, u1,v1, du,dv,
					spenLenF, spanLenInv;
	int				w, texOffs;

#define RASTER_PIX(offs)	texOffs = (u0.getInt()&wMask) + ((v0.getInt()&hMask) << wShift);	\
							nDst[offs] = (unsigned short)texturePixels[texOffs];				\
							u0 += du;	v0 += dv;

	// non interpolated version
	for(int len,rlIdx=0;;)
	{
		// calculate visible pixels
		//
		len = scanlineRunLengths[rlIdx++];

		if(len>0)
		{
			// calculate an exact texture coordinate
			Z = OneOverZ;	Z.inverse();
			u0 = UOverZ * Z,
			v0 = VOverZ * Z;

			if(len>=SPAN_LEN)
			{
				spenLenF.setInt(SPAN_LEN);
				spanLenInv = spenLenF;
				spanLenInv.inverse();

				dOneOverZdSpan = dOneOverZdX;	dOneOverZdSpan<<=SPAN_BITS;
				dUOverZdSpan = dUOverZdX;		dUOverZdSpan<<=SPAN_BITS;
				dVOverZdSpan = dVOverZdX;		dVOverZdSpan<<=SPAN_BITS;

				// run through SPAN_LEN-sized spans as long as we can...
				while(len>SPAN_LEN)
				{
					OneOverZ += dOneOverZdSpan;
					UOverZ += dUOverZdSpan;
					VOverZ += dVOverZdSpan;

					// calculate another exact texture coordinate (SPAN_LEN pixels further)
					Z.inverse(OneOverZ);
					u1 = UOverZ * Z;
					v1 = VOverZ * Z;

					// calculte the texcoord steps per pixel
					du = (u1-u0);	du>>=SPAN_BITS;
					dv = (v1-v0);	dv>>=SPAN_BITS;

					// now run through the span
					len -= (w = SPAN_LEN);
					/*while(w--)
					{
						texOffs = (u0.getInt()&wMask) + ((v0.getInt()&hMask) << wShift);
						*(nDst++) = (unsigned short)texturePixels[texOffs];

						u0 += du;
						v0 += dv;
					}*/

					RASTER_SPAN16
					nDst += SPAN_LEN;
				}
			}

			// now do the small rest (<SPAN_LEN) - also interpolated
			if(len>0)
			{
				spenLenF.setInt(len);
				spanLenInv = spenLenF;
				spanLenInv.inverse();

				OneOverZ += dOneOverZdX*spenLenF;
				UOverZ += dUOverZdX*spenLenF;
				VOverZ += dVOverZdX*spenLenF;

				// calculate another exact texture coordinate (SPAN_LEN pixels further)
				Z.inverse(OneOverZ);
				u1 = UOverZ * Z;
				v1 = VOverZ * Z;

				// calculte the texcoord steps per pixel
				du = (u1-u0)*spanLenInv;
				dv = (v1-v0)*spanLenInv;

				// now run through the span
				w = len;

				while(w--)
				{
					texOffs = (u0.getInt()&wMask) + ((v0.getInt()&hMask) << wShift);
					*(nDst++) = (unsigned short)texturePixels[texOffs];

					u0 += du;
					v0 += dv;
				}
			}

		}

		if(rlIdx>=numScanlineRunLengths)
			break;

		// skip invisible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		if(rlIdx>=numScanlineRunLengths)
			break;

		OneOverZ += dOneOverZdX*len;
		UOverZ += dUOverZdX*len;
		VOverZ += dVOverZdX*len;
		nDst += len;
	}

#undef RASTER_PIX
}


// fills a pixel buffer with affine textured pixels
// and renders directly into video memeory
// (destroys previous buffer content)
//
void drawScanLine_AffineTexture_VidMem(const Edge *nLeft, const Edge *nRight, const klFloat& nInvWidth,
									   PIXELTYPE* nDst, const MipMapLevel* nTexture)
{
    // const int		SPAN_BITS = 4 , SPAN_LEN = 1<<SPAN_BITS;
	unsigned int	wShift = getShift(nTexture->width), wMask=nTexture->width-1, hMask=nTexture->height-1;
	TEX_PIXELTYPE*	texturePixels = nTexture->pixels;
	klFloat			u = nLeft->U, v = nLeft->V,
					du = (nRight->U-u)*nInvWidth, dv = (nRight->V-v)*nInvWidth;


	// non interpolated version
	for(int len,rlIdx=0;;)
	{
		// calculate visible pixels
		//
		len = scanlineRunLengths[rlIdx++];

		while(len--)
		{
			int texOffs = (u.getInt()&wMask) + ((v.getInt()&hMask) << wShift);
			*(nDst++) = (unsigned short)texturePixels[texOffs];

			u += du;
			v += dv;
		}

		if(rlIdx>=numScanlineRunLengths)
			break;

		// skip invisible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		if(rlIdx>=numScanlineRunLengths)
			break;

		u += du*len;
		v += dv*len;
		nDst += len;
	}
}


// this is our special texturing function that also
// does gouraud shading and directly renders into video memeory
// (no blending and no fog)
//
void drawScanLine_Texture_Shaded_VidMem(const Edge *nLeft, const Edge *nRight, klFloat nPreStep,
									    const klFloat& nInvWidth, PIXELTYPE* nDst,
									    const TexGradients &nGradients, const MipMapLevel* nTexture)
{
	const int				SPAN_BITS = 4, SPAN_LEN = 1<<SPAN_BITS;
	unsigned int			wShift = getShift(nTexture->width), wMask=nTexture->width-1, hMask=nTexture->height-1;
	TEX_PIXELTYPE*			texturePixels = nTexture->pixels;
	InterpolatorRGB_Linear	iRGB(nLeft, nRight, nInvWidth);
	klFloat					dOneOverZdX = nGradients.dOneOverZdX,
							dUOverZdX =   nGradients.dUOverZdX,
							dVOverZdX =   nGradients.dVOverZdX,
							OneOverZ = nLeft->OneOverZ + nPreStep * dOneOverZdX,
							UOverZ =   nLeft->UOverZ + nPreStep * dUOverZdX,
							VOverZ =   nLeft->VOverZ + nPreStep * dVOverZdX;
	klFloat					dOneOverZdSpan, dUOverZdSpan, dVOverZdSpan, Z,
							u0,v0, u1,v1, du,dv,
							spenLenF, spanLenInv;
	int						w;


	// non interpolated version
	for(int len,rlIdx=0;;)
	{
		// calculate visible pixels
		//
		len = scanlineRunLengths[rlIdx++];

		if(len>0)
		{
			// calculate an exact texture coordinate
			Z = OneOverZ;	Z.inverse();
			u0 = UOverZ * Z,
			v0 = VOverZ * Z;

			if(len>=SPAN_LEN)
			{
				spenLenF.setInt(SPAN_LEN);
				spanLenInv = spenLenF;
				spanLenInv.inverse();

				dOneOverZdSpan = dOneOverZdX;	dOneOverZdSpan<<=SPAN_BITS;
				dUOverZdSpan = dUOverZdX;		dUOverZdSpan<<=SPAN_BITS;
				dVOverZdSpan = dVOverZdX;		dVOverZdSpan<<=SPAN_BITS;

				// run through SPAN_LEN-sized spans as long as we can...
				while(len>SPAN_LEN)
				{
					OneOverZ += dOneOverZdSpan;
					UOverZ += dUOverZdSpan;
					VOverZ += dVOverZdSpan;

					// calculate another exact texture coordinate (SPAN_LEN pixels further)
					Z.inverse(OneOverZ);
					u1 = UOverZ * Z;
					v1 = VOverZ * Z;

					// calculte the texcoord steps per pixel
					du = (u1-u0);	du>>=SPAN_BITS;
					dv = (v1-v0);	dv>>=SPAN_BITS;

					// now run through the span
					len -= (w = SPAN_LEN);
					while(w--)
					{
						int texOffs = (u0.getInt()&wMask) + ((v0.getInt()&hMask) << wShift);
						register unsigned short gorCol = (unsigned short)iRGB.getRGB565();
						register unsigned short texCol = (unsigned short)texturePixels[texOffs];
						u0 += du;
						v0 += dv;
						iRGB.step();

						register unsigned int tmp = (gorCol&REDBLUE_MASK) * (texCol&REDBLUE_MASK);
						*(nDst++) =	(unsigned short)(((tmp>>16)&RED_MASK) | 
													 ((((gorCol&GREEN_MASK) * (texCol&GREEN_MASK))>>11)&GREEN_MASK) |
													 ((tmp>>5)&BLUE_MASK)
													);
					}
				}
			}

			// now do the small rest (<SPAN_LEN) - also interpolated
			if(len>0)
			{
				spenLenF.setInt(len);
				spanLenInv = spenLenF;
				spanLenInv.inverse();

				OneOverZ += dOneOverZdX*spenLenF;
				UOverZ += dUOverZdX*spenLenF;
				VOverZ += dVOverZdX*spenLenF;

				// calculate another exact texture coordinate (SPAN_LEN pixels further)
				Z.inverse(OneOverZ);
				u1 = UOverZ * Z;
				v1 = VOverZ * Z;

				// calculte the texcoord steps per pixel
				du = (u1-u0)*spanLenInv;
				dv = (v1-v0)*spanLenInv;

				// now run through the span
				w = len;

				while(w--)
				{
					int texOffs = (u0.getInt()&wMask) + ((v0.getInt()&hMask) << wShift);
					register unsigned short gorCol = (unsigned short)iRGB.getRGB565();
					register unsigned short texCol = (unsigned short)texturePixels[texOffs];
					u0 += du;
					v0 += dv;
					iRGB.step();

					register unsigned int tmp = (gorCol&REDBLUE_MASK) * (texCol&REDBLUE_MASK);
					*(nDst++) =	(unsigned short)(((tmp>>16)&RED_MASK) | 
												 ((((gorCol&GREEN_MASK) * (texCol&GREEN_MASK))>>11)&GREEN_MASK) |
												 ((tmp>>5)&BLUE_MASK)
												);
				}
			}

		}

		if(rlIdx>=numScanlineRunLengths)
			break;

		// skip invisible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		if(rlIdx>=numScanlineRunLengths)
			break;

		OneOverZ += dOneOverZdX*len;
		UOverZ += dUOverZdX*len;
		VOverZ += dVOverZdX*len;
		iRGB.step(len);
		nDst += len;
	}
}


// fills a pixel buffer with affine textured pixels
// and renders directly into video memeory
// (destroys previous buffer content)
//
void drawScanLine_AffineTexture_Shaded_VidMem(const Edge *nLeft, const Edge *nRight, const klFloat& nInvWidth,
											  PIXELTYPE* nDst, const MipMapLevel* nTexture)
{
    // const int				SPAN_BITS = 4 , SPAN_LEN = 1<<SPAN_BITS;
	unsigned int			wShift = getShift(nTexture->width), wMask=nTexture->width-1, hMask=nTexture->height-1;
	TEX_PIXELTYPE*			texturePixels = nTexture->pixels;
	klFloat					u = nLeft->U, v = nLeft->V,
							du = (nRight->U-u)*nInvWidth, dv = (nRight->V-v)*nInvWidth;
	InterpolatorRGB_Linear	iRGB(nLeft, nRight, nInvWidth);


	// non interpolated version
	for(int len,rlIdx=0;;)
	{
		// calculate visible pixels
		//
		len = scanlineRunLengths[rlIdx++];

		while(len--)
		{
			int texOffs = (u.getInt()&wMask) + ((v.getInt()&hMask) << wShift);
			register unsigned short gorCol = (unsigned short)iRGB.getRGB565();
			register unsigned short texCol = (unsigned short)texturePixels[texOffs];
			u += du;
			v += dv;
			iRGB.step();

			register unsigned int tmp = (gorCol&REDBLUE_MASK) * (texCol&REDBLUE_MASK);
			*(nDst++) =	(unsigned short)(((tmp>>16)&RED_MASK) | 
										 ((((gorCol&GREEN_MASK) * (texCol&GREEN_MASK))>>11)&GREEN_MASK) |
										 ((tmp>>5)&BLUE_MASK)
										);
		}

		if(rlIdx>=numScanlineRunLengths)
			break;

		// skip invisible pixels
		//
		len = scanlineRunLengths[rlIdx++];
		if(rlIdx>=numScanlineRunLengths)
			break;

		u += du*len;
		v += dv*len;
		iRGB.step(len);
		nDst += len;
	}
}

//
// } class klRSW565
//
