head	1.8;
access;
symbols
	merge-1:1.1.2.7
	experimental-1:1.1.0.2;
locks; strict;
comment	@ * @;


1.8
date	99.08.02.23.28.20;	author keithw;	state Exp;
branches;
next	1.7;

1.7
date	99.08.01.16.43.05;	author miklos;	state Exp;
branches;
next	1.6;

1.6
date	99.07.20.22.40.12;	author keithw;	state Exp;
branches;
next	1.5;

1.5
date	99.07.14.12.42.30;	author brianp;	state Exp;
branches;
next	1.4;

1.4
date	99.07.14.01.09.31;	author keithw;	state Exp;
branches;
next	1.3;

1.3
date	99.07.12.15.02.02;	author keithw;	state Exp;
branches;
next	1.2;

1.2
date	99.07.12.12.05.26;	author keithw;	state Exp;
branches;
next	1.1;

1.1
date	99.05.21.21.29.29;	author keithw;	state dead;
branches
	1.1.2.1;
next	;

1.1.2.1
date	99.05.21.21.29.29;	author keithw;	state Exp;
branches;
next	1.1.2.2;

1.1.2.2
date	99.05.24.02.04.16;	author keithw;	state Exp;
branches;
next	1.1.2.3;

1.1.2.3
date	99.05.30.13.30.35;	author keithw;	state Exp;
branches;
next	1.1.2.4;

1.1.2.4
date	99.06.05.21.55.02;	author holger;	state Exp;
branches;
next	1.1.2.5;

1.1.2.5
date	99.06.06.22.35.56;	author keithw;	state Exp;
branches;
next	1.1.2.6;

1.1.2.6
date	99.06.19.15.04.15;	author keithw;	state Exp;
branches;
next	1.1.2.7;

1.1.2.7
date	99.07.05.19.42.33;	author keithw;	state Exp;
branches;
next	;


desc
@@


1.8
log
@fix for miklos' cva line bug
@
text
@
#ifdef HAVE_CONFIG_H
#include "conf.h"
#endif


#if defined(FX)

#include "fxdrv.h"
#include "vbindirect.h"


/* We don't handle texcoord-4 in the safe clip routines - maybe we should.
 */
static void fxDDRenderElements( struct vertex_buffer *VB )
{
   GLcontext *ctx = VB->ctx;
   fxMesaContext fxMesa = (fxMesaContext)ctx->DriverCtx;

   if (fxMesa->render_index != 0 ||
       ((ctx->Texture.ReallyEnabled & 0xf) && 
	(VB->TexCoordPtr[0]->size > 2)) ||
       ((ctx->Texture.ReallyEnabled & 0xf0) && 
	(VB->TexCoordPtr[1]->size > 2)) ||
	(VB->ClipPtr->size != 4)) /* Brokes clipping otherwise */
      gl_render_elts( VB );
   else 
      fxDDRenderElementsDirect( VB );
}

static void fxDDCheckRenderVBIndirect( GLcontext *ctx, 
				       struct gl_pipeline_stage *d )
{   
   d->type = 0;

   if ((ctx->IndirectTriangles & DD_SW_SETUP) == 0 &&
       ctx->Driver.MultipassFunc == 0) 
   {
      d->type = PIPE_IMMEDIATE;
      d->inputs = VERT_SETUP_FULL | VERT_ELT | VERT_PRECALC_DATA;
   }      
}

static void fxDDRenderVBIndirect( struct vertex_buffer *VB )
{
   GLcontext *ctx = VB->ctx;
   fxMesaContext fxMesa = (fxMesaContext)ctx->DriverCtx;
   struct vertex_buffer *cvaVB = ctx->CVA.VB;

   if (fxMesa->render_index != 0 ||
       ((ctx->Texture.ReallyEnabled & 0xf) && cvaVB->TexCoordPtr[0]->size>2) ||
       ((ctx->Texture.ReallyEnabled & 0xf0) && cvaVB->TexCoordPtr[1]->size>2) ||
       (VB->ClipPtr->size != 4)) /* Brokes clipping otherwise */
      gl_render_vb_indirect( VB );
   else
      fxDDRenderVBIndirectDirect( VB );
}


/* This sort of driver-based reconfiguration of the pipeline could be
 * used to support accelerated transformation and lighting on capable
 * hardware.
 *
 */
GLuint fxDDRegister {
			fl = fl->fl_next;
			continue;
		}
		error = -EAGAIN;
		if (!wait)
			goto out;
		locks_insert_block(fl, new_fl);
		interruptible_sleep_on(&new_fl->fl_wait);
		locks_delete_block(fl, new_fl);
		goto repeat;
	}
	locks_insert_lock(&filp->f_inode->i_flock, new_fl);
	new_fl = NULL;
	error = 0;

out:
	if (new_fl)
		locks_free_lock(new_fl);
	return (error);
}

/* Add a POSIX style lock to a file.
 * We merge adjacent locks whenever possible. POSIX locks are sorted by owner
 * task, then by starting address
 *
 * Kai Petzke writes:
 * To make freeing a lock much faster, we keep a pointer to the lock before the
 * actual one. But the real gain of the new coding was, that lock_it() and
 * unlock_it() became one function.
 *
 * To all purists: Yes, I use a few goto's. Just pass on to the next function.
 */

static int posix_lock_file(struct file *filp, struct file_lock *caller,
			   unsigned int wait)
{
	struct file_lock *fl;
	struct file_lock *new_fl, *new_fl2;
	struct file_lock *left = NULL;
	struct file_lock *right = NULL;
	struct file_lock **before;
	int error;
	int added = 0;

	/*
	 * We may need two file_lock structures for this operation,
	 * so we get them in advance to avoid races.
	 */
	new_fl  = locks_empty_lock();
	new_fl2 = locks_empty_lock();
	error = -ENOLCK; /* "no luck" */
	if (!(new_fl && new_fl2))
		goto out;
 
	if (caller->fl_type != F_UNLCK) {
  repeat:
		error = -EBUSY;
		if ((fl = filp->f_inode->i_flock) && (fl->fl_flags & FL_FLOCK))
			goto out;

		while (fl != NULL) {
			if (!posix_locks_conflict(caller, fl)) {
				fl = fl->fl_next;
				continue;
			}
			error = -EAGAIN;
			if (!wait)
				goto out;
			error = -EDEADLK;
			if (posix_locks_deadlock(caller->fl_owner, fl->fl_owner))
				goto out;
			error = -ERESTARTSYS;
			if (current->signal & ~current->blocked)
				goto out;
			locks_insert_block(fl, caller);
			interruptible_sleep_on(&caller->fl_wait);
			locks_delete_block(fl, caller);
			goto repeat;
  		}
  	}

	/*
	 * We've allocated the new locks in advance, so there are no
	 * errors possible (and no blocking operations) from here on.
	 * 
	 * Find the first old lock with the same owner as the new lock.
	 */
	
	before = &filp->f_inode->i_flock;

	error = -EBUSY;
	if ((*before != NULL) && ((*before)->fl_flags & FL_FLOCK))
		goto out;

	/* First skip locks owned by other processes.
	 */
	while ((fl = *before) && (caller->fl_owner != fl->fl_owner)) {
		before = &fl->fl_next;
	}

	/* Process locks with this owner.
	 */
	while ((fl = *before) && (caller->fl_owner == fl->fl_owner)) {
		/* Detect adjacent or overlapping regions (if same lock type)
		 */
		if (caller->fl_type == fl->fl_type) {
			if (fl->fl_end < caller->fl_start - 1)
				goto next_lock;
			/* If the next lock in the list has entirely bigger
			 * addresses than the new one, insert the lock here.
			 */
			if (fl->fl_start > caller->fl_end + 1)
				break;

			/* If we come here, the new and old lock are of the
			 * same type and adjacent or overlapping. Make one
			 * lock yielding from the lower start address of both
			 * locks to the higher end address.
			 */
			if (fl->fl_start > caller->fl_start)
				fl->fl_start = caller->fl_start;
			else
				caller->fl_start = fl->fl_start;
			if (fl->fl_end < caller->fl_end)
				fl->fl_end = caller->fl_end;
			else
				caller->fl_end = fl->fl_end;
			if (added) {
				locks_delete_lock(before, 0);
				continue;
			}
			caller = fl;
			added = 1;
		}
		else {
			/* Processing for different lock types is a bit
			 * more complex.
			 */
			if (fl->fl_end < caller->fl_start)
				goto next_lock;
			if (fl->fl_start > caller->fl_end)
				break;
			if (caller->fl_type == F_UNLCK)
				added = 1;
			if (fl->fl_start < caller->fl_start)
				left = fl;
			/* If the next lock in the list has a higher end
			 * address than the new one, insert the new one here.
			 */
			if (fl->fl_end > caller->fl_end) {
				right = fl;
				break;
			}
			if (fl->fl_start >= caller->fl_start) {
				/* The new lock completely replaces an old
				 * one (This may happen several times).
				 */
				if (added) {
					locks_delete_lock(before, 0);
					continue;
				}
				/* Replace the old lock with the new one.
				 * Wake up anybody waiting for the old one,
				 * as the change in lock type might satisfy
				 * their needs.
				 */
				locks_wake_up_blocks(fl, 0);
				fl->fl_start = caller->fl_start;
				fl->fl_end = caller->fl_end;
				fl->fl_type = caller->fl_type;
				caller = fl;
				added = 1;
			}
		}
		/* Go on to next lock.
		 */
	next_lock:
		before = &fl->fl_next;
	}

	error = 0;
	if (!added) {
		if (caller->fl_type == F_UNLCK)
			goto out;
		locks_init_lock(new_fl, caller);
		locks_insert_lock(before, new_fl);
		new_fl = NULL;
	}
	if (right) {
		if (left == right) {
			/* The new lock breaks the old one in two pieces,
			 * so we have to use the second new lock (in this
			 * case, even F_UNLCK may fail!).
			 */
			left = locks_init_lock(new_fl2, right);
			locks_insert_lock(before, left);
			new_fl2 = NULL;
		}
		right->fl_start = caller->fl_end + 1;
		locks_wake_up_blocks(right, 0);
	}
	if (left) {
		left->fl_end = caller->fl_start - 1;
		locks_wake_up_blocks(left, 0);
	}
out:
	/*
	 * Free any unused locks.  (They haven't
	 * ever been used, so we use kfree().)
	 */
	if (new_fl)
		kfree(new_fl);
	if (new_fl2)
		kfree(new_fl2);
	return error;
}

/*
 * Allocate an empty lock structure. We can use GFP_KERNEL now that
 * all allocations are done in advance.
 */
static struct file_lock *locks_empty_lock(void)
{
	return ((struct file_lock *) kmalloc(sizeof(struct file_lock),
						GFP_KERNEL));
}

/*
 * Initialize a new lock from an existing file_lock structure.
 */
static struct file_lock *locks_init_lock(struct file_lock *new,
					 struct file_lock *fl)
{
	if (new) {
		memset(new, 0, sizeof(*new));
		new->fl_owner = fl->fl_owner;
		new->fl_file = fl->fl_file;
		new->fl_flags = fl->fl_flags;
		new->fl_type = fl->fl_type;
		new->fl_start = fl->fl_start;
		new->fl_end = fl->fl_end;
	}
	return new;
}

/* Insert file lock fl into an inode's lock list at the position indicated
 * by pos. At the same time add the lock to the global file lock list.
 */
static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
{
	fl->fl_nextlink = file_lock_table;
	fl->fl_prevlink = NULL;
	if (file_lock_table != NULL)
		file_lock_table->fl_prevlink = fl;
	file_lock_table = fl;
	fl->fl_next = *pos;	/* insert into file's list */
	*pos = fl;

	return;
}

/* Delete a lock and free it.
 * First remove our lock from the active lock lists. Then call
 * locks_wake_up_blocks() to wake up processes that are blocked
 * waiting for this lock. Finally free the lock structure.
 */
static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait)
{
	struct file_lock *thisfl;
	struct file_lock *prevfl;
	struct file_lock *nextfl;
	
	thisfl = *thisfl_p;
	*thisfl_p = thisfl->fl_next;

	prevfl = thisfl->fl_prevlink;
	nextfl = thisfl->fl_nextlink;

	if (nextfl != NULL)
		nextfl->fl_prevlink = prevfl;

	if (prevfl != NULL)
		prevfl->fl_nextlink = nextfl;
	else
		file_lock_table = nextfl;
	
	locks_wake_up_blocks(thisfl, wait);
	locks_free_lock(thisfl);

	return;
}


static char *lock_get_status(struct file_lock *fl, int id, char *pfx)
{
	static char temp[129];
	char *p = temp;
	struct inode *inode;

	inode = fl->fl_file->f_inode;

	p += sprintf(p, "%d:%s ", id, pfx);
	if (fl->fl_flags & FL_POSIX) {
		p += sprintf(p, "%6s %s ",
			     (fl->fl_flags & FL_BROKEN) ? "BROKEN" :
			     (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ",
			     (IS_MANDLOCK(inode) &&
			      (inode->i_mode & (S_IXGRP | S_ISGID)) == S_ISGID) ?
			     "MANDATORY" : "ADVISORY ");
	}
	else {
		p += sprintf(p, "FLOCK  ADVISORY  ");
	}
	p += sprintf(p, "%s ", (fl->fl_type == F_RDLCK) ? "READ " : "WRITE");
	p += sprintf(p, "%d %s:%ld %ld %ld ",
		     fl->fl_owner ? fl->fl_owner->pid : 0,
		     kdevname(inode->i_dev), inode->i_ino, fl->fl_start,
		     fl->fl_end);
	sprintf(p, "%08lx %08lx %08lx %08lx %08lx\n",
		(long)fl, (long)fl->fl_prevlink, (long)fl->fl_nextlink,
		(long)fl->fl_next, (long)fl->fl_nextblock);
	return (temp);
}

static inline int copy_lock_status(char_render_vb_indirect) 
	    out[o].run = fxDDRenderVBIndirect;
	 o++;
	 break;
      default:
	 out[o++] = in[i];
	 break;
      }
   }

   return o;
}
@


1.1.2.2
log
@new, experimental fast path for quake 3 precalc pipeline
@
text
@d43 7
a98 36


/* Perform global optimizations to the pipeline.  The fx driver
 * implements a single such fast path, which corresponds to the standard
 * quake3 cva pipeline.
 *
 * The downside of that stage is that it prevents sharing of vertex
 * data in multipass operation.  Hence the 'passratio' test below,
 * which has to be implemented before this code is usable.
 *
 * The other downside is that this breaks gl_print_pipeline()...  
 */
void fxDDOptimizePrecalcPipeline( GLcontext *ctx, struct gl_pipeline *pipe )
{   
   fxMesaContext fxMesa = FX_CONTEXT(ctx);

/*     return; */

   if (/*   ctx->CVA.passratio < 1.2 &&  */
       fxMesa->render_index == 0 && 
       pipe->ops == (PIPE_OP_VERT_XFORM |
		     PIPE_OP_RAST_SETUP_1 |
		     PIPE_OP_RAST_SETUP_0 |
		     PIPE_OP_RENDER))
   {
      pipe->stages[0] = fxCvaFastPath[fxMesa->setupindex & 0x7];
      pipe->stages[1] = 0;
   }
}

void fxDDOptimizeEltPipeline( GLcontext *ctx, struct gl_pipeline *pipe )
{
   (void) ctx;
   (void) pipe;
}

@


1.1.2.3
log
@Broadened FX fast path, added support for multipass multitexture.
@
text
@a92 44
#define ILLEGAL_ENABLES (ENABLE_TEXMAT0 | 	\
			 ENABLE_TEXMAT1 | 	\
			 ENABLE_TEXGEN0 |	\
			 ENABLE_TEXGEN1 |	\
			 ENABLE_USERCLIP | 	\
			 ENABLE_LIGHT | 	\
			 ENABLE_FOG)
			 

/* Better than optimizing the pipeline, we can do the whole build very
 * quickly with the aid of a new flags member.
 */
GLboolean fxDDBuildPrecalcPipeline( GLcontext *ctx )
{   
   struct gl_pipeline *pipe = &ctx->CVA.pre;
   fxMesaContext fxMesa = FX_CONTEXT(ctx);

   if (fxMesa->is_in_hardware &&
       fxMesa->render_index == 0 && 
       (ctx->Enabled & ILLEGAL_ENABLES) == 0 &&
       (ctx->Array.Summary & VERT_ELT))
   {
      pipe->stages[0] = fxCvaFastPath[fxMesa->setupindex & 0x7];
      pipe->stages[1] = 0;

      pipe->inputs = ctx->RenderFlags;
      pipe->new_inputs = ctx->RenderFlags;
      pipe->outputs = 0;
      pipe->new_outputs = 0;
      pipe->forbidden_inputs = 0;
      pipe->cva_state_change = 0;

      pipe->changed_ops = pipe->ops | pipe->stages[0]->ops;
      pipe->ops = pipe->stages[0]->ops;
      return 1;
   }

   return 0;
}


d105 1
a105 1
{
d108 3
a110 1
   if (fxMesa->is_in_hardware &&
d112 4
a115 2
       (ctx->Enabled & ILLEGAL_ENABLES) == 0 &&
       (ctx->Array.Summary & VERT_ELT))
a120 2


@


1.1.2.4
log
@added #if defined(FX)-#endif to make compilation without glide possible
@
text
@d1 1
a1 1
#if defined(FX)
a169 2

#endif
@


1.1.2.5
log
@some trial assembly, made newer code active by default
@
text
@d93 2
a94 4
#define ILLEGAL_ENABLES (TEXTURE0_3D|		\
			 TEXTURE1_3D|		\
			 ENABLE_TEXMAT0 |	\
			 ENABLE_TEXMAT1 |	\
d97 2
a98 2
			 ENABLE_USERCLIP |	\
			 ENABLE_LIGHT |		\
d113 1
a113 2
       (ctx->Array.Summary & (VERT_ELT|VERT_OBJ_23)) &&
       (ctx->Array.Summary & (VERT_OBJ_4|VERT_TEX0_4|VERT_TEX1_4)) == 0)
@


1.1.2.6
log
@Removed SGIS multitexture, added FX/X86 assm directory
@
text
@a101 2


a102 19
/* Because this is slotted in by the OptimizePipeline function, most
 * of the information here is just for gl_print_pipeline().  Only the
 * run member is required.  
 */
static struct gl_pipeline_stage fx_fast_stage = {
   "FX combined vertex transform, setup and rasterization stage",
   PIPE_OP_VERT_XFORM|PIPE_OP_RAST_SETUP_0|PIPE_OP_RAST_SETUP_1|PIPE_OP_RENDER,
   PIPE_PRECALC,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,				/* never called */
   fxDDFastPath
};

a110 2
   
/*     return 0; */
d118 2
d121 10
a130 75
#if 0
      static int maxipax, count;

      if ((ctx->Array.NewArrayState & VERT_OBJ_ANY) != 0) {
	 fprintf(stderr, "count: %d\n", count);
	 fxMesa->passes++; count = 0;

      } else {
	 pipe->stages[0] = 0;
	 return 1;
      }

      fxPrintSetupFlags( "setupindex", fxMesa->setupindex );

      if (fxMesa->setupindex & SETUP_TMU0) {
	 fxMesa->multipass++; count++;
      }

      if (fxMesa->setupindex & SETUP_TMU1) {
	 fxMesa->multipass++; count++;
      }

      if (count > maxipax) maxipax = count;

      if (fxMesa->passes && (fxMesa->passes & 0x03ff) == 0) {
	 fprintf(stderr, "nr %d avg nr virtual texunits: %f max %d\n",
		 fxMesa->passes,
		 (float)fxMesa->multipass / (float)fxMesa->passes,
		 maxipax);
	 fxMesa->passes = 0;
	 fxMesa->multipass = 0;
      }
      if (fxMesa->passes == 8) {
	 fxMesa->passes >>= 1;
	 fxMesa->multipass >>= 1;
      }
#endif
      
      if (1 || !fxMesa->multipass) {
	 pipe->stages[0] = &fx_fast_stage;
	 pipe->stages[1] = 0;

	 pipe->inputs = ctx->RenderFlags;
	 pipe->new_inputs = ctx->RenderFlags;
	 pipe->outputs = 0;
	 pipe->new_outputs = 0;
	 pipe->forbidden_inputs = 0;
	 pipe->cva_state_change = 0;

	 pipe->changed_ops = pipe->ops | pipe->stages[0]->ops;
	 pipe->ops = pipe->stages[0]->ops;

	 if (MESA_VERBOSE & VERBOSE_STATE) 
	    fprintf(stderr, "fxMesa: using fast path\n");

	 fxMesa->using_fast_path = 1;
	 
	 return 1;
      }      
      
      if (MESA_VERBOSE & VERBOSE_STATE) 
	 fprintf(stderr, "fxMesa: multipass --> revert to normal path\n");
   } 

   if (MESA_VERBOSE & VERBOSE_STATE) {
      if (fxMesa->using_fast_path)
	 fprintf(stderr, "fxMesa: falling back to full pipeline\n");
      else 
	 fprintf(stderr, "fxMesa: still using full pipeline\n");
   }


   if (fxMesa->using_fast_path) {
      fxMesa->using_fast_path = 0;
      ctx->Array.NewArrayState |= ctx->Array.Summary;
d160 1
a160 1
      pipe->stages[0] = &fx_fast_stage;
@


1.1.2.7
log
@cleaned up fxpipeline
@
text
@a124 9
int fx_frame;


#if 1
#undef MESA_VERBOSE
#define MESA_VERBOSE VERBOSE_STATE
#endif


d133 2
d138 1
a138 1
       (ctx->Array.Summary & (VERT_OBJ_23)) &&
d141 54
a194 2
      if (MESA_VERBOSE & VERBOSE_STATE) 
	 if (!fxMesa->using_fast_path)
d197 7
a203 12
      pipe->stages[0] = &fx_fast_stage;
      pipe->stages[1] = 0;
/*        pipe->inputs = ctx->RenderFlags; */
/*        pipe->new_inputs = ctx->RenderFlags; */
/*        pipe->outputs = 0; */
/*        pipe->new_outputs = 0; */
/*        pipe->forbidden_inputs = 0; */
/*        pipe->cva_state_change = 0; */
/*        pipe->changed_ops = 0;  */
      pipe->ops = pipe->stages[0]->ops;
      fxMesa->using_fast_path = 1;
      return 1;
d206 7
a212 9
   if (fxMesa->using_fast_path) 
   {
      if (MESA_VERBOSE & VERBOSE_STATE) 
 	 fprintf(stderr, "fxMesa: fall back to full pipeline %x %x %x %x %x\n",
		 fxMesa->is_in_hardware,
		 fxMesa->render_index,
		 (ctx->Enabled & ILLEGAL_ENABLES),
		 (ctx->Array.Summary & (VERT_OBJ_23)),
		 (ctx->Array.Summary & (VERT_OBJ_4|VERT_TEX0_4|VERT_TEX1_4)));
d214 1
a215 2
      ctx->CVA.VB->ClipOrMask = 0;
      ctx->CVA.VB->ClipAndMask = CLIP_ALL_BITS;
a216 1
      return 0;
a217 3
   
   if (MESA_VERBOSE & VERBOSE_STATE) 
      fprintf(stderr, "fxMesa: still using full pipeline\n");
d231 5
a235 1
 * This is now handled by the 'build' function above.
@