radeon, r200: automake: include builddir prior to srcdir

[mesa.git] / src / mesa / drivers / dri / radeon / radeon_tcl.c
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c

index df6708f05e35dfc9a834651ecd79fad92afe1d60..3e2f4261600ee97777327e32fdeb8093f9cd0a0f 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -1,7 +1,7 @@
  /**************************************************************************
  
  Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
-                     Tungsten Graphics Inc., Austin, Texas.
+                     VMware, Inc.
  
  All Rights Reserved.
  
@@ -29,14 +29,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  
  /*
   * Authors:
- *   Keith Whitwell <keith@tungstengraphics.com>
+ *   Keith Whitwell <keithw@vmware.com>
   */
  
  #include "main/glheader.h"
  #include "main/imports.h"
-#include "main/light.h"
  #include "main/mtypes.h"
+#include "main/light.h"
  #include "main/enums.h"
+#include "main/state.h"
  
  #include "vbo/vbo.h"
  #include "tnl/tnl.h"
@@ -46,10 +47,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #include "radeon_context.h"
  #include "radeon_state.h"
  #include "radeon_ioctl.h"
-#include "radeon_tex.h"
  #include "radeon_tcl.h"
  #include "radeon_swtcl.h"
  #include "radeon_maos.h"
+#include "radeon_common_context.h"
  
  
  
@@ -64,7 +65,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #define HAVE_LINE_STRIPS 1
  #define HAVE_TRIANGLES   1
  #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0
  #define HAVE_TRI_FANS    1
  #define HAVE_QUADS       0
  #define HAVE_QUAD_STRIPS 0
@@ -147,10 +147,7 @@ static GLboolean discrete_prim[0x10] = {
  static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) 
  {
        if (rmesa->radeon.dma.flush)
-        rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
-
-      rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->radeon.hw.max_state_size + ELTS_BUFSZ(nr) + 
-                              AOS_BUFSZ(rmesa->radeon.tcl.aos_count), __FUNCTION__);
+        rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
  
        radeonEmitAOS( rmesa,
                      rmesa->radeon.tcl.aos_count, 0 );
@@ -167,7 +164,7 @@ static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
   * discrete and there are no intervening state changes.  (Somewhat
   * duplicates changes to DrawArrays code)
   */
-static void radeonEmitPrim( GLcontext *ctx, 
+static void radeonEmitPrim( struct gl_context *ctx, 
                        GLenum prim, 
                        GLuint hwprim, 
                        GLuint start, 
@@ -176,10 +173,6 @@ static void radeonEmitPrim( GLcontext *ctx,
     r100ContextPtr rmesa = R100_CONTEXT( ctx );
     radeonTclPrimitive( ctx, prim, hwprim );
     
-   rcommonEnsureCmdBufSpace( &rmesa->radeon,
-                            AOS_BUFSZ(rmesa->radeon.tcl.aos_count) +
-                            rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ );
-
     radeonEmitAOS( rmesa,
                   rmesa->radeon.tcl.aos_count,
                   start );
@@ -196,6 +189,8 @@ static void radeonEmitPrim( GLcontext *ctx,
     radeonEmitPrim( ctx, prim, hwprim, start, count );           \
     (void) rmesa; } while (0)
  
+#define MAX_CONVERSION_SIZE 40
+
  /* Try & join small primitives
   */
  #if 0
@@ -212,8 +207,8 @@ static void radeonEmitPrim( GLcontext *ctx,
  #ifdef MESA_BIG_ENDIAN
  /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
  #define EMIT_ELT(dest, offset, x) do {                         \
-       int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );     \
-       GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );    \
+       int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 );  \
+       GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 ); \
         (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x);     \
         (void)rmesa; } while (0)
  #else
@@ -233,7 +228,7 @@ static void radeonEmitPrim( GLcontext *ctx,
  /*                          External entrypoints                     */
  /**********************************************************************/
  
-void radeonEmitPrimitive( GLcontext *ctx, 
+void radeonEmitPrimitive( struct gl_context *ctx, 
                           GLuint first,
                           GLuint last,
                           GLuint flags )
@@ -241,7 +236,7 @@ void radeonEmitPrimitive( GLcontext *ctx,
     tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
  }
  
-void radeonEmitEltPrimitive( GLcontext *ctx, 
+void radeonEmitEltPrimitive( struct gl_context *ctx, 
                              GLuint first,
                              GLuint last,
                              GLuint flags )
@@ -249,7 +244,7 @@ void radeonEmitEltPrimitive( GLcontext *ctx,
     tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
  }
  
-void radeonTclPrimitive( GLcontext *ctx, 
+void radeonTclPrimitive( struct gl_context *ctx, 
                          GLenum prim,
                          int hw_prim )
  {
@@ -257,6 +252,10 @@ void radeonTclPrimitive( GLcontext *ctx,
     GLuint se_cntl;
     GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
  
+   radeon_prepare_render(&rmesa->radeon);
+   if (rmesa->radeon.NewGLState)
+      radeonValidateState( ctx );
+
     if (newprim != rmesa->tcl.hw_primitive ||
         !discrete_prim[hw_prim&0xf]) {
        RADEON_NEWPRIM( rmesa );
@@ -266,7 +265,7 @@ void radeonTclPrimitive( GLcontext *ctx,
     se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
     se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
  
-   if (prim == GL_POLYGON && (ctx->_TriangleCaps & DD_FLATSHADE)) 
+   if (prim == GL_POLYGON && ctx->Light.ShadeModel == GL_FLAT) 
        se_cntl |= RADEON_FLAT_SHADE_VTX_0;
     else
        se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
@@ -277,87 +276,72 @@ void radeonTclPrimitive( GLcontext *ctx,
     }
  }
  
-/**********************************************************************/
-/*             Fog blend factor computation for hw tcl                */
-/*             same calculation used as in t_vb_fog.c                 */
-/**********************************************************************/
-
-#define FOG_EXP_TABLE_SIZE 256
-#define FOG_MAX (10.0)
-#define EXP_FOG_MAX .0006595
-#define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE)
-static GLfloat exp_table[FOG_EXP_TABLE_SIZE];
-
-#if 1
-#define NEG_EXP( result, narg )                                                \
-do {                                                                   \
-   GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR));                      \
-   GLint k = (GLint) f;                                                        \
-   if (k > FOG_EXP_TABLE_SIZE-2)                                       \
-      result = (GLfloat) EXP_FOG_MAX;                                  \
-   else                                                                        \
-      result = exp_table[k] + (f-k)*(exp_table[k+1]-exp_table[k]);     \
-} while (0)
-#else
-#define NEG_EXP( result, narg )                                        \
-do {                                                           \
-   result = exp(-narg);                                                \
-} while (0)
-#endif
-
-
-/**
- * Initialize the exp_table[] lookup table for approximating exp().
- */
-void
-radeonInitStaticFogData( void )
-{
-   GLfloat f = 0.0F;
-   GLint i = 0;
-   for ( ; i < FOG_EXP_TABLE_SIZE ; i++, f += FOG_INCR) {
-      exp_table[i] = (GLfloat) exp(-f);
-   }
-}
-
-
  /**
- * Compute per-vertex fog blend factors from fog coordinates by
- * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function.
- * Fog coordinates are distances from the eye (typically between the
- * near and far clip plane distances).
- * Note the fog (eye Z) coords may be negative so we use ABS(z) below.
- * Fog blend factors are in the range [0,1].
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
   */
-float
-radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
+static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs )
  {
-   GLfloat end  = ctx->Fog.End;
-   GLfloat d, temp;
-   const GLfloat z = FABSF(fogcoord);
-
-   switch (ctx->Fog.Mode) {
-   case GL_LINEAR:
-      if (ctx->Fog.Start == ctx->Fog.End)
-         d = 1.0F;
+  r100ContextPtr rmesa = R100_CONTEXT(ctx);
+  TNLcontext *tnl = TNL_CONTEXT(ctx);
+  struct vertex_buffer *VB = &tnl->vb;
+  GLuint space_required;
+  GLuint state_size;
+  GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
+  int i;
+  /* list of flags that are allocating aos object */
+  const GLuint flags_to_check[] = {
+    VERT_BIT_NORMAL,
+    VERT_BIT_COLOR0,
+    VERT_BIT_COLOR1,
+    VERT_BIT_FOG
+  };
+  /* predict number of aos to emit */
+  for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i)
+  {
+    if (inputs & flags_to_check[i])
+      ++nr_aos;
+  }
+  for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
+  {
+    if (inputs & VERT_BIT_TEX(i))
+      ++nr_aos;
+  }
+
+  {
+    /* count the prediction for state size */
+    space_required = 0;
+    state_size = radeonCountStateEmitSize( &rmesa->radeon );
+    /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
+    if (!rmesa->hw.tcl.dirty)
+      state_size += rmesa->hw.tcl.check( &rmesa->radeon.glCtx, &rmesa->hw.tcl );
+    /* predict size for elements */
+    for (i = 0; i < VB->PrimitiveCount; ++i)
+    {
+      /* If primitive.count is less than MAX_CONVERSION_SIZE
+        rendering code may decide convert to elts.
+        In that case we have to make pessimistic prediction.
+        and use larger of 2 paths. */
+      const GLuint elts = ELTS_BUFSZ(nr_aos);
+      const GLuint index = INDEX_BUFSZ;
+      const GLuint vbuf = VBUF_BUFSZ;
+      if (!VB->Primitive[i].count)
+       continue;
+      if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+         || vbuf > index + elts)
+       space_required += vbuf;
        else
-         d = 1.0F / (ctx->Fog.End - ctx->Fog.Start);
-      temp = (end - z) * d;
-      return CLAMP(temp, 0.0F, 1.0F);
-      break;
-   case GL_EXP:
-      d = ctx->Fog.Density;
-      NEG_EXP( temp, d * z );
-      return temp;
-      break;
-   case GL_EXP2:
-      d = ctx->Fog.Density*ctx->Fog.Density;
-      NEG_EXP( temp, d * z * z );
-      return temp;
-      break;
-   default:
-      _mesa_problem(ctx, "Bad fog mode in make_fog_coord");
-      return 0;
-   }
+       space_required += index + elts;
+      space_required += VB->Primitive[i].count * 3;
+      space_required += AOS_BUFSZ(nr_aos);
+    }
+    space_required += SCISSOR_BUFSZ;
+  }
+  /* flush the buffer in case we need more than is left. */
+  if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __func__))
+    return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+  else
+    return space_required + state_size;
  }
  
  /**********************************************************************/
@@ -367,7 +351,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
  
  /* TCL render.
   */
-static GLboolean radeon_run_tcl_render( GLcontext *ctx,
+static GLboolean radeon_run_tcl_render( struct gl_context *ctx,
                                         struct tnl_pipeline_stage *stage )
  {
     r100ContextPtr rmesa = R100_CONTEXT(ctx);
@@ -375,6 +359,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
     struct vertex_buffer *VB = &tnl->vb;
     GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
     GLuint i;
+   GLuint emit_end;
  
     /* TODO: separate this from the swtnl pipeline 
      */
@@ -391,7 +376,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
        inputs |= VERT_BIT_NORMAL;
     }
  
-   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+   if (_mesa_need_secondary_color(ctx)) {
        inputs |= VERT_BIT_COLOR1;
     }
  
@@ -400,7 +385,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
     }
  
     for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
-      if (ctx->Texture.Unit[i]._ReallyEnabled) {
+      if (ctx->Texture.Unit[i]._Current) {
        /* TODO: probably should not emit texture coords when texgen is enabled */
          if (rmesa->TexGenNeedNormals[i]) {
             inputs |= VERT_BIT_NORMAL;
@@ -410,6 +395,8 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
     }
  
     radeonReleaseArrays( ctx, ~0 );
+   emit_end = radeonEnsureEmitSize( ctx, inputs )
+     + rmesa->radeon.cmdbuf.cs->cdw;
     radeonEmitArrays( ctx, inputs );
  
     rmesa->tcl.Elts = VB->Elts;
@@ -429,6 +416,10 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
          radeonEmitPrimitive( ctx, start, start+length, prim );
     }
  
+   if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
+      WARN_ONCE("Rendering was %d commands larger than predicted size."
+         " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
+
     return GL_FALSE;            /* finished the pipe */
  }
  
@@ -458,7 +449,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage =
   */
  
  
-static void transition_to_swtnl( GLcontext *ctx )
+static void transition_to_swtnl( struct gl_context *ctx )
  {
     r100ContextPtr rmesa = R100_CONTEXT(ctx);
     TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -470,10 +461,10 @@ static void transition_to_swtnl( GLcontext *ctx )
     radeonChooseVertexState( ctx );
     radeonChooseRenderState( ctx );
  
-   _mesa_validate_all_lighting_tables( ctx ); 
+   _tnl_validate_shine_tables( ctx ); 
  
     tnl->Driver.NotifyMaterialChange = 
-      _mesa_validate_all_lighting_tables;
+      _tnl_validate_shine_tables;
  
     radeonReleaseArrays( ctx, ~0 );
  
@@ -487,7 +478,7 @@ static void transition_to_swtnl( GLcontext *ctx )
  }
  
  
-static void transition_to_hwtnl( GLcontext *ctx )
+static void transition_to_hwtnl( struct gl_context *ctx )
  {
     r100ContextPtr rmesa = R100_CONTEXT(ctx);
     TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -509,16 +500,16 @@ static void transition_to_hwtnl( GLcontext *ctx )
     tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
  
     if ( rmesa->radeon.dma.flush )                      
-      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );  
+      rmesa->radeon.dma.flush( &rmesa->radeon.glCtx ); 
  
     rmesa->radeon.dma.flush = NULL;
     rmesa->swtcl.vertex_format = 0;
     
     //   if (rmesa->swtcl.indexed_verts.buf) 
     //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
-   //                        __FUNCTION__ );
+   //                        __func__ );
  
-   if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+   if (RADEON_DEBUG & RADEON_FALLBACKS)
        fprintf(stderr, "Radeon end tcl fallback\n");
  }
  
@@ -547,7 +538,7 @@ static char *getFallbackString(GLuint bit)
  
  
  
-void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+void radeonTclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode )
  {
     r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLuint oldfallback = rmesa->radeon.TclFallback;
@@ -555,7 +546,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
     if (mode) {
        rmesa->radeon.TclFallback |= bit;
        if (oldfallback == 0) {
-        if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+        if (RADEON_DEBUG & RADEON_FALLBACKS)
             fprintf(stderr, "Radeon begin tcl fallback %s\n",
                     getFallbackString( bit ));
          transition_to_swtnl( ctx );
@@ -564,7 +555,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
     else {
        rmesa->radeon.TclFallback &= ~bit;
        if (oldfallback == bit) {
-        if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+        if (RADEON_DEBUG & RADEON_FALLBACKS)
             fprintf(stderr, "Radeon end tcl fallback %s\n",
                     getFallbackString( bit ));
          transition_to_hwtnl( ctx );