r5xx: Dump shader constants when dumping program assembly.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_swtcl.c
index ab771aba87b95ebe2570aabf95052381f7175109..8aebd9be3ea0b9f434576702c7d2609c23b280f6 100644 (file)
@@ -40,6 +40,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "enums.h"
 #include "image.h"
 #include "imports.h"
+#include "light.h"
 #include "macros.h"
 
 #include "swrast/s_context.h"
@@ -57,12 +58,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_emit.h"
 #include "r300_mem.h"
 
-#define R300_NEWPRIM( rmesa )                  \
-  do {                                         \
-    if ( rmesa->dma.flush )                    \
-      rmesa->dma.flush( rmesa );               \
-  } while (0)
-
 static void flush_last_swtcl_prim( r300ContextPtr rmesa  );
 
 
@@ -85,189 +80,204 @@ do {                                                                      \
 
 static void r300SetVertexFormat( GLcontext *ctx )
 {
-   r300ContextPtr rmesa = R300_CONTEXT( ctx );
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
-   struct vertex_buffer *VB = &tnl->vb;
-   DECLARE_RENDERINPUTS(index_bitset);
-   int vap_fmt_0 = 0;
-   int vap_fmt_1 = 0;
-   int vic_0 = 0, vic_1 = 0;
-   int vap_vte_cntl = 0;
-   int offset = 0;
-   int vte = 0;
-
-   DECLARE_RENDERINPUTS(render_inputs_bitset);
-
-   RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
-
-   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
-
-   RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
-
-   /* Important:
-    */
-   if ( VB->NdcPtr != NULL ) {
-      VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
-   }
-   else {
-      VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
-   }
-
-   assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
-   rmesa->swtcl.vertex_attr_count = 0;
-
-   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
-    * build up a hardware vertex.
-    */
-   EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
-   vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
-   vap_vte_cntl |= R300_VTX_W0_FMT;
-   vic_1 |= R300_INPUT_CNTL_POS;
-
-   offset = 4;
-
-   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
-     EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
-     vap_fmt_0 |=  R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
-     offset += 1;
-   }
-
-   rmesa->swtcl.coloroffset = offset;
-#if MESA_LITTLE_ENDIAN 
-   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4F );
-#else
-   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4F );
-#endif
-
-   vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
-   vic_1 |= R300_INPUT_CNTL_COLOR;
-   vic_0 |= 1;
-   offset += 4;
-
-   rmesa->swtcl.specoffset = 0;
-   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
-       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
-
-#if MESA_LITTLE_ENDIAN 
-      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
-        rmesa->swtcl.specoffset = offset;
-        EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3F );
-        vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
-        vic_0 |= (1<<2);
-      }
-      else {
-        EMIT_PAD( 3 );
-      }
-
-      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
-        EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F );
-        vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
-      }
-      else {
-        EMIT_PAD( 1 );
-      }
-#else
-      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
-        EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F );
-        vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
-      }
-      else {
-        EMIT_PAD( 1 );
-      }
-
-      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
-        rmesa->swtcl.specoffset = offset;
-        EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR );
-        vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
-      }
-      else {
-        EMIT_PAD( 3 );
-      }
-#endif
-   }
-
-   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
-      int i;
-
-      for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-        if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
-           GLuint sz = VB->TexCoordPtr[i]->size;
-
-           vap_fmt_1 |= sz << (3 * i);
-           vic_1 |= 0x400 << i;
-           EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 );
-        }
-      }
-   }
-
-#if 0
-   if ( (rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] & R200_FOG_USE_MASK)
-      != R200_FOG_USE_SPEC_ALPHA ) {
-      R200_STATECHANGE( rmesa, ctx );
-      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_USE_MASK;
-      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_SPEC_ALPHA;
-   }
-#endif
-
-
-   R300_STATECHANGE(rmesa, vic);
-   rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = 0x1;
-   rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = vic_1;
-
-   R300_STATECHANGE(rmesa, vof);
-   rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = vap_fmt_0;
-   rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
-
-   if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset)) {
-      
-      rmesa->swtcl.vertex_size =
-         _tnl_install_attrs( ctx,
-                             rmesa->swtcl.vertex_attrs, 
-                             rmesa->swtcl.vertex_attr_count,
-                             NULL, 0 );
-
-      rmesa->swtcl.vertex_size /= 4;
-      RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-
-      vte = rmesa->hw.vte.cmd[1];
-      R300_STATECHANGE(rmesa, vte);
-      rmesa->hw.vte.cmd[1] = vte;
-      rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size;
-   }
-}
+       r300ContextPtr rmesa = R300_CONTEXT( ctx );
+       TNLcontext *tnl = TNL_CONTEXT(ctx);
+       struct vertex_buffer *VB = &tnl->vb;
+       DECLARE_RENDERINPUTS(index_bitset);
+       GLuint InputsRead = 0, OutputsWritten = 0;
+       int vap_fmt_0 = 0;
+       int vap_vte_cntl = 0;
+       int offset = 0;
+       int vte = 0;
+       GLint inputs[VERT_ATTRIB_MAX];
+       GLint tab[VERT_ATTRIB_MAX];
+       int swizzle[VERT_ATTRIB_MAX][4];
+       GLuint i, nr;
+       GLuint sz, vap_fmt_1 = 0;
+
+       DECLARE_RENDERINPUTS(render_inputs_bitset);
+       RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
+       RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+       RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
+
+       vte = rmesa->hw.vte.cmd[1];
+       vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT);
+       /* Important:
+        */
+       if ( VB->NdcPtr != NULL ) {
+               VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+               vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT;
+       }
+       else {
+               VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+               vte |= R300_VTX_W0_FMT;
+       }
 
+       assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+       rmesa->swtcl.vertex_attr_count = 0;
+
+       /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+        * build up a hardware vertex.
+        */
+       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) {
+               sz = VB->AttribPtr[VERT_ATTRIB_POS]->size;
+               InputsRead |= 1 << VERT_ATTRIB_POS;
+               OutputsWritten |= 1 << VERT_RESULT_HPOS;
+               EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 );
+               offset = sz;
+       } else {
+               offset = 4;
+               EMIT_PAD(4 * sizeof(float));
+       }
 
-/* Flush vertices in the current dma region.
- */
-static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
-{
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
+       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
+               EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
+               vap_fmt_0 |=  R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+               offset += 1;
+       }
 
-   rmesa->dma.flush = NULL;
+       if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) {
+               sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size;
+               rmesa->swtcl.coloroffset = offset;
+               InputsRead |= 1 << VERT_ATTRIB_COLOR0;
+               OutputsWritten |= 1 << VERT_RESULT_COL0;
+               EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 );
+               offset += sz;
+       }
 
-   if (rmesa->dma.current.buf) {
-     struct r300_dma_region *current = &rmesa->dma.current;
-     GLuint current_offset = GET_START(current);
+       rmesa->swtcl.specoffset = 0;
+       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+               sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size;
+               rmesa->swtcl.specoffset = offset;
+               EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 );
+               InputsRead |= 1 << VERT_ATTRIB_COLOR1;
+               OutputsWritten |= 1 << VERT_RESULT_COL1;
+       }
 
-      assert (current->start + 
-             rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-             current->ptr);
+       if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+               int i;
+
+               for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+                       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
+                               sz = VB->TexCoordPtr[i]->size;
+                               InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
+                               OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
+                               EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 );
+                               vap_fmt_1 |= sz << (3 * i);
+                       }
+               }
+       }
 
-      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+       for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+               if (InputsRead & (1 << i)) {
+                       inputs[i] = nr++;
+               } else {
+                       inputs[i] = -1;
+               }
+       }
+       
+       /* Fixed, apply to vir0 only */
+       if (InputsRead & (1 << VERT_ATTRIB_POS))
+               inputs[VERT_ATTRIB_POS] = 0;
+       if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
+               inputs[VERT_ATTRIB_COLOR0] = 2;
+       if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
+               inputs[VERT_ATTRIB_COLOR1] = 3;
+       for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
+               if (InputsRead & (1 << i))
+                       inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+       
+       for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+               if (InputsRead & (1 << i)) {
+                       tab[nr++] = i;
+               }
+       }
+       
+       for (i = 0; i < nr; i++) {
+               int ci;
+               
+               swizzle[i][0] = SWIZZLE_ZERO;
+               swizzle[i][1] = SWIZZLE_ZERO;
+               swizzle[i][2] = SWIZZLE_ZERO;
+               swizzle[i][3] = SWIZZLE_ONE;
+
+               for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) {
+                       swizzle[i][ci] = ci;
+               }
+       }
 
-       r300EmitVertexAOS( rmesa,
-                          rmesa->swtcl.vertex_size,
-                          current_offset);
+       R300_NEWPRIM(rmesa);
+       R300_STATECHANGE(rmesa, vir[0]);
+       ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
+               r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
+                                  VB->AttribPtr, inputs, tab, nr);
+       R300_STATECHANGE(rmesa, vir[1]);
+       ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
+               r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
+                                  nr);
+   
+       R300_STATECHANGE(rmesa, vic);
+       rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+       rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+   
+       R300_STATECHANGE(rmesa, vof);
+       rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+       rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
+   
+       rmesa->swtcl.vertex_size =
+               _tnl_install_attrs( ctx,
+                                   rmesa->swtcl.vertex_attrs, 
+                                   rmesa->swtcl.vertex_attr_count,
+                                   NULL, 0 );
        
-       r300EmitVbufPrim( rmesa,
-                         rmesa->swtcl.hw_primitive,
-                         rmesa->swtcl.numverts);
-      }
-
-      rmesa->swtcl.numverts = 0;
-      current->start = current->ptr;
-   }
+       rmesa->swtcl.vertex_size /= 4;
+
+       RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
+
+
+       R300_STATECHANGE(rmesa, vte);
+       rmesa->hw.vte.cmd[1] = vte;
+       rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size;
+}
+
+
+/* Flush vertices in the current dma region.
+ */
+static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
+{
+       if (RADEON_DEBUG & DEBUG_IOCTL)
+               fprintf(stderr, "%s\n", __FUNCTION__);
+       
+       rmesa->dma.flush = NULL;
+
+       if (rmesa->dma.current.buf) {
+               struct r300_dma_region *current = &rmesa->dma.current;
+               GLuint current_offset = GET_START(current);
+
+               assert (current->start + 
+                       rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+                       current->ptr);
+
+               if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+
+                       r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
+                       
+                       r300EmitState(rmesa);
+                       
+                       r300EmitVertexAOS( rmesa,
+                                          rmesa->swtcl.vertex_size,
+                                          current_offset);
+                       
+                       r300EmitVbufPrim( rmesa,
+                                         rmesa->swtcl.hw_primitive,
+                                         rmesa->swtcl.numverts);
+                       
+                       r300EmitCacheFlush(rmesa);
+               }
+               
+               rmesa->swtcl.numverts = 0;
+               current->start = current->ptr;
+       }
 }
 
 /* Alloc space in the current dma region.
@@ -275,30 +285,28 @@ static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
 static void *
 r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
 {
-   GLuint bytes = vsize * nverts;
-
-   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
-       r300RefillCurrentDmaRegion( rmesa, bytes);
+       GLuint bytes = vsize * nverts;
 
-   if (!rmesa->dma.flush) {
-      rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-      rmesa->dma.flush = flush_last_swtcl_prim;
-   }
+       if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+               r300RefillCurrentDmaRegion( rmesa, bytes);
 
-   ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
-   ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
-   ASSERT( rmesa->dma.current.start + 
-          rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-          rmesa->dma.current.ptr );
-
-
-   {
-      GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
-      rmesa->dma.current.ptr += bytes;
-      rmesa->swtcl.numverts += nverts;
-      return head;
-   }
+       if (!rmesa->dma.flush) {
+               rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+               rmesa->dma.flush = flush_last_swtcl_prim;
+       }
 
+       ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+       ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
+       ASSERT( rmesa->dma.current.start + 
+               rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+               rmesa->dma.current.ptr );
+
+       {
+               GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
+               rmesa->dma.current.ptr += bytes;
+               rmesa->swtcl.numverts += nverts;
+               return head;
+       }
 }
 
 static GLuint reduced_prim[] = {
@@ -331,7 +339,7 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
 #define HAVE_TRI_STRIP_1 0
 #define HAVE_TRI_FANS    1
 #define HAVE_QUADS       0
-#define HAVE_QUAD_STRIPS 1
+#define HAVE_QUAD_STRIPS 0
 #define HAVE_POLYGONS    1
 #define HAVE_ELTS        1
 
@@ -499,7 +507,7 @@ static void init_rast_tab( void )
 #define RENDER_QUAD( v0, v1, v2, v3 ) \
    r300_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
 #define INIT(x) do {                                   \
-   r300RenderPrimitive( ctx, reduced_prim[x] );                        \
+   r300RenderPrimitive( ctx, x );                      \
 } while (0)
 #undef LOCAL_VARS
 #define LOCAL_VARS                                             \
@@ -529,76 +537,57 @@ static void init_rast_tab( void )
 /**********************************************************************/
 static void r300ChooseRenderState( GLcontext *ctx )
 {
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
-   r300ContextPtr rmesa = R300_CONTEXT(ctx);
-   GLuint index = 0;
-   GLuint flags = ctx->_TriangleCaps;
-
-   // if (!rmesa->TclFallback || rmesa->Fallback) 
-//      return;
-
-   if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
-   if (flags & DD_TRI_UNFILLED)      index |= R300_UNFILLED_BIT;
-
-   if (index != rmesa->swtcl.RenderIndex) {
-      tnl->Driver.Render.Points = rast_tab[index].points;
-      tnl->Driver.Render.Line = rast_tab[index].line;
-      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
-      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
-      tnl->Driver.Render.Quad = rast_tab[index].quad;
-
-      if (index == 0) {
-       tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts;
-        tnl->Driver.Render.PrimTabElts = r300_render_tab_elts;
-        tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly;
-      } else {
-        tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
-        tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
-        tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
-      }
-
-      rmesa->swtcl.RenderIndex = index;
-   }
+       TNLcontext *tnl = TNL_CONTEXT(ctx);
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       GLuint index = 0;
+       GLuint flags = ctx->_TriangleCaps;
+
+       if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
+       if (flags & DD_TRI_UNFILLED)      index |= R300_UNFILLED_BIT;
+
+       if (index != rmesa->swtcl.RenderIndex) {
+               tnl->Driver.Render.Points = rast_tab[index].points;
+               tnl->Driver.Render.Line = rast_tab[index].line;
+               tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+               tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+               tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+               if (index == 0) {
+                       tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts;
+                       tnl->Driver.Render.PrimTabElts = r300_render_tab_elts;
+                       tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly;
+               } else {
+                       tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+                       tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+                       tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+               }
+
+               rmesa->swtcl.RenderIndex = index;
+       }
 }
 
 
 static void r300RenderStart(GLcontext *ctx)
 {
-  r300ContextPtr rmesa = R300_CONTEXT( ctx );
-       int cmd_reserved = 0;
-       int cmd_written = 0;
-       drm_radeon_cmd_header_t *cmd = NULL;
-
+        r300ContextPtr rmesa = R300_CONTEXT( ctx );
        //      fprintf(stderr, "%s\n", __FUNCTION__);
-       
+
+       r300ChooseRenderState(ctx);     
        r300SetVertexFormat(ctx);
 
+       r300UpdateShaders(rmesa);
        r300UpdateShaderStates(rmesa);
 
-       reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
-       e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
+       r300EmitCacheFlush(rmesa);
        
-       reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
-       e32(R300_RB3D_ZCACHE_UNKNOWN_03);
-       
-
        if (rmesa->dma.flush != 0 && 
            rmesa->dma.flush != flush_last_swtcl_prim)
-         rmesa->dma.flush( rmesa );
+               rmesa->dma.flush( rmesa );
+
 }
 
 static void r300RenderFinish(GLcontext *ctx)
 {
-        r300ContextPtr rmesa = R300_CONTEXT( ctx );
-       int cmd_reserved = 0;
-       int cmd_written = 0;
-       drm_radeon_cmd_header_t *cmd = NULL;
-
-       reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
-       e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
-
-       reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
-       e32(R300_RB3D_ZCACHE_UNKNOWN_03);
 }
 
 static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
@@ -616,8 +605,11 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
 
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        rmesa->swtcl.render_primitive = prim;
-       if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
-         r300RasterPrimitive( ctx, prim );
+
+       if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+         return;
+
+       r300RasterPrimitive( ctx, reduced_prim[prim] );
        //      fprintf(stderr, "%s\n", __FUNCTION__);
        
 }
@@ -662,6 +654,11 @@ void r300InitSwtcl(GLcontext *ctx)
 
        _tnl_need_projected_coords( ctx, GL_FALSE );
        r300ChooseRenderState(ctx);
+
+       _mesa_validate_all_lighting_tables( ctx ); 
+
+       tnl->Driver.NotifyMaterialChange = 
+         _mesa_validate_all_lighting_tables;
 }
 
 void r300DestroySwtcl(GLcontext *ctx)
@@ -672,30 +669,12 @@ void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
 {
        int cmd_reserved = 0;
        int cmd_written = 0;
-       int vte;
-       int route0;
 
        drm_radeon_cmd_header_t *cmd = NULL;
        if (RADEON_DEBUG & DEBUG_VERTS)
          fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
                  __FUNCTION__, vertex_size, offset);
 
-       /* emit vte */
-
-       R300_STATECHANGE(rmesa, vir[0]);
-       ((drm_r300_cmd_header_t *)rmesa->hw.vir[0].cmd)->packet0.count = 1;
-       rmesa->hw.vir[0].cmd[1] = 0x22030003;
-
-       R300_STATECHANGE(rmesa, vir[1]);
-       ((drm_r300_cmd_header_t *)rmesa->hw.vir[1].cmd)->packet0.count = 1;
-       
-       route0 = (R300_INPUT_ROUTE_SELECT_X |
-                 (R300_INPUT_ROUTE_SELECT_Y << R300_INPUT_ROUTE_Y_SHIFT) |
-                 (R300_INPUT_ROUTE_SELECT_Z << R300_INPUT_ROUTE_Z_SHIFT) |
-                 (R300_INPUT_ROUTE_SELECT_W << R300_INPUT_ROUTE_W_SHIFT) |(R300_INPUT_ROUTE_ENABLE));
-
-       rmesa->hw.vir[1].cmd[1] = route0 | (route0 << 16);
-
        start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
        e32(1);
        e32(vertex_size | (vertex_size << 8));
@@ -713,8 +692,6 @@ void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
        type = r300PrimitiveType(rmesa, primitive);
        num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
        
-       r300EmitState(rmesa);
-       
        start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
        e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
 }