r5xx: Dump shader constants when dumping program assembly.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_swtcl.c
index dfa02f95232ab8c667d686fadc48321c5e3a193c..8aebd9be3ea0b9f434576702c7d2609c23b280f6 100644 (file)
@@ -40,6 +40,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "enums.h"
 #include "image.h"
 #include "imports.h"
+#include "light.h"
 #include "macros.h"
 
 #include "swrast/s_context.h"
@@ -77,291 +78,166 @@ do {                                                                      \
    rmesa->swtcl.vertex_attr_count++;                                   \
 } while (0)
 
-static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
-                                int *inputs, GLint * tab, GLuint nr)
+static void r300SetVertexFormat( GLcontext *ctx )
 {
-       GLuint i, dw;
-
-       /* type, inputs, stop bit, size */
-       for (i = 0; i + 1 < nr; i += 2) {
-               dw = (inputs[tab[i]] << 8) | 0x3;
-               dw |= ((inputs[tab[i + 1]] << 8) | 0x3) << 16;
-               if (i + 2 == nr) {
-                       dw |= (R300_VAP_INPUT_ROUTE_END << 16);
-               }
-               dst[i >> 1] = dw;
+       r300ContextPtr rmesa = R300_CONTEXT( ctx );
+       TNLcontext *tnl = TNL_CONTEXT(ctx);
+       struct vertex_buffer *VB = &tnl->vb;
+       DECLARE_RENDERINPUTS(index_bitset);
+       GLuint InputsRead = 0, OutputsWritten = 0;
+       int vap_fmt_0 = 0;
+       int vap_vte_cntl = 0;
+       int offset = 0;
+       int vte = 0;
+       GLint inputs[VERT_ATTRIB_MAX];
+       GLint tab[VERT_ATTRIB_MAX];
+       int swizzle[VERT_ATTRIB_MAX][4];
+       GLuint i, nr;
+       GLuint sz, vap_fmt_1 = 0;
+
+       DECLARE_RENDERINPUTS(render_inputs_bitset);
+       RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
+       RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+       RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
+
+       vte = rmesa->hw.vte.cmd[1];
+       vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT);
+       /* Important:
+        */
+       if ( VB->NdcPtr != NULL ) {
+               VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+               vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT;
        }
-
-       if (nr & 1) {
-               dw = (inputs[tab[nr - 1]] << 8) | 0x3;
-               dw |= R300_VAP_INPUT_ROUTE_END;
-               dst[nr >> 1] = dw;
+       else {
+               VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+               vte |= R300_VTX_W0_FMT;
        }
 
-       return (nr + 1) >> 1;
-}
-
-static GLuint r300VAPInputRoute1Swizzle(int swizzle[4])
-{
-       return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) |
-           (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) |
-           (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) |
-           (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT);
-}
-
-static GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
-{
-       GLuint i;
-
-       for (i = 0; i + 1 < nr; i += 2) {
-               dst[i >> 1] = r300VAPInputRoute1Swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE;
-               dst[i >> 1] |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) << 16;
+       assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+       rmesa->swtcl.vertex_attr_count = 0;
+
+       /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+        * build up a hardware vertex.
+        */
+       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) {
+               sz = VB->AttribPtr[VERT_ATTRIB_POS]->size;
+               InputsRead |= 1 << VERT_ATTRIB_POS;
+               OutputsWritten |= 1 << VERT_RESULT_HPOS;
+               EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 );
+               offset = sz;
+       } else {
+               offset = 4;
+               EMIT_PAD(4 * sizeof(float));
        }
 
-       if (nr & 1) {
-               dst[nr >> 1] = r300VAPInputRoute1Swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE;
+       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
+               EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
+               vap_fmt_0 |=  R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+               offset += 1;
        }
 
-       return (nr + 1) >> 1;
-}
+       if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) {
+               sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size;
+               rmesa->swtcl.coloroffset = offset;
+               InputsRead |= 1 << VERT_ATTRIB_COLOR0;
+               OutputsWritten |= 1 << VERT_RESULT_COL0;
+               EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 );
+               offset += sz;
+       }
 
-static GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
-{
-       /* No idea what this value means. I have seen other values written to
-        * this register... */
-       return 0x5555;
-}
+       rmesa->swtcl.specoffset = 0;
+       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+               sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size;
+               rmesa->swtcl.specoffset = offset;
+               EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 );
+               InputsRead |= 1 << VERT_ATTRIB_COLOR1;
+               OutputsWritten |= 1 << VERT_RESULT_COL1;
+       }
 
-static GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
-{
-       r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       GLuint i, vic_1 = 0;
+       if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+               int i;
+
+               for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+                       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
+                               sz = VB->TexCoordPtr[i]->size;
+                               InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
+                               OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
+                               EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 );
+                               vap_fmt_1 |= sz << (3 * i);
+                       }
+               }
+       }
 
+       for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+               if (InputsRead & (1 << i)) {
+                       inputs[i] = nr++;
+               } else {
+                       inputs[i] = -1;
+               }
+       }
+       
+       /* Fixed, apply to vir0 only */
        if (InputsRead & (1 << VERT_ATTRIB_POS))
-               vic_1 |= R300_INPUT_CNTL_POS;
-
-       if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
-               vic_1 |= R300_INPUT_CNTL_NORMAL;
-
+               inputs[VERT_ATTRIB_POS] = 0;
        if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
-               vic_1 |= R300_INPUT_CNTL_COLOR;
-
-       rmesa->state.texture.tc_count = 0;
-       for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
-               if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
-                       rmesa->state.texture.tc_count++;
-                       vic_1 |= R300_INPUT_CNTL_TC0 << i;
+               inputs[VERT_ATTRIB_COLOR0] = 2;
+       if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
+               inputs[VERT_ATTRIB_COLOR1] = 3;
+       for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
+               if (InputsRead & (1 << i))
+                       inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+       
+       for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+               if (InputsRead & (1 << i)) {
+                       tab[nr++] = i;
                }
-
-       return vic_1;
-}
-
-static GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
-{
-       GLuint ret = 0;
-
-       if (OutputsWritten & (1 << VERT_RESULT_HPOS))
-               ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
-
-       if (OutputsWritten & (1 << VERT_RESULT_COL0))
-               ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
-
-       if (OutputsWritten & (1 << VERT_RESULT_COL1))
-               ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
-
-#if 0
-       if (OutputsWritten & (1 << VERT_RESULT_BFC0))
-               ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
-
-       if (OutputsWritten & (1 << VERT_RESULT_BFC1))
-               ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
-
-       if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
-#endif
-
-       if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
-               ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
-
-       return ret;
-}
-
-static GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
-{
-       GLuint i, ret = 0;
-
-       for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-               if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) {
-                       ret |= (4 << (3 * i));
+       }
+       
+       for (i = 0; i < nr; i++) {
+               int ci;
+               
+               swizzle[i][0] = SWIZZLE_ZERO;
+               swizzle[i][1] = SWIZZLE_ZERO;
+               swizzle[i][2] = SWIZZLE_ZERO;
+               swizzle[i][3] = SWIZZLE_ONE;
+
+               for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) {
+                       swizzle[i][ci] = ci;
                }
        }
 
-       return ret;
-}
-
-static void r300SetVertexFormat( GLcontext *ctx )
-{
-   r300ContextPtr rmesa = R300_CONTEXT( ctx );
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
-   struct vertex_buffer *VB = &tnl->vb;
-   DECLARE_RENDERINPUTS(index_bitset);
-   GLuint InputsRead = 0, OutputsWritten = 0;
-   int vap_fmt_0 = 0;
-   int vap_vte_cntl = 0;
-   int offset = 0;
-   int vte = 0;
-   GLint inputs[VERT_ATTRIB_MAX];
-   GLint tab[VERT_ATTRIB_MAX];
-   int swizzle[VERT_ATTRIB_MAX][4];
-   GLuint i, nr;
-
-   DECLARE_RENDERINPUTS(render_inputs_bitset);
-
-   RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
-
-   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
-
-   RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
-
-   /* Important:
-    */
-   if ( VB->NdcPtr != NULL ) {
-      VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
-   }
-   else {
-      VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
-   }
-
-   assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
-   rmesa->swtcl.vertex_attr_count = 0;
-
-   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
-    * build up a hardware vertex.
-    */
-   EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
-   vap_vte_cntl |= R300_VTX_W0_FMT;
-   InputsRead |= 1 << VERT_ATTRIB_POS;
-   OutputsWritten |= 1 << VERT_RESULT_HPOS;
-   offset = 4;
-
-   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
-     EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
-     vap_fmt_0 |=  R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
-     offset += 1;
-   }
-
-   rmesa->swtcl.coloroffset = offset;
-   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4F );
-
-   InputsRead |= 1 << VERT_ATTRIB_COLOR0;
-   OutputsWritten |= 1 << VERT_RESULT_COL0;
-   offset += 4;
-
-   rmesa->swtcl.specoffset = 0;
-   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
-       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
-
-     if (_mesa_little_endian()) {
-      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
-        rmesa->swtcl.specoffset = offset;
-        EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4F );
-        InputsRead |= 1 << VERT_ATTRIB_COLOR1;
-        OutputsWritten |= 1 << VERT_RESULT_COL1;
-      }
-
-     } else {
-      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
-        rmesa->swtcl.specoffset = offset;
-        EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4F );
-        InputsRead |= 1 << VERT_ATTRIB_COLOR1;
-        OutputsWritten |= 1 << VERT_RESULT_COL1;
-      }
-     }
-   }
-
-   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
-      int i;
-
-      for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-        if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
-           InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
-           OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
-           EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_4F );
-        }
-      }
-   }
-
-   for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
-     if (InputsRead & (1 << i)) {
-       inputs[i] = nr++;
-     } else {
-       inputs[i] = -1;
-     }
-   }
-
-   /* Fixed, apply to vir0 only */
-   if (InputsRead & VERT_ATTRIB_POS)
-     inputs[VERT_ATTRIB_POS] = 0;
-   if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
-     inputs[VERT_ATTRIB_COLOR0] = 2;
-   if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
-     inputs[VERT_ATTRIB_COLOR1] = 3;
-   for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
-     if (InputsRead & (1 << i))
-       inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+       R300_NEWPRIM(rmesa);
+       R300_STATECHANGE(rmesa, vir[0]);
+       ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
+               r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
+                                  VB->AttribPtr, inputs, tab, nr);
+       R300_STATECHANGE(rmesa, vir[1]);
+       ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
+               r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
+                                  nr);
    
-   for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
-     if (InputsRead & (1 << i)) {
-       tab[nr++] = i;
-     }
-   }
-
-   for (i = 0; i < nr; i++) {
-     int ci;
-
-     swizzle[i][0] = SWIZZLE_ZERO;
-     swizzle[i][1] = SWIZZLE_ZERO;
-     swizzle[i][2] = SWIZZLE_ZERO;
-     swizzle[i][3] = SWIZZLE_ONE;
-
-     for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) {
-       swizzle[i][ci] = ci;
-     }
-   }
-
-   if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset)) {
-     R300_NEWPRIM(rmesa);
-     R300_STATECHANGE(rmesa, vir[0]);
-     ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
-       r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
-                         VB->AttribPtr, inputs, tab, nr);
-     R300_STATECHANGE(rmesa, vir[1]);
-     ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
-       r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
-                         nr);
-     
-     R300_STATECHANGE(rmesa, vic);
-     rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
-     rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
-     
-     R300_STATECHANGE(rmesa, vof);
-     rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
-     rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten);
-     
-      rmesa->swtcl.vertex_size =
-         _tnl_install_attrs( ctx,
-                             rmesa->swtcl.vertex_attrs, 
-                             rmesa->swtcl.vertex_attr_count,
-                             NULL, 0 );
-
-      rmesa->swtcl.vertex_size /= 4;
-
-      RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-
-      vte = rmesa->hw.vte.cmd[1];
-      R300_STATECHANGE(rmesa, vte);
-      rmesa->hw.vte.cmd[1] = vte;
-      rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size;
-   }
+       R300_STATECHANGE(rmesa, vic);
+       rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+       rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+   
+       R300_STATECHANGE(rmesa, vof);
+       rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+       rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
+   
+       rmesa->swtcl.vertex_size =
+               _tnl_install_attrs( ctx,
+                                   rmesa->swtcl.vertex_attrs, 
+                                   rmesa->swtcl.vertex_attr_count,
+                                   NULL, 0 );
+       
+       rmesa->swtcl.vertex_size /= 4;
+
+       RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
+
+
+       R300_STATECHANGE(rmesa, vte);
+       rmesa->hw.vte.cmd[1] = vte;
+       rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size;
 }
 
 
@@ -369,34 +245,39 @@ static void r300SetVertexFormat( GLcontext *ctx )
  */
 static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
 {
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   rmesa->dma.flush = NULL;
-
-   if (rmesa->dma.current.buf) {
-     struct r300_dma_region *current = &rmesa->dma.current;
-     GLuint current_offset = GET_START(current);
-
-      assert (current->start + 
-             rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-             current->ptr);
-
-      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
-
-       r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (8*sizeof(int)), __FUNCTION__);
-       r300EmitVertexAOS( rmesa,
-                          rmesa->swtcl.vertex_size,
-                          current_offset);
+       if (RADEON_DEBUG & DEBUG_IOCTL)
+               fprintf(stderr, "%s\n", __FUNCTION__);
        
-       r300EmitVbufPrim( rmesa,
-                         rmesa->swtcl.hw_primitive,
-                         rmesa->swtcl.numverts);
-      }
-
-      rmesa->swtcl.numverts = 0;
-      current->start = current->ptr;
-   }
+       rmesa->dma.flush = NULL;
+
+       if (rmesa->dma.current.buf) {
+               struct r300_dma_region *current = &rmesa->dma.current;
+               GLuint current_offset = GET_START(current);
+
+               assert (current->start + 
+                       rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+                       current->ptr);
+
+               if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+
+                       r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
+                       
+                       r300EmitState(rmesa);
+                       
+                       r300EmitVertexAOS( rmesa,
+                                          rmesa->swtcl.vertex_size,
+                                          current_offset);
+                       
+                       r300EmitVbufPrim( rmesa,
+                                         rmesa->swtcl.hw_primitive,
+                                         rmesa->swtcl.numverts);
+                       
+                       r300EmitCacheFlush(rmesa);
+               }
+               
+               rmesa->swtcl.numverts = 0;
+               current->start = current->ptr;
+       }
 }
 
 /* Alloc space in the current dma region.
@@ -404,30 +285,28 @@ static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
 static void *
 r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
 {
-   GLuint bytes = vsize * nverts;
-
-   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
-       r300RefillCurrentDmaRegion( rmesa, bytes);
-
-   if (!rmesa->dma.flush) {
-      rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-      rmesa->dma.flush = flush_last_swtcl_prim;
-   }
+       GLuint bytes = vsize * nverts;
 
-   ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
-   ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
-   ASSERT( rmesa->dma.current.start + 
-          rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-          rmesa->dma.current.ptr );
+       if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+               r300RefillCurrentDmaRegion( rmesa, bytes);
 
+       if (!rmesa->dma.flush) {
+               rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+               rmesa->dma.flush = flush_last_swtcl_prim;
+       }
 
-   {
-      GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
-      rmesa->dma.current.ptr += bytes;
-      rmesa->swtcl.numverts += nverts;
-      return head;
-   }
-
+       ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+       ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
+       ASSERT( rmesa->dma.current.start + 
+               rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+               rmesa->dma.current.ptr );
+
+       {
+               GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
+               rmesa->dma.current.ptr += bytes;
+               rmesa->swtcl.numverts += nverts;
+               return head;
+       }
 }
 
 static GLuint reduced_prim[] = {
@@ -658,76 +537,57 @@ static void init_rast_tab( void )
 /**********************************************************************/
 static void r300ChooseRenderState( GLcontext *ctx )
 {
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
-   r300ContextPtr rmesa = R300_CONTEXT(ctx);
-   GLuint index = 0;
-   GLuint flags = ctx->_TriangleCaps;
-
-   // if (!rmesa->TclFallback || rmesa->Fallback) 
-//      return;
-
-   if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
-   if (flags & DD_TRI_UNFILLED)      index |= R300_UNFILLED_BIT;
-
-   if (index != rmesa->swtcl.RenderIndex) {
-      tnl->Driver.Render.Points = rast_tab[index].points;
-      tnl->Driver.Render.Line = rast_tab[index].line;
-      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
-      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
-      tnl->Driver.Render.Quad = rast_tab[index].quad;
-
-      if (index == 0) {
-       tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts;
-        tnl->Driver.Render.PrimTabElts = r300_render_tab_elts;
-        tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly;
-      } else {
-        tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
-        tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
-        tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
-      }
-
-      rmesa->swtcl.RenderIndex = index;
-   }
+       TNLcontext *tnl = TNL_CONTEXT(ctx);
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       GLuint index = 0;
+       GLuint flags = ctx->_TriangleCaps;
+
+       if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
+       if (flags & DD_TRI_UNFILLED)      index |= R300_UNFILLED_BIT;
+
+       if (index != rmesa->swtcl.RenderIndex) {
+               tnl->Driver.Render.Points = rast_tab[index].points;
+               tnl->Driver.Render.Line = rast_tab[index].line;
+               tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+               tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+               tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+               if (index == 0) {
+                       tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts;
+                       tnl->Driver.Render.PrimTabElts = r300_render_tab_elts;
+                       tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly;
+               } else {
+                       tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+                       tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+                       tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+               }
+
+               rmesa->swtcl.RenderIndex = index;
+       }
 }
 
 
 static void r300RenderStart(GLcontext *ctx)
 {
-  r300ContextPtr rmesa = R300_CONTEXT( ctx );
-       int cmd_reserved = 0;
-       int cmd_written = 0;
-       drm_radeon_cmd_header_t *cmd = NULL;
-
+        r300ContextPtr rmesa = R300_CONTEXT( ctx );
        //      fprintf(stderr, "%s\n", __FUNCTION__);
-       
+
+       r300ChooseRenderState(ctx);     
        r300SetVertexFormat(ctx);
 
+       r300UpdateShaders(rmesa);
        r300UpdateShaderStates(rmesa);
 
-       reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
-       e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
-       
-       reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
-       e32(R300_RB3D_ZCACHE_UNKNOWN_03);
+       r300EmitCacheFlush(rmesa);
        
        if (rmesa->dma.flush != 0 && 
            rmesa->dma.flush != flush_last_swtcl_prim)
-         rmesa->dma.flush( rmesa );
+               rmesa->dma.flush( rmesa );
 
 }
 
 static void r300RenderFinish(GLcontext *ctx)
 {
-        r300ContextPtr rmesa = R300_CONTEXT( ctx );
-       int cmd_reserved = 0;
-       int cmd_written = 0;
-       drm_radeon_cmd_header_t *cmd = NULL;
-
-       reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
-       e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
-
-       reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
-       e32(R300_RB3D_ZCACHE_UNKNOWN_03);
 }
 
 static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
@@ -745,8 +605,11 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
 
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        rmesa->swtcl.render_primitive = prim;
-       if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
-         r300RasterPrimitive( ctx, reduced_prim[prim] );
+
+       if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+         return;
+
+       r300RasterPrimitive( ctx, reduced_prim[prim] );
        //      fprintf(stderr, "%s\n", __FUNCTION__);
        
 }
@@ -791,6 +654,11 @@ void r300InitSwtcl(GLcontext *ctx)
 
        _tnl_need_projected_coords( ctx, GL_FALSE );
        r300ChooseRenderState(ctx);
+
+       _mesa_validate_all_lighting_tables( ctx ); 
+
+       tnl->Driver.NotifyMaterialChange = 
+         _mesa_validate_all_lighting_tables;
 }
 
 void r300DestroySwtcl(GLcontext *ctx)
@@ -824,8 +692,6 @@ void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
        type = r300PrimitiveType(rmesa, primitive);
        num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
        
-       r300EmitState(rmesa);
-       
        start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
        e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
 }