Merge commit 'origin/mesa_7_7_branch'
[mesa.git] / src / mesa / drivers / dri / savage / savagetris.c
index 8e9f33d27d4fd213d77672ef3dec1a6c7abbe4be..e9529d19391ff8368784d0979ccf02f0441c82a8 100644 (file)
@@ -1,4 +1,4 @@
-/* $XFree86$ */ /* -*- c-basic-offset: 3 -*- */
+/* -*- c-basic-offset: 3 -*- */
 /**************************************************************************
 
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -37,10 +37,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include <stdio.h>
 #include <math.h>
 
-#include "glheader.h"
-#include "mtypes.h"
-#include "colormac.h"
-#include "macros.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/macros.h"
 
 #include "swrast/swrast.h"
 #include "swrast_setup/swrast_setup.h"
@@ -56,114 +56,197 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 static void savageRasterPrimitive( GLcontext *ctx, GLuint prim );
 static void savageRenderPrimitive( GLcontext *ctx, GLenum prim );
+
+
+static GLenum reduced_prim[GL_POLYGON+1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
  
 /***********************************************************************
  *                    Emit primitives                                  *
  ***********************************************************************/
 
-static  __inline__ GLuint * savage_send_one_vertex(savageContextPtr imesa, savageVertexPtr v, GLuint * vb, GLuint start, GLuint size)
-{ 
-    GLuint j; 
-    for (j = start ; j < size ; j++) 
-    { 
-        WRITE_CMD(vb, v->ui[j],GLuint); 
-    }
-    return vb; 
-} 
-static void __inline__ savage_draw_triangle( savageContextPtr imesa, 
-                                          savageVertexPtr v0, 
-                                          savageVertexPtr v1, 
-                                          savageVertexPtr v2 ) 
-{ 
-   GLuint vertsize = imesa->vertex_size; 
-#if SAVAGEDEBUG
-   GLuint *vb = savageDMAAlloc (imesa, 3 * vertsize + 1 + 8); 
+#if defined (USE_X86_ASM)
+#define EMIT_VERT( j, vb, vertex_size, start, v )              \
+do {   int __tmp;                                              \
+        vb += start;                                           \
+       __asm__ __volatile__( "rep ; movsl"                     \
+                        : "=%c" (j), "=D" (vb), "=S" (__tmp)   \
+                        : "0" (vertex_size-start),             \
+                          "D" ((long)vb),                      \
+                          "S" ((long)&(v)->ui[start]));        \
+} while (0)
 #else
-   GLuint *vb = savageDMAAlloc (imesa, 4 * vertsize + 1); 
+#define EMIT_VERT( j, vb, vertex_size, start, v )      \
+do {                                           \
+   for ( j = start ; j < vertex_size ; j++ )   \
+      vb[j] = (v)->ui[j];                      \
+   vb += vertex_size;                          \
+} while (0)
 #endif
 
-   imesa->DrawPrimitiveCmd &=
-       ~(SAVAGE_HW_TRIANGLE_TYPE| SAVAGE_HW_TRIANGLE_CONT);
-   WRITE_CMD(vb,SAVAGE_DRAW_PRIMITIVE(3, imesa->DrawPrimitiveCmd, 0),GLuint);
+static void INLINE savage_draw_triangle (savageContextPtr imesa,
+                                            savageVertexPtr v0,
+                                            savageVertexPtr v1,
+                                            savageVertexPtr v2) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 3*vertsize);
+   GLuint j;
+
+   EMIT_VERT (j, vb, vertsize, 0, v0);
+   EMIT_VERT (j, vb, vertsize, 0, v1);
+   EMIT_VERT (j, vb, vertsize, 0, v2);
+}
 
-   vb = savage_send_one_vertex(imesa, v0, vb, 0, vertsize);
-   vb = savage_send_one_vertex(imesa, v1, vb, 0, vertsize);
-   vb = savage_send_one_vertex(imesa, v2, vb, 0, vertsize);
-#if SAVAGEDEBUG
-   {
-        GLuint x0,y0,w,h;
-        x0 = (GLuint)imesa->drawX;
-        y0 = (GLuint)imesa->drawY;
-        w  = (GLuint)imesa->driDrawable->w;
-        h  = (GLuint)imesa->driDrawable->h;
-
-       (*vb) = 0x4BCC00C0;
-       vb++;
-       (*vb) = imesa->savageScreen->backOffset;
-       vb++;
-       (*vb) = imesa->savageScreen->backBitmapDesc;
-       vb++;
-       (*vb) = (y0<<16)|x0;
-       vb++;
-       (*vb) = 0x0;
-       vb++;
-       (*vb) = (h<<16)|w;
-       vb++;
+static void INLINE savage_draw_quad (savageContextPtr imesa,
+                                        savageVertexPtr v0,
+                                        savageVertexPtr v1,
+                                        savageVertexPtr v2,
+                                        savageVertexPtr v3) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 6*vertsize);
+   GLuint j;
+
+   EMIT_VERT (j, vb, vertsize, 0, v0);
+   EMIT_VERT (j, vb, vertsize, 0, v1);
+   EMIT_VERT (j, vb, vertsize, 0, v3);
+   EMIT_VERT (j, vb, vertsize, 0, v1);
+   EMIT_VERT (j, vb, vertsize, 0, v2);
+   EMIT_VERT (j, vb, vertsize, 0, v3);
+}
+
+static INLINE void savage_draw_point (savageContextPtr imesa,
+                                         savageVertexPtr tmp) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 6*vertsize);
+   const GLfloat x = tmp->v.x;
+   const GLfloat y = tmp->v.y;
+   const GLfloat sz = 0.5 * CLAMP(imesa->glCtx->Point.Size,
+                                  imesa->glCtx->Const.MinPointSize,
+                                  imesa->glCtx->Const.MaxPointSize);
+   GLuint j;
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+}
+
+static INLINE void savage_draw_line (savageContextPtr imesa,
+                                        savageVertexPtr v0,
+                                        savageVertexPtr v1 ) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 6*vertsize);
+   const GLfloat width = CLAMP(imesa->glCtx->Line.Width,
+                               imesa->glCtx->Const.MinLineWidth,
+                               imesa->glCtx->Const.MaxLineWidth);
+   GLfloat dx, dy, ix, iy;
+   GLuint j;
+
+   dx = v0->v.x - v1->v.x;
+   dy = v0->v.y - v1->v.y;
+
+   ix = width * .5; iy = 0;
+   if (dx * dx > dy * dy) {
+      iy = ix; ix = 0;
    }
-#endif
-   savageDMACommit (imesa, vb);
-} 
-static __inline__ void savage_draw_point( savageContextPtr imesa, 
-                                         savageVertexPtr tmp ) 
-{ 
-   GLfloat sz = imesa->glCtx->Point._Size * .5;
-   int vertsize = imesa->vertex_size; 
-   GLuint *vb = savageDMAAlloc (imesa, 4 * vertsize + 1); 
-   const GLfloat x = tmp->v.x; 
-   const GLfloat y = tmp->v.y; 
-   
-   imesa->DrawPrimitiveCmd &=
-       ~(SAVAGE_HW_TRIANGLE_TYPE | SAVAGE_HW_TRIANGLE_CONT);   
-   imesa->DrawPrimitiveCmd |= SAVAGE_HW_TRIANGLE_FAN; 
-     
-   WRITE_CMD(vb, SAVAGE_DRAW_PRIMITIVE(4, imesa->DrawPrimitiveCmd, 0),GLuint);
-
-   WRITE_CMD(vb, x - sz, GLfloat);
-   WRITE_CMD(vb, y - sz, GLfloat);
-   vb = savage_send_one_vertex(imesa, tmp, vb, 2, vertsize);
-
-   WRITE_CMD(vb, x + sz, GLfloat);
-   WRITE_CMD(vb, y - sz, GLfloat);
-   vb = savage_send_one_vertex(imesa, tmp, vb, 2, vertsize);
-
-   WRITE_CMD(vb, x + sz, GLfloat);
-   WRITE_CMD(vb, y + sz, GLfloat);
-   vb = savage_send_one_vertex(imesa, tmp, vb, 2, vertsize);
-   WRITE_CMD(vb, x - sz, GLfloat);
-   WRITE_CMD(vb, y + sz, GLfloat);
-   vb = savage_send_one_vertex(imesa, tmp, vb, 2, vertsize);
 
-   savageDMACommit (imesa, vb);
+   *(float *)&vb[0] = v0->v.x - ix;
+   *(float *)&vb[1] = v0->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, v0);
+
+   *(float *)&vb[0] = v1->v.x + ix;
+   *(float *)&vb[1] = v1->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, v1);
+
+   *(float *)&vb[0] = v0->v.x + ix;
+   *(float *)&vb[1] = v0->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, v0);
+
+   *(float *)&vb[0] = v0->v.x - ix;
+   *(float *)&vb[1] = v0->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, v0);
+
+   *(float *)&vb[0] = v1->v.x - ix;
+   *(float *)&vb[1] = v1->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, v1);
+
+   *(float *)&vb[0] = v1->v.x + ix;
+   *(float *)&vb[1] = v1->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, v1);
 } 
-static __inline__ void savage_draw_line( savageContextPtr imesa, 
-                                      savageVertexPtr v0, 
-                                      savageVertexPtr v1 ) 
-{  
-   GLuint vertsize = imesa->vertex_size; 
-   GLuint *vb = savageDMAAlloc (imesa, 4 * vertsize + 1); 
-   GLfloat dx, dy, ix, iy; 
-   GLfloat width = imesa->glCtx->Line._Width;
-
-   imesa->DrawPrimitiveCmd &=
-       ~(SAVAGE_HW_TRIANGLE_TYPE | SAVAGE_HW_TRIANGLE_CONT);
-   imesa->DrawPrimitiveCmd |= SAVAGE_HW_TRIANGLE_FAN; 
-   WRITE_CMD(vb, SAVAGE_DRAW_PRIMITIVE(4, imesa->DrawPrimitiveCmd, 0),GLuint);
+
+/* Fallback drawing functions for the ptex hack. Code duplication
+ * (especially lines and points) isn't beautiful, but I didn't feel
+ * like inventing yet another template. :-/
+ */
+#define PTEX_VERTEX( j, tmp, vertex_size, start, v)    \
+do {                                                   \
+   GLfloat rhw = 1.0 / v->f[vertex_size];              \
+   for ( j = start ; j < vertex_size ; j++ )           \
+      tmp.f[j] = v->f[j];                              \
+   tmp.f[3] *= v->f[vertex_size];                      \
+   tmp.f[vertex_size-2] *= rhw;                                \
+   tmp.f[vertex_size-1] *= rhw;                                \
+} while (0)
+
+static void INLINE savage_ptex_tri (savageContextPtr imesa,
+                                       savageVertexPtr v0,
+                                       savageVertexPtr v1,
+                                       savageVertexPtr v2) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 3*vertsize);
+   savageVertex tmp;
+   GLuint j;
+
+   PTEX_VERTEX (j, tmp, vertsize, 0, v0); EMIT_VERT (j, vb, vertsize, 0, &tmp);
+   PTEX_VERTEX (j, tmp, vertsize, 0, v1); EMIT_VERT (j, vb, vertsize, 0, &tmp);
+   PTEX_VERTEX (j, tmp, vertsize, 0, v2); EMIT_VERT (j, vb, vertsize, 0, &tmp);
+}
+
+static INLINE void savage_ptex_line (savageContextPtr imesa,
+                                        savageVertexPtr v0,
+                                        savageVertexPtr v1 ) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 6*vertsize);
+   const GLfloat width = CLAMP(imesa->glCtx->Line.Width,
+                               imesa->glCtx->Const.MinLineWidth,
+                               imesa->glCtx->Const.MaxLineWidth);
+   GLfloat dx, dy, ix, iy;
+   savageVertex tmp0, tmp1;
+   GLuint j;
+
+   PTEX_VERTEX (j, tmp0, vertsize, 2, v0);
+   PTEX_VERTEX (j, tmp1, vertsize, 2, v1);
 
    dx = v0->v.x - v1->v.x;
    dy = v0->v.y - v1->v.y;
@@ -173,48 +256,70 @@ static __inline__ void savage_draw_line( savageContextPtr imesa,
       iy = ix; ix = 0;
    }
 
-   WRITE_CMD(vb, (v0->v.x - ix), GLfloat);
-   WRITE_CMD(vb, (v0->v.y - iy), GLfloat);
-   vb = savage_send_one_vertex(imesa, v0, vb, 2, vertsize);
+   *(float *)&vb[0] = v0->v.x - ix;
+   *(float *)&vb[1] = v0->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp0);
 
-   WRITE_CMD(vb, (v1->v.x - ix), GLfloat);
-   WRITE_CMD(vb, (v1->v.y - iy), GLfloat);     
-   vb = savage_send_one_vertex(imesa, v1, vb, 2, vertsize);
+   *(float *)&vb[0] = v1->v.x + ix;
+   *(float *)&vb[1] = v1->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp1);
 
-   WRITE_CMD(vb, (v1->v.x + ix), GLfloat);
-   WRITE_CMD(vb, (v1->v.y + iy), GLfloat);
-   vb = savage_send_one_vertex(imesa, v1, vb, 2, vertsize);
+   *(float *)&vb[0] = v0->v.x + ix;
+   *(float *)&vb[1] = v0->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp0);
 
-   WRITE_CMD(vb, (v0->v.x + ix), GLfloat);
-   WRITE_CMD(vb, (v0->v.y + iy), GLfloat);
-   vb = savage_send_one_vertex(imesa, v0, vb, 2, vertsize);
+   *(float *)&vb[0] = v0->v.x - ix;
+   *(float *)&vb[1] = v0->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp0);
 
-   savageDMACommit (imesa, vb);
-} 
-static void __inline__ savage_draw_quad( savageContextPtr imesa, 
-                                        savageVertexPtr v0, 
-                                        savageVertexPtr v1, 
-                                        savageVertexPtr v2, 
-                                        savageVertexPtr v3 ) 
-{ 
-   GLuint vertsize = imesa->vertex_size; 
-   GLuint *vb = savageDMAAlloc (imesa, 6 * vertsize + 1); 
-
-   imesa->DrawPrimitiveCmd &=
-       ~(SAVAGE_HW_TRIANGLE_TYPE | SAVAGE_HW_TRIANGLE_CONT);
-   WRITE_CMD(vb, SAVAGE_DRAW_PRIMITIVE(6, imesa->DrawPrimitiveCmd, 0),GLuint);
-   vb = savage_send_one_vertex(imesa, v0, vb, 0, vertsize);
-   vb = savage_send_one_vertex(imesa, v1, vb, 0, vertsize);
-   vb = savage_send_one_vertex(imesa, v3, vb, 0, vertsize);
-   vb = savage_send_one_vertex(imesa, v1, vb, 0, vertsize);
-   vb = savage_send_one_vertex(imesa, v2, vb, 0, vertsize);
-   vb = savage_send_one_vertex(imesa, v3, vb, 0, vertsize);
-
-   savageDMACommit (imesa, vb);
+   *(float *)&vb[0] = v1->v.x - ix;
+   *(float *)&vb[1] = v1->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp1);
+
+   *(float *)&vb[0] = v1->v.x + ix;
+   *(float *)&vb[1] = v1->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp1);
 } 
 
+static INLINE void savage_ptex_point (savageContextPtr imesa,
+                                         savageVertexPtr v0) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 6*vertsize);
+   const GLfloat x = v0->v.x;
+   const GLfloat y = v0->v.y;
+   const GLfloat sz = 0.5 * CLAMP(imesa->glCtx->Point.Size,
+                                  imesa->glCtx->Const.MinPointSize,
+                                  imesa->glCtx->Const.MaxPointSize);
+   savageVertex tmp;
+   GLuint j;
+
+   PTEX_VERTEX (j, tmp, vertsize, 2, v0);
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+}
 /***********************************************************************
  *          Macros for t_dd_tritmp.h to draw basic primitives          *
  ***********************************************************************/
@@ -265,10 +370,10 @@ do {                                              \
 
 
 static struct {
-   points_func         points;
-   line_func           line;
-   triangle_func       triangle;
-   quad_func           quad;
+   tnl_points_func             points;
+   tnl_line_func               line;
+   tnl_triangle_func   triangle;
+   tnl_quad_func               quad;
 } rast_tab[SAVAGE_MAX_TRIFUNC];
 
 
@@ -291,6 +396,7 @@ static struct {
 #define TAB rast_tab
 
 #define DEPTH_SCALE imesa->depth_scale
+#define REVERSE_DEPTH 1
 #define UNFILLED_TRI unfilled_tri
 #define UNFILLED_QUAD unfilled_quad
 #define VERT_X(_v) _v->v.x
@@ -299,28 +405,27 @@ static struct {
 #define AREA_IS_CCW( a ) (a > 0)
 #define GET_VERTEX(e) (imesa->verts + (e * imesa->vertex_size * sizeof(int)))
 
-#define SAVAGE_COLOR( dst, src )               \
-do {                                           \
-   dst[0] = src[2];                            \
-   dst[1] = src[1];                            \
-   dst[2] = src[0];                            \
-   dst[3] = src[3];                            \
-} while (0)
-
-#define SAVAGE_SPEC( dst, src )                        \
-do {                                           \
-   dst[0] = src[2];                            \
-   dst[1] = src[1];                            \
-   dst[2] = src[0];                            \
+#define VERT_SET_RGBA( v, c )                                  \
+do {                                                           \
+   savage_color_t *color = (savage_color_t *)&((v)->ub4[coloroffset]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);               \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);             \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);              \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);             \
 } while (0)
-
-#define VERT_SET_RGBA( v, c )    SAVAGE_COLOR( v->ub4[coloroffset], c )
 #define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
 #define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
 #define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
 
-#define VERT_SET_SPEC( v, c )                                          \
-   if (specoffset) SAVAGE_SPEC( v->ub4[specoffset], c )
+#define VERT_SET_SPEC( v, c )                                  \
+do {                                                           \
+   if (specoffset) {                                           \
+      savage_color_t *spec = (savage_color_t *)&((v)->ub4[specoffset]);        \
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);             \
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);           \
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);            \
+   }                                                           \
+} while (0)
 #define VERT_COPY_SPEC( v0, v1 )                                       \
    if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset])
 #define VERT_SAVE_SPEC( idx )                                          \
@@ -332,16 +437,17 @@ do {                                              \
    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);               \
    GLuint color[n], spec[n];                                   \
    GLuint coloroffset =                                                \
-      ((imesa->DrawPrimitiveCmd & SAVAGE_HW_NO_W) ? 3 : 4);    \
+      ((imesa->skip & SAVAGE_SKIP_W) ? 3 : 4);                 \
    GLboolean specoffset =                                      \
-      ((imesa->DrawPrimitiveCmd & SAVAGE_HW_NO_CS) ? 0 : coloroffset+1);\
+      ((imesa->skip & SAVAGE_SKIP_C1) ? 0 : coloroffset+1);    \
    (void) color; (void) spec; (void) coloroffset; (void) specoffset;
 
 /***********************************************************************
  *                Helpers for rendering unfilled primitives            *
  ***********************************************************************/
 
-#define RASTERIZE(x)
+#define RASTERIZE(x) if (imesa->raster_primitive != reduced_prim[x]) \
+                        savageRasterPrimitive( ctx, x )
 #define RENDER_PRIMITIVE imesa->render_primitive
 #define IND SAVAGE_FALLBACK_BIT
 #define TAG(x) x
@@ -459,6 +565,8 @@ savage_fallback_tri( savageContextPtr imesa,
 {
    GLcontext *ctx = imesa->glCtx;
    SWvertex v[3];
+   FLUSH_BATCH(imesa);
+   WAIT_IDLE_EMPTY(imesa);
    _swsetup_Translate( ctx, v0, &v[0] );
    _swsetup_Translate( ctx, v1, &v[1] );
    _swsetup_Translate( ctx, v2, &v[2] );
@@ -473,6 +581,8 @@ savage_fallback_line( savageContextPtr imesa,
 {
    GLcontext *ctx = imesa->glCtx;
    SWvertex v[2];
+   FLUSH_BATCH(imesa);
+   WAIT_IDLE_EMPTY(imesa);
    _swsetup_Translate( ctx, v0, &v[0] );
    _swsetup_Translate( ctx, v1, &v[1] );
    _swrast_Line( ctx, &v[0], &v[1] );
@@ -485,6 +595,8 @@ savage_fallback_point( savageContextPtr imesa,
 {
    GLcontext *ctx = imesa->glCtx;
    SWvertex v[1];
+   FLUSH_BATCH(imesa);
+   WAIT_IDLE_EMPTY(imesa);
    _swsetup_Translate( ctx, v0, &v[0] );
    _swrast_Point( ctx, &v[0] );
 }
@@ -586,6 +698,7 @@ static void savageFastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
 #define _SAVAGE_NEW_RENDER_STATE (_DD_NEW_LINE_STIPPLE |       \
                                  _DD_NEW_LINE_SMOOTH |         \
                                  _DD_NEW_POINT_SMOOTH |        \
+                                 _DD_NEW_TRI_STIPPLE |         \
                                  _DD_NEW_TRI_SMOOTH |          \
                                  _DD_NEW_TRI_UNFILLED |        \
                                  _DD_NEW_TRI_LIGHT_TWOSIDE |   \
@@ -594,7 +707,7 @@ static void savageFastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
 /* original driver didn't have DD_POINT_SMOOTH. really needed? */
 #define POINT_FALLBACK (DD_POINT_SMOOTH)
 #define LINE_FALLBACK (DD_LINE_STIPPLE|DD_LINE_SMOOTH)
-#define TRI_FALLBACK (DD_TRI_SMOOTH)
+#define TRI_FALLBACK (DD_TRI_STIPPLE|DD_TRI_SMOOTH)
 #define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)
 #define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
 
@@ -605,11 +718,23 @@ static void savageChooseRenderState(GLcontext *ctx)
    GLuint flags = ctx->_TriangleCaps;
    GLuint index = 0;
 
-   if (flags & (ANY_RASTER_FLAGS|ANY_FALLBACK_FLAGS)) {
+   /* Hook in fallback functions for the ptex hack. Do this first, so
+    * that a real fallback will overwrite them with the respective
+    * savage_fallback_... function.
+    */
+   if (imesa->ptexHack) {
+      /* Do textures make sense with points? */
+      imesa->draw_point = savage_ptex_point;
+      imesa->draw_line = savage_ptex_line;
+      imesa->draw_tri = savage_ptex_tri;
+      index |= SAVAGE_FALLBACK_BIT;
+   } else {
       imesa->draw_point = savage_draw_point;
       imesa->draw_line = savage_draw_line;
       imesa->draw_tri = savage_draw_triangle;
+   }
 
+   if (flags & (ANY_RASTER_FLAGS|ANY_FALLBACK_FLAGS)) {
       if (flags & ANY_RASTER_FLAGS) {
         if (flags & DD_TRI_LIGHT_TWOSIDE) index |= SAVAGE_TWOSIDE_BIT;
         if (flags & DD_TRI_OFFSET)        index |= SAVAGE_OFFSET_BIT;
@@ -618,11 +743,15 @@ static void savageChooseRenderState(GLcontext *ctx)
 
       /* Hook in fallbacks for specific primitives.
        */
-      if (flags & (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)) {
+      if (flags & ANY_FALLBACK_FLAGS) {
         if (flags & POINT_FALLBACK) imesa->draw_point = savage_fallback_point;
         if (flags & LINE_FALLBACK)  imesa->draw_line = savage_fallback_line;
         if (flags & TRI_FALLBACK)   imesa->draw_tri = savage_fallback_tri;
         index |= SAVAGE_FALLBACK_BIT;
+        if (SAVAGE_DEBUG & DEBUG_FALLBACKS) {
+           fprintf (stderr, "Per-primitive fallback, TriangleCaps=0x%x\n",
+                    ctx->_TriangleCaps);
+        }
       }
    }
 
@@ -657,36 +786,41 @@ static void savageRunPipeline( GLcontext *ctx )
 {
    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
 
+   if (imesa->no_rast)
+      FALLBACK(ctx, SAVAGE_FALLBACK_NORAST, GL_TRUE);
+
    if (imesa->new_state)
       savageDDUpdateHwState( ctx );
 
-   if (!imesa->Fallback && imesa->new_gl_state) {
+   if (!imesa->Fallback) {
       if (imesa->new_gl_state & _SAVAGE_NEW_RENDER_STATE)
         savageChooseRenderState( ctx );
 
+      /* choose the correct primitive type for tnl rendering */
+      if (imesa->savageScreen->chipset < S3_SAVAGE4 &&
+         (ctx->_TriangleCaps & DD_FLATSHADE)) {
+        if (imesa->HwPrim != SAVAGE_PRIM_TRILIST_201)
+           savageFlushVertices(imesa);
+        imesa->HwPrim = SAVAGE_PRIM_TRILIST_201;
+      } else {
+        if (imesa->HwPrim != SAVAGE_PRIM_TRILIST)
+           savageFlushVertices(imesa);
+        imesa->HwPrim = SAVAGE_PRIM_TRILIST;
+      }
+
       imesa->new_gl_state = 0;
    }
 
    _tnl_run_pipeline( ctx );
+
+   if (imesa->no_rast)
+      FALLBACK(ctx, SAVAGE_FALLBACK_NORAST, GL_FALSE);
 }
 
 /**********************************************************************/
 /*                 High level hooks for t_vb_render.c                 */
 /**********************************************************************/
 
-static GLenum reduced_prim[GL_POLYGON+1] = {
-   GL_POINTS,
-   GL_LINES,
-   GL_LINES,
-   GL_LINES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES
-};
-
 /* This is called when Mesa switches between rendering triangle
  * primitives (such as GL_POLYGON, GL_QUADS, GL_TRIANGLE_STRIP, etc),
  * and lines, points and bitmaps.
@@ -700,13 +834,13 @@ static void savageRasterPrimitive( GLcontext *ctx, GLuint prim )
 {
    savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
 
-   FLUSH_BATCH( imesa );
-
    /* Update culling */
-   if (imesa->raster_primitive != prim)
-      imesa->dirty |= SAVAGE_UPLOAD_CTX;
+   if (imesa->raster_primitive != prim) {
+      imesa->raster_primitive = prim;
+      imesa->new_state |= SAVAGE_NEW_CULL;
+      savageDDUpdateHwState (ctx);
+   }
 
-   imesa->raster_primitive = prim;
 #if 0
    if (ctx->Polygon.StippleFlag && mmesa->haveHwStipple)
    {
@@ -733,96 +867,343 @@ static void savageRenderPrimitive( GLcontext *ctx, GLenum prim )
    }
 }
 
+/* Check if projective texture coordinates are used and if we can fake
+ * them. Fallback to swrast we can't. Returns GL_TRUE if projective
+ * texture coordinates must be faked, GL_FALSE otherwise.
+ */
+static GLboolean savageCheckPTexHack( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX0 ) && VB->AttribPtr[_TNL_ATTRIB_TEX0]->size == 4) {
+      if (!RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_ATTRIB_TEX1, _TNL_LAST_TEX ))
+        return GL_TRUE; /* apply ptex hack */
+      else
+        FALLBACK(ctx, SAVAGE_FALLBACK_PROJ_TEXTURE, GL_TRUE);
+   }
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX1 ) && VB->AttribPtr[_TNL_ATTRIB_TEX1]->size == 4)
+      FALLBACK(ctx, SAVAGE_FALLBACK_PROJ_TEXTURE, GL_TRUE);
+
+   return GL_FALSE; /* don't apply ptex hack */
+}
+
 
-#define EMIT_ATTR( ATTR, STYLE, SKIP )                                 \
+#define DO_EMIT_ATTR( ATTR, STYLE )                                    \
 do {                                                                   \
    imesa->vertex_attrs[imesa->vertex_attr_count].attrib = (ATTR);      \
    imesa->vertex_attrs[imesa->vertex_attr_count].format = (STYLE);     \
    imesa->vertex_attr_count++;                                         \
-   drawCmd &= ~SKIP;                                                   \
 } while (0)
 
-static void savageRenderStart( GLcontext *ctx )
+#define NEED_ATTR( INDEX, SKIP )                                       \
+do {                                                                   \
+   setupIndex |= (INDEX);                                              \
+   skip &= ~(SKIP);                                                    \
+} while (0)
+
+#define EMIT_ATTR( ATTR, STYLE, INDEX, SKIP )                          \
+do {                                                                   \
+   NEED_ATTR( INDEX, SKIP );                                           \
+   DO_EMIT_ATTR( ATTR, STYLE );                                                \
+} while (0)
+
+#define EMIT_PAD( N )                                                  \
+do {                                                                   \
+   imesa->vertex_attrs[imesa->vertex_attr_count].attrib = 0;           \
+   imesa->vertex_attrs[imesa->vertex_attr_count].format = EMIT_PAD;    \
+   imesa->vertex_attrs[imesa->vertex_attr_count].offset = (N);         \
+   imesa->vertex_attr_count++;                                         \
+} while (0)
+
+#define SAVAGE_EMIT_XYZ  0x0001
+#define SAVAGE_EMIT_W    0x0002
+#define SAVAGE_EMIT_C0   0x0004
+#define SAVAGE_EMIT_C1   0x0008
+#define SAVAGE_EMIT_FOG  0x0010
+#define SAVAGE_EMIT_S0   0x0020
+#define SAVAGE_EMIT_T0   0x0040
+#define SAVAGE_EMIT_Q0   0x0080
+#define SAVAGE_EMIT_ST0  0x0060
+#define SAVAGE_EMIT_STQ0 0x00e0
+#define SAVAGE_EMIT_S1   0x0100
+#define SAVAGE_EMIT_T1   0x0200
+#define SAVAGE_EMIT_ST1  0x0300
+
+
+static INLINE GLuint savageChooseVertexFormat_s3d( GLcontext *ctx )
 {
    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
-   GLuint index = tnl->render_inputs;
-   GLuint drawCmd = SAVAGE_HW_SKIPFLAGS;
-   if (imesa->savageScreen->chipset < S3_SAVAGE4)
-      drawCmd &= ~SAVAGE_HW_NO_UV1;
-   drawCmd &= ~SAVAGE_HW_NO_Z; /* all mesa vertices have a z coordinate */
+   DECLARE_RENDERINPUTS(index_bitset);
+   GLuint setupIndex = SAVAGE_EMIT_XYZ;
+   GLubyte skip;
 
-   /* Important:
-    */
-   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
    imesa->vertex_attr_count = 0;
+
+   skip = SAVAGE_SKIP_ALL_S3D;
+   skip &= ~SAVAGE_SKIP_Z; /* all mesa vertices have a z coordinate */
+
    /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
     * build up a hardware vertex.
     */
-   if (index & _TNL_BITS_TEX_ANY) {
-      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, SAVAGE_HW_NO_W );
-   }
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX ) || !(ctx->_TriangleCaps & DD_FLATSHADE))
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, SAVAGE_EMIT_W, SAVAGE_SKIP_W );
    else {
-      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, 0 );
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, 0, 0 );
+      EMIT_PAD( 4 );
+      skip &= ~SAVAGE_SKIP_W;
    }
 
    /* t_context.c always includes a diffuse color */
-   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, SAVAGE_HW_NO_CD );
-      
-   if (index & (_TNL_BIT_COLOR1|_TNL_BIT_FOG)) {
-      EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB, SAVAGE_HW_NO_CS );
-      EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, SAVAGE_HW_NO_CS );
-   }
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, SAVAGE_EMIT_C0, SAVAGE_SKIP_C0 );
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ))
+      EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, SAVAGE_EMIT_C1, SAVAGE_SKIP_C1 );
+   else
+      EMIT_PAD( 3 );
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG ))
+      EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, SAVAGE_EMIT_FOG, SAVAGE_SKIP_C1 );
+   else
+      EMIT_PAD( 1 );
+   skip &= ~SAVAGE_SKIP_C1;
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX0 )) {
+      if (imesa->ptexHack)
+        EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_3F_XYW, SAVAGE_EMIT_STQ0, SAVAGE_SKIP_ST0);
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size == 4)
+        assert (0); /* should be caught by savageCheckPTexHack */
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size >= 2)
+        /* The chromium menu emits some 3D tex coords even though no
+         * 3D texture is enabled. Ignore the 3rd coordinate. */
+        EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_2F, SAVAGE_EMIT_ST0, SAVAGE_SKIP_ST0 );
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size == 1) {
+        EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_1F, SAVAGE_EMIT_S0, SAVAGE_SKIP_S0 );
+        EMIT_PAD( 4 );
+      } else
+        EMIT_PAD( 8 );
+   } else
+      EMIT_PAD( 8 );
+   skip &= ~SAVAGE_SKIP_ST0;
+
+   assert (skip == 0);
+   imesa->skip = skip;
+   return setupIndex;
+}
 
-   if (index & _TNL_BIT_TEX(0)) {
-      if (VB->TexCoordPtr[0]->size > 2) {
-        /* projective textures are not supported by the hardware */
-        FALLBACK(ctx, SAVAGE_FALLBACK_TEXTURE, GL_TRUE);
-      }
-      if (VB->TexCoordPtr[0]->size == 2)
-        EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_2F, SAVAGE_HW_NO_UV0 );
+
+static INLINE GLuint savageChooseVertexFormat_s4( GLcontext *ctx )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+   GLuint setupIndex = SAVAGE_EMIT_XYZ;
+   GLubyte skip;
+   GLuint size, mask;
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+   skip = SAVAGE_SKIP_ALL_S4;
+   skip &= ~SAVAGE_SKIP_Z; /* all mesa vertices have a z coordinate */
+
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX ) || !(ctx->_TriangleCaps & DD_FLATSHADE))
+      NEED_ATTR( SAVAGE_EMIT_W, SAVAGE_SKIP_W );
+
+   /* t_context.c always includes a diffuse color */
+   NEED_ATTR( SAVAGE_EMIT_C0, SAVAGE_SKIP_C0 );
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ))
+      NEED_ATTR( SAVAGE_EMIT_C1, SAVAGE_SKIP_C1 );
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG ))
+      NEED_ATTR( SAVAGE_EMIT_FOG, SAVAGE_SKIP_C1 );
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX0 )) {
+      if (imesa->ptexHack)
+        NEED_ATTR( SAVAGE_EMIT_STQ0, SAVAGE_SKIP_ST0);
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size == 4)
+        assert (0); /* should be caught by savageCheckPTexHack */
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size >= 2)
+        /* The chromium menu emits some 3D tex coords even though no
+         * 3D texture is enabled. Ignore the 3rd coordinate. */
+        NEED_ATTR( SAVAGE_EMIT_ST0, SAVAGE_SKIP_ST0 );
       else
-        EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_1F, SAVAGE_HW_NO_U0 );
+        NEED_ATTR( SAVAGE_EMIT_S0, SAVAGE_SKIP_S0 );
    }
-   if (index & _TNL_BIT_TEX(1)) {
-      if (VB->TexCoordPtr[1]->size > 2) {
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX1 )) {
+      if (VB->AttribPtr[_TNL_ATTRIB_TEX1]->size == 4)
         /* projective textures are not supported by the hardware */
-        FALLBACK(ctx, SAVAGE_FALLBACK_TEXTURE, GL_TRUE);
-      }
-      if (VB->TexCoordPtr[1]->size == 2)
-        EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_2F, SAVAGE_HW_NO_UV1 );
+        assert (0); /* should be caught by savageCheckPTexHack */
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX1]->size >= 2)
+        NEED_ATTR( SAVAGE_EMIT_ST1, SAVAGE_SKIP_ST1 );
       else
-        EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_1F, SAVAGE_HW_NO_U1 );
+        NEED_ATTR( SAVAGE_EMIT_S1, SAVAGE_SKIP_S1 );
    }
 
-   /* Only need to change the vertex emit code if there has been a
-    * statechange to a new hardware vertex format:
+   /* if nothing changed we can skip the rest */
+   if (setupIndex == imesa->SetupIndex && imesa->vertex_size != 0)
+      return setupIndex;
+
+   if (imesa->enable_vdma) {
+      mask = SAVAGE_SKIP_W;
+      size = 10 - (skip & 1) - (skip >> 1 & 1) -
+        (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
+        (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
+
+      while (size < 8) {
+        if (skip & mask) {
+           skip &= ~mask;
+           size++;
+        }
+        mask <<= 1;
+      }
+   }
+
+   imesa->vertex_attr_count = 0;
+
+   if (skip & SAVAGE_SKIP_W)
+      DO_EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT );
+   else if (setupIndex & SAVAGE_EMIT_W)
+      DO_EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT );
+   else {
+      DO_EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT );
+      EMIT_PAD( 4 );
+   }
+
+   DO_EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA );
+
+   if (!(skip & SAVAGE_SKIP_C1)) {
+      if (!(setupIndex & (SAVAGE_EMIT_C1|SAVAGE_EMIT_FOG)))
+        EMIT_PAD( 4 );
+      else {
+        if (setupIndex & SAVAGE_EMIT_C1)
+           DO_EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR );
+        else
+           EMIT_PAD( 3 );
+        if (setupIndex & SAVAGE_EMIT_FOG)
+           DO_EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F );
+        else
+           EMIT_PAD( 1 );
+      }
+   }
+
+   if ((skip & SAVAGE_SKIP_ST0) != SAVAGE_SKIP_ST0) {
+      if ((setupIndex & SAVAGE_EMIT_STQ0) == SAVAGE_EMIT_STQ0)
+        DO_EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_3F_XYW );
+      else if ((setupIndex & SAVAGE_EMIT_ST0) == SAVAGE_EMIT_ST0)
+        DO_EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_2F );
+      else if ((setupIndex & SAVAGE_EMIT_ST0) == SAVAGE_EMIT_S0) {
+        DO_EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_1F );
+        if (!(skip & SAVAGE_SKIP_T0)) EMIT_PAD( 4 );
+      } else {
+        if (!(skip & SAVAGE_SKIP_S0)) EMIT_PAD( 4 );
+        if (!(skip & SAVAGE_SKIP_T0)) EMIT_PAD( 4 );
+      }
+   }
+
+   if ((skip & SAVAGE_SKIP_ST1) != SAVAGE_SKIP_ST1) {
+      if ((setupIndex & SAVAGE_EMIT_ST1) == SAVAGE_EMIT_ST1)
+        DO_EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_2F );
+      else if ((setupIndex & SAVAGE_EMIT_ST1) == SAVAGE_EMIT_S1) {
+        DO_EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_1F );
+        if (!(skip & SAVAGE_SKIP_T1)) EMIT_PAD( 4 );
+      } else {
+        if (!(skip & SAVAGE_SKIP_S1)) EMIT_PAD( 4 );
+        if (!(skip & SAVAGE_SKIP_T1)) EMIT_PAD( 4 );
+      }
+   }
+
+   imesa->skip = skip;
+   return setupIndex;
+}
+
+
+static void savageRenderStart( GLcontext *ctx )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint setupIndex = SAVAGE_EMIT_XYZ;
+   GLboolean ptexHack;
+
+   /* Check if we need to apply the ptex hack. Choose a new render
+    * state if necessary. (Note: this can't be done in
+    * savageRunPipeline, since the number of vertex coordinates can
+    * change in the pipeline. texmat or texgen or both?) */
+   ptexHack = savageCheckPTexHack( ctx );
+   if (ptexHack != imesa->ptexHack) {
+      imesa->ptexHack = ptexHack;
+      savageChooseRenderState (ctx);
+   }
+   /* Handle fallback cases identified in savageCheckPTexHack. */
+   if (SAVAGE_CONTEXT(ctx)->Fallback) {
+      tnl->Driver.Render.Start(ctx);
+      return;
+   }
+
+   /* Important:
     */
-   if (drawCmd != (imesa->DrawPrimitiveCmd & SAVAGE_HW_SKIPFLAGS)) {
-      imesa->vertex_size = 
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   if (imesa->savageScreen->chipset < S3_SAVAGE4) {
+      setupIndex = savageChooseVertexFormat_s3d(ctx);
+   } else {
+      setupIndex = savageChooseVertexFormat_s4(ctx);
+   }
+
+   /* Need to change the vertex emit code if the SetupIndex changed or
+    * is set for the first time (indicated by vertex_size == 0). */
+   if (setupIndex != imesa->SetupIndex || imesa->vertex_size == 0) {
+      GLuint hwVertexSize;
+      imesa->vertex_size =
         _tnl_install_attrs( ctx, 
                             imesa->vertex_attrs, 
                             imesa->vertex_attr_count,
                             imesa->hw_viewport, 0 );
       imesa->vertex_size >>= 2;
-
-      imesa->DrawPrimitiveCmd = drawCmd;
-   }
-
-   if (!SAVAGE_CONTEXT(ctx)->Fallback) {
-      /* Update hardware state and get the lock */
-      savageDDRenderStart( ctx );
-   } else {
-      tnl->Driver.Render.Start(ctx);
+      imesa->SetupIndex = setupIndex;
+
+      hwVertexSize = imesa->vertex_size;
+      if (setupIndex & SAVAGE_EMIT_Q0) {
+        /* The vertex setup code emits homogenous texture
+         * coordinates. They are converted to normal 2D coords by
+         * savage_ptex_tri/line/point. Now we have two different
+         * vertex sizes. Functions that emit vertices to the hardware
+         * need to use HwVertexSize, anything that manipulates the
+         * vertices generated by t_vertex uses vertex_size. */
+        hwVertexSize--;
+        assert (imesa->ptexHack);
+      } else
+        assert (!imesa->ptexHack);
+
+      if (hwVertexSize != imesa->HwVertexSize) {
+        /* Changing the vertex size: flush vertex and command buffer and
+         * discard the DMA buffer, if we were using one. */
+        savageFlushVertices(imesa);
+        savageFlushCmdBuf(imesa, GL_TRUE);
+        if (hwVertexSize == 8 && imesa->enable_vdma) {
+           if (SAVAGE_DEBUG & DEBUG_DMA)
+              fprintf (stderr, "Using DMA, skip=0x%02x\n", imesa->skip);
+           /* we can use vertex dma */
+           imesa->vtxBuf = &imesa->dmaVtxBuf;
+        } else {
+           if (SAVAGE_DEBUG & DEBUG_DMA)
+              fprintf (stderr, "Not using DMA, skip=0x%02x\n", imesa->skip);
+           imesa->vtxBuf = &imesa->clientVtxBuf;
+        }
+        imesa->HwVertexSize = hwVertexSize;
+      }
    }
 }
 
 static void savageRenderFinish( GLcontext *ctx )
 {
-   /* Release the lock */
-   savageDDRenderEnd( ctx );
+   /* Flush the last primitive now, before any state is changed. */
+   savageFlushVertices(SAVAGE_CONTEXT(ctx));
 
    if (SAVAGE_CONTEXT(ctx)->RenderIndex & SAVAGE_FALLBACK_BIT)
       _swrast_flush( ctx );
@@ -833,22 +1214,38 @@ static void savageRenderFinish( GLcontext *ctx )
 /*           Transition to/from hardware rasterization.               */
 /**********************************************************************/
 
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "Draw buffer",
+   "Read buffer",
+   "Color mask",
+   "Specular",
+   "LogicOp",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "glBlendEquation",
+   "Hardware rasterization disabled",
+   "Projective texture",
+};
+
 void savageFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
    GLuint oldfallback = imesa->Fallback;
+   GLuint index;
+   for (index = 0; (1 << index) < bit; ++index);
 
    if (mode) {
       imesa->Fallback |= bit;
       if (oldfallback == 0) {
         /* the first fallback */
-        LOCK_HARDWARE(SAVAGE_CONTEXT(ctx));
-        FLUSH_BATCH( imesa );
-        UNLOCK_HARDWARE(SAVAGE_CONTEXT(ctx));
         _swsetup_Wakeup( ctx );
         imesa->RenderIndex = ~0;
       }
+      if (!(oldfallback & bit) && (SAVAGE_DEBUG & DEBUG_FALLBACKS))
+        fprintf (stderr, "Savage begin fallback: 0x%x %s\n",
+                 bit, fallbackStrings[index]);
    }
    else {
       imesa->Fallback &= ~bit;
@@ -872,6 +1269,9 @@ void savageFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
 
         imesa->new_gl_state |= _SAVAGE_NEW_RENDER_STATE;
       }
+      if ((oldfallback & bit) && (SAVAGE_DEBUG & DEBUG_FALLBACKS))
+        fprintf (stderr, "Savage end fallback: 0x%x %s\n",
+                 bit, fallbackStrings[index]);
    }
 }