Convert all uses of CARD32 and CARD8 to int32_t and int8_t.

[mesa.git] / src / mesa / drivers / dri / radeon / radeon_swtcl.c
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c

index 926b1523d62c3e0597b200c5c984e6707dc9206f..fd14e76d4441a3218e1fc4daa46ea1aef9e07ac9 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -1,4 +1,4 @@
-/* $XFree86$ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c,v 1.6 2003/05/06 23:52:08 daenzer Exp $ */
  /**************************************************************************
  
  Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
@@ -44,8 +44,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #include "math/m_translate.h"
  #include "tnl/tnl.h"
  #include "tnl/t_context.h"
-#include "tnl/t_imm_exec.h"
  #include "tnl/t_pipeline.h"
+#include "tnl/t_vtx_api.h"     /* for _tnl_FlushVertices */
  
  #include "radeon_context.h"
  #include "radeon_ioctl.h"
@@ -67,7 +67,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #define RADEON_MAX_SETUP       0x40
  
  static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
-static void flush_last_swtcl_prim_compat( radeonContextPtr rmesa );
  
  static struct {
     void                (*emit)( GLcontext *, GLuint, GLuint, void *, GLuint );
@@ -75,7 +74,6 @@ static struct {
     copy_pv_func                copy_pv;
     GLboolean           (*check_tex_sizes)( GLcontext *ctx );
     GLuint               vertex_size;
-   GLuint               vertex_stride_shift;
     GLuint               vertex_format;
  } setup_tab[RADEON_MAX_SETUP];
  
@@ -135,9 +133,7 @@ static struct {
  #define GET_TEXSOURCE(n)  n
  #define GET_VERTEX_FORMAT() RADEON_CONTEXT(ctx)->swtcl.vertex_format
  #define GET_VERTEX_STORE() RADEON_CONTEXT(ctx)->swtcl.verts
-#define GET_VERTEX_STRIDE_SHIFT() RADEON_CONTEXT(ctx)->swtcl.vertex_stride_shift
-#define GET_UBYTE_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteColor
-#define GET_UBYTE_SPEC_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteSecondaryColor
+#define GET_VERTEX_SIZE() RADEON_CONTEXT(ctx)->swtcl.vertex_size * sizeof(GLuint)
  
  #define HAVE_HW_VIEWPORT    1
  /* Tiny vertices don't seem to work atm - haven't looked into why.
@@ -155,10 +151,6 @@ static struct {
  #define CHECK_HW_DIVIDE    (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE| \
                                                      DD_TRI_UNFILLED)))
  
-#define IMPORT_QUALIFIER
-#define IMPORT_FLOAT_COLORS radeon_import_float_colors
-#define IMPORT_FLOAT_SPEC_COLORS radeon_import_float_spec_colors
-
  #define INTERP_VERTEX setup_tab[RADEON_CONTEXT(ctx)->swtcl.SetupIndex].interp
  #define COPY_PV_VERTEX setup_tab[RADEON_CONTEXT(ctx)->swtcl.SetupIndex].copy_pv
  
@@ -272,7 +264,6 @@ static void radeonRenderStart( GLcontext *ctx )
          RADEON_NEWPRIM(rmesa);
          rmesa->swtcl.vertex_format = setup_tab[ind].vertex_format;
          rmesa->swtcl.vertex_size = setup_tab[ind].vertex_size;
-        rmesa->swtcl.vertex_stride_shift = setup_tab[ind].vertex_stride_shift;
        }
  
        if (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
@@ -282,7 +273,6 @@ static void radeonRenderStart( GLcontext *ctx )
     }
     
     if (rmesa->dma.flush != 0 && 
-       rmesa->dma.flush != flush_last_swtcl_prim_compat &&
         rmesa->dma.flush != flush_last_swtcl_prim)
        rmesa->dma.flush( rmesa );
  }
@@ -292,9 +282,8 @@ void radeonBuildVertices( GLcontext *ctx, GLuint start, GLuint count,
                            GLuint newinputs )
  {
     radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-   GLubyte *v = ((GLubyte *)rmesa->swtcl.verts + 
-                (start << rmesa->swtcl.vertex_stride_shift));
-   GLuint stride = 1 << rmesa->swtcl.vertex_stride_shift;
+   GLuint stride = rmesa->swtcl.vertex_size * sizeof(int);
+   GLubyte *v = ((GLubyte *)rmesa->swtcl.verts + (start * stride));
  
     newinputs |= rmesa->swtcl.SetupNewInputs;
     rmesa->swtcl.SetupNewInputs = 0;
@@ -339,7 +328,6 @@ void radeonChooseVertexState( GLcontext *ctx )
        RADEON_NEWPRIM(rmesa);
        rmesa->swtcl.vertex_format = setup_tab[ind].vertex_format;
        rmesa->swtcl.vertex_size = setup_tab[ind].vertex_size;
-      rmesa->swtcl.vertex_stride_shift = setup_tab[ind].vertex_stride_shift;
     }
  
     {
@@ -381,7 +369,7 @@ static void flush_last_swtcl_prim( radeonContextPtr rmesa  )
  
     if (rmesa->dma.current.buf) {
        struct radeon_dma_region *current = &rmesa->dma.current;
-      GLuint current_offset = (rmesa->radeonScreen->agp_buffer_offset +
+      GLuint current_offset = (rmesa->radeonScreen->gart_buffer_offset +
                                current->buf->buf->idx * RADEON_BUFFER_SIZE + 
                                current->start);
  
@@ -408,46 +396,6 @@ static void flush_last_swtcl_prim( radeonContextPtr rmesa  )
  }
  
  
-static void flush_last_swtcl_prim_compat( radeonContextPtr rmesa )
-{
-   struct radeon_dma_region *current = &rmesa->dma.current;
-
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s buf %p start %d ptr %d\n", 
-             __FUNCTION__,
-             current->buf,
-             current->start,
-             current->ptr);
-
-   assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-   assert (current->start + 
-          rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-          current->ptr);
-   assert (current->start == 0);
-
-   rmesa->dma.flush = 0;
-
-   if (current->ptr && current->buf) {
-      assert (current->buf->refcount == 1);
-
-      radeonCompatEmitPrimitive( rmesa,
-                                rmesa->swtcl.vertex_format,
-                                rmesa->swtcl.hw_primitive,
-                                rmesa->swtcl.numverts);
-      
-      /* The buffer has been released:
-       */
-      FREE(current->buf);
-      current->buf = 0;
-      current->start = 0;
-      current->ptr = current->end;
-
-   }
-
-   rmesa->swtcl.numverts = 0;
-}
-
-
  /* Alloc space in the current dma region.
   */
  static __inline void *radeonAllocDmaLowVerts( radeonContextPtr rmesa,
@@ -460,22 +408,18 @@ static __inline void *radeonAllocDmaLowVerts( radeonContextPtr rmesa,
  
     if (!rmesa->dma.flush) {
        rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-      if (rmesa->dri.drmMinor == 1)
-        rmesa->dma.flush = flush_last_swtcl_prim_compat;
-      else
-        rmesa->dma.flush = flush_last_swtcl_prim;
+      rmesa->dma.flush = flush_last_swtcl_prim;
     }
  
     assert( vsize == rmesa->swtcl.vertex_size * 4 );
-   assert( rmesa->dma.flush == flush_last_swtcl_prim ||
-          rmesa->dma.flush == flush_last_swtcl_prim_compat);
+   assert( rmesa->dma.flush == flush_last_swtcl_prim );
     assert (rmesa->dma.current.start + 
            rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
            rmesa->dma.current.ptr);
  
  
     {
-      char *head = rmesa->dma.current.address + rmesa->dma.current.ptr;
+      GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
        rmesa->dma.current.ptr += bytes;
        rmesa->swtcl.numverts += nverts;
        return head;
@@ -486,13 +430,15 @@ static __inline void *radeonAllocDmaLowVerts( radeonContextPtr rmesa,
  
  
  
-void radeon_emit_contiguous_verts( GLcontext *ctx, GLuint start, GLuint count )
+static void *radeon_emit_contiguous_verts( GLcontext *ctx, 
+                                          GLuint start, 
+                                          GLuint count,
+                                          void *dest)
  {
     radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint vertex_size = rmesa->swtcl.vertex_size * 4;
-   CARD32 *dest = radeonAllocDmaLowVerts( rmesa, count-start, vertex_size );
-   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, dest, 
-                                           vertex_size );
+   GLuint stride = rmesa->swtcl.vertex_size * 4;
+   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, dest, stride );
+   return (void *)((char *)dest + stride * (count - start));
  }
  
  
@@ -558,37 +504,13 @@ static __inline void radeonEltPrimitive( radeonContextPtr rmesa, GLenum prim )
  }
  
  
-static void VERT_FALLBACK( GLcontext *ctx,
-                          GLuint start,
-                          GLuint count,
-                          GLuint flags )
-{
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
-   tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
-   tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
-   tnl->Driver.Render.PrimTabVerts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
-   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_BIT_CLIP;
-}
-
-static void ELT_FALLBACK( GLcontext *ctx,
-                         GLuint start,
-                         GLuint count,
-                         GLuint flags )
-{
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
-   tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
-   tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
-   tnl->Driver.Render.PrimTabElts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
-   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_BIT_CLIP;
-}
  
  
  #define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
-#define ELTS_VARS  GLushort *dest
+#define ELTS_VARS( buf )  GLushort *dest = buf
  #define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
  #define ELT_INIT(prim) radeonEltPrimitive( rmesa, prim )
-#define NEW_PRIMITIVE()  RADEON_NEWPRIM( rmesa )
-#define NEW_BUFFER()  radeonRefillCurrentDmaRegion( rmesa )
+#define FLUSH()  RADEON_NEWPRIM( rmesa )
  #define GET_CURRENT_VB_MAX_VERTS() \
    (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
  #define GET_SUBSEQUENT_VB_MAX_VERTS() \
@@ -605,38 +527,35 @@ static void ELT_FALLBACK( GLcontext *ctx,
    ((RADEON_CMD_BUF_SZ - 1024) / 2)
  
  
+static void *radeon_alloc_elts( radeonContextPtr rmesa, int nr )
+{
+   if (rmesa->dma.flush == radeonFlushElts &&
+       rmesa->store.cmd_used + nr*2 < RADEON_CMD_BUF_SZ) {
  
-/* How do you extend an existing primitive?
- */
-#define ALLOC_ELTS(nr)                                                 \
-do {                                                                   \
-   if (rmesa->dma.flush == radeonFlushElts &&                          \
-       rmesa->store.cmd_used + nr*2 < RADEON_CMD_BUF_SZ) {             \
-                                                                       \
-      dest = (GLushort *)(rmesa->store.cmd_buf +                       \
-                         rmesa->store.cmd_used);                       \
-      rmesa->store.cmd_used += nr*2;                                   \
-   }                                                                   \
-   else {                                                              \
-      if (rmesa->dma.flush) {                                          \
-        rmesa->dma.flush( rmesa );                                     \
-      }                                                                        \
-                                                                       \
-      radeonEmitVertexAOS( rmesa,                                      \
-                          rmesa->swtcl.vertex_size,                    \
-                          (rmesa->radeonScreen->agp_buffer_offset +            \
-                           rmesa->swtcl.indexed_verts.buf->buf->idx *  \
-                           RADEON_BUFFER_SIZE +                        \
-                           rmesa->swtcl.indexed_verts.start));         \
-                                                                       \
-      dest = radeonAllocEltsOpenEnded( rmesa,                          \
-                                      rmesa->swtcl.vertex_format,      \
-                                      rmesa->swtcl.hw_primitive,       \
-                                      nr );                            \
-   }                                                                   \
-} while (0)
+      rmesa->store.cmd_used += nr*2;
+
+      return (void *)(rmesa->store.cmd_buf + rmesa->store.cmd_used);
+   }
+   else {
+      if (rmesa->dma.flush) {
+        rmesa->dma.flush( rmesa );
+      }
  
-#define ALLOC_ELTS_NEW_PRIMITIVE(nr) ALLOC_ELTS( nr )
+      radeonEmitVertexAOS( rmesa,
+                          rmesa->swtcl.vertex_size,
+                          (rmesa->radeonScreen->gart_buffer_offset +
+                           rmesa->swtcl.indexed_verts.buf->buf->idx *
+                           RADEON_BUFFER_SIZE +
+                           rmesa->swtcl.indexed_verts.start));
+
+      return (void *) radeonAllocEltsOpenEnded( rmesa,
+                                               rmesa->swtcl.vertex_format,
+                                               rmesa->swtcl.hw_primitive,
+                                               nr );
+   }
+}
+
+#define ALLOC_ELTS(nr) radeon_alloc_elts(rmesa, nr)
  
  #ifdef MESA_BIG_ENDIAN
  /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
@@ -649,14 +568,18 @@ do {                                                                      \
  #endif
  #define EMIT_TWO_ELTS(offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
  #define INCR_ELTS( nr ) dest += nr
+#define ELTPTR dest
  #define RELEASE_ELT_VERTS() \
    radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ )
-#define EMIT_VERTS( ctx, j, nr ) \
-  radeon_emit_contiguous_verts(ctx, j, (j)+(nr))
  #define EMIT_INDEXED_VERTS( ctx, start, count ) \
    radeon_emit_indexed_verts( ctx, start, count )
  
  
+#define ALLOC_VERTS( nr ) \
+  radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 )
+#define EMIT_VERTS( ctx, j, nr, buf ) \
+  radeon_emit_contiguous_verts(ctx, j, (j)+(nr), buf)
+
  #define TAG(x) radeon_dma_##x
  #include "tnl_dd/t_dd_dmatmp.h"
  
@@ -667,52 +590,48 @@ do {                                                                      \
  
  
  static GLboolean radeon_run_render( GLcontext *ctx,
-                                   struct gl_pipeline_stage *stage )
+                                   struct tnl_pipeline_stage *stage )
  {
     radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
     TNLcontext *tnl = TNL_CONTEXT(ctx);
     struct vertex_buffer *VB = &tnl->vb;
-   GLuint i, length, flags = 0;
     render_func *tab = TAG(render_tab_verts);
+   GLuint i;
  
     if (rmesa->swtcl.indexed_verts.buf && (!VB->Elts || stage->changed_inputs)) 
        RELEASE_ELT_VERTS();
         
-   if (VB->ClipOrMask ||            /* No clipping */
-       rmesa->swtcl.RenderIndex != 0 ||    /* No per-vertex manipulations */
-       ctx->Line.StippleFlag)        /* GH: THIS IS A HACK!!! */
+   if (rmesa->swtcl.RenderIndex != 0 ||   
+       !radeon_dma_validate_render( ctx, VB ))
        return GL_TRUE;          
  
-   if (rmesa->dri.drmMinor < 3) {
-      /* drm 1.1 doesn't support vertex primitives starting in the
-       * middle of a buffer.  It doesn't support sane indexed vertices
-       * either.  drm 1.2 fixes both of these problems, but we don't have a
-       * compatibility layer to that version yet.  
-       */
-      return GL_TRUE;
-   }
-
     tnl->Driver.Render.Start( ctx );
  
     if (VB->Elts) {
        tab = TAG(render_tab_elts);
-      if (!rmesa->swtcl.indexed_verts.buf)
-        if (!TAG(emit_elt_verts)(ctx, 0, VB->Count))
-           return GL_TRUE;     /* too many vertices */
+      if (!rmesa->swtcl.indexed_verts.buf) {
+        if (VB->Count > GET_SUBSEQUENT_VB_MAX_VERTS())
+           return GL_TRUE;
+        EMIT_INDEXED_VERTS(ctx, 0, VB->Count);
+      }
     }
  
-   for (i = 0 ; !(flags & PRIM_LAST) ; i += length)
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
     {
-      flags = VB->Primitive[i];
-      length = VB->PrimitiveLength[i];
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+        continue;
  
        if (RADEON_DEBUG & DEBUG_PRIMS)
          fprintf(stderr, "radeon_render.c: prim %s %d..%d\n", 
-                _mesa_lookup_enum_by_nr(flags & PRIM_MODE_MASK), 
-                i, i+length);
+                _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK), 
+                start, start+length);
  
        if (length)
-        tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+        tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, prim );
     }
  
     tnl->Driver.Render.Finish( ctx );
@@ -723,35 +642,19 @@ static GLboolean radeon_run_render( GLcontext *ctx,
  
  
  static void radeon_check_render( GLcontext *ctx,
-                                struct gl_pipeline_stage *stage )
+                                struct tnl_pipeline_stage *stage )
  {
-   GLuint inputs = VERT_BIT_POS | VERT_BIT_CLIP | VERT_BIT_COLOR0;
-
-   if (ctx->RenderMode == GL_RENDER) {
-      if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
-        inputs |= VERT_BIT_COLOR1;
-
-      if (ctx->Texture.Unit[0]._ReallyEnabled)
-        inputs |= VERT_BIT_TEX0;
-
-      if (ctx->Texture.Unit[1]._ReallyEnabled)
-        inputs |= VERT_BIT_TEX1;
-
-      if (ctx->Fog.Enabled)
-        inputs |= VERT_BIT_FOG;
-   }
-
-   stage->inputs = inputs;
+   stage->inputs = TNL_CONTEXT(ctx)->render_inputs;
  }
  
  
-static void dtr( struct gl_pipeline_stage *stage )
+static void dtr( struct tnl_pipeline_stage *stage )
  {
     (void)stage;
  }
  
  
-const struct gl_pipeline_stage _radeon_render_stage =
+const struct tnl_pipeline_stage _radeon_render_stage =
  {
     "radeon render",
     (_DD_NEW_SEPARATE_SPECULAR |
@@ -784,7 +687,7 @@ struct texrect_stage_data {
  
  
  static GLboolean run_texrect_stage( GLcontext *ctx,
-                                   struct gl_pipeline_stage *stage )
+                                   struct tnl_pipeline_stage *stage )
  {
     struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
     radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
@@ -801,7 +704,7 @@ static GLboolean run_texrect_stage( GLcontext *ctx,
     
        if (stage->changed_inputs & VERT_BIT_TEX(i)) {
          struct gl_texture_object *texObj = ctx->Texture.Unit[i].CurrentRect;
-        struct gl_texture_image *texImage = texObj->Image[texObj->BaseLevel];
+        struct gl_texture_image *texImage = texObj->Image[0][texObj->BaseLevel];
          const GLfloat iw = 1.0/texImage->Width;
          const GLfloat ih = 1.0/texImage->Height;
          GLfloat *in = (GLfloat *)VB->TexCoordPtr[i]->data;
@@ -826,7 +729,7 @@ static GLboolean run_texrect_stage( GLcontext *ctx,
  /* Called the first time stage->run() is invoked.
   */
  static GLboolean alloc_texrect_data( GLcontext *ctx,
-                                    struct gl_pipeline_stage *stage )
+                                    struct tnl_pipeline_stage *stage )
  {
     struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
     struct texrect_stage_data *store;
@@ -848,7 +751,7 @@ static GLboolean alloc_texrect_data( GLcontext *ctx,
  
  
  static void check_texrect( GLcontext *ctx,
-                          struct gl_pipeline_stage *stage )
+                          struct tnl_pipeline_stage *stage )
  {
     GLuint flags = 0;
  
@@ -864,7 +767,7 @@ static void check_texrect( GLcontext *ctx,
  }
  
  
-static void free_texrect_data( struct gl_pipeline_stage *stage )
+static void free_texrect_data( struct tnl_pipeline_stage *stage )
  {
     struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
     GLuint i;
@@ -879,7 +782,7 @@ static void free_texrect_data( struct gl_pipeline_stage *stage )
  }
  
  
-const struct gl_pipeline_stage _radeon_texrect_stage =
+const struct tnl_pipeline_stage _radeon_texrect_stage =
  {
     "radeon texrect stage",                     /* name */
     _NEW_TEXTURE,       /* check_state */
@@ -921,6 +824,7 @@ static void radeonResetLineStipple( GLcontext *ctx );
   ***********************************************************************/
  
  #undef LOCAL_VARS
+#undef ALLOC_VERTS
  #define CTX_ARG radeonContextPtr rmesa
  #define CTX_ARG2 rmesa
  #define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
@@ -928,9 +832,8 @@ static void radeonResetLineStipple( GLcontext *ctx );
  #undef LOCAL_VARS
  #define LOCAL_VARS                                             \
     radeonContextPtr rmesa = RADEON_CONTEXT(ctx);               \
-   const GLuint shift = rmesa->swtcl.vertex_stride_shift;      \
     const char *radeonverts = (char *)rmesa->swtcl.verts;
-#define VERT(x) (radeonVertex *)(radeonverts + (x << shift))
+#define VERT(x) (radeonVertex *)(radeonverts + (x * vertsize * sizeof(int)))
  #define VERTEX radeonVertex 
  #undef TAG
  #define TAG(x) radeon_##x
@@ -952,7 +855,6 @@ static void radeonResetLineStipple( GLcontext *ctx );
  
  #define RADEON_TWOSIDE_BIT     0x01
  #define RADEON_UNFILLED_BIT    0x02
-#define RADEON_OFFSET_BIT      0x04 /* drmMinor == 1 */
  #define RADEON_MAX_TRIFUNC     0x08
  
  
@@ -965,7 +867,7 @@ static struct {
  
  
  #define DO_FALLBACK  0
-#define DO_OFFSET   (IND & RADEON_OFFSET_BIT)
+#define DO_OFFSET    0
  #define DO_UNFILLED (IND & RADEON_UNFILLED_BIT)
  #define DO_TWOSIDE  (IND & RADEON_TWOSIDE_BIT)
  #define DO_FLAT      0
@@ -989,23 +891,43 @@ static struct {
  #define VERT_Y(_v) _v->v.y
  #define VERT_Z(_v) _v->v.z
  #define AREA_IS_CCW( a ) (a < 0)
-#define GET_VERTEX(e) (rmesa->swtcl.verts + (e<<rmesa->swtcl.vertex_stride_shift))
+#define GET_VERTEX(e) (rmesa->swtcl.verts + (e * rmesa->swtcl.vertex_size * sizeof(int)))
+
+#define VERT_SET_RGBA( v, c )                                          \
+do {                                                           \
+   radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]);  \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);               \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);             \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);              \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);             \
+} while (0)
  
-#define VERT_SET_RGBA( v, c )    v->ui[coloroffset] = LE32_TO_CPU(*(GLuint *)c)
  #define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
-#define VERT_SAVE_RGBA( idx )    color[idx] = CPU_TO_LE32(v[idx]->ui[coloroffset])
-#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = LE32_TO_CPU(color[idx])
-
-#define VERT_SET_SPEC( v0, c )   if (havespec) {                       \
-                                       v0->v.specular.red   = (c)[0];  \
-                                       v0->v.specular.green = (c)[1];  \
-                                       v0->v.specular.blue  = (c)[2]; }
-#define VERT_COPY_SPEC( v0, v1 ) if (havespec) {                                       \
-                                       v0->v.specular.red   = v1->v.specular.red;      \
-                                       v0->v.specular.green = v1->v.specular.green;    \
-                                       v0->v.specular.blue  = v1->v.specular.blue; }
-#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = CPU_TO_LE32(v[idx]->ui[5])
-#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = LE32_TO_CPU(spec[idx])
+
+#define VERT_SET_SPEC( v0, c )                                 \
+do {                                                           \
+   if (havespec) {                                             \
+      UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]);    \
+      UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]);  \
+      UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]);   \
+   }                                                           \
+} while (0)
+#define VERT_COPY_SPEC( v0, v1 )                       \
+do {                                                   \
+   if (havespec) {                                     \
+      v0->v.specular.red   = v1->v.specular.red;       \
+      v0->v.specular.green = v1->v.specular.green;     \
+      v0->v.specular.blue  = v1->v.specular.blue;      \
+   }                                                   \
+} while (0)
+
+/* These don't need LE32_TO_CPU() as they used to save and restore
+ * colors which are already in the correct format.
+ */
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = v[idx]->ui[5]
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = spec[idx]
  
  #undef LOCAL_VARS
  #undef TAG
@@ -1051,22 +973,6 @@ static struct {
  #define TAG(x) x##_twoside_unfilled
  #include "tnl_dd/t_dd_tritmp.h"
  
-#define IND (RADEON_OFFSET_BIT)
-#define TAG(x) x##_offset
-#include "tnl_dd/t_dd_tritmp.h"
-
-#define IND (RADEON_TWOSIDE_BIT|RADEON_OFFSET_BIT)
-#define TAG(x) x##_twoside_offset
-#include "tnl_dd/t_dd_tritmp.h"
-
-#define IND (RADEON_UNFILLED_BIT|RADEON_OFFSET_BIT)
-#define TAG(x) x##_unfilled_offset
-#include "tnl_dd/t_dd_tritmp.h"
-
-#define IND (RADEON_TWOSIDE_BIT|RADEON_UNFILLED_BIT|RADEON_OFFSET_BIT)
-#define TAG(x) x##_twoside_unfilled_offset
-#include "tnl_dd/t_dd_tritmp.h"
-
  
  static void init_rast_tab( void )
  {
@@ -1074,17 +980,13 @@ static void init_rast_tab( void )
     init_twoside();
     init_unfilled();
     init_twoside_unfilled();
-   init_offset();
-   init_twoside_offset();
-   init_unfilled_offset();
-   init_twoside_unfilled_offset();
  }
  
  /**********************************************************************/
  /*               Render unclipped begin/end objects                   */
  /**********************************************************************/
  
-#define VERT(x) (radeonVertex *)(radeonverts + (x << shift))
+#define VERT(x) (radeonVertex *)(radeonverts + (x * vertsize * sizeof(int)))
  #define RENDER_POINTS( start, count )          \
     for ( ; start < count ; start++)            \
        radeon_point( rmesa, VERT(start) )
@@ -1101,7 +1003,7 @@ static void init_rast_tab( void )
  #undef LOCAL_VARS
  #define LOCAL_VARS                                             \
     radeonContextPtr rmesa = RADEON_CONTEXT(ctx);               \
-   const GLuint shift = rmesa->swtcl.vertex_stride_shift;              \
+   const GLuint vertsize = rmesa->swtcl.vertex_size;           \
     const char *radeonverts = (char *)rmesa->swtcl.verts;               \
     const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;       \
     const GLboolean stipple = ctx->Line.StippleFlag;            \
@@ -1136,8 +1038,6 @@ void radeonChooseRenderState( GLcontext *ctx )
  
     if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
     if (flags & DD_TRI_UNFILLED)      index |= RADEON_UNFILLED_BIT;
-   if ((flags & DD_TRI_OFFSET) &&
-       rmesa->dri.drmMinor == 1)  index |= RADEON_OFFSET_BIT;
  
     if (index != rmesa->swtcl.RenderIndex) {
        tnl->Driver.Render.Points = rast_tab[index].points;
@@ -1271,7 +1171,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
  
  void radeonFlushVertices( GLcontext *ctx, GLuint flags )
  {
-   _tnl_flush_vertices( ctx, flags );
+   _tnl_FlushVertices( ctx, flags );
  
     if (flags & FLUSH_STORED_VERTICES)
        RADEON_NEWPRIM( RADEON_CONTEXT( ctx ) );
@@ -1300,7 +1200,7 @@ void radeonInitSwtcl( GLcontext *ctx )
     tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
     tnl->Driver.Render.BuildVertices = radeonBuildVertices;
  
-   rmesa->swtcl.verts = ALIGN_MALLOC( size * 16 * 4, 32 );
+   rmesa->swtcl.verts = (GLubyte *)ALIGN_MALLOC( size * 16 * 4, 32 );
     rmesa->swtcl.RenderIndex = ~0;
     rmesa->swtcl.render_primitive = GL_TRIANGLES;
     rmesa->swtcl.hw_primitive = 0;
@@ -1320,13 +1220,4 @@ void radeonDestroySwtcl( GLcontext *ctx )
        rmesa->swtcl.verts = 0;
     }
  
-   if (rmesa->UbyteSecondaryColor.Ptr) {
-      ALIGN_FREE(rmesa->UbyteSecondaryColor.Ptr);
-      rmesa->UbyteSecondaryColor.Ptr = 0;
-   }
-
-   if (rmesa->UbyteColor.Ptr) {
-      ALIGN_FREE(rmesa->UbyteColor.Ptr);
-      rmesa->UbyteColor.Ptr = 0;
-   }
  }