radeon/r200/r300: another big merge upheavel.

[mesa.git] / src / mesa / drivers / dri / radeon / radeon_state.c
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c

index 3c7767768b2c5a96541f2e8e551c2fb7e1b148d2..a9ec1d51c7e8255970ed6c78fbdbbde2801572fa 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state.c,v 1.8 2002/12/16 16:18:58 dawes Exp $ */
  /**************************************************************************
  
  Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
@@ -33,35 +32,38 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
  
-#include "glheader.h"
-#include "imports.h"
-#include "api_arrayelt.h"
-#include "enums.h"
-#include "light.h"
-#include "state.h"
-#include "context.h"
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/api_arrayelt.h"
+#include "main/enums.h"
+#include "main/light.h"
+#include "main/state.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
  
-#include "array_cache/acache.h"
+#include "vbo/vbo.h"
  #include "tnl/tnl.h"
  #include "tnl/t_pipeline.h"
  #include "swrast_setup/swrast_setup.h"
  
  #include "radeon_context.h"
+#include "common_cmdbuf.h"
  #include "radeon_ioctl.h"
  #include "radeon_state.h"
  #include "radeon_tcl.h"
  #include "radeon_tex.h"
  #include "radeon_swtcl.h"
-#include "radeon_vtxfmt.h"
  #include "drirenderbuffer.h"
  
+static void radeonUpdateSpecular( GLcontext *ctx );
+
  /* =============================================================
   * Alpha blending
   */
  
  static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
     GLubyte refByte;
  
@@ -105,7 +107,7 @@ static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
  static void radeonBlendEquationSeparate( GLcontext *ctx,
                                          GLenum modeRGB, GLenum modeA )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
     GLboolean fallback = GL_FALSE;
  
@@ -133,7 +135,8 @@ static void radeonBlendEquationSeparate( GLcontext *ctx,
     if ( !fallback ) {
        RADEON_STATECHANGE( rmesa, ctx );
        rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
-      if ( ctx->Color._LogicOpEnabled ) {
+      if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled
+           && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) {
          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
        } else {
          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
@@ -145,7 +148,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
                                      GLenum sfactorRGB, GLenum dfactorRGB,
                                      GLenum sfactorA, GLenum dfactorA )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
        ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
     GLboolean fallback = GL_FALSE;
@@ -255,7 +258,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
  
  static void radeonDepthFunc( GLcontext *ctx, GLenum func )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
  
     RADEON_STATECHANGE( rmesa, ctx );
     rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
@@ -291,7 +294,7 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
  
  static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     RADEON_STATECHANGE( rmesa, ctx );
  
     if ( ctx->Depth.Mask ) {
@@ -303,16 +306,16 @@ static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
  
  static void radeonClearDepth( GLcontext *ctx, GLclampd d )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
                     RADEON_DEPTH_FORMAT_MASK);
  
     switch ( format ) {
     case RADEON_DEPTH_FORMAT_16BIT_INT_Z:
-      rmesa->state.depth.clear = d * 0x0000ffff;
+      rmesa->radeon.state.depth.clear = d * 0x0000ffff;
        break;
     case RADEON_DEPTH_FORMAT_24BIT_INT_Z:
-      rmesa->state.depth.clear = d * 0x00ffffff;
+      rmesa->radeon.state.depth.clear = d * 0x00ffffff;
        break;
     }
  }
@@ -325,13 +328,10 @@ static void radeonClearDepth( GLcontext *ctx, GLclampd d )
  
  static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     union { int i; float f; } c, d;
     GLchan col[4];
  
-   c.i = rmesa->hw.fog.cmd[FOG_C];
-   d.i = rmesa->hw.fog.cmd[FOG_D];
-
     switch (pname) {
     case GL_FOG_MODE:
        if (!ctx->Fog.Enabled)
@@ -341,179 +341,79 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
        switch (ctx->Fog.Mode) {
        case GL_LINEAR:
          rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_LINEAR;
-        if (ctx->Fog.Start == ctx->Fog.End) {
-           c.f = 1.0F;
-           d.f = 1.0F;
-        }
-        else {
-           c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
-           d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start);
-        }
          break;
        case GL_EXP:
          rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP;
-        c.f = 0.0;
-        d.f = ctx->Fog.Density;
          break;
        case GL_EXP2:
          rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP2;
-        c.f = 0.0;
-        d.f = -(ctx->Fog.Density * ctx->Fog.Density);
          break;
        default:
          return;
        }
-      break;
+   /* fallthrough */
     case GL_FOG_DENSITY:
+   case GL_FOG_START:
+   case GL_FOG_END:
+      if (!ctx->Fog.Enabled)
+        return;
+      c.i = rmesa->hw.fog.cmd[FOG_C];
+      d.i = rmesa->hw.fog.cmd[FOG_D];
        switch (ctx->Fog.Mode) {
        case GL_EXP:
          c.f = 0.0;
-        d.f = ctx->Fog.Density;
+        /* While this is the opposite sign from the DDK, it makes the fog test
+         * pass, and matches r200.
+         */
+        d.f = -ctx->Fog.Density;
          break;
        case GL_EXP2:
          c.f = 0.0;
          d.f = -(ctx->Fog.Density * ctx->Fog.Density);
          break;
-      default:
-        break;
-      }
-      break;
-   case GL_FOG_START:
-   case GL_FOG_END:
-      if (ctx->Fog.Mode == GL_LINEAR) {
+      case GL_LINEAR:
          if (ctx->Fog.Start == ctx->Fog.End) {
             c.f = 1.0F;
             d.f = 1.0F;
          } else {
             c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
-           d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start);
+           /* While this is the opposite sign from the DDK, it makes the fog
+            * test pass, and matches r200.
+            */
+           d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start);
          }
+        break;
+      default:
+        break;
+      }
+      if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
+        RADEON_STATECHANGE( rmesa, fog );
+        rmesa->hw.fog.cmd[FOG_C] = c.i;
+        rmesa->hw.fog.cmd[FOG_D] = d.i;
        }
        break;
     case GL_FOG_COLOR: 
        RADEON_STATECHANGE( rmesa, ctx );
        UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
-      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] =
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~RADEON_FOG_COLOR_MASK;
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |=
          radeonPackColor( 4, col[0], col[1], col[2], 0 );
        break;
-   case GL_FOG_COORDINATE_SOURCE_EXT: 
-      /* What to do?
-       */
+   case GL_FOG_COORD_SRC:
+      radeonUpdateSpecular( ctx );
        break;
     default:
        return;
     }
-
-   if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
-      RADEON_STATECHANGE( rmesa, fog );
-      rmesa->hw.fog.cmd[FOG_C] = c.i;
-      rmesa->hw.fog.cmd[FOG_D] = d.i;
-   }
-}
-
-
-/* =============================================================
- * Scissoring
- */
-
-
-static GLboolean intersect_rect( drm_clip_rect_t *out,
-                                drm_clip_rect_t *a,
-                                drm_clip_rect_t *b )
-{
-   *out = *a;
-   if ( b->x1 > out->x1 ) out->x1 = b->x1;
-   if ( b->y1 > out->y1 ) out->y1 = b->y1;
-   if ( b->x2 < out->x2 ) out->x2 = b->x2;
-   if ( b->y2 < out->y2 ) out->y2 = b->y2;
-   if ( out->x1 >= out->x2 ) return GL_FALSE;
-   if ( out->y1 >= out->y2 ) return GL_FALSE;
-   return GL_TRUE;
-}
-
-
-void radeonRecalcScissorRects( radeonContextPtr rmesa )
-{
-   drm_clip_rect_t *out;
-   int i;
-
-   /* Grow cliprect store?
-    */
-   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
-      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
-        rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */
-        rmesa->state.scissor.numAllocedClipRects *= 2;
-      }
-
-      if (rmesa->state.scissor.pClipRects)
-        FREE(rmesa->state.scissor.pClipRects);
-
-      rmesa->state.scissor.pClipRects = 
-        MALLOC( rmesa->state.scissor.numAllocedClipRects * 
-                sizeof(drm_clip_rect_t) );
-
-      if ( rmesa->state.scissor.pClipRects == NULL ) {
-        rmesa->state.scissor.numAllocedClipRects = 0;
-        return;
-      }
-   }
-   
-   out = rmesa->state.scissor.pClipRects;
-   rmesa->state.scissor.numClipRects = 0;
-
-   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
-      if ( intersect_rect( out, 
-                          &rmesa->pClipRects[i], 
-                          &rmesa->state.scissor.rect ) ) {
-        rmesa->state.scissor.numClipRects++;
-        out++;
-      }
-   }
-}
-
-
-static void radeonUpdateScissor( GLcontext *ctx )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if ( rmesa->dri.drawable ) {
-      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-
-      int x = ctx->Scissor.X;
-      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
-      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
-      int h = dPriv->h - ctx->Scissor.Y - 1;
-
-      rmesa->state.scissor.rect.x1 = x + dPriv->x;
-      rmesa->state.scissor.rect.y1 = y + dPriv->y;
-      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
-      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
-
-      radeonRecalcScissorRects( rmesa );
-   }
  }
  
-
-static void radeonScissor( GLcontext *ctx,
-                          GLint x, GLint y, GLsizei w, GLsizei h )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if ( ctx->Scissor.Enabled ) {
-      RADEON_FIREVERTICES( rmesa );    /* don't pipeline cliprect changes */
-      radeonUpdateScissor( ctx );
-   }
-
-}
-
-
  /* =============================================================
   * Culling
   */
  
  static void radeonCullFace( GLcontext *ctx, GLenum unused )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
     GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
  
@@ -550,7 +450,7 @@ static void radeonCullFace( GLcontext *ctx, GLenum unused )
  
  static void radeonFrontFace( GLcontext *ctx, GLenum mode )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
  
     RADEON_STATECHANGE( rmesa, set );
     rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
@@ -575,7 +475,7 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
   */
  static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
  
     RADEON_STATECHANGE( rmesa, lin );
     RADEON_STATECHANGE( rmesa, set );
@@ -592,7 +492,7 @@ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
  
  static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
  
     RADEON_STATECHANGE( rmesa, lin );
     rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = 
@@ -607,8 +507,8 @@ static void radeonColorMask( GLcontext *ctx,
                              GLboolean r, GLboolean g,
                              GLboolean b, GLboolean a )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint mask = radeonPackColor( rmesa->radeonScreen->cpp,
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
                                   ctx->Color.ColorMask[RCOMP],
                                   ctx->Color.ColorMask[GCOMP],
                                   ctx->Color.ColorMask[BCOMP],
@@ -628,17 +528,18 @@ static void radeonColorMask( GLcontext *ctx,
  static void radeonPolygonOffset( GLcontext *ctx,
                                  GLfloat factor, GLfloat units )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat constant = units * rmesa->state.depth.scale;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   float_ui32_type constant =  { units * rmesa->radeon.state.depth.scale };
+   float_ui32_type factoru = { factor };
  
     RADEON_STATECHANGE( rmesa, zbs );
-   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = *(GLuint *)&factor;
-   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = *(GLuint *)&constant;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = factoru.ui32;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
  }
  
  static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLuint i;
     drm_radeon_stipple_t stipple;
  
@@ -650,27 +551,27 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
  
     /* TODO: push this into cmd mechanism
      */
-   RADEON_FIREVERTICES( rmesa );
-   LOCK_HARDWARE( rmesa );
+   radeon_firevertices(&rmesa->radeon);
+   LOCK_HARDWARE( &rmesa->radeon );
  
     /* FIXME: Use window x,y offsets into stipple RAM.
      */
     stipple.mask = rmesa->state.stipple.mask;
-   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
+   drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, 
                      &stipple, sizeof(drm_radeon_stipple_t) );
-   UNLOCK_HARDWARE( rmesa );
+   UNLOCK_HARDWARE( &rmesa->radeon );
  }
  
  static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
  
     /* Can't generally do unfilled via tcl, but some good special
      * cases work. 
      */
     TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag);
-   if (rmesa->TclFallback) {
+   if (rmesa->radeon.TclFallback) {
        radeonChooseRenderState( ctx );
        radeonChooseVertexState( ctx );
     }
@@ -690,8 +591,9 @@ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
   */
  static void radeonUpdateSpecular( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   u_int32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+   GLuint flag = 0;
  
     RADEON_STATECHANGE( rmesa, tcl );
  
@@ -730,13 +632,22 @@ static void radeonUpdateSpecular( GLcontext *ctx )
     }
  
     if (ctx->Fog.Enabled) {
-      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
        rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
-
-      /* Bizzare: have to leave lighting enabled to get fog.
-       */
-      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH) {
+        rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+      /* Bizzare: have to leave lighting enabled to get fog. */
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      }
+      else {
+      /* cannot do tcl fog factor calculation with fog coord source
+       * (send precomputed factors). Cannot use precomputed fog
+       * factors together with tcl spec light (need tcl fallback) */
+        flag = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &
+           RADEON_TCL_COMPUTE_SPECULAR) != 0;
+      }
     }
+ 
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_FOGCOORDSPEC, flag);
  
     if (NEED_SECONDARY_COLOR(ctx)) {
        assert( (p & RADEON_SPECULAR_ENABLE) != 0 );
@@ -751,7 +662,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
  
     /* Update vertex/render formats
      */
-   if (rmesa->TclFallback) { 
+   if (rmesa->radeon.TclFallback) { 
        radeonChooseRenderState( ctx );
        radeonChooseVertexState( ctx );
     }
@@ -768,7 +679,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
   */
  static void update_global_ambient( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     float *fcmd = (float *)RADEON_DB_STATE( glt );
  
     /* Need to do more if both emmissive & ambient are PREMULT:
@@ -803,7 +714,7 @@ static void update_light_colors( GLcontext *ctx, GLuint p )
  /*     fprintf(stderr, "%s\n", __FUNCTION__); */
  
     if (l->Enabled) {
-      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
        float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
  
        COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );    
@@ -843,7 +754,7 @@ static void check_twoside_fallback( GLcontext *ctx )
  
  static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
  {
-      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
        GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
  
        light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
@@ -907,7 +818,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
  
  void radeonUpdateMaterial( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
     GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
     GLuint mask = ~0;
@@ -972,7 +883,7 @@ void radeonUpdateMaterial( GLcontext *ctx )
   */
  static void update_light( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
  
     /* Have to check these, or have an automatic shortcircuit mechanism
      * to remove noop statechanges. (Or just do a better job on the
@@ -1037,7 +948,7 @@ static void update_light( GLcontext *ctx )
  static void radeonLightfv( GLcontext *ctx, GLenum light,
                            GLenum pname, const GLfloat *params )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLint p = light - GL_LIGHT0;
     struct gl_light *l = &ctx->Light.Light[p];
     GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
@@ -1158,7 +1069,7 @@ static void radeonLightfv( GLcontext *ctx, GLenum light,
  static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
                                 const GLfloat *param )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
  
     switch (pname) {
        case GL_LIGHT_MODEL_AMBIENT: 
@@ -1182,7 +1093,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
  
          check_twoside_fallback( ctx );
  
-        if (rmesa->TclFallback) {
+        if (rmesa->radeon.TclFallback) {
             radeonChooseRenderState( ctx );
             radeonChooseVertexState( ctx );
          }
@@ -1199,7 +1110,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
  
  static void radeonShadeModel( GLcontext *ctx, GLenum mode )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
  
     s &= ~(RADEON_DIFFUSE_SHADE_MASK |
@@ -1238,7 +1149,7 @@ static void radeonShadeModel( GLcontext *ctx, GLenum mode )
  static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
  {
     GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
  
     RADEON_STATECHANGE( rmesa, ucp[p] );
@@ -1250,7 +1161,7 @@ static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
  
  static void radeonUpdateClipPlanes( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLuint p;
  
     for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
@@ -1275,9 +1186,9 @@ static void
  radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
                             GLint ref, GLuint mask )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint refmask = ((ctx->Stencil.Ref[0] << RADEON_STENCIL_REF_SHIFT) |
-                    (ctx->Stencil.ValueMask[0] << RADEON_STENCIL_MASK_SHIFT));
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) |
+                    ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT));
  
     RADEON_STATECHANGE( rmesa, ctx );
     RADEON_STATECHANGE( rmesa, msk );
@@ -1319,18 +1230,18 @@ radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
  static void
  radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
  
     RADEON_STATECHANGE( rmesa, msk );
     rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
     rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
-      (ctx->Stencil.WriteMask[0] << RADEON_STENCIL_WRITEMASK_SHIFT);
+      ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT);
  }
  
  static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
                                       GLenum zfail, GLenum zpass )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
  
     /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP,
        and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC,
@@ -1343,7 +1254,7 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
     GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP;
     GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP;
     
-   if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_BROKEN_STENCIL) {
+   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
        tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC;
        tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC;
        tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC;
@@ -1449,12 +1360,12 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
  
  static void radeonClearStencil( GLcontext *ctx, GLint s )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
  
-   rmesa->state.stencil.clear = 
-      ((GLuint) ctx->Stencil.Clear |
+   rmesa->radeon.state.stencil.clear = 
+      ((GLuint) (ctx->Stencil.Clear & 0xff) |
         (0xff << RADEON_STENCIL_MASK_SHIFT) |
-       (ctx->Stencil.WriteMask[0] << RADEON_STENCIL_WRITEMASK_SHIFT));
+       ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT));
  }
  
  
@@ -1475,27 +1386,28 @@ static void radeonClearStencil( GLcontext *ctx, GLint s )
   */
  void radeonUpdateWindow( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-   GLfloat xoffset = (GLfloat)dPriv->x;
-   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+   GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+   GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
     const GLfloat *v = ctx->Viewport._WindowMap.m;
  
-   GLfloat sx = v[MAT_SX];
-   GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
-   GLfloat sy = - v[MAT_SY];
-   GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
-   GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
-   GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
-   RADEON_FIREVERTICES( rmesa );
+   float_ui32_type sx = { v[MAT_SX] };
+   float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
+   float_ui32_type sy = { - v[MAT_SY] };
+   float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
+   float_ui32_type sz = { v[MAT_SZ] * rmesa->radeon.state.depth.scale };
+   float_ui32_type tz = { v[MAT_TZ] * rmesa->radeon.state.depth.scale };
+
+   radeon_firevertices(&rmesa->radeon);
     RADEON_STATECHANGE( rmesa, vpt );
  
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = *(GLuint *)&sx;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = *(GLuint *)&sy;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = *(GLuint *)&sz;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = *(GLuint *)&tz;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = sy.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = sz.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = tz.ui32;
  }
  
  
@@ -1506,7 +1418,6 @@ static void radeonViewport( GLcontext *ctx, GLint x, GLint y,
      * setting below.  Could apply deltas to rescue pipelined viewport
      * values, or keep the originals hanging around.
      */
-   RADEON_FIREVERTICES( RADEON_CONTEXT(ctx) );
     radeonUpdateWindow( ctx );
  }
  
@@ -1518,24 +1429,28 @@ static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
  
  void radeonUpdateViewportOffset( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
     GLfloat xoffset = (GLfloat)dPriv->x;
     GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
     const GLfloat *v = ctx->Viewport._WindowMap.m;
  
-   GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
-   GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+   float_ui32_type tx;
+   float_ui32_type ty;
  
-   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != *(GLuint *)&tx ||
-       rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != *(GLuint *)&ty )
+   tx.f = v[MAT_TX] + xoffset + SUBPIXEL_X;
+   ty.f = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 ||
+       rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 )
     {
        /* Note: this should also modify whatever data the context reset
         * code uses...
         */
-      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
-      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
-      
+      RADEON_STATECHANGE( rmesa, vpt );
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+
        /* update polygon stipple x/y screen offset */
        {
           GLuint stx, sty;
@@ -1545,8 +1460,8 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
                  RADEON_STIPPLE_Y_OFFSET_MASK);
  
           /* add magic offsets, then invert */
-         stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
-         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
+         stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
+         sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1)
                       & RADEON_STIPPLE_COORD_MASK);
  
           m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
@@ -1570,20 +1485,20 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
  
  static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLubyte c[4];
     CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
     CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
     CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
     CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
-   rmesa->state.color.clear = radeonPackColor( rmesa->radeonScreen->cpp,
+   rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
                                                c[0], c[1], c[2], c[3] );
  }
  
  
  static void radeonRenderMode( GLcontext *ctx, GLenum mode )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
  }
  
@@ -1609,7 +1524,7 @@ static GLuint radeon_rop_tab[] = {
  
  static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLuint rop = (GLuint)opcode - GL_CLEAR;
  
     ASSERT( rop < 16 );
@@ -1619,66 +1534,36 @@ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
  }
  
  
-/**
- * Set up the cliprects for either front or back-buffer drawing.
- */
-void radeonSetCliprects( radeonContextPtr rmesa )
-{
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-
-   if (rmesa->glCtx->DrawBuffer->_ColorDrawBufferMask[0]
-       == BUFFER_BIT_BACK_LEFT) {
-      /* Can't ignore 2d windows if we are page flipping.
-       */
-      if ( dPriv->numBackClipRects == 0 || rmesa->doPageFlip ) {
-        rmesa->numClipRects = dPriv->numClipRects;
-        rmesa->pClipRects = dPriv->pClipRects;
-      }
-      else {
-        rmesa->numClipRects = dPriv->numBackClipRects;
-        rmesa->pClipRects = dPriv->pBackClipRects;
-      }
-   }
-   else {
-      /* front buffer (or none, or multiple buffers */
-      rmesa->numClipRects = dPriv->numClipRects;
-      rmesa->pClipRects = dPriv->pClipRects;
-   }
-
-   if (rmesa->state.scissor.enabled)
-      radeonRecalcScissorRects( rmesa );
-}
-
-
  /**
   * Called via glDrawBuffer.
   */
  static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
  
     if (RADEON_DEBUG & DEBUG_DRI)
        fprintf(stderr, "%s %s\n", __FUNCTION__,
               _mesa_lookup_enum_by_nr( mode ));
  
-   RADEON_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */
+   radeon_firevertices(&rmesa->radeon);        /* don't pipeline cliprect changes */
  
-   /*
-    * _ColorDrawBufferMask is easier to cope with than <mode>.
-    * Check for software fallback, update cliprects.
-    */
-   switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
-   case BUFFER_BIT_FRONT_LEFT:
-   case BUFFER_BIT_BACK_LEFT:
+   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+      /* 0 (GL_NONE) buffers or multiple color drawing buffers */
+      FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+   case BUFFER_FRONT_LEFT:
+   case BUFFER_BACK_LEFT:
        FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE );
        break;
     default:
-      /* 0 (GL_NONE) buffers or multiple color drawing buffers */
        FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
        return;
     }
  
-   radeonSetCliprects( rmesa );
+   radeonSetCliprects( &rmesa->radeon );
  
     /* We'll set the drawing engine's offset/pitch parameters later
      * when we update other state.
@@ -1697,7 +1582,7 @@ static void radeonReadBuffer( GLcontext *ctx, GLenum mode )
  
  static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLuint p, flag;
  
     if ( RADEON_DEBUG & DEBUG_STATE )
@@ -1729,7 +1614,8 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
        } else {
          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ALPHA_BLEND_ENABLE;
        }
-      if ( ctx->Color._LogicOpEnabled ) {
+      if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled
+           && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) {
          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
        } else {
          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
@@ -1791,10 +1677,10 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
        RADEON_STATECHANGE(rmesa, ctx );
        if ( state ) {
          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
        } else {
          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->state.color.roundEnable;
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
        }
        break;
  
@@ -1809,8 +1695,6 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
          rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
        }
        radeonUpdateSpecular( ctx ); /* for PK_SPEC */
-      if (rmesa->TclFallback) 
-        radeonChooseVertexState( ctx );
        _mesa_allow_light_in_model( ctx, !state );
        break;
  
@@ -1869,7 +1753,8 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
  
     case GL_COLOR_LOGIC_OP:
        RADEON_STATECHANGE( rmesa, ctx );
-      if ( ctx->Color._LogicOpEnabled ) {
+      if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled
+           && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) {
          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
        } else {
          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
@@ -1886,44 +1771,29 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
        break;
  
     case GL_POLYGON_OFFSET_POINT:
-      if (rmesa->dri.drmMinor == 1) {
-        radeonChooseRenderState( ctx );
-      } 
-      else {
-        RADEON_STATECHANGE( rmesa, set );
-        if ( state ) {
-           rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_POINT;
-        } else {
-           rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_POINT;
-        }
+      RADEON_STATECHANGE( rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_POINT;
+      } else {
+        rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_POINT;
        }
        break;
  
     case GL_POLYGON_OFFSET_LINE:
-      if (rmesa->dri.drmMinor == 1) {
-        radeonChooseRenderState( ctx );
-      } 
-      else {
-        RADEON_STATECHANGE( rmesa, set );
-        if ( state ) {
-           rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_LINE;
-        } else {
-           rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_LINE;
-        }
+      RADEON_STATECHANGE( rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_LINE;
+      } else {
+        rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_LINE;
        }
        break;
  
     case GL_POLYGON_OFFSET_FILL:
-      if (rmesa->dri.drmMinor == 1) {
-        radeonChooseRenderState( ctx );
-      } 
-      else {
-        RADEON_STATECHANGE( rmesa, set );
-        if ( state ) {
-           rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_TRI;
-        } else {
-           rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_TRI;
-        }
+      RADEON_STATECHANGE( rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_TRI;
+      } else {
+        rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_TRI;
        }
        break;
  
@@ -1957,13 +1827,13 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
     }
  
     case GL_SCISSOR_TEST:
-      RADEON_FIREVERTICES( rmesa );
-      rmesa->state.scissor.enabled = state;
+      radeon_firevertices(&rmesa->radeon);
+      rmesa->radeon.state.scissor.enabled = state;
        radeonUpdateScissor( ctx );
        break;
  
     case GL_STENCIL_TEST:
-      if ( rmesa->state.stencil.hwBuffer ) {
+      if ( rmesa->radeon.state.stencil.hwBuffer ) {
          RADEON_STATECHANGE( rmesa, ctx );
          if ( state ) {
             rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
@@ -1996,7 +1866,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
  
  static void radeonLightingSpaceChange( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLboolean tmp;
     RADEON_STATECHANGE( rmesa, tcl );
  
@@ -2025,9 +1895,87 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
   */
  
  
+void radeonUploadTexMatrix( r100ContextPtr rmesa,
+                           int unit, GLboolean swapcols )
+{
+/* Here's how this works: on r100, only 3 tex coords can be submitted, so the
+   vector looks like this probably: (s t r|q 0) (not sure if the last coord
+   is hardwired to 0, could be 1 too). Interestingly, it actually looks like
+   texgen generates all 4 coords, at least tests with projtex indicated that.
+   So: if we need the q coord in the end (solely determined by the texture
+   target, i.e. 2d / 1d / texrect targets) we swap the third and 4th row.
+   Additionally, if we don't have texgen but 4 tex coords submitted, we swap
+   column 3 and 4 (for the 2d / 1d / texrect targets) since the the q coord
+   will get submitted in the "wrong", i.e. 3rd, slot.
+   If an app submits 3 coords for 2d targets, we assume it is saving on vertex
+   size and using the texture matrix to swap the r and q coords around (ut2k3
+   does exactly that), so we don't need the 3rd / 4th column swap - still need
+   the 3rd / 4th row swap of course. This will potentially break for apps which
+   use TexCoord3x just for fun. Additionally, it will never work if an app uses
+   an "advanced" texture matrix and relies on all 4 texcoord inputs to generate
+   the maximum needed 3. This seems impossible to do with hw tcl on r100, and
+   incredibly hard to detect so we can't just fallback in such a case. Assume
+   it never happens... - rs
+*/
+
+   int idx = TEXMAT_0 + unit;
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0;
+   int i;
+   struct gl_texture_unit tUnit = rmesa->radeon.glCtx->Texture.Unit[unit];
+   GLfloat *src = rmesa->tmpmat[unit].m;
+
+   rmesa->TexMatColSwap &= ~(1 << unit);
+   if ((tUnit._ReallyEnabled & (TEXTURE_3D_BIT | TEXTURE_CUBE_BIT)) == 0) {
+      if (swapcols) {
+        rmesa->TexMatColSwap |= 1 << unit;
+        /* attention some elems are swapped 2 times! */
+        *dest++ = src[0];
+        *dest++ = src[4];
+        *dest++ = src[12];
+        *dest++ = src[8];
+        *dest++ = src[1];
+        *dest++ = src[5];
+        *dest++ = src[13];
+        *dest++ = src[9];
+        *dest++ = src[2];
+        *dest++ = src[6];
+        *dest++ = src[15];
+        *dest++ = src[11];
+        /* those last 4 are probably never used */
+        *dest++ = src[3];
+        *dest++ = src[7];
+        *dest++ = src[14];
+        *dest++ = src[10];
+      }
+      else {
+        for (i = 0; i < 2; i++) {
+           *dest++ = src[i];
+           *dest++ = src[i+4];
+           *dest++ = src[i+8];
+           *dest++ = src[i+12];
+        }
+        for (i = 3; i >= 2; i--) {
+           *dest++ = src[i];
+           *dest++ = src[i+4];
+           *dest++ = src[i+8];
+           *dest++ = src[i+12];
+        }
+      }
+   }
+   else {
+      for (i = 0 ; i < 4 ; i++) {
+        *dest++ = src[i];
+        *dest++ = src[i+4];
+        *dest++ = src[i+8];
+        *dest++ = src[i+12];
+      }
+   }
  
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
  
-static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
+
+static void upload_matrix( r100ContextPtr rmesa, GLfloat *src, int idx )
  {
     float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
     int i;
@@ -2043,7 +1991,7 @@ static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
     RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
  }
  
-static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
+static void upload_matrix_t( r100ContextPtr rmesa, GLfloat *src, int idx )
  {
     float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
     memcpy(dest, src, 16*sizeof(float));
@@ -2053,63 +2001,64 @@ static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
  
  static void update_texturematrix( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
     GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
     GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
     int unit;
-
-   rmesa->TexMatEnabled = 0;
-
-   for (unit = 0 ; unit < 2; unit++) {
-      if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
-      }
-      else if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
-        GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
-        
-        rmesa->TexMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE|
-                                 RADEON_TEXMAT_0_ENABLE) << unit;
-
-        if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
-           /* Need to preconcatenate any active texgen 
-            * obj/eyeplane matrices:
-            */
-           _math_matrix_mul_matrix( &rmesa->tmpmat,
+   GLuint texMatEnabled = 0;
+   rmesa->NeedTexMatrix = 0;
+   rmesa->TexMatColSwap = 0;
+
+   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+        GLboolean needMatrix = GL_FALSE;
+        if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
+           needMatrix = GL_TRUE;
+           texMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE |
+                             RADEON_TEXMAT_0_ENABLE) << unit;
+
+           if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+              /* Need to preconcatenate any active texgen
+               * obj/eyeplane matrices:
+               */
+              _math_matrix_mul_matrix( &rmesa->tmpmat[unit],
                                      ctx->TextureMatrixStack[unit].Top,
                                      &rmesa->TexGenMatrix[unit] );
-           upload_matrix( rmesa, rmesa->tmpmat.m, TEXMAT_0+unit );
+           }
+           else {
+              _math_matrix_copy( &rmesa->tmpmat[unit],
+                 ctx->TextureMatrixStack[unit].Top );
+           }
          }
-        else {
-           rmesa->TexMatEnabled |= 
-              (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
-           upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m, 
-                          TEXMAT_0+unit );
+        else if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+           _math_matrix_copy( &rmesa->tmpmat[unit], &rmesa->TexGenMatrix[unit] );
+           needMatrix = GL_TRUE;
+        }
+        if (needMatrix) {
+           rmesa->NeedTexMatrix |= 1 << unit;
+           radeonUploadTexMatrix( rmesa, unit,
+                       !ctx->Texture.Unit[unit].TexGenEnabled );
          }
-      }
-      else if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
-        upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, 
-                       TEXMAT_0+unit );
        }
     }
  
+   tpc = (texMatEnabled | rmesa->TexGenEnabled);
  
-   tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
-
-   vs &= ~((0xf << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
-          (0xf << RADEON_TCL_TEX_1_OUTPUT_SHIFT));
+   /* TCL_TEX_COMPUTED_x is TCL_TEX_INPUT_x | 0x8 */
+   vs &= ~((RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
+          (RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_1_OUTPUT_SHIFT) |
+          (RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_2_OUTPUT_SHIFT));
  
-   if (tpc & RADEON_TEXGEN_TEXMAT_0_ENABLE)
-      vs |= RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT;
-   else
-      vs |= RADEON_TCL_TEX_INPUT_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT;
-
-   if (tpc & RADEON_TEXGEN_TEXMAT_1_ENABLE)
-      vs |= RADEON_TCL_TEX_COMPUTED_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT;
-   else
-      vs |= RADEON_TCL_TEX_INPUT_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT;
+   vs |= (((tpc & RADEON_TEXGEN_TEXMAT_0_ENABLE) <<
+        (RADEON_TCL_TEX_0_OUTPUT_SHIFT + 3)) |
+      ((tpc & RADEON_TEXGEN_TEXMAT_1_ENABLE) <<
+        (RADEON_TCL_TEX_1_OUTPUT_SHIFT + 2)) |
+      ((tpc & RADEON_TEXGEN_TEXMAT_2_ENABLE) <<
+        (RADEON_TCL_TEX_2_OUTPUT_SHIFT + 1)));
  
     if (tpc != rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] ||
         vs != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]) {
-      
+
        RADEON_STATECHANGE(rmesa, tcl);
        rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = tpc;
        rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] = vs;
@@ -2124,43 +2073,32 @@ static void update_texturematrix( GLcontext *ctx )
  void
  radeonUpdateDrawBuffer(GLcontext *ctx)
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     struct gl_framebuffer *fb = ctx->DrawBuffer;
-   driRenderbuffer *drb;
-
-   if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
-      /* draw to front */
-      drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-   }
-   else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) {
-      /* draw to back */
-      drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-   }
-   else {
-      /* drawing to multiple buffers, or none */
-      return;
+   struct radeon_renderbuffer *rrb;
+
+   if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+     /* draw to front */
+     rrb = (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+   } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+     /* draw to back */
+     rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+   } else {
+     /* drawing to multiple buffers, or none */
+     return;
     }
  
-   assert(drb);
-   assert(drb->flippedPitch);
+   assert(rrb);
+   assert(rrb->pitch);
  
     RADEON_STATECHANGE( rmesa, ctx );
-
-   /* Note: we used the (possibly) page-flipped values */
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
-     = ((drb->flippedOffset + rmesa->radeonScreen->fbLocation)
-       & RADEON_COLOROFFSET_MASK);
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
-   if (rmesa->sarea->tiling_enabled) {
-      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
-   }
  }
  
  
  void radeonValidateState( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint new_state = rmesa->NewGLState;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint new_state = rmesa->radeon.NewGLState;
  
     if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
       radeonUpdateDrawBuffer(ctx);
@@ -2168,7 +2106,7 @@ void radeonValidateState( GLcontext *ctx )
  
     if (new_state & _NEW_TEXTURE) {
        radeonUpdateTextureState( ctx );
-      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
+      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
     }
  
     /* Need an event driven matrix update?
@@ -2188,7 +2126,7 @@ void radeonValidateState( GLcontext *ctx )
      */
     if (new_state & _NEW_TEXTURE_MATRIX) {
        update_texturematrix( ctx );
-   }      
+   }
  
     if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
        update_light( ctx );
@@ -2202,7 +2140,7 @@ void radeonValidateState( GLcontext *ctx )
     }
  
  
-   rmesa->NewGLState = 0;
+   rmesa->radeon.NewGLState = 0;
  }
  
  
@@ -2210,11 +2148,10 @@ static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
  {
     _swrast_InvalidateState( ctx, new_state );
     _swsetup_InvalidateState( ctx, new_state );
-   _ac_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
     _tnl_InvalidateState( ctx, new_state );
     _ae_invalidate_state( ctx, new_state );
-   RADEON_CONTEXT(ctx)->NewGLState |= new_state;
-   radeonVtxfmtInvalidate( ctx );
+   R100_CONTEXT(ctx)->radeon.NewGLState |= new_state;
  }
  
  
@@ -2238,15 +2175,15 @@ static GLboolean check_material( GLcontext *ctx )
  
  static void radeonWrapRunPipeline( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLboolean has_material;
  
     if (0)
-      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
  
     /* Validate state:
      */
-   if (rmesa->NewGLState)
+   if (rmesa->radeon.NewGLState)
        radeonValidateState( ctx );
  
     has_material = (ctx->Light.Enabled && check_material( ctx ));
@@ -2301,10 +2238,7 @@ void radeonInitStateFuncs( GLcontext *ctx )
     ctx->Driver.LineWidth                = radeonLineWidth;
     ctx->Driver.LogicOpcode             = radeonLogicOpCode;
     ctx->Driver.PolygonMode             = radeonPolygonMode;
-
-   if (RADEON_CONTEXT(ctx)->dri.drmMinor > 1)
-      ctx->Driver.PolygonOffset                = radeonPolygonOffset;
-
+   ctx->Driver.PolygonOffset           = radeonPolygonOffset;
     ctx->Driver.PolygonStipple          = radeonPolygonStipple;
     ctx->Driver.RenderMode              = radeonRenderMode;
     ctx->Driver.Scissor                 = radeonScissor;