r200/r300: get up to speed on renamed files

[mesa.git] / src / mesa / drivers / dri / r200 / r200_state.c
diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c

index 36dd9ca8870314ad161423bec3662efbb484b5d9..84b0d90c69fac171237902f050c235f4605de1d9 100644 (file)
--- a/src/mesa/drivers/dri/r200/r200_state.c
+++ b/src/mesa/drivers/dri/r200/r200_state.c
@@ -1,4 +1,3 @@
-/* $XFree86$ */
  /**************************************************************************
  
  Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
@@ -34,27 +33,30 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
  
-#include "glheader.h"
-#include "imports.h"
-#include "api_arrayelt.h"
-#include "enums.h"
-#include "colormac.h"
-#include "light.h"
-#include "buffers.h"
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/api_arrayelt.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/light.h"
+#include "main/framebuffer.h"
  
  #include "swrast/swrast.h"
-#include "array_cache/acache.h"
+#include "vbo/vbo.h"
  #include "tnl/tnl.h"
  #include "tnl/t_pipeline.h"
  #include "swrast_setup/swrast_setup.h"
  
+#include "radeon_buffer.h"
+#include "radeon_cs.h"
+#include "radeon_mipmap_tree.h"
  #include "r200_context.h"
  #include "r200_ioctl.h"
  #include "r200_state.h"
  #include "r200_tcl.h"
  #include "r200_tex.h"
  #include "r200_swtcl.h"
-#include "r200_vtxfmt.h"
+#include "r200_vertprog.h"
  
  #include "drirenderbuffer.h"
  
@@ -115,8 +117,8 @@ static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] )
     CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
     CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
     CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
-   if (rmesa->r200Screen->drmSupportsBlendColor)
-      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = r200PackColor( 4, color[0], color[1], color[2], color[3] );
+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = radeonPackColor( 4, color[0], color[1], color[2], color[3] );
  }
  
  /**
@@ -214,8 +216,8 @@ static void r200_set_blend_state( GLcontext * ctx )
  
     R200_STATECHANGE( rmesa, ctx );
  
-   if (rmesa->r200Screen->drmSupportsBlendColor) {
-      if (ctx->Color._LogicOpEnabled) {
+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+      if (ctx->Color.ColorLogicOpEnabled) {
           rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
           rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
           rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
@@ -231,7 +233,7 @@ static void r200_set_blend_state( GLcontext * ctx )
        }
     }
     else {
-      if (ctx->Color._LogicOpEnabled) {
+      if (ctx->Color.ColorLogicOpEnabled) {
           rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
           rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
           return;
@@ -279,7 +281,7 @@ static void r200_set_blend_state( GLcontext * ctx )
        return;
     }
  
-   if (!rmesa->r200Screen->drmSupportsBlendColor) {
+   if (!rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
        rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
        return;
     }
@@ -384,10 +386,10 @@ static void r200ClearDepth( GLcontext *ctx, GLclampd d )
  
     switch ( format ) {
     case R200_DEPTH_FORMAT_16BIT_INT_Z:
-      rmesa->state.depth.clear = d * 0x0000ffff;
+      rmesa->radeon.state.depth.clear = d * 0x0000ffff;
        break;
     case R200_DEPTH_FORMAT_24BIT_INT_Z:
-      rmesa->state.depth.clear = d * 0x00ffffff;
+      rmesa->radeon.state.depth.clear = d * 0x00ffffff;
        break;
     }
  }
@@ -481,24 +483,21 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
     case GL_FOG_COLOR: 
        R200_STATECHANGE( rmesa, ctx );
        UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
-      i = r200PackColor( 4, col[0], col[1], col[2], 0 );
+      i = radeonPackColor( 4, col[0], col[1], col[2], 0 );
        rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
        rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
        break;
     case GL_FOG_COORD_SRC: {
-      GLuint fmt_0 = rmesa->hw.vtx.cmd[VTX_VTXFMT_0];
        GLuint out_0 = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0];
        GLuint fog   = rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR];
  
        fog &= ~R200_FOG_USE_MASK;
-      if ( ctx->Fog.FogCoordinateSource == GL_FOG_COORD ) {
+      if ( ctx->Fog.FogCoordinateSource == GL_FOG_COORD || ctx->VertexProgram.Enabled) {
          fog   |= R200_FOG_USE_VTX_FOG;
-        fmt_0 |= R200_VTX_DISCRETE_FOG;
          out_0 |= R200_VTX_DISCRETE_FOG;
        }
        else {
          fog   |=  R200_FOG_USE_SPEC_ALPHA;
-        fmt_0 &= ~R200_VTX_DISCRETE_FOG;
          out_0 &= ~R200_VTX_DISCRETE_FOG;
        }
  
@@ -507,10 +506,8 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
          rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = fog;
        }
  
-      if ( (fmt_0 != rmesa->hw.vtx.cmd[VTX_VTXFMT_0])
-          || (out_0 != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0])) {
+      if (out_0 != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0]) {
          R200_STATECHANGE( rmesa, vtx );
-        rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0;
          rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0;     
        }
  
@@ -527,102 +524,6 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
     }
  }
  
-
-/* =============================================================
- * Scissoring
- */
-
-
-static GLboolean intersect_rect( drm_clip_rect_t *out,
-                                drm_clip_rect_t *a,
-                                drm_clip_rect_t *b )
-{
-   *out = *a;
-   if ( b->x1 > out->x1 ) out->x1 = b->x1;
-   if ( b->y1 > out->y1 ) out->y1 = b->y1;
-   if ( b->x2 < out->x2 ) out->x2 = b->x2;
-   if ( b->y2 < out->y2 ) out->y2 = b->y2;
-   if ( out->x1 >= out->x2 ) return GL_FALSE;
-   if ( out->y1 >= out->y2 ) return GL_FALSE;
-   return GL_TRUE;
-}
-
-
-void r200RecalcScissorRects( r200ContextPtr rmesa )
-{
-   drm_clip_rect_t *out;
-   int i;
-
-   /* Grow cliprect store?
-    */
-   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
-      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
-        rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */
-        rmesa->state.scissor.numAllocedClipRects *= 2;
-      }
-
-      if (rmesa->state.scissor.pClipRects)
-        FREE(rmesa->state.scissor.pClipRects);
-
-      rmesa->state.scissor.pClipRects = 
-        MALLOC( rmesa->state.scissor.numAllocedClipRects * 
-                sizeof(drm_clip_rect_t) );
-
-      if ( rmesa->state.scissor.pClipRects == NULL ) {
-        rmesa->state.scissor.numAllocedClipRects = 0;
-        return;
-      }
-   }
-   
-   out = rmesa->state.scissor.pClipRects;
-   rmesa->state.scissor.numClipRects = 0;
-
-   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
-      if ( intersect_rect( out, 
-                          &rmesa->pClipRects[i], 
-                          &rmesa->state.scissor.rect ) ) {
-        rmesa->state.scissor.numClipRects++;
-        out++;
-      }
-   }
-}
-
-
-static void r200UpdateScissor( GLcontext *ctx )
-{
-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-
-   if ( rmesa->dri.drawable ) {
-      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-
-      int x = ctx->Scissor.X;
-      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
-      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
-      int h = dPriv->h - ctx->Scissor.Y - 1;
-
-      rmesa->state.scissor.rect.x1 = x + dPriv->x;
-      rmesa->state.scissor.rect.y1 = y + dPriv->y;
-      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
-      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
-
-      r200RecalcScissorRects( rmesa );
-   }
-}
-
-
-static void r200Scissor( GLcontext *ctx,
-                          GLint x, GLint y, GLsizei w, GLsizei h )
-{
-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-
-   if ( ctx->Scissor.Enabled ) {
-      R200_FIREVERTICES( rmesa );      /* don't pipeline cliprect changes */
-      r200UpdateScissor( ctx );
-   }
-
-}
-
-
  /* =============================================================
   * Culling
   */
@@ -690,7 +591,81 @@ static void r200FrontFace( GLcontext *ctx, GLenum mode )
   */
  static void r200PointSize( GLcontext *ctx, GLfloat size )
  {
-   if (0) fprintf(stderr, "%s: %f\n", __FUNCTION__, size );
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+
+   R200_STATECHANGE( rmesa, cst );
+   R200_STATECHANGE( rmesa, ptp );
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= ~0xffff;
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] |= ((GLuint)(ctx->Point.Size * 16.0));
+/* this is the size param of the point size calculation (point size reg value
+   is not used when calculation is active). */
+   fcmd[PTP_VPORT_SCALE_PTSIZE] = ctx->Point.Size;
+}
+
+static void r200PointParameter( GLcontext *ctx, GLenum pname, const GLfloat *params)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+
+   switch (pname) {
+   case GL_POINT_SIZE_MIN:
+   /* Can clamp both in tcl and setup - just set both (as does fglrx) */
+      R200_STATECHANGE( rmesa, lin );
+      R200_STATECHANGE( rmesa, ptp );
+      rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= 0xffff;
+      rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)(ctx->Point.MinSize * 16.0) << 16;
+      fcmd[PTP_CLAMP_MIN] = ctx->Point.MinSize;
+      break;
+   case GL_POINT_SIZE_MAX:
+      R200_STATECHANGE( rmesa, cst );
+      R200_STATECHANGE( rmesa, ptp );
+      rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= 0xffff;
+      rmesa->hw.cst.cmd[CST_RE_POINTSIZE] |= (GLuint)(ctx->Point.MaxSize * 16.0) << 16;
+      fcmd[PTP_CLAMP_MAX] = ctx->Point.MaxSize;
+      break;
+   case GL_POINT_DISTANCE_ATTENUATION:
+      R200_STATECHANGE( rmesa, vtx );
+      R200_STATECHANGE( rmesa, spr );
+      R200_STATECHANGE( rmesa, ptp );
+      GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+      rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &=
+        ~(R200_PS_MULT_MASK | R200_PS_LIN_ATT_ZERO | R200_PS_SE_SEL_STATE);
+      /* can't rely on ctx->Point._Attenuated here and test for NEW_POINT in
+        r200ValidateState looks like overkill */
+      if (ctx->Point.Params[0] != 1.0 ||
+         ctx->Point.Params[1] != 0.0 ||
+         ctx->Point.Params[2] != 0.0 ||
+         (ctx->VertexProgram.Enabled && ctx->VertexProgram.PointSizeEnabled)) {
+        /* all we care for vp would be the ps_se_sel_state setting */
+        fcmd[PTP_ATT_CONST_QUAD] = ctx->Point.Params[2];
+        fcmd[PTP_ATT_CONST_LIN] = ctx->Point.Params[1];
+        fcmd[PTP_ATT_CONST_CON] = ctx->Point.Params[0];
+        rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_MULT_ATTENCONST;
+        if (ctx->Point.Params[1] == 0.0)
+           rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_LIN_ATT_ZERO;
+/* FIXME: setting this here doesn't look quite ok - we only want to do
+          that if we're actually drawing points probably */
+        rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_PT_SIZE;
+        rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= R200_VTX_POINT_SIZE;
+      }
+      else {
+        rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |=
+           R200_PS_SE_SEL_STATE | R200_PS_MULT_CONST;
+        rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_PT_SIZE;
+        rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~R200_VTX_POINT_SIZE;
+      }
+      break;
+   case GL_POINT_FADE_THRESHOLD_SIZE:
+      /* don't support multisampling, so doesn't matter. */
+      break;
+   /* can't do these but don't need them.
+   case GL_POINT_SPRITE_R_MODE_NV:
+   case GL_POINT_SPRITE_COORD_ORIGIN: */
+   default:
+      fprintf(stderr, "bad pname parameter in r200PointParameter\n");
+      return;
+   }
  }
  
  /* =============================================================
@@ -704,9 +679,11 @@ static void r200LineWidth( GLcontext *ctx, GLfloat widthf )
     R200_STATECHANGE( rmesa, set );
  
     /* Line width is stored in U6.4 format.
+    * Same min/max limits for AA, non-AA lines.
      */
     rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= ~0xffff;
-   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)(ctx->Line._Width * 16.0);
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)
+      (CLAMP(widthf, ctx->Const.MinLineWidth, ctx->Const.MaxLineWidth) * 16.0);
  
     if ( widthf > 1.0 ) {
        rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_WIDELINE_ENABLE;
@@ -733,7 +710,7 @@ static void r200ColorMask( GLcontext *ctx,
                            GLboolean b, GLboolean a )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLuint mask = r200PackColor( rmesa->r200Screen->cpp,
+   GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
                                 ctx->Color.ColorMask[RCOMP],
                                 ctx->Color.ColorMask[GCOMP],
                                 ctx->Color.ColorMask[BCOMP],
@@ -764,16 +741,17 @@ static void r200PolygonOffset( GLcontext *ctx,
                                GLfloat factor, GLfloat units )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLfloat constant = units * rmesa->state.depth.scale;
+   float_ui32_type constant =  { units * rmesa->radeon.state.depth.scale };
+   float_ui32_type factoru = { factor };
  
  /*    factor *= 2; */
  /*    constant *= 2; */
-   
+
  /*    fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */
  
     R200_STATECHANGE( rmesa, zbs );
-   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = *(GLuint *)&factor;
-   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = *(GLuint *)&constant;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = factoru.ui32;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
  }
  
  static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
@@ -790,15 +768,15 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
  
     /* TODO: push this into cmd mechanism
      */
-   R200_FIREVERTICES( rmesa );
-   LOCK_HARDWARE( rmesa );
+   radeon_firevertices(&rmesa->radeon);
+   LOCK_HARDWARE( &rmesa->radeon );
  
     /* FIXME: Use window x,y offsets into stipple RAM.
      */
     stipple.mask = rmesa->state.stipple.mask;
-   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
+   drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, 
                      &stipple, sizeof(stipple) );
-   UNLOCK_HARDWARE( rmesa );
+   UNLOCK_HARDWARE( &rmesa->radeon );
  }
  
  static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
@@ -810,7 +788,7 @@ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
      * cases work. 
      */
     TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag);
-   if (rmesa->TclFallback) {
+   if (rmesa->radeon.TclFallback) {
        r200ChooseRenderState( ctx );
        r200ChooseVertexState( ctx );
     }
@@ -831,7 +809,7 @@ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
  static void r200UpdateSpecular( GLcontext *ctx )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   u_int32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+   uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
  
     R200_STATECHANGE( rmesa, tcl );
     R200_STATECHANGE( rmesa, vtx );
@@ -887,7 +865,7 @@ static void r200UpdateSpecular( GLcontext *ctx )
  
     /* Update vertex/render formats
      */
-   if (rmesa->TclFallback) { 
+   if (rmesa->radeon.TclFallback) { 
        r200ChooseRenderState( ctx );
        r200ChooseVertexState( ctx );
     }
@@ -1319,8 +1297,25 @@ static void r200Lightfv( GLcontext *ctx, GLenum light,
     }
  }
  
-                 
-
+static void r200UpdateLocalViewer ( GLcontext *ctx )
+{
+/* It looks like for the texgen modes GL_SPHERE_MAP, GL_NORMAL_MAP and
+   GL_REFLECTION_MAP we need R200_LOCAL_VIEWER set (fglrx does exactly that
+   for these and only these modes). This means specular highlights may turn out
+   wrong in some cases when lighting is enabled but GL_LIGHT_MODEL_LOCAL_VIEWER
+   is not set, though it seems to happen rarely and the effect seems quite
+   subtle. May need TCL fallback to fix it completely, though I'm not sure
+   how you'd identify the cases where the specular highlights indeed will
+   be wrong. Don't know if fglrx does something special in that case.
+*/
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, tcl );
+   if (ctx->Light.Model.LocalViewer ||
+       ctx->Texture._GenFlags & TEXGEN_NEED_NORMALS)
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LOCAL_VIEWER;
+   else
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LOCAL_VIEWER;
+}
  
  static void r200LightModelfv( GLcontext *ctx, GLenum pname,
                                 const GLfloat *param )
@@ -1333,11 +1328,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname,
          break;
  
        case GL_LIGHT_MODEL_LOCAL_VIEWER:
-        R200_STATECHANGE( rmesa, tcl );
-        if (ctx->Light.Model.LocalViewer)
-           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LOCAL_VIEWER;
-        else
-           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LOCAL_VIEWER;
+        r200UpdateLocalViewer( ctx );
           break;
  
        case GL_LIGHT_MODEL_TWO_SIDE:
@@ -1346,7 +1337,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname,
             rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE;
          else
             rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE);
-        if (rmesa->TclFallback) {
+        if (rmesa->radeon.TclFallback) {
             r200ChooseRenderState( ctx );
             r200ChooseVertexState( ctx );
          }
@@ -1369,20 +1360,23 @@ static void r200ShadeModel( GLcontext *ctx, GLenum mode )
     s &= ~(R200_DIFFUSE_SHADE_MASK |
           R200_ALPHA_SHADE_MASK |
           R200_SPECULAR_SHADE_MASK |
-         R200_FOG_SHADE_MASK);
+         R200_FOG_SHADE_MASK |
+         R200_DISC_FOG_SHADE_MASK);
  
     switch ( mode ) {
     case GL_FLAT:
        s |= (R200_DIFFUSE_SHADE_FLAT |
             R200_ALPHA_SHADE_FLAT |
             R200_SPECULAR_SHADE_FLAT |
-           R200_FOG_SHADE_FLAT);
+           R200_FOG_SHADE_FLAT |
+           R200_DISC_FOG_SHADE_FLAT);
        break;
     case GL_SMOOTH:
        s |= (R200_DIFFUSE_SHADE_GOURAUD |
             R200_ALPHA_SHADE_GOURAUD |
             R200_SPECULAR_SHADE_GOURAUD |
-           R200_FOG_SHADE_GOURAUD);
+           R200_FOG_SHADE_GOURAUD |
+           R200_DISC_FOG_SHADE_GOURAUD);
        break;
     default:
        return;
@@ -1435,12 +1429,13 @@ static void r200UpdateClipPlanes( GLcontext *ctx )
   * Stencil
   */
  
-static void r200StencilFunc( GLcontext *ctx, GLenum func,
-                              GLint ref, GLuint mask )
+static void
+r200StencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
+                         GLint ref, GLuint mask )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLuint refmask = ((ctx->Stencil.Ref[0] << R200_STENCIL_REF_SHIFT) |
-                    (ctx->Stencil.ValueMask[0] << R200_STENCIL_MASK_SHIFT));
+   GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << R200_STENCIL_REF_SHIFT) |
+                    ((ctx->Stencil.ValueMask[0] & 0xff) << R200_STENCIL_MASK_SHIFT));
  
     R200_STATECHANGE( rmesa, ctx );
     R200_STATECHANGE( rmesa, msk );
@@ -1479,18 +1474,20 @@ static void r200StencilFunc( GLcontext *ctx, GLenum func,
     rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
  }
  
-static void r200StencilMask( GLcontext *ctx, GLuint mask )
+static void
+r200StencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
  
     R200_STATECHANGE( rmesa, msk );
     rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~R200_STENCIL_WRITE_MASK;
     rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
-      (ctx->Stencil.WriteMask[0] << R200_STENCIL_WRITEMASK_SHIFT);
+      ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT);
  }
  
-static void r200StencilOp( GLcontext *ctx, GLenum fail,
-                            GLenum zfail, GLenum zpass )
+static void
+r200StencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+                       GLenum zfail, GLenum zpass )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
  
@@ -1585,10 +1582,10 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
  
-   rmesa->state.stencil.clear = 
-      ((GLuint) ctx->Stencil.Clear |
+   rmesa->radeon.state.stencil.clear = 
+      ((GLuint) (ctx->Stencil.Clear & 0xff) |
         (0xff << R200_STENCIL_MASK_SHIFT) |
-       (ctx->Stencil.WriteMask[0] << R200_STENCIL_WRITEMASK_SHIFT));
+       ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT));
  }
  
  
@@ -1602,30 +1599,35 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
  #define SUBPIXEL_X 0.125
  #define SUBPIXEL_Y 0.125
  
+
+/**
+ * Called when window size or position changes or viewport or depth range
+ * state is changed.  We update the hardware viewport state here.
+ */
  void r200UpdateWindow( GLcontext *ctx )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-   GLfloat xoffset = (GLfloat)dPriv->x;
-   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+   GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+   GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
     const GLfloat *v = ctx->Viewport._WindowMap.m;
  
-   GLfloat sx = v[MAT_SX];
-   GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
-   GLfloat sy = - v[MAT_SY];
-   GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
-   GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
-   GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
+   float_ui32_type sx = { v[MAT_SX] };
+   float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
+   float_ui32_type sy = { - v[MAT_SY] };
+   float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
+   float_ui32_type sz = { v[MAT_SZ] * rmesa->radeon.state.depth.scale };
+   float_ui32_type tz = { v[MAT_TZ] * rmesa->radeon.state.depth.scale };
  
-   R200_FIREVERTICES( rmesa );
+   radeon_firevertices(&rmesa->radeon);
     R200_STATECHANGE( rmesa, vpt );
  
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = *(GLuint *)&sx;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = *(GLuint *)&sy;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = *(GLuint *)&sz;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = *(GLuint *)&tz;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = sy.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = sz.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = tz.ui32;
  }
  
  
@@ -1633,13 +1635,10 @@ void r200UpdateWindow( GLcontext *ctx )
  static void r200Viewport( GLcontext *ctx, GLint x, GLint y,
                             GLsizei width, GLsizei height )
  {
-   /* update size of Mesa/software ancillary buffers */
-   _mesa_ResizeBuffersMESA();
     /* Don't pipeline viewport changes, conflict with window offset
      * setting below.  Could apply deltas to rescue pipelined viewport
      * values, or keep the originals hanging around.
      */
-   R200_FIREVERTICES( R200_CONTEXT(ctx) );
     r200UpdateWindow( ctx );
  }
  
@@ -1652,23 +1651,27 @@ static void r200DepthRange( GLcontext *ctx, GLclampd nearval,
  void r200UpdateViewportOffset( GLcontext *ctx )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
     GLfloat xoffset = (GLfloat)dPriv->x;
     GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
     const GLfloat *v = ctx->Viewport._WindowMap.m;
  
-   GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
-   GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+   float_ui32_type tx;
+   float_ui32_type ty;
  
-   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != *(GLuint *)&tx ||
-       rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != *(GLuint *)&ty )
+   tx.f = v[MAT_TX] + xoffset + SUBPIXEL_X;
+   ty.f = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 ||
+       rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 )
     {
        /* Note: this should also modify whatever data the context reset
         * code uses...
         */
-      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
-      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
-      
+      R200_STATECHANGE( rmesa, vpt );
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+
        /* update polygon stipple x/y screen offset */
        {
           GLuint stx, sty;
@@ -1678,8 +1681,8 @@ void r200UpdateViewportOffset( GLcontext *ctx )
                  R200_STIPPLE_Y_OFFSET_MASK);
  
           /* add magic offsets, then invert */
-         stx = 31 - ((rmesa->dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
-         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
+         stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
+         sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1)
                       & R200_STIPPLE_COORD_MASK);
  
           m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) |
@@ -1692,7 +1695,7 @@ void r200UpdateViewportOffset( GLcontext *ctx )
        }
     }
  
-   r200UpdateScissor( ctx );
+   radeonUpdateScissor( ctx );
  }
  
  
@@ -1709,7 +1712,7 @@ static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] )
     CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]);
     CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]);
     CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]);
-   rmesa->state.color.clear = r200PackColor( rmesa->r200Screen->cpp,
+   rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
                                               color[0], color[1],
                                               color[2], color[3] );
  }
@@ -1753,37 +1756,6 @@ static void r200LogicOpCode( GLcontext *ctx, GLenum opcode )
  }
  
  
-void r200SetCliprects( r200ContextPtr rmesa, GLenum mode )
-{
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-
-   switch ( mode ) {
-   case GL_FRONT_LEFT:
-      rmesa->numClipRects = dPriv->numClipRects;
-      rmesa->pClipRects = dPriv->pClipRects;
-      break;
-   case GL_BACK_LEFT:
-      /* Can't ignore 2d windows if we are page flipping.
-       */
-      if ( dPriv->numBackClipRects == 0 || rmesa->doPageFlip ) {
-        rmesa->numClipRects = dPriv->numClipRects;
-        rmesa->pClipRects = dPriv->pClipRects;
-      }
-      else {
-        rmesa->numClipRects = dPriv->numBackClipRects;
-        rmesa->pClipRects = dPriv->pBackClipRects;
-      }
-      break;
-   default:
-      fprintf(stderr, "bad mode in r200SetCliprects\n");
-      return;
-   }
-
-   if (rmesa->state.scissor.enabled)
-      r200RecalcScissorRects( rmesa );
-}
-
-
  static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
@@ -1792,46 +1764,30 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
        fprintf(stderr, "%s %s\n", __FUNCTION__,
               _mesa_lookup_enum_by_nr( mode ));
  
-   R200_FIREVERTICES(rmesa);   /* don't pipeline cliprect changes */
+   radeon_firevertices(&rmesa->radeon);        /* don't pipeline cliprect changes */
  
-   /*
-    * _ColorDrawBufferMask is easier to cope with than <mode>.
-    * Check for software fallback, update cliprects.
-    */
-   switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
-   case BUFFER_BIT_FRONT_LEFT:
-      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      r200SetCliprects( rmesa, GL_FRONT_LEFT );
-      break;
-   case BUFFER_BIT_BACK_LEFT:
+   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+      /* 0 (GL_NONE) buffers or multiple color drawing buffers */
+      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+   case BUFFER_FRONT_LEFT:
+   case BUFFER_BACK_LEFT:
        FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      r200SetCliprects( rmesa, GL_BACK_LEFT );
        break;
     default:
-      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
        FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
        return;
     }
  
-#if 000
-   /* We want to update the s/w rast state too so that r200SetBuffer()
-    * gets called.
-    */
-   _swrast_DrawBuffer(ctx, mode);
+   radeonSetCliprects( &rmesa->radeon );
+   radeonUpdatePageFlipping(&rmesa->radeon);
  
-   R200_STATECHANGE( rmesa, ctx );
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->state.color.drawOffset +
-                                              rmesa->r200Screen->fbLocation)
-                                             & R200_COLOROFFSET_MASK);
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch;
-   if (rmesa->sarea->tiling_enabled) {
-      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
-   }
-#else
     /* We'll set the drawing engine's offset/pitch parameters later
      * when we update other state.
      */
-#endif
  }
  
  
@@ -1915,10 +1871,10 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
        R200_STATECHANGE(rmesa, ctx );
        if ( state ) {
          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_DITHER_ENABLE;
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
        } else {
          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE;
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->state.color.roundEnable;
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
        }
        break;
  
@@ -1933,7 +1889,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
          rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
        }
        r200UpdateSpecular( ctx ); /* for PK_SPEC */
-      if (rmesa->TclFallback) 
+      if (rmesa->radeon.TclFallback) 
          r200ChooseVertexState( ctx );
        _mesa_allow_light_in_model( ctx, !state );
        break;
@@ -1969,6 +1925,8 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
  
     case GL_LIGHTING:
        r200UpdateSpecular(ctx);
+      /* for reflection map fixup - might set recheck_texgen for all units too */
+      rmesa->radeon.NewGLState |= _NEW_TEXTURE;
        break;
  
     case GL_LINE_SMOOTH:
@@ -1998,10 +1956,10 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
        }
        break;
  
-      /* Pointsize registers on r200 don't seem to do anything.  Maybe
-       * have to pass pointsizes as vertex parameters?  In any case,
-       * setting pointmin == pointsizemax == 1.0, and doing nothing
-       * for aa is enough to satisfy conform.
+      /* Pointsize registers on r200 only work for point sprites, and point smooth
+       * doesn't work for point sprites (and isn't needed for 1.0 sized aa points).
+       * In any case, setting pointmin == pointsizemax == 1.0 for aa points
+       * is enough to satisfy conform.
         */
     case GL_POINT_SMOOTH:
        break;
@@ -2029,6 +1987,19 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
        break;
  #endif
  
+   case GL_POINT_SPRITE_ARB:
+      R200_STATECHANGE( rmesa, spr );
+      if ( state ) {
+        int i;
+        for (i = 0; i < 6; i++) {
+           rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |=
+               ctx->Point.CoordReplace[i] << (R200_PS_GEN_TEX_0_SHIFT + i);
+        }
+      } else {
+        rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &= ~R200_PS_GEN_TEX_MASK;
+      }
+      break;
+
     case GL_POLYGON_OFFSET_FILL:
        R200_STATECHANGE( rmesa, set );
        if ( state ) {
@@ -2068,13 +2039,13 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
     }
  
     case GL_SCISSOR_TEST:
-      R200_FIREVERTICES( rmesa );
-      rmesa->state.scissor.enabled = state;
-      r200UpdateScissor( ctx );
+      radeon_firevertices(&rmesa->radeon);
+      rmesa->radeon.state.scissor.enabled = state;
+      radeonUpdateScissor( ctx );
        break;
  
     case GL_STENCIL_TEST:
-      if ( rmesa->state.stencil.hwBuffer ) {
+      if ( rmesa->radeon.state.stencil.hwBuffer ) {
          R200_STATECHANGE( rmesa, ctx );
          if ( state ) {
             rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_STENCIL_ENABLE;
@@ -2100,9 +2071,104 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
        break;
  
     case GL_VERTEX_PROGRAM_ARB:
-      TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_TCL_DISABLE, state);
+      if (!state) {
+        GLuint i;
+        rmesa->curr_vp_hw = NULL;
+        R200_STATECHANGE( rmesa, vap );
+        rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_PROG_VTX_SHADER_ENABLE;
+        /* mark all tcl atoms (tcl vector state got overwritten) dirty
+           not sure about tcl scalar state - we need at least grd
+           with vert progs too.
+           ucp looks like it doesn't get overwritten (may even work
+           with vp for pos-invariant progs if we're lucky) */
+        R200_STATECHANGE( rmesa, mtl[0] );
+        R200_STATECHANGE( rmesa, mtl[1] );
+        R200_STATECHANGE( rmesa, fog );
+        R200_STATECHANGE( rmesa, glt );
+        R200_STATECHANGE( rmesa, eye );
+        for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++) {
+           R200_STATECHANGE( rmesa, mat[i] );
+        }
+        for (i = 0 ; i < 8; i++) {
+           R200_STATECHANGE( rmesa, lit[i] );
+        }
+        R200_STATECHANGE( rmesa, tcl );
+        for (i = 0; i <= ctx->Const.MaxClipPlanes; i++) {
+           if (ctx->Transform.ClipPlanesEnabled & (1 << i)) {
+              rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (R200_UCP_ENABLE_0 << i);
+           }
+/*         else {
+              rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(R200_UCP_ENABLE_0 << i);
+           }*/
+        }
+        /* ugly. Need to call everything which might change compsel. */
+        r200UpdateSpecular( ctx );
+#if 0
+       /* shouldn't be necessary, as it's picked up anyway in r200ValidateState (_NEW_PROGRAM),
+          but without it doom3 locks up at always the same places. Why? */
+       /* FIXME: This can (and should) be replaced by a call to the TCL_STATE_FLUSH reg before
+          accessing VAP_SE_VAP_CNTL. Requires drm changes (done). Remove after some time... */
+        r200UpdateTextureState( ctx );
+        /* if we call r200UpdateTextureState we need the code below because we are calling it with
+           non-current derived enabled values which may revert the state atoms for frag progs even when
+           they already got disabled... ugh
+           Should really figure out why we need to call r200UpdateTextureState in the first place */
+        GLuint unit;
+        for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
+           R200_STATECHANGE( rmesa, pix[unit] );
+           R200_STATECHANGE( rmesa, tex[unit] );
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
+               ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+           /* need to guard this with drmSupportsFragmentShader? Should never get here if
+              we don't announce ATI_fs, right? */
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
+         }
+        R200_STATECHANGE( rmesa, cst );
+        R200_STATECHANGE( rmesa, tf );
+        rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+#endif
+      }
+      else {
+        /* picked up later */
+      }
+      /* call functions which change hw state based on ARB_vp enabled or not. */
+      r200PointParameter( ctx, GL_POINT_DISTANCE_ATTENUATION, NULL );
+      r200Fogfv( ctx, GL_FOG_COORD_SRC, NULL );
+      break;
+
+   case GL_VERTEX_PROGRAM_POINT_SIZE_ARB:
+      r200PointParameter( ctx, GL_POINT_DISTANCE_ATTENUATION, NULL );
+      break;
+
+   case GL_FRAGMENT_SHADER_ATI:
+      if ( !state ) {
+        /* restore normal tex env colors and make sure tex env combine will get updated
+           mark env atoms dirty (as their data was overwritten by afs even
+           if they didn't change) and restore tex coord routing */
+        GLuint unit;
+        for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
+           R200_STATECHANGE( rmesa, pix[unit] );
+           R200_STATECHANGE( rmesa, tex[unit] );
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
+               ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+           /* need to guard this with drmSupportsFragmentShader? Should never get here if
+              we don't announce ATI_fs, right? */
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
+         }
+        R200_STATECHANGE( rmesa, cst );
+        R200_STATECHANGE( rmesa, tf );
+        rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+      }
+      else {
+        /* need to mark this dirty as pix/tf atoms have overwritten the data
+           even if the data in the atoms didn't change */
+        R200_STATECHANGE( rmesa, atf );
+        R200_STATECHANGE( rmesa, afs[1] );
+        /* everything else picked up in r200UpdateTextureState hopefully */
+      }
        break;
-
     default:
        return;
     }
@@ -2235,52 +2301,116 @@ r200UpdateDrawBuffer(GLcontext *ctx)
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
     struct gl_framebuffer *fb = ctx->DrawBuffer;
-   driRenderbuffer *drb;
-
-   if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
-      /* draw to front */
-      drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-   }
-   else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) {
-      /* draw to back */
-      drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-   }
-   else {
-      /* drawing to multiple buffers, or none */
-      return;
+   struct radeon_renderbuffer *rrb;
+
+   if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+     /* draw to front */
+     rrb = (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+   } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+     /* draw to back */
+     rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+   } else {
+     /* drawing to multiple buffers, or none */
+     return;
     }
  
-   assert(drb);
-   assert(drb->flippedPitch);
+   assert(rrb);
+   assert(rrb->pitch);
  
     R200_STATECHANGE( rmesa, ctx );
  
+#if 0
     /* Note: we used the (possibly) page-flipped values */
     rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
-     = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
+     = ((rrb->flippedOffset + rmesa->radeon.radeonScreen->fbLocation)
         & R200_COLOROFFSET_MASK);
     rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
-   if (rmesa->sarea->tiling_enabled) {
+   if (rmesa->radeon.sarea->tiling_enabled) {
        rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
     }
+#endif
  }
  
+static GLboolean r200ValidateBuffers(GLcontext *ctx)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct radeon_cs_space_check bos[8];
+   struct radeon_renderbuffer *rrb;
+   int num_bo = 0;
+   int i;
+   int flushed = 0, ret;
+again:
+   num_bo = 0;
+   
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+      bos[num_bo].bo = rrb->bo;
+      bos[num_bo].read_domains = 0;
+      bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
+      bos[num_bo].new_accounted = 0;
+      num_bo++;
+   }
+
+   /* depth buffer */
+   rrb = radeon_get_depthbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+      bos[num_bo].bo = rrb->bo;
+      bos[num_bo].read_domains = 0;
+      bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
+      bos[num_bo].new_accounted = 0;
+      num_bo++;
+   }
+
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+      radeonTexObj *t;
+      
+      if (!ctx->Texture.Unit[i]._ReallyEnabled)
+        continue;
+      
+      t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+      bos[num_bo].bo = t->mt->bo;
+      bos[num_bo].read_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+      bos[num_bo].write_domain = 0;
+      bos[num_bo].new_accounted = 0;
+      num_bo++;
+   }
+   
+   ret = radeon_cs_space_check(rmesa->radeon.cmdbuf.cs, bos, num_bo);
+   if (ret == RADEON_CS_SPACE_OP_TO_BIG)
+      return GL_FALSE;
+   if (ret == RADEON_CS_SPACE_FLUSH) {
+      r200Flush(ctx);
+      if (flushed)
+        return GL_FALSE;
+      flushed = 1;
+      goto again;
+   }
+   return GL_TRUE;
+}
  
-
-void r200ValidateState( GLcontext *ctx )
+GLboolean r200ValidateState( GLcontext *ctx )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLuint new_state = rmesa->NewGLState;
+   GLuint new_state = rmesa->radeon.NewGLState;
  
     if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
       r200UpdateDrawBuffer(ctx);
     }
  
-   if (new_state & _NEW_TEXTURE) {
+   if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
        r200UpdateTextureState( ctx );
-      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
+      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
+      r200UpdateLocalViewer( ctx );
     }
  
+   /* we need to do a space check here */
+   if (!r200ValidateBuffers(ctx))
+     return GL_FALSE;
+
+/* FIXME: don't really need most of these when vertex progs are enabled */
+
     /* Need an event driven matrix update?
      */
     if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) 
@@ -2298,7 +2428,7 @@ void r200ValidateState( GLcontext *ctx )
      */
     if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) {
        update_texturematrix( ctx );
-   }      
+   }
  
     if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
        update_light( ctx );
@@ -2311,8 +2441,19 @@ void r200ValidateState( GLcontext *ctx )
          r200UpdateClipPlanes( ctx );
     }
  
+   if (new_state & (_NEW_PROGRAM|
+   /* need to test for pretty much anything due to possible parameter bindings */
+       _NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM|
+       _NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX|
+       _NEW_FOG|_NEW_POINT|_NEW_TRACK_MATRIX)) {
+      if (ctx->VertexProgram._Enabled) {
+        r200SetupVertexProg( ctx );
+      }
+      else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0);
+   }
  
-   rmesa->NewGLState = 0;
+   rmesa->radeon.NewGLState = 0;
+   return GL_TRUE;
  }
  
  
@@ -2320,23 +2461,24 @@ static void r200InvalidateState( GLcontext *ctx, GLuint new_state )
  {
     _swrast_InvalidateState( ctx, new_state );
     _swsetup_InvalidateState( ctx, new_state );
-   _ac_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
     _tnl_InvalidateState( ctx, new_state );
     _ae_invalidate_state( ctx, new_state );
-   R200_CONTEXT(ctx)->NewGLState |= new_state;
-   r200VtxfmtInvalidate( ctx );
+   R200_CONTEXT(ctx)->radeon.NewGLState |= new_state;
  }
  
  /* A hack.  The r200 can actually cope just fine with materials
- * between begin/ends, so fix this. But how ?
+ * between begin/ends, so fix this.
+ * Should map to inputs just like the generic vertex arrays for vertex progs.
+ * In theory there could still be too many and we'd still need a fallback.
   */
  static GLboolean check_material( GLcontext *ctx )
  {
     TNLcontext *tnl = TNL_CONTEXT(ctx);
     GLint i;
  
-   for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT; 
-       i < _TNL_ATTRIB_MAT_BACK_INDEXES; 
+   for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT;
+       i < _TNL_ATTRIB_MAT_BACK_INDEXES;
         i++)
        if (tnl->vb.AttribPtr[i] &&
           tnl->vb.AttribPtr[i]->stride)
@@ -2344,21 +2486,22 @@ static GLboolean check_material( GLcontext *ctx )
  
     return GL_FALSE;
  }
-      
+
  static void r200WrapRunPipeline( GLcontext *ctx )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
     GLboolean has_material;
  
     if (0)
-      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
  
     /* Validate state:
      */
-   if (rmesa->NewGLState)
-      r200ValidateState( ctx );
+   if (rmesa->radeon.NewGLState)
+      if (!r200ValidateState( ctx ))
+        FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
  
-   has_material = (ctx->Light.Enabled && check_material( ctx ));
+   has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx );
  
     if (has_material) {
        TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_TRUE );
@@ -2411,21 +2554,15 @@ void r200InitStateFuncs( struct dd_function_table *functions )
     functions->PolygonMode              = r200PolygonMode;
     functions->PolygonOffset            = r200PolygonOffset;
     functions->PolygonStipple           = r200PolygonStipple;
+   functions->PointParameterfv         = r200PointParameter;
     functions->PointSize                        = r200PointSize;
     functions->RenderMode               = r200RenderMode;
-   functions->Scissor                  = r200Scissor;
+   functions->Scissor                  = radeonScissor;
     functions->ShadeModel               = r200ShadeModel;
-   functions->StencilFunc              = r200StencilFunc;
-   functions->StencilMask              = r200StencilMask;
-   functions->StencilOp                        = r200StencilOp;
+   functions->StencilFuncSeparate      = r200StencilFuncSeparate;
+   functions->StencilMaskSeparate      = r200StencilMaskSeparate;
+   functions->StencilOpSeparate                = r200StencilOpSeparate;
     functions->Viewport                 = r200Viewport;
-
-   /* Swrast hooks for imaging extensions:
-    */
-   functions->CopyColorTable           = _swrast_CopyColorTable;
-   functions->CopyColorSubTable                = _swrast_CopyColorSubTable;
-   functions->CopyConvolutionFilter1D  = _swrast_CopyConvolutionFilter1D;
-   functions->CopyConvolutionFilter2D  = _swrast_CopyConvolutionFilter2D;
  }