remove CVS/XFree86 keywords
[mesa.git] / src / mesa / drivers / dri / r200 / r200_state.c
index def5d86fbbbcc99bc28389ae1bde34ee0eef1712..2115799b9b6f0402ec3881e79385e25300961b4c 100644 (file)
@@ -1,4 +1,3 @@
-/* $XFree86$ */
 /**************************************************************************
 
 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
@@ -39,21 +38,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "api_arrayelt.h"
 #include "enums.h"
 #include "colormac.h"
+#include "light.h"
+#include "framebuffer.h"
 
 #include "swrast/swrast.h"
-#include "array_cache/acache.h"
+#include "vbo/vbo.h"
 #include "tnl/tnl.h"
 #include "tnl/t_pipeline.h"
 #include "swrast_setup/swrast_setup.h"
 
-
 #include "r200_context.h"
 #include "r200_ioctl.h"
 #include "r200_state.h"
 #include "r200_tcl.h"
 #include "r200_tex.h"
 #include "r200_swtcl.h"
-#include "r200_vtxfmt.h"
+#include "r200_vertprog.h"
+
+#include "drirenderbuffer.h"
 
 
 /* =============================================================
@@ -103,158 +105,234 @@ static void r200AlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
    rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc;
 }
 
-static void r200BlendEquation( GLcontext *ctx, GLenum mode )
+static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] )
 {
+   GLubyte color[4];
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~R200_COMB_FCN_MASK;
-
-   switch ( mode ) {
-   case GL_FUNC_ADD:
-   case GL_LOGIC_OP:
-      b |= R200_COMB_FCN_ADD_CLAMP;
-      break;
-
-   case GL_FUNC_SUBTRACT:
-      b |= R200_COMB_FCN_SUB_CLAMP;
-      break;
-
-   case GL_FUNC_REVERSE_SUBTRACT:
-      b |= R200_COMB_FCN_RSUB_CLAMP;
-      break;
-
-   case GL_MIN:
-      b |= R200_COMB_FCN_MIN;
-      break;
-
-   case GL_MAX:
-      b |= R200_COMB_FCN_MAX;
-      break;
-
-   default:
-      break;
-   }
-
    R200_STATECHANGE( rmesa, ctx );
-   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
-   if ( ctx->Color.ColorLogicOpEnabled ) {
-      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
-   } else {
-      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
-   }
+   CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
+   CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
+   CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
+   CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
+   if (rmesa->r200Screen->drmSupportsBlendColor)
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = r200PackColor( 4, color[0], color[1], color[2], color[3] );
 }
 
-static void r200BlendFunc( GLcontext *ctx, GLenum sfactor, GLenum dfactor )
+/**
+ * Calculate the hardware blend factor setting.  This same function is used
+ * for source and destination of both alpha and RGB.
+ *
+ * \returns
+ * The hardware register value for the specified blend factor.  This value
+ * will need to be shifted into the correct position for either source or
+ * destination factor.
+ *
+ * \todo
+ * Since the two cases where source and destination are handled differently
+ * are essentially error cases, they should never happen.  Determine if these
+ * cases can be removed.
+ */
+static int blend_factor( GLenum factor, GLboolean is_src )
 {
-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
-      ~(R200_SRC_BLEND_MASK | R200_DST_BLEND_MASK);
+   int func;
 
-   switch ( ctx->Color.BlendSrcRGB ) {
+   switch ( factor ) {
    case GL_ZERO:
-      b |= R200_SRC_BLEND_GL_ZERO;
+      func = R200_BLEND_GL_ZERO;
       break;
    case GL_ONE:
-      b |= R200_SRC_BLEND_GL_ONE;
+      func = R200_BLEND_GL_ONE;
       break;
    case GL_DST_COLOR:
-      b |= R200_SRC_BLEND_GL_DST_COLOR;
+      func = R200_BLEND_GL_DST_COLOR;
       break;
    case GL_ONE_MINUS_DST_COLOR:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_DST_COLOR;
+      func = R200_BLEND_GL_ONE_MINUS_DST_COLOR;
       break;
    case GL_SRC_COLOR:
-      b |= R200_SRC_BLEND_GL_SRC_COLOR;
+      func = R200_BLEND_GL_SRC_COLOR;
       break;
    case GL_ONE_MINUS_SRC_COLOR:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      func = R200_BLEND_GL_ONE_MINUS_SRC_COLOR;
       break;
    case GL_SRC_ALPHA:
-      b |= R200_SRC_BLEND_GL_SRC_ALPHA;
+      func = R200_BLEND_GL_SRC_ALPHA;
       break;
    case GL_ONE_MINUS_SRC_ALPHA:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      func = R200_BLEND_GL_ONE_MINUS_SRC_ALPHA;
       break;
    case GL_DST_ALPHA:
-      b |= R200_SRC_BLEND_GL_DST_ALPHA;
+      func = R200_BLEND_GL_DST_ALPHA;
       break;
    case GL_ONE_MINUS_DST_ALPHA:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      func = R200_BLEND_GL_ONE_MINUS_DST_ALPHA;
       break;
    case GL_SRC_ALPHA_SATURATE:
-      b |= R200_SRC_BLEND_GL_SRC_ALPHA_SATURATE;
+      func = (is_src) ? R200_BLEND_GL_SRC_ALPHA_SATURATE : R200_BLEND_GL_ZERO;
       break;
    case GL_CONSTANT_COLOR:
-      b |= R200_SRC_BLEND_GL_CONST_COLOR;
+      func = R200_BLEND_GL_CONST_COLOR;
       break;
    case GL_ONE_MINUS_CONSTANT_COLOR:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_CONST_COLOR;
+      func = R200_BLEND_GL_ONE_MINUS_CONST_COLOR;
       break;
    case GL_CONSTANT_ALPHA:
-      b |= R200_SRC_BLEND_GL_CONST_ALPHA;
+      func = R200_BLEND_GL_CONST_ALPHA;
       break;
    case GL_ONE_MINUS_CONSTANT_ALPHA:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+      func = R200_BLEND_GL_ONE_MINUS_CONST_ALPHA;
       break;
    default:
-      break;
+      func = (is_src) ? R200_BLEND_GL_ONE : R200_BLEND_GL_ZERO;
    }
+   return func;
+}
 
-   switch ( ctx->Color.BlendDstRGB ) {
-   case GL_ZERO:
-      b |= R200_DST_BLEND_GL_ZERO;
-      break;
-   case GL_ONE:
-      b |= R200_DST_BLEND_GL_ONE;
-      break;
-   case GL_SRC_COLOR:
-      b |= R200_DST_BLEND_GL_SRC_COLOR;
-      break;
-   case GL_ONE_MINUS_SRC_COLOR:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
-      break;
-   case GL_SRC_ALPHA:
-      b |= R200_DST_BLEND_GL_SRC_ALPHA;
+/**
+ * Sets both the blend equation and the blend function.
+ * This is done in a single
+ * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
+ * change the interpretation of the blend function.
+ * Also, make sure that blend function and blend equation are set to their default
+ * value if color blending is not enabled, since at least blend equations GL_MIN
+ * and GL_FUNC_REVERSE_SUBTRACT will cause wrong results otherwise for
+ * unknown reasons.
+ */
+static void r200_set_blend_state( GLcontext * ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint cntl = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &
+      ~(R200_ROP_ENABLE | R200_ALPHA_BLEND_ENABLE | R200_SEPARATE_ALPHA_ENABLE);
+
+   int func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+      (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT);
+   int eqn = R200_COMB_FCN_ADD_CLAMP;
+   int funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+      (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT);
+   int eqnA = R200_COMB_FCN_ADD_CLAMP;
+
+   R200_STATECHANGE( rmesa, ctx );
+
+   if (rmesa->r200Screen->drmSupportsBlendColor) {
+      if (ctx->Color.ColorLogicOpEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
+         rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
+         rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+         return;
+      } else if (ctx->Color.BlendEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ALPHA_BLEND_ENABLE | R200_SEPARATE_ALPHA_ENABLE;
+      }
+      else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl;
+         rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
+         rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+         return;
+      }
+   }
+   else {
+      if (ctx->Color.ColorLogicOpEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
+         rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
+         return;
+      } else if (ctx->Color.BlendEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ALPHA_BLEND_ENABLE;
+      }
+      else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl;
+         rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
+         return;
+      }
+   }
+
+   func = (blend_factor( ctx->Color.BlendSrcRGB, GL_TRUE ) << R200_SRC_BLEND_SHIFT) |
+      (blend_factor( ctx->Color.BlendDstRGB, GL_FALSE ) << R200_DST_BLEND_SHIFT);
+
+   switch(ctx->Color.BlendEquationRGB) {
+   case GL_FUNC_ADD:
+      eqn = R200_COMB_FCN_ADD_CLAMP;
       break;
-   case GL_ONE_MINUS_SRC_ALPHA:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+
+   case GL_FUNC_SUBTRACT:
+      eqn = R200_COMB_FCN_SUB_CLAMP;
       break;
-   case GL_DST_COLOR:
-      b |= R200_DST_BLEND_GL_DST_COLOR;
+
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqn = R200_COMB_FCN_RSUB_CLAMP;
       break;
-   case GL_ONE_MINUS_DST_COLOR:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_DST_COLOR;
+
+   case GL_MIN:
+      eqn = R200_COMB_FCN_MIN;
+      func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
       break;
-   case GL_DST_ALPHA:
-      b |= R200_DST_BLEND_GL_DST_ALPHA;
+
+   case GL_MAX:
+      eqn = R200_COMB_FCN_MAX;
+      func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
       break;
-   case GL_ONE_MINUS_DST_ALPHA:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_DST_ALPHA;
+
+   default:
+      fprintf( stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+         __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB );
+      return;
+   }
+
+   if (!rmesa->r200Screen->drmSupportsBlendColor) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
+      return;
+   }
+
+   funcA = (blend_factor( ctx->Color.BlendSrcA, GL_TRUE ) << R200_SRC_BLEND_SHIFT) |
+      (blend_factor( ctx->Color.BlendDstA, GL_FALSE ) << R200_DST_BLEND_SHIFT);
+
+   switch(ctx->Color.BlendEquationA) {
+   case GL_FUNC_ADD:
+      eqnA = R200_COMB_FCN_ADD_CLAMP;
       break;
-   case GL_CONSTANT_COLOR:
-      b |= R200_DST_BLEND_GL_CONST_COLOR;
+
+   case GL_FUNC_SUBTRACT:
+      eqnA = R200_COMB_FCN_SUB_CLAMP;
       break;
-   case GL_ONE_MINUS_CONSTANT_COLOR:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_CONST_COLOR;
+
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqnA = R200_COMB_FCN_RSUB_CLAMP;
       break;
-   case GL_CONSTANT_ALPHA:
-      b |= R200_DST_BLEND_GL_CONST_ALPHA;
+
+   case GL_MIN:
+      eqnA = R200_COMB_FCN_MIN;
+      funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
       break;
-   case GL_ONE_MINUS_CONSTANT_ALPHA:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+
+   case GL_MAX:
+      eqnA = R200_COMB_FCN_MAX;
+      funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
       break;
+
    default:
-      break;
+      fprintf( stderr, "[%s:%u] Invalid A blend equation (0x%04x).\n",
+         __FUNCTION__, __LINE__, ctx->Color.BlendEquationA );
+      return;
    }
 
-   R200_STATECHANGE( rmesa, ctx );
-   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+   rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqnA | funcA;
+   rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+
+}
+
+static void r200BlendEquationSeparate( GLcontext *ctx,
+                                      GLenum modeRGB, GLenum modeA )
+{
+      r200_set_blend_state( ctx );
 }
 
 static void r200BlendFuncSeparate( GLcontext *ctx,
                                     GLenum sfactorRGB, GLenum dfactorRGB,
                                     GLenum sfactorA, GLenum dfactorA )
 {
-   r200BlendFunc( ctx, sfactorRGB, dfactorRGB );
+      r200_set_blend_state( ctx );
 }
 
 
@@ -297,6 +375,21 @@ static void r200DepthFunc( GLcontext *ctx, GLenum func )
    }
 }
 
+static void r200ClearDepth( GLcontext *ctx, GLclampd d )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
+                   R200_DEPTH_FORMAT_MASK);
+
+   switch ( format ) {
+   case R200_DEPTH_FORMAT_16BIT_INT_Z:
+      rmesa->state.depth.clear = d * 0x0000ffff;
+      break;
+   case R200_DEPTH_FORMAT_24BIT_INT_Z:
+      rmesa->state.depth.clear = d * 0x00ffffff;
+      break;
+   }
+}
 
 static void r200DepthMask( GLcontext *ctx, GLboolean flag )
 {
@@ -391,10 +484,32 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
       break;
-   case GL_FOG_COORDINATE_SOURCE_EXT: 
-      /* What to do?
-       */
+   case GL_FOG_COORD_SRC: {
+      GLuint out_0 = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0];
+      GLuint fog   = rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR];
+
+      fog &= ~R200_FOG_USE_MASK;
+      if ( ctx->Fog.FogCoordinateSource == GL_FOG_COORD || ctx->VertexProgram.Enabled) {
+        fog   |= R200_FOG_USE_VTX_FOG;
+        out_0 |= R200_VTX_DISCRETE_FOG;
+      }
+      else {
+        fog   |=  R200_FOG_USE_SPEC_ALPHA;
+        out_0 &= ~R200_VTX_DISCRETE_FOG;
+      }
+
+      if ( fog != rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] ) {
+        R200_STATECHANGE( rmesa, ctx );
+        rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = fog;
+      }
+
+      if (out_0 != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0]) {
+        R200_STATECHANGE( rmesa, vtx );
+        rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0;     
+      }
+
       break;
+   }
    default:
       return;
    }
@@ -412,9 +527,9 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
  */
 
 
-static GLboolean intersect_rect( XF86DRIClipRectPtr out,
-                                XF86DRIClipRectPtr a,
-                                XF86DRIClipRectPtr b )
+static GLboolean intersect_rect( drm_clip_rect_t *out,
+                                drm_clip_rect_t *a,
+                                drm_clip_rect_t *b )
 {
    *out = *a;
    if ( b->x1 > out->x1 ) out->x1 = b->x1;
@@ -429,7 +544,7 @@ static GLboolean intersect_rect( XF86DRIClipRectPtr out,
 
 void r200RecalcScissorRects( r200ContextPtr rmesa )
 {
-   XF86DRIClipRectPtr out;
+   drm_clip_rect_t *out;
    int i;
 
    /* Grow cliprect store?
@@ -445,7 +560,7 @@ void r200RecalcScissorRects( r200ContextPtr rmesa )
 
       rmesa->state.scissor.pClipRects = 
         MALLOC( rmesa->state.scissor.numAllocedClipRects * 
-                sizeof(XF86DRIClipRectRec) );
+                sizeof(drm_clip_rect_t) );
 
       if ( rmesa->state.scissor.pClipRects == NULL ) {
         rmesa->state.scissor.numAllocedClipRects = 0;
@@ -569,7 +684,81 @@ static void r200FrontFace( GLcontext *ctx, GLenum mode )
  */
 static void r200PointSize( GLcontext *ctx, GLfloat size )
 {
-   if (0) fprintf(stderr, "%s: %f\n", __FUNCTION__, size );
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+
+   R200_STATECHANGE( rmesa, cst );
+   R200_STATECHANGE( rmesa, ptp );
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= ~0xffff;
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] |= ((GLuint)(ctx->Point.Size * 16.0));
+/* this is the size param of the point size calculation (point size reg value
+   is not used when calculation is active). */
+   fcmd[PTP_VPORT_SCALE_PTSIZE] = ctx->Point.Size;
+}
+
+static void r200PointParameter( GLcontext *ctx, GLenum pname, const GLfloat *params)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+
+   switch (pname) {
+   case GL_POINT_SIZE_MIN:
+   /* Can clamp both in tcl and setup - just set both (as does fglrx) */
+      R200_STATECHANGE( rmesa, lin );
+      R200_STATECHANGE( rmesa, ptp );
+      rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= 0xffff;
+      rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)(ctx->Point.MinSize * 16.0) << 16;
+      fcmd[PTP_CLAMP_MIN] = ctx->Point.MinSize;
+      break;
+   case GL_POINT_SIZE_MAX:
+      R200_STATECHANGE( rmesa, cst );
+      R200_STATECHANGE( rmesa, ptp );
+      rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= 0xffff;
+      rmesa->hw.cst.cmd[CST_RE_POINTSIZE] |= (GLuint)(ctx->Point.MaxSize * 16.0) << 16;
+      fcmd[PTP_CLAMP_MAX] = ctx->Point.MaxSize;
+      break;
+   case GL_POINT_DISTANCE_ATTENUATION:
+      R200_STATECHANGE( rmesa, vtx );
+      R200_STATECHANGE( rmesa, spr );
+      R200_STATECHANGE( rmesa, ptp );
+      GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+      rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &=
+        ~(R200_PS_MULT_MASK | R200_PS_LIN_ATT_ZERO | R200_PS_SE_SEL_STATE);
+      /* can't rely on ctx->Point._Attenuated here and test for NEW_POINT in
+        r200ValidateState looks like overkill */
+      if (ctx->Point.Params[0] != 1.0 ||
+         ctx->Point.Params[1] != 0.0 ||
+         ctx->Point.Params[2] != 0.0 ||
+         (ctx->VertexProgram.Enabled && ctx->VertexProgram.PointSizeEnabled)) {
+        /* all we care for vp would be the ps_se_sel_state setting */
+        fcmd[PTP_ATT_CONST_QUAD] = ctx->Point.Params[2];
+        fcmd[PTP_ATT_CONST_LIN] = ctx->Point.Params[1];
+        fcmd[PTP_ATT_CONST_CON] = ctx->Point.Params[0];
+        rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_MULT_ATTENCONST;
+        if (ctx->Point.Params[1] == 0.0)
+           rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_LIN_ATT_ZERO;
+/* FIXME: setting this here doesn't look quite ok - we only want to do
+          that if we're actually drawing points probably */
+        rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_PT_SIZE;
+        rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= R200_VTX_POINT_SIZE;
+      }
+      else {
+        rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |=
+           R200_PS_SE_SEL_STATE | R200_PS_MULT_CONST;
+        rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_PT_SIZE;
+        rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~R200_VTX_POINT_SIZE;
+      }
+      break;
+   case GL_POINT_FADE_THRESHOLD_SIZE:
+      /* don't support multisampling, so doesn't matter. */
+      break;
+   /* can't do these but don't need them.
+   case GL_POINT_SPRITE_R_MODE_NV:
+   case GL_POINT_SPRITE_COORD_ORIGIN: */
+   default:
+      fprintf(stderr, "bad pname parameter in r200PointParameter\n");
+      return;
+   }
 }
 
 /* =============================================================
@@ -643,23 +832,24 @@ static void r200PolygonOffset( GLcontext *ctx,
                               GLfloat factor, GLfloat units )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLfloat constant = units * rmesa->state.depth.scale;
+   float_ui32_type constant =  { units * rmesa->state.depth.scale };
+   float_ui32_type factoru = { factor };
 
 /*    factor *= 2; */
 /*    constant *= 2; */
-   
+
 /*    fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */
 
    R200_STATECHANGE( rmesa, zbs );
-   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = *(GLuint *)&factor;
-   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = *(GLuint *)&constant;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = factoru.ui32;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
 }
 
 static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    GLuint i;
-   drmRadeonStipple stipple;
+   drm_radeon_stipple_t stipple;
 
    /* Must flip pattern upside down.
     */
@@ -676,7 +866,7 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
     */
    stipple.mask = rmesa->state.stipple.mask;
    drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
-                    &stipple, sizeof(drmRadeonStipple) );
+                    &stipple, sizeof(stipple) );
    UNLOCK_HARDWARE( rmesa );
 }
 
@@ -710,7 +900,7 @@ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
 static void r200UpdateSpecular( GLcontext *ctx )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   CARD32 p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+   u_int32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
 
    R200_STATECHANGE( rmesa, tcl );
    R200_STATECHANGE( rmesa, vtx );
@@ -787,6 +977,8 @@ static void update_global_ambient( GLcontext *ctx )
    float *fcmd = (float *)R200_DB_STATE( glt );
 
    /* Need to do more if both emmissive & ambient are PREMULT:
+    * I believe this is not nessary when using source_material. This condition thus
+    * will never happen currently, and the function has no dependencies on materials now
     */
    if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] &
        ((3 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
@@ -809,9 +1001,6 @@ static void update_global_ambient( GLcontext *ctx )
 /* Update on change to 
  *    - light[p].colors
  *    - light[p].enabled
- *    - material,
- *    - colormaterial enabled
- *    - colormaterial bitmask
  */
 static void update_light_colors( GLcontext *ctx, GLuint p )
 {
@@ -822,102 +1011,115 @@ static void update_light_colors( GLcontext *ctx, GLuint p )
    if (l->Enabled) {
       r200ContextPtr rmesa = R200_CONTEXT(ctx);
       float *fcmd = (float *)R200_DB_STATE( lit[p] );
-      GLuint bitmask = ctx->Light.ColorMaterialBitmask;
-      GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
 
       COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );    
       COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
       COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
       
-      if (!ctx->Light.ColorMaterialEnabled)
-        bitmask = 0;
-
-      if ((bitmask & MAT_BIT_FRONT_AMBIENT) == 0) 
-        SELF_SCALE_3V( &fcmd[LIT_AMBIENT_RED], mat[MAT_ATTRIB_FRONT_AMBIENT] );
-
-      if ((bitmask & MAT_BIT_FRONT_DIFFUSE) == 0) 
-        SELF_SCALE_3V( &fcmd[LIT_DIFFUSE_RED], mat[MAT_ATTRIB_FRONT_DIFFUSE] );
-      
-      if ((bitmask & MAT_BIT_FRONT_SPECULAR) == 0) 
-        SELF_SCALE_3V( &fcmd[LIT_SPECULAR_RED], mat[MAT_ATTRIB_FRONT_SPECULAR] );
-
       R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
    }
 }
 
-/* Also fallback for asym colormaterial mode in twoside lighting...
- */
-static void check_twoside_fallback( GLcontext *ctx )
-{
-   GLboolean fallback = GL_FALSE;
-   GLint i;
-
-   if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
-      if (ctx->Light.ColorMaterialEnabled &&
-         (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != 
-         ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1))
-        fallback = GL_TRUE;
-      else {
-        for (i = MAT_ATTRIB_FRONT_AMBIENT; i < MAT_ATTRIB_FRONT_INDEXES; i+=2)
-           if (memcmp( ctx->Light.Material.Attrib[i],
-                       ctx->Light.Material.Attrib[i+1],
-                       sizeof(GLfloat)*4) != 0) {
-              fallback = GL_TRUE;  
-              break;
-           }
-      }
-   }
-
-   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_LIGHT_TWOSIDE, fallback );
-}
-
 static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
 {
-   if (ctx->Light.ColorMaterialEnabled) {
       r200ContextPtr rmesa = R200_CONTEXT(ctx);
       GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1];
-      GLuint mask = ctx->Light.ColorMaterialBitmask;
-
-      /* Default to PREMULT:
-       */
       light_model_ctl1 &= ~((0xf << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
                           (0xf << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
                           (0xf << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
-                          (0xf << R200_FRONT_SPECULAR_SOURCE_SHIFT)); 
+                  (0xf << R200_FRONT_SPECULAR_SOURCE_SHIFT) |
+                  (0xf << R200_BACK_EMISSIVE_SOURCE_SHIFT) |
+                  (0xf << R200_BACK_AMBIENT_SOURCE_SHIFT) |
+                  (0xf << R200_BACK_DIFFUSE_SOURCE_SHIFT) |
+                  (0xf << R200_BACK_SPECULAR_SOURCE_SHIFT));
+
+   if (ctx->Light.ColorMaterialEnabled) {
+      GLuint mask = ctx->Light.ColorMaterialBitmask;
    
       if (mask & MAT_BIT_FRONT_EMISSION) {
         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
                             R200_FRONT_EMISSIVE_SOURCE_SHIFT);
       }
+      else
+        light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+                            R200_FRONT_EMISSIVE_SOURCE_SHIFT);
 
       if (mask & MAT_BIT_FRONT_AMBIENT) {
         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
                             R200_FRONT_AMBIENT_SOURCE_SHIFT);
       }
+      else
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+                            R200_FRONT_AMBIENT_SOURCE_SHIFT);
         
       if (mask & MAT_BIT_FRONT_DIFFUSE) {
         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
                             R200_FRONT_DIFFUSE_SOURCE_SHIFT);
       }
+      else
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+                            R200_FRONT_DIFFUSE_SOURCE_SHIFT);
    
       if (mask & MAT_BIT_FRONT_SPECULAR) {
         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
                             R200_FRONT_SPECULAR_SOURCE_SHIFT);
       }
+      else {
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+                            R200_FRONT_SPECULAR_SOURCE_SHIFT);
+      }
    
-      if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1]) {
-        GLuint p;
+      if (mask & MAT_BIT_BACK_EMISSION) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_BACK_EMISSIVE_SOURCE_SHIFT);
+      }
 
-        R200_STATECHANGE( rmesa, tcl );
-        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = light_model_ctl1;      
+      else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+                            R200_BACK_EMISSIVE_SOURCE_SHIFT);
 
-        for (p = 0 ; p < MAX_LIGHTS; p++) 
-           update_light_colors( ctx, p );
-        update_global_ambient( ctx );
+      if (mask & MAT_BIT_BACK_AMBIENT) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_BACK_AMBIENT_SOURCE_SHIFT);
       }
+      else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+                            R200_BACK_AMBIENT_SOURCE_SHIFT);
+
+      if (mask & MAT_BIT_BACK_DIFFUSE) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_BACK_DIFFUSE_SOURCE_SHIFT);
    }
+      else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+                            R200_BACK_DIFFUSE_SOURCE_SHIFT);
+
+      if (mask & MAT_BIT_BACK_SPECULAR) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_BACK_SPECULAR_SOURCE_SHIFT);
+      }
+      else {
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+                            R200_BACK_SPECULAR_SOURCE_SHIFT);
+      }
+      }
+   else {
+       /* Default to SOURCE_MATERIAL:
+        */
+     light_model_ctl1 |=
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_EMISSIVE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_AMBIENT_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_DIFFUSE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_SPECULAR_SOURCE_SHIFT);
+   }
+
+   if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1]) {
+      R200_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = light_model_ctl1;
+   }
+   
    
-   check_twoside_fallback( ctx );
 }
 
 void r200UpdateMaterial( GLcontext *ctx )
@@ -925,16 +1127,16 @@ void r200UpdateMaterial( GLcontext *ctx )
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
    GLfloat *fcmd = (GLfloat *)R200_DB_STATE( mtl[0] );
-   GLuint p;
+   GLfloat *fcmd2 = (GLfloat *)R200_DB_STATE( mtl[1] );
    GLuint mask = ~0;
    
+   /* Might be possible and faster to update everything unconditionally? */
    if (ctx->Light.ColorMaterialEnabled)
       mask &= ~ctx->Light.ColorMaterialBitmask;
 
    if (R200_DEBUG & DEBUG_STATE)
       fprintf(stderr, "%s\n", __FUNCTION__);
 
-      
    if (mask & MAT_BIT_FRONT_EMISSION) {
       fcmd[MTL_EMMISSIVE_RED]   = mat[MAT_ATTRIB_FRONT_EMISSION][0];
       fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1];
@@ -963,15 +1165,39 @@ void r200UpdateMaterial( GLcontext *ctx )
       fcmd[MTL_SHININESS]       = mat[MAT_ATTRIB_FRONT_SHININESS][0];
    }
 
-   if (R200_DB_STATECHANGE( rmesa, &rmesa->hw.mtl[0] )) {
-      for (p = 0 ; p < MAX_LIGHTS; p++) 
-        update_light_colors( ctx, p );
-
-      check_twoside_fallback( ctx );
-      update_global_ambient( ctx );
+   if (mask & MAT_BIT_BACK_EMISSION) {
+      fcmd2[MTL_EMMISSIVE_RED]   = mat[MAT_ATTRIB_BACK_EMISSION][0];
+      fcmd2[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_BACK_EMISSION][1];
+      fcmd2[MTL_EMMISSIVE_BLUE]  = mat[MAT_ATTRIB_BACK_EMISSION][2];
+      fcmd2[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_BACK_EMISSION][3];
+   }
+   if (mask & MAT_BIT_BACK_AMBIENT) {
+      fcmd2[MTL_AMBIENT_RED]     = mat[MAT_ATTRIB_BACK_AMBIENT][0];
+      fcmd2[MTL_AMBIENT_GREEN]   = mat[MAT_ATTRIB_BACK_AMBIENT][1];
+      fcmd2[MTL_AMBIENT_BLUE]    = mat[MAT_ATTRIB_BACK_AMBIENT][2];
+      fcmd2[MTL_AMBIENT_ALPHA]   = mat[MAT_ATTRIB_BACK_AMBIENT][3];
+   }
+   if (mask & MAT_BIT_BACK_DIFFUSE) {
+      fcmd2[MTL_DIFFUSE_RED]     = mat[MAT_ATTRIB_BACK_DIFFUSE][0];
+      fcmd2[MTL_DIFFUSE_GREEN]   = mat[MAT_ATTRIB_BACK_DIFFUSE][1];
+      fcmd2[MTL_DIFFUSE_BLUE]    = mat[MAT_ATTRIB_BACK_DIFFUSE][2];
+      fcmd2[MTL_DIFFUSE_ALPHA]   = mat[MAT_ATTRIB_BACK_DIFFUSE][3];
+   }
+   if (mask & MAT_BIT_BACK_SPECULAR) {
+      fcmd2[MTL_SPECULAR_RED]    = mat[MAT_ATTRIB_BACK_SPECULAR][0];
+      fcmd2[MTL_SPECULAR_GREEN]  = mat[MAT_ATTRIB_BACK_SPECULAR][1];
+      fcmd2[MTL_SPECULAR_BLUE]   = mat[MAT_ATTRIB_BACK_SPECULAR][2];
+      fcmd2[MTL_SPECULAR_ALPHA]  = mat[MAT_ATTRIB_BACK_SPECULAR][3];
+   }
+   if (mask & MAT_BIT_BACK_SHININESS) {
+      fcmd2[MTL_SHININESS]       = mat[MAT_ATTRIB_BACK_SHININESS][0];
    }
-   else if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_STATE))
-      fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__);
+
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mtl[0] );
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mtl[1] );
+
+   /* currently material changes cannot trigger a global ambient change, I believe this is correct
+    update_global_ambient( ctx ); */
 }
 
 /* _NEW_LIGHT
@@ -1108,6 +1334,10 @@ static void r200Lightfv( GLcontext *ctx, GLenum light,
    case GL_CONSTANT_ATTENUATION:
       R200_STATECHANGE(rmesa, lit[p]);
       fcmd[LIT_ATTEN_CONST] = params[0];
+      if ( params[0] == 0.0 )
+        fcmd[LIT_ATTEN_CONST_INV] = FLT_MAX;
+      else
+        fcmd[LIT_ATTEN_CONST_INV] = 1.0 / params[0];
       break;
    case GL_LINEAR_ATTENUATION:
       R200_STATECHANGE(rmesa, lit[p]);
@@ -1121,10 +1351,62 @@ static void r200Lightfv( GLcontext *ctx, GLenum light,
       return;
    }
 
-}
+   /* Set RANGE_ATTEN only when needed */
+   switch (pname) {
+   case GL_POSITION:
+   case GL_CONSTANT_ATTENUATION:
+   case GL_LINEAR_ATTENUATION:
+   case GL_QUADRATIC_ATTENUATION: {
+      GLuint *icmd = (GLuint *)R200_DB_STATE( tcl );
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+      GLuint atten_flag = ( p&1 ) ? R200_LIGHT_1_ENABLE_RANGE_ATTEN
+                                 : R200_LIGHT_0_ENABLE_RANGE_ATTEN;
+      GLuint atten_const_flag = ( p&1 ) ? R200_LIGHT_1_CONSTANT_RANGE_ATTEN
+                                 : R200_LIGHT_0_CONSTANT_RANGE_ATTEN;
+
+      if ( l->EyePosition[3] == 0.0F ||
+          ( ( fcmd[LIT_ATTEN_CONST] == 0.0 || fcmd[LIT_ATTEN_CONST] == 1.0 ) &&
+            fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) ) {
+        /* Disable attenuation */
+        icmd[idx] &= ~atten_flag;
+      } else {
+        if ( fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) {
+           /* Enable only constant portion of attenuation calculation */
+           icmd[idx] |= ( atten_flag | atten_const_flag );
+        } else {
+           /* Enable full attenuation calculation */
+           icmd[idx] &= ~atten_const_flag;
+           icmd[idx] |= atten_flag;
+        }
+      }
 
-                 
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.tcl );
+      break;
+   }
+   default:
+     break;
+   }
+}
 
+static void r200UpdateLocalViewer ( GLcontext *ctx )
+{
+/* It looks like for the texgen modes GL_SPHERE_MAP, GL_NORMAL_MAP and
+   GL_REFLECTION_MAP we need R200_LOCAL_VIEWER set (fglrx does exactly that
+   for these and only these modes). This means specular highlights may turn out
+   wrong in some cases when lighting is enabled but GL_LIGHT_MODEL_LOCAL_VIEWER
+   is not set, though it seems to happen rarely and the effect seems quite
+   subtle. May need TCL fallback to fix it completely, though I'm not sure
+   how you'd identify the cases where the specular highlights indeed will
+   be wrong. Don't know if fglrx does something special in that case.
+*/
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, tcl );
+   if (ctx->Light.Model.LocalViewer ||
+       ctx->Texture._GenFlags & TEXGEN_NEED_NORMALS)
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LOCAL_VIEWER;
+   else
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LOCAL_VIEWER;
+}
 
 static void r200LightModelfv( GLcontext *ctx, GLenum pname,
                                const GLfloat *param )
@@ -1137,11 +1419,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname,
         break;
 
       case GL_LIGHT_MODEL_LOCAL_VIEWER:
-        R200_STATECHANGE( rmesa, tcl );
-        if (ctx->Light.Model.LocalViewer)
-           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LOCAL_VIEWER;
-        else
-           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LOCAL_VIEWER;
+        r200UpdateLocalViewer( ctx );
          break;
 
       case GL_LIGHT_MODEL_TWO_SIDE:
@@ -1149,10 +1427,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname,
         if (ctx->Light.Model.TwoSide)
            rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE;
         else
-           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LIGHT_TWOSIDE;
-
-        check_twoside_fallback( ctx );
-
+           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE);
         if (rmesa->TclFallback) {
            r200ChooseRenderState( ctx );
            r200ChooseVertexState( ctx );
@@ -1176,20 +1451,23 @@ static void r200ShadeModel( GLcontext *ctx, GLenum mode )
    s &= ~(R200_DIFFUSE_SHADE_MASK |
          R200_ALPHA_SHADE_MASK |
          R200_SPECULAR_SHADE_MASK |
-         R200_FOG_SHADE_MASK);
+         R200_FOG_SHADE_MASK |
+         R200_DISC_FOG_SHADE_MASK);
 
    switch ( mode ) {
    case GL_FLAT:
       s |= (R200_DIFFUSE_SHADE_FLAT |
            R200_ALPHA_SHADE_FLAT |
            R200_SPECULAR_SHADE_FLAT |
-           R200_FOG_SHADE_FLAT);
+           R200_FOG_SHADE_FLAT |
+           R200_DISC_FOG_SHADE_FLAT);
       break;
    case GL_SMOOTH:
       s |= (R200_DIFFUSE_SHADE_GOURAUD |
            R200_ALPHA_SHADE_GOURAUD |
            R200_SPECULAR_SHADE_GOURAUD |
-           R200_FOG_SHADE_GOURAUD);
+           R200_FOG_SHADE_GOURAUD |
+           R200_DISC_FOG_SHADE_GOURAUD);
       break;
    default:
       return;
@@ -1242,12 +1520,13 @@ static void r200UpdateClipPlanes( GLcontext *ctx )
  * Stencil
  */
 
-static void r200StencilFunc( GLcontext *ctx, GLenum func,
-                              GLint ref, GLuint mask )
+static void
+r200StencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
+                         GLint ref, GLuint mask )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLuint refmask = ((ctx->Stencil.Ref[0] << R200_STENCIL_REF_SHIFT) |
-                    (ctx->Stencil.ValueMask[0] << R200_STENCIL_MASK_SHIFT));
+   GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << R200_STENCIL_REF_SHIFT) |
+                    ((ctx->Stencil.ValueMask[0] & 0xff) << R200_STENCIL_MASK_SHIFT));
 
    R200_STATECHANGE( rmesa, ctx );
    R200_STATECHANGE( rmesa, msk );
@@ -1286,18 +1565,20 @@ static void r200StencilFunc( GLcontext *ctx, GLenum func,
    rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
 }
 
-static void r200StencilMask( GLcontext *ctx, GLuint mask )
+static void
+r200StencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
 
    R200_STATECHANGE( rmesa, msk );
    rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~R200_STENCIL_WRITE_MASK;
    rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
-      (ctx->Stencil.WriteMask[0] << R200_STENCIL_WRITEMASK_SHIFT);
+      ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT);
 }
 
-static void r200StencilOp( GLcontext *ctx, GLenum fail,
-                            GLenum zfail, GLenum zpass )
+static void
+r200StencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+                       GLenum zfail, GLenum zpass )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
 
@@ -1393,9 +1674,9 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
 
    rmesa->state.stencil.clear = 
-      ((GLuint) ctx->Stencil.Clear |
+      ((GLuint) (ctx->Stencil.Clear & 0xff) |
        (0xff << R200_STENCIL_MASK_SHIFT) |
-       (ctx->Stencil.WriteMask[0] << R200_STENCIL_WRITEMASK_SHIFT));
+       ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT));
 }
 
 
@@ -1409,6 +1690,11 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
 #define SUBPIXEL_X 0.125
 #define SUBPIXEL_Y 0.125
 
+
+/**
+ * Called when window size or position changes or viewport or depth range
+ * state is changed.  We update the hardware viewport state here.
+ */
 void r200UpdateWindow( GLcontext *ctx )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
@@ -1417,22 +1703,22 @@ void r200UpdateWindow( GLcontext *ctx )
    GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
    const GLfloat *v = ctx->Viewport._WindowMap.m;
 
-   GLfloat sx = v[MAT_SX];
-   GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
-   GLfloat sy = - v[MAT_SY];
-   GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
-   GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
-   GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
+   float_ui32_type sx = { v[MAT_SX] };
+   float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
+   float_ui32_type sy = { - v[MAT_SY] };
+   float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
+   float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
+   float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
 
    R200_FIREVERTICES( rmesa );
    R200_STATECHANGE( rmesa, vpt );
 
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = *(GLuint *)&sx;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = *(GLuint *)&sy;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = *(GLuint *)&sz;
-   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = *(GLuint *)&tz;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = sy.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = sz.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = tz.ui32;
 }
 
 
@@ -1444,7 +1730,6 @@ static void r200Viewport( GLcontext *ctx, GLint x, GLint y,
     * setting below.  Could apply deltas to rescue pipelined viewport
     * values, or keep the originals hanging around.
     */
-   R200_FIREVERTICES( R200_CONTEXT(ctx) );
    r200UpdateWindow( ctx );
 }
 
@@ -1462,18 +1747,22 @@ void r200UpdateViewportOffset( GLcontext *ctx )
    GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
    const GLfloat *v = ctx->Viewport._WindowMap.m;
 
-   GLfloat tx = v[MAT_TX] + xoffset;
-   GLfloat ty = (- v[MAT_TY]) + yoffset;
+   float_ui32_type tx;
+   float_ui32_type ty;
+
+   tx.f = v[MAT_TX] + xoffset + SUBPIXEL_X;
+   ty.f = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
 
-   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != *(GLuint *)&tx ||
-       rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != *(GLuint *)&ty )
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 ||
+       rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 )
    {
       /* Note: this should also modify whatever data the context reset
        * code uses...
        */
-      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
-      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
-      
+      R200_STATECHANGE( rmesa, vpt );
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+
       /* update polygon stipple x/y screen offset */
       {
          GLuint stx, sty;
@@ -1558,34 +1847,54 @@ static void r200LogicOpCode( GLcontext *ctx, GLenum opcode )
 }
 
 
-void r200SetCliprects( r200ContextPtr rmesa, GLenum mode )
+/*
+ * Set up the cliprects for either front or back-buffer drawing.
+ */
+void r200SetCliprects( r200ContextPtr rmesa )
 {
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
+   __DRIdrawablePrivate *const readable = rmesa->dri.readable;
+   GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
+   GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
 
-   switch ( mode ) {
-   case GL_FRONT_LEFT:
-      rmesa->numClipRects = dPriv->numClipRects;
-      rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
-      break;
-   case GL_BACK_LEFT:
+   if (draw_fb->_ColorDrawBufferMask[0]
+       == BUFFER_BIT_BACK_LEFT) {
       /* Can't ignore 2d windows if we are page flipping.
        */
-      if ( dPriv->numBackClipRects == 0 || rmesa->doPageFlip ) {
-        rmesa->numClipRects = dPriv->numClipRects;
-        rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
+      if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
+         rmesa->numClipRects = drawable->numClipRects;
+         rmesa->pClipRects = drawable->pClipRects;
       }
       else {
-        rmesa->numClipRects = dPriv->numBackClipRects;
-        rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pBackClipRects;
+         rmesa->numClipRects = drawable->numBackClipRects;
+         rmesa->pClipRects = drawable->pBackClipRects;
+      }
+   }
+   else {
+     /* front buffer (or none, or multiple buffers) */
+     rmesa->numClipRects = drawable->numClipRects;
+     rmesa->pClipRects = drawable->pClipRects;
+  }
+
+   if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
+      _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
+                              drawable->w, drawable->h);
+      draw_fb->Initialized = GL_TRUE;
+   }
+
+   if (drawable != readable) {
+      if ((read_fb->Width != readable->w) ||
+         (read_fb->Height != readable->h)) {
+        _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
+                                 readable->w, readable->h);
+        read_fb->Initialized = GL_TRUE;
       }
-      break;
-   default:
-      fprintf(stderr, "bad mode in r200SetCliprects\n");
-      return;
    }
 
    if (rmesa->state.scissor.enabled)
       r200RecalcScissorRects( rmesa );
+
+   rmesa->lastStamp = drawable->lastStamp;
 }
 
 
@@ -1600,33 +1909,25 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
    R200_FIREVERTICES(rmesa);   /* don't pipeline cliprect changes */
 
    /*
-    * _DrawDestMask is easier to cope with than <mode>.
+    * _ColorDrawBufferMask is easier to cope with than <mode>.
+    * Check for software fallback, update cliprects.
     */
-   switch ( ctx->Color._DrawDestMask ) {
-   case FRONT_LEFT_BIT:
+   switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
+   case BUFFER_BIT_FRONT_LEFT:
+   case BUFFER_BIT_BACK_LEFT:
       FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      r200SetCliprects( rmesa, GL_FRONT_LEFT );
-      break;
-   case BACK_LEFT_BIT:
-      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      r200SetCliprects( rmesa, GL_BACK_LEFT );
       break;
    default:
-      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+      /* 0 (GL_NONE) buffers or multiple color drawing buffers */
       FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
       return;
    }
 
-   /* We want to update the s/w rast state too so that r200SetBuffer()
-    * gets called.
-    */
-   _swrast_DrawBuffer(ctx, mode);
+   r200SetCliprects( rmesa );
 
-   R200_STATECHANGE( rmesa, ctx );
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->state.color.drawOffset +
-                                              rmesa->r200Screen->fbLocation)
-                                             & R200_COLOROFFSET_MASK);
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch;
+   /* We'll set the drawing engine's offset/pitch parameters later
+    * when we update other state.
+    */
 }
 
 
@@ -1667,17 +1968,8 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       break;
 
    case GL_BLEND:
-      R200_STATECHANGE( rmesa, ctx );
-      if (state) {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ALPHA_BLEND_ENABLE;
-      } else {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ALPHA_BLEND_ENABLE;
-      }
-      if ( ctx->Color.ColorLogicOpEnabled ) {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
-      } else {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
-      }
+   case GL_COLOR_LOGIC_OP:
+      r200_set_blend_state( ctx );
       break;
 
    case GL_CLIP_PLANE0:
@@ -1699,8 +1991,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
 
    case GL_COLOR_MATERIAL:
       r200ColorMaterial( ctx, 0, 0 );
-      if (!state) 
-        r200UpdateMaterial( ctx );
+      r200UpdateMaterial( ctx );
       break;
 
    case GL_CULL_FACE:
@@ -1731,7 +2022,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       R200_STATECHANGE(rmesa, ctx );
       if ( state ) {
         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_FOG_ENABLE;
-        r200Fogfv( ctx, GL_FOG_MODE, 0 );
+        r200Fogfv( ctx, GL_FOG_MODE, NULL );
       } else {
         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_FOG_ENABLE;
         R200_STATECHANGE(rmesa, tcl);
@@ -1774,7 +2065,8 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
 
    case GL_LIGHTING:
       r200UpdateSpecular(ctx);
-      check_twoside_fallback( ctx );
+      /* for reflection map fixup - might set recheck_texgen for all units too */
+      rmesa->NewGLState |= _NEW_TEXTURE;
       break;
 
    case GL_LINE_SMOOTH:
@@ -1795,15 +2087,6 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       }
       break;
 
-   case GL_COLOR_LOGIC_OP:
-      R200_STATECHANGE( rmesa, ctx );
-      if ( state ) {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
-      } else {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
-      }
-      break;
-      
    case GL_NORMALIZE:
       R200_STATECHANGE( rmesa, tcl );
       if ( state ) {
@@ -1813,10 +2096,10 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       }
       break;
 
-      /* Pointsize registers on r200 don't seem to do anything.  Maybe
-       * have to pass pointsizes as vertex parameters?  In any case,
-       * setting pointmin == pointsizemax == 1.0, and doing nothing
-       * for aa is enough to satisfy conform.
+      /* Pointsize registers on r200 only work for point sprites, and point smooth
+       * doesn't work for point sprites (and isn't needed for 1.0 sized aa points).
+       * In any case, setting pointmin == pointsizemax == 1.0 for aa points
+       * is enough to satisfy conform.
        */
    case GL_POINT_SMOOTH:
       break;
@@ -1844,6 +2127,19 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       break;
 #endif
 
+   case GL_POINT_SPRITE_ARB:
+      R200_STATECHANGE( rmesa, spr );
+      if ( state ) {
+        int i;
+        for (i = 0; i < 6; i++) {
+           rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |=
+               ctx->Point.CoordReplace[i] << (R200_PS_GEN_TEX_0_SHIFT + i);
+        }
+      } else {
+        rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &= ~R200_PS_GEN_TEX_MASK;
+      }
+      break;
+
    case GL_POLYGON_OFFSET_FILL:
       R200_STATECHANGE( rmesa, set );
       if ( state ) {
@@ -1914,6 +2210,105 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       r200UpdateSpecular ( ctx );
       break;
 
+   case GL_VERTEX_PROGRAM_ARB:
+      if (!state) {
+        GLuint i;
+        rmesa->curr_vp_hw = NULL;
+        R200_STATECHANGE( rmesa, vap );
+        rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_PROG_VTX_SHADER_ENABLE;
+        /* mark all tcl atoms (tcl vector state got overwritten) dirty
+           not sure about tcl scalar state - we need at least grd
+           with vert progs too.
+           ucp looks like it doesn't get overwritten (may even work
+           with vp for pos-invariant progs if we're lucky) */
+        R200_STATECHANGE( rmesa, mtl[0] );
+        R200_STATECHANGE( rmesa, mtl[1] );
+        R200_STATECHANGE( rmesa, fog );
+        R200_STATECHANGE( rmesa, glt );
+        R200_STATECHANGE( rmesa, eye );
+        for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++) {
+           R200_STATECHANGE( rmesa, mat[i] );
+        }
+        for (i = 0 ; i < 8; i++) {
+           R200_STATECHANGE( rmesa, lit[i] );
+        }
+        R200_STATECHANGE( rmesa, tcl );
+        for (i = 0; i <= ctx->Const.MaxClipPlanes; i++) {
+           if (ctx->Transform.ClipPlanesEnabled & (1 << i)) {
+              rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (R200_UCP_ENABLE_0 << i);
+           }
+/*         else {
+              rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(R200_UCP_ENABLE_0 << i);
+           }*/
+        }
+        /* ugly. Need to call everything which might change compsel. */
+        r200UpdateSpecular( ctx );
+#if 0
+       /* shouldn't be necessary, as it's picked up anyway in r200ValidateState (_NEW_PROGRAM),
+          but without it doom3 locks up at always the same places. Why? */
+       /* FIXME: This can (and should) be replaced by a call to the TCL_STATE_FLUSH reg before
+          accessing VAP_SE_VAP_CNTL. Requires drm changes (done). Remove after some time... */
+        r200UpdateTextureState( ctx );
+        /* if we call r200UpdateTextureState we need the code below because we are calling it with
+           non-current derived enabled values which may revert the state atoms for frag progs even when
+           they already got disabled... ugh
+           Should really figure out why we need to call r200UpdateTextureState in the first place */
+        GLuint unit;
+        for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
+           R200_STATECHANGE( rmesa, pix[unit] );
+           R200_STATECHANGE( rmesa, tex[unit] );
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
+               ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+           /* need to guard this with drmSupportsFragmentShader? Should never get here if
+              we don't announce ATI_fs, right? */
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
+         }
+        R200_STATECHANGE( rmesa, cst );
+        R200_STATECHANGE( rmesa, tf );
+        rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+#endif
+      }
+      else {
+        /* picked up later */
+      }
+      /* call functions which change hw state based on ARB_vp enabled or not. */
+      r200PointParameter( ctx, GL_POINT_DISTANCE_ATTENUATION, NULL );
+      r200Fogfv( ctx, GL_FOG_COORD_SRC, NULL );
+      break;
+
+   case GL_VERTEX_PROGRAM_POINT_SIZE_ARB:
+      r200PointParameter( ctx, GL_POINT_DISTANCE_ATTENUATION, NULL );
+      break;
+
+   case GL_FRAGMENT_SHADER_ATI:
+      if ( !state ) {
+        /* restore normal tex env colors and make sure tex env combine will get updated
+           mark env atoms dirty (as their data was overwritten by afs even
+           if they didn't change) and restore tex coord routing */
+        GLuint unit;
+        for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
+           R200_STATECHANGE( rmesa, pix[unit] );
+           R200_STATECHANGE( rmesa, tex[unit] );
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
+               ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+           /* need to guard this with drmSupportsFragmentShader? Should never get here if
+              we don't announce ATI_fs, right? */
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
+         }
+        R200_STATECHANGE( rmesa, cst );
+        R200_STATECHANGE( rmesa, tf );
+        rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+      }
+      else {
+        /* need to mark this dirty as pix/tf atoms have overwritten the data
+           even if the data in the atoms didn't change */
+        R200_STATECHANGE( rmesa, atf );
+        R200_STATECHANGE( rmesa, afs[1] );
+        /* everything else picked up in r200UpdateTextureState hopefully */
+      }
+      break;
    default:
       return;
    }
@@ -1991,7 +2386,7 @@ static void update_texturematrix( GLcontext *ctx )
    rmesa->TexMatEnabled = 0;
    rmesa->TexMatCompSel = 0;
 
-   for (unit = 0 ; unit < 2; unit++) {
+   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
       if (!ctx->Texture.Unit[unit]._ReallyEnabled) 
         continue;
 
@@ -2005,9 +2400,9 @@ static void update_texturematrix( GLcontext *ctx )
            /* Need to preconcatenate any active texgen 
             * obj/eyeplane matrices:
             */
-           _math_matrix_mul_matrix( &rmesa->tmpmat, 
-                                    &rmesa->TexGenMatrix[unit],
-                                    ctx->TextureMatrixStack[unit].Top );
+           _math_matrix_mul_matrix( &rmesa->tmpmat,
+                                    ctx->TextureMatrixStack[unit].Top, 
+                                    &rmesa->TexGenMatrix[unit] );
            upload_matrix( rmesa, rmesa->tmpmat.m, R200_MTX_TEX0+unit );
         } 
         else {
@@ -2022,11 +2417,9 @@ static void update_texturematrix( GLcontext *ctx )
    }
 
    tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
-   if (tpc != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] ||
-       rmesa->TexGenInputs != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1]) {
+   if (tpc != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0]) {
       R200_STATECHANGE(rmesa, tcg);
       rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = tpc;
-      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] = rmesa->TexGenInputs;
    }
 
    compsel &= ~R200_OUTPUT_TEX_MASK;
@@ -2039,16 +2432,64 @@ static void update_texturematrix( GLcontext *ctx )
 
 
 
+/**
+ * Tell the card where to render (offset, pitch).
+ * Effected by glDrawBuffer, etc
+ */
+void
+r200UpdateDrawBuffer(GLcontext *ctx)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   driRenderbuffer *drb;
+
+   if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
+      /* draw to front */
+      drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+   }
+   else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) {
+      /* draw to back */
+      drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+   }
+   else {
+      /* drawing to multiple buffers, or none */
+      return;
+   }
+
+   assert(drb);
+   assert(drb->flippedPitch);
+
+   R200_STATECHANGE( rmesa, ctx );
+
+   /* Note: we used the (possibly) page-flipped values */
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
+     = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
+       & R200_COLOROFFSET_MASK);
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
+   if (rmesa->sarea->tiling_enabled) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+   }
+}
+
+
+
 void r200ValidateState( GLcontext *ctx )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    GLuint new_state = rmesa->NewGLState;
 
-   if (new_state & _NEW_TEXTURE) {
+   if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+     r200UpdateDrawBuffer(ctx);
+   }
+
+   if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
       r200UpdateTextureState( ctx );
       new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
+      r200UpdateLocalViewer( ctx );
    }
 
+/* FIXME: don't really need most of these when vertex progs are enabled */
+
    /* Need an event driven matrix update?
     */
    if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) 
@@ -2066,7 +2507,7 @@ void r200ValidateState( GLcontext *ctx )
     */
    if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) {
       update_texturematrix( ctx );
-   }      
+   }
 
    if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
       update_light( ctx );
@@ -2079,6 +2520,16 @@ void r200ValidateState( GLcontext *ctx )
         r200UpdateClipPlanes( ctx );
    }
 
+   if (new_state & (_NEW_PROGRAM|
+   /* need to test for pretty much anything due to possible parameter bindings */
+       _NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM|
+       _NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX|
+       _NEW_FOG|_NEW_POINT|_NEW_TRACK_MATRIX)) {
+      if (ctx->VertexProgram._Enabled) {
+        r200SetupVertexProg( ctx );
+      }
+      else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0);
+   }
 
    rmesa->NewGLState = 0;
 }
@@ -2088,23 +2539,24 @@ static void r200InvalidateState( GLcontext *ctx, GLuint new_state )
 {
    _swrast_InvalidateState( ctx, new_state );
    _swsetup_InvalidateState( ctx, new_state );
-   _ac_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
    _tnl_InvalidateState( ctx, new_state );
    _ae_invalidate_state( ctx, new_state );
    R200_CONTEXT(ctx)->NewGLState |= new_state;
-   r200VtxfmtInvalidate( ctx );
 }
 
 /* A hack.  The r200 can actually cope just fine with materials
  * between begin/ends, so fix this.
+ * Should map to inputs just like the generic vertex arrays for vertex progs.
+ * In theory there could still be too many and we'd still need a fallback.
  */
 static GLboolean check_material( GLcontext *ctx )
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    GLint i;
 
-   for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT; 
-       i < _TNL_ATTRIB_MAT_BACK_INDEXES; 
+   for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT;
+       i < _TNL_ATTRIB_MAT_BACK_INDEXES;
        i++)
       if (tnl->vb.AttribPtr[i] &&
          tnl->vb.AttribPtr[i]->stride)
@@ -2112,7 +2564,6 @@ static GLboolean check_material( GLcontext *ctx )
 
    return GL_FALSE;
 }
-      
 
 static void r200WrapRunPipeline( GLcontext *ctx )
 {
@@ -2127,7 +2578,7 @@ static void r200WrapRunPipeline( GLcontext *ctx )
    if (rmesa->NewGLState)
       r200ValidateState( ctx );
 
-   has_material = (ctx->Light.Enabled && check_material( ctx ));
+   has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx );
 
    if (has_material) {
       TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_TRUE );
@@ -2145,57 +2596,55 @@ static void r200WrapRunPipeline( GLcontext *ctx )
 
 /* Initialize the driver's state functions.
  */
-void r200InitStateFuncs( GLcontext *ctx )
+void r200InitStateFuncs( struct dd_function_table *functions )
 {
-   ctx->Driver.UpdateState             = r200InvalidateState;
-   ctx->Driver.LightingSpaceChange      = r200LightingSpaceChange;
-
-   ctx->Driver.DrawBuffer              = r200DrawBuffer;
-   ctx->Driver.ReadBuffer              = r200ReadBuffer;
-
-   ctx->Driver.AlphaFunc               = r200AlphaFunc;
-   ctx->Driver.BlendEquation           = r200BlendEquation;
-   ctx->Driver.BlendFunc               = r200BlendFunc;
-   ctx->Driver.BlendFuncSeparate       = r200BlendFuncSeparate;
-   ctx->Driver.ClearColor              = r200ClearColor;
-   ctx->Driver.ClearDepth              = NULL;
-   ctx->Driver.ClearIndex              = NULL;
-   ctx->Driver.ClearStencil            = r200ClearStencil;
-   ctx->Driver.ClipPlane               = r200ClipPlane;
-   ctx->Driver.ColorMask               = r200ColorMask;
-   ctx->Driver.CullFace                        = r200CullFace;
-   ctx->Driver.DepthFunc               = r200DepthFunc;
-   ctx->Driver.DepthMask               = r200DepthMask;
-   ctx->Driver.DepthRange              = r200DepthRange;
-   ctx->Driver.Enable                  = r200Enable;
-   ctx->Driver.Fogfv                   = r200Fogfv;
-   ctx->Driver.FrontFace               = r200FrontFace;
-   ctx->Driver.Hint                    = NULL;
-   ctx->Driver.IndexMask               = NULL;
-   ctx->Driver.LightModelfv            = r200LightModelfv;
-   ctx->Driver.Lightfv                 = r200Lightfv;
-   ctx->Driver.LineStipple              = r200LineStipple;
-   ctx->Driver.LineWidth                = r200LineWidth;
-   ctx->Driver.LogicOpcode             = r200LogicOpCode;
-   ctx->Driver.PolygonMode             = r200PolygonMode;
-   ctx->Driver.PolygonOffset           = r200PolygonOffset;
-   ctx->Driver.PolygonStipple          = r200PolygonStipple;
-   ctx->Driver.PointSize                = r200PointSize;
-   ctx->Driver.RenderMode              = r200RenderMode;
-   ctx->Driver.Scissor                 = r200Scissor;
-   ctx->Driver.ShadeModel              = r200ShadeModel;
-   ctx->Driver.StencilFunc             = r200StencilFunc;
-   ctx->Driver.StencilMask             = r200StencilMask;
-   ctx->Driver.StencilOp               = r200StencilOp;
-   ctx->Driver.Viewport                        = r200Viewport;
-
-   /* Swrast hooks for imaging extensions:
-    */
-   ctx->Driver.CopyColorTable          = _swrast_CopyColorTable;
-   ctx->Driver.CopyColorSubTable       = _swrast_CopyColorSubTable;
-   ctx->Driver.CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D;
-   ctx->Driver.CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
+   functions->UpdateState              = r200InvalidateState;
+   functions->LightingSpaceChange      = r200LightingSpaceChange;
+
+   functions->DrawBuffer               = r200DrawBuffer;
+   functions->ReadBuffer               = r200ReadBuffer;
+
+   functions->AlphaFunc                        = r200AlphaFunc;
+   functions->BlendColor               = r200BlendColor;
+   functions->BlendEquationSeparate    = r200BlendEquationSeparate;
+   functions->BlendFuncSeparate                = r200BlendFuncSeparate;
+   functions->ClearColor               = r200ClearColor;
+   functions->ClearDepth               = r200ClearDepth;
+   functions->ClearIndex               = NULL;
+   functions->ClearStencil             = r200ClearStencil;
+   functions->ClipPlane                        = r200ClipPlane;
+   functions->ColorMask                        = r200ColorMask;
+   functions->CullFace                 = r200CullFace;
+   functions->DepthFunc                        = r200DepthFunc;
+   functions->DepthMask                        = r200DepthMask;
+   functions->DepthRange               = r200DepthRange;
+   functions->Enable                   = r200Enable;
+   functions->Fogfv                    = r200Fogfv;
+   functions->FrontFace                        = r200FrontFace;
+   functions->Hint                     = NULL;
+   functions->IndexMask                        = NULL;
+   functions->LightModelfv             = r200LightModelfv;
+   functions->Lightfv                  = r200Lightfv;
+   functions->LineStipple              = r200LineStipple;
+   functions->LineWidth                        = r200LineWidth;
+   functions->LogicOpcode              = r200LogicOpCode;
+   functions->PolygonMode              = r200PolygonMode;
+   functions->PolygonOffset            = r200PolygonOffset;
+   functions->PolygonStipple           = r200PolygonStipple;
+   functions->PointParameterfv         = r200PointParameter;
+   functions->PointSize                        = r200PointSize;
+   functions->RenderMode               = r200RenderMode;
+   functions->Scissor                  = r200Scissor;
+   functions->ShadeModel               = r200ShadeModel;
+   functions->StencilFuncSeparate      = r200StencilFuncSeparate;
+   functions->StencilMaskSeparate      = r200StencilMaskSeparate;
+   functions->StencilOpSeparate                = r200StencilOpSeparate;
+   functions->Viewport                 = r200Viewport;
+}
 
+
+void r200InitTnlFuncs( GLcontext *ctx )
+{
    TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = r200UpdateMaterial;
    TNL_CONTEXT(ctx)->Driver.RunPipeline = r200WrapRunPipeline;
 }