fix up radeon span functions using latest r200 code from Brian,
[mesa.git] / src / mesa / drivers / dri / r200 / r200_state.c
index a755f5be684bd258dd4c52641d8e7d76763f8a3f..79e1093822498ac475102cbaca5c5667f9f9b428 100644 (file)
@@ -47,7 +47,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "tnl/t_pipeline.h"
 #include "swrast_setup/swrast_setup.h"
 
-
 #include "r200_context.h"
 #include "r200_ioctl.h"
 #include "r200_state.h"
@@ -56,6 +55,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r200_swtcl.h"
 #include "r200_vtxfmt.h"
 
+#include "drirenderbuffer.h"
+
 
 /* =============================================================
  * Alpha blending
@@ -104,156 +105,234 @@ static void r200AlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
    rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc;
 }
 
-static void r200BlendEquationSeparate( GLcontext *ctx, 
-                                      GLenum modeRGB, GLenum modeA )
+static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] )
 {
+   GLubyte color[4];
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~R200_COMB_FCN_MASK;
-
-   assert( modeRGB == modeA );
-
-   switch ( modeRGB ) {
-   case GL_FUNC_ADD:
-   case GL_LOGIC_OP:
-      b |= R200_COMB_FCN_ADD_CLAMP;
-      break;
-
-   case GL_FUNC_SUBTRACT:
-      b |= R200_COMB_FCN_SUB_CLAMP;
-      break;
-
-   case GL_FUNC_REVERSE_SUBTRACT:
-      b |= R200_COMB_FCN_RSUB_CLAMP;
-      break;
-
-   case GL_MIN:
-      b |= R200_COMB_FCN_MIN;
-      break;
-
-   case GL_MAX:
-      b |= R200_COMB_FCN_MAX;
-      break;
-
-   default:
-      break;
-   }
-
    R200_STATECHANGE( rmesa, ctx );
-   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
-   if ( ctx->Color._LogicOpEnabled ) {
-      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
-   } else {
-      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
-   }
+   CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
+   CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
+   CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
+   CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
+   if (rmesa->r200Screen->drmSupportsBlendColor)
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = r200PackColor( 4, color[0], color[1], color[2], color[3] );
 }
 
-static void r200BlendFuncSeparate( GLcontext *ctx,
-                                    GLenum sfactorRGB, GLenum dfactorRGB,
-                                    GLenum sfactorA, GLenum dfactorA )
+/**
+ * Calculate the hardware blend factor setting.  This same function is used
+ * for source and destination of both alpha and RGB.
+ *
+ * \returns
+ * The hardware register value for the specified blend factor.  This value
+ * will need to be shifted into the correct position for either source or
+ * destination factor.
+ *
+ * \todo
+ * Since the two cases where source and destination are handled differently
+ * are essentially error cases, they should never happen.  Determine if these
+ * cases can be removed.
+ */
+static int blend_factor( GLenum factor, GLboolean is_src )
 {
-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
-      ~(R200_SRC_BLEND_MASK | R200_DST_BLEND_MASK);
+   int func;
 
-   switch ( ctx->Color.BlendSrcRGB ) {
+   switch ( factor ) {
    case GL_ZERO:
-      b |= R200_SRC_BLEND_GL_ZERO;
+      func = R200_BLEND_GL_ZERO;
       break;
    case GL_ONE:
-      b |= R200_SRC_BLEND_GL_ONE;
+      func = R200_BLEND_GL_ONE;
       break;
    case GL_DST_COLOR:
-      b |= R200_SRC_BLEND_GL_DST_COLOR;
+      func = R200_BLEND_GL_DST_COLOR;
       break;
    case GL_ONE_MINUS_DST_COLOR:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_DST_COLOR;
+      func = R200_BLEND_GL_ONE_MINUS_DST_COLOR;
       break;
    case GL_SRC_COLOR:
-      b |= R200_SRC_BLEND_GL_SRC_COLOR;
+      func = R200_BLEND_GL_SRC_COLOR;
       break;
    case GL_ONE_MINUS_SRC_COLOR:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      func = R200_BLEND_GL_ONE_MINUS_SRC_COLOR;
       break;
    case GL_SRC_ALPHA:
-      b |= R200_SRC_BLEND_GL_SRC_ALPHA;
+      func = R200_BLEND_GL_SRC_ALPHA;
       break;
    case GL_ONE_MINUS_SRC_ALPHA:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      func = R200_BLEND_GL_ONE_MINUS_SRC_ALPHA;
       break;
    case GL_DST_ALPHA:
-      b |= R200_SRC_BLEND_GL_DST_ALPHA;
+      func = R200_BLEND_GL_DST_ALPHA;
       break;
    case GL_ONE_MINUS_DST_ALPHA:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      func = R200_BLEND_GL_ONE_MINUS_DST_ALPHA;
       break;
    case GL_SRC_ALPHA_SATURATE:
-      b |= R200_SRC_BLEND_GL_SRC_ALPHA_SATURATE;
+      func = (is_src) ? R200_BLEND_GL_SRC_ALPHA_SATURATE : R200_BLEND_GL_ZERO;
       break;
    case GL_CONSTANT_COLOR:
-      b |= R200_SRC_BLEND_GL_CONST_COLOR;
+      func = R200_BLEND_GL_CONST_COLOR;
       break;
    case GL_ONE_MINUS_CONSTANT_COLOR:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_CONST_COLOR;
+      func = R200_BLEND_GL_ONE_MINUS_CONST_COLOR;
       break;
    case GL_CONSTANT_ALPHA:
-      b |= R200_SRC_BLEND_GL_CONST_ALPHA;
+      func = R200_BLEND_GL_CONST_ALPHA;
       break;
    case GL_ONE_MINUS_CONSTANT_ALPHA:
-      b |= R200_SRC_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+      func = R200_BLEND_GL_ONE_MINUS_CONST_ALPHA;
       break;
    default:
-      break;
+      func = (is_src) ? R200_BLEND_GL_ONE : R200_BLEND_GL_ZERO;
    }
+   return func;
+}
 
-   switch ( ctx->Color.BlendDstRGB ) {
-   case GL_ZERO:
-      b |= R200_DST_BLEND_GL_ZERO;
-      break;
-   case GL_ONE:
-      b |= R200_DST_BLEND_GL_ONE;
-      break;
-   case GL_SRC_COLOR:
-      b |= R200_DST_BLEND_GL_SRC_COLOR;
-      break;
-   case GL_ONE_MINUS_SRC_COLOR:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
-      break;
-   case GL_SRC_ALPHA:
-      b |= R200_DST_BLEND_GL_SRC_ALPHA;
+/**
+ * Sets both the blend equation and the blend function.
+ * This is done in a single
+ * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
+ * change the interpretation of the blend function.
+ * Also, make sure that blend function and blend equation are set to their default
+ * value if color blending is not enabled, since at least blend equations GL_MIN
+ * and GL_FUNC_REVERSE_SUBTRACT will cause wrong results otherwise for
+ * unknown reasons.
+ */
+static void r200_set_blend_state( GLcontext * ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint cntl = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &
+      ~(R200_ROP_ENABLE | R200_ALPHA_BLEND_ENABLE | R200_SEPARATE_ALPHA_ENABLE);
+
+   int func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+      (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT);
+   int eqn = R200_COMB_FCN_ADD_CLAMP;
+   int funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+      (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT);
+   int eqnA = R200_COMB_FCN_ADD_CLAMP;
+
+   R200_STATECHANGE( rmesa, ctx );
+
+   if (rmesa->r200Screen->drmSupportsBlendColor) {
+      if (ctx->Color._LogicOpEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
+         rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
+         rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+         return;
+      } else if (ctx->Color.BlendEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ALPHA_BLEND_ENABLE | R200_SEPARATE_ALPHA_ENABLE;
+      }
+      else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl;
+         rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
+         rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+         return;
+      }
+   }
+   else {
+      if (ctx->Color._LogicOpEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
+         rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
+         return;
+      } else if (ctx->Color.BlendEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ALPHA_BLEND_ENABLE;
+      }
+      else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl;
+         rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
+         return;
+      }
+   }
+
+   func = (blend_factor( ctx->Color.BlendSrcRGB, GL_TRUE ) << R200_SRC_BLEND_SHIFT) |
+      (blend_factor( ctx->Color.BlendDstRGB, GL_FALSE ) << R200_DST_BLEND_SHIFT);
+
+   switch(ctx->Color.BlendEquationRGB) {
+   case GL_FUNC_ADD:
+      eqn = R200_COMB_FCN_ADD_CLAMP;
       break;
-   case GL_ONE_MINUS_SRC_ALPHA:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+
+   case GL_FUNC_SUBTRACT:
+      eqn = R200_COMB_FCN_SUB_CLAMP;
       break;
-   case GL_DST_COLOR:
-      b |= R200_DST_BLEND_GL_DST_COLOR;
+
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqn = R200_COMB_FCN_RSUB_CLAMP;
       break;
-   case GL_ONE_MINUS_DST_COLOR:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_DST_COLOR;
+
+   case GL_MIN:
+      eqn = R200_COMB_FCN_MIN;
+      func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
       break;
-   case GL_DST_ALPHA:
-      b |= R200_DST_BLEND_GL_DST_ALPHA;
+
+   case GL_MAX:
+      eqn = R200_COMB_FCN_MAX;
+      func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
       break;
-   case GL_ONE_MINUS_DST_ALPHA:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_DST_ALPHA;
+
+   default:
+      fprintf( stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+         __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB );
+      return;
+   }
+
+   if (!rmesa->r200Screen->drmSupportsBlendColor) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
+      return;
+   }
+
+   funcA = (blend_factor( ctx->Color.BlendSrcA, GL_TRUE ) << R200_SRC_BLEND_SHIFT) |
+      (blend_factor( ctx->Color.BlendDstA, GL_FALSE ) << R200_DST_BLEND_SHIFT);
+
+   switch(ctx->Color.BlendEquationA) {
+   case GL_FUNC_ADD:
+      eqnA = R200_COMB_FCN_ADD_CLAMP;
       break;
-   case GL_CONSTANT_COLOR:
-      b |= R200_DST_BLEND_GL_CONST_COLOR;
+
+   case GL_FUNC_SUBTRACT:
+      eqnA = R200_COMB_FCN_SUB_CLAMP;
       break;
-   case GL_ONE_MINUS_CONSTANT_COLOR:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_CONST_COLOR;
+
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqnA = R200_COMB_FCN_RSUB_CLAMP;
       break;
-   case GL_CONSTANT_ALPHA:
-      b |= R200_DST_BLEND_GL_CONST_ALPHA;
+
+   case GL_MIN:
+      eqnA = R200_COMB_FCN_MIN;
+      funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
       break;
-   case GL_ONE_MINUS_CONSTANT_ALPHA:
-      b |= R200_DST_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+
+   case GL_MAX:
+      eqnA = R200_COMB_FCN_MAX;
+      funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
       break;
+
    default:
-      break;
+      fprintf( stderr, "[%s:%u] Invalid A blend equation (0x%04x).\n",
+         __FUNCTION__, __LINE__, ctx->Color.BlendEquationA );
+      return;
    }
 
-   R200_STATECHANGE( rmesa, ctx );
-   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+   rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqnA | funcA;
+   rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+
+}
+
+static void r200BlendEquationSeparate( GLcontext *ctx,
+                                      GLenum modeRGB, GLenum modeA )
+{
+      r200_set_blend_state( ctx );
+}
+
+static void r200BlendFuncSeparate( GLcontext *ctx,
+                                    GLenum sfactorRGB, GLenum dfactorRGB,
+                                    GLenum sfactorA, GLenum dfactorA )
+{
+      r200_set_blend_state( ctx );
 }
 
 
@@ -296,6 +375,21 @@ static void r200DepthFunc( GLcontext *ctx, GLenum func )
    }
 }
 
+static void r200ClearDepth( GLcontext *ctx, GLclampd d )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
+                   R200_DEPTH_FORMAT_MASK);
+
+   switch ( format ) {
+   case R200_DEPTH_FORMAT_16BIT_INT_Z:
+      rmesa->state.depth.clear = d * 0x0000ffff;
+      break;
+   case R200_DEPTH_FORMAT_24BIT_INT_Z:
+      rmesa->state.depth.clear = d * 0x00ffffff;
+      break;
+   }
+}
 
 static void r200DepthMask( GLcontext *ctx, GLboolean flag )
 {
@@ -390,10 +484,32 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
       break;
-   case GL_FOG_COORDINATE_SOURCE_EXT: 
-      /* What to do?
-       */
+   case GL_FOG_COORD_SRC: {
+      GLuint out_0 = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0];
+      GLuint fog   = rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR];
+
+      fog &= ~R200_FOG_USE_MASK;
+      if ( ctx->Fog.FogCoordinateSource == GL_FOG_COORD ) {
+        fog   |= R200_FOG_USE_VTX_FOG;
+        out_0 |= R200_VTX_DISCRETE_FOG;
+      }
+      else {
+        fog   |=  R200_FOG_USE_SPEC_ALPHA;
+        out_0 &= ~R200_VTX_DISCRETE_FOG;
+      }
+
+      if ( fog != rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] ) {
+        R200_STATECHANGE( rmesa, ctx );
+        rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = fog;
+      }
+
+      if (out_0 != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0]) {
+        R200_STATECHANGE( rmesa, vtx );
+        rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0;     
+      }
+
       break;
+   }
    default:
       return;
    }
@@ -568,7 +684,11 @@ static void r200FrontFace( GLcontext *ctx, GLenum mode )
  */
 static void r200PointSize( GLcontext *ctx, GLfloat size )
 {
-   if (0) fprintf(stderr, "%s: %f\n", __FUNCTION__, size );
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, cst );
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= ~0xffff;
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] |= ((GLuint)(ctx->Point.Size * 16.0));
 }
 
 /* =============================================================
@@ -709,7 +829,7 @@ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
 static void r200UpdateSpecular( GLcontext *ctx )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   int32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+   u_int32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
 
    R200_STATECHANGE( rmesa, tcl );
    R200_STATECHANGE( rmesa, vtx );
@@ -1197,8 +1317,25 @@ static void r200Lightfv( GLcontext *ctx, GLenum light,
    }
 }
 
-                 
-
+static void r200UpdateLocalViewer ( GLcontext *ctx )
+{
+/* It looks like for the texgen modes GL_SPHERE_MAP, GL_NORMAL_MAP and
+   GL_REFLECTION_MAP we need R200_LOCAL_VIEWER set (fglrx does exactly that
+   for these and only these modes). This means specular highlights may turn out
+   wrong in some cases when lighting is enabled but GL_LIGHT_MODEL_LOCAL_VIEWER
+   is not set, though it seems to happen rarely and the effect seems quite
+   subtle. May need TCL fallback to fix it completely, though I'm not sure
+   how you'd identify the cases where the specular highlights indeed will
+   be wrong. Don't know if fglrx does something special in that case.
+*/
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, tcl );
+   if (ctx->Light.Model.LocalViewer ||
+       ctx->Texture._GenFlags & TEXGEN_NEED_NORMALS)
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LOCAL_VIEWER;
+   else
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LOCAL_VIEWER;
+}
 
 static void r200LightModelfv( GLcontext *ctx, GLenum pname,
                                const GLfloat *param )
@@ -1211,11 +1348,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname,
         break;
 
       case GL_LIGHT_MODEL_LOCAL_VIEWER:
-        R200_STATECHANGE( rmesa, tcl );
-        if (ctx->Light.Model.LocalViewer)
-           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LOCAL_VIEWER;
-        else
-           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LOCAL_VIEWER;
+        r200UpdateLocalViewer( ctx );
          break;
 
       case GL_LIGHT_MODEL_TWO_SIDE:
@@ -1313,8 +1446,9 @@ static void r200UpdateClipPlanes( GLcontext *ctx )
  * Stencil
  */
 
-static void r200StencilFunc( GLcontext *ctx, GLenum func,
-                              GLint ref, GLuint mask )
+static void
+r200StencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
+                         GLint ref, GLuint mask )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    GLuint refmask = ((ctx->Stencil.Ref[0] << R200_STENCIL_REF_SHIFT) |
@@ -1357,7 +1491,8 @@ static void r200StencilFunc( GLcontext *ctx, GLenum func,
    rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
 }
 
-static void r200StencilMask( GLcontext *ctx, GLuint mask )
+static void
+r200StencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
 
@@ -1367,8 +1502,9 @@ static void r200StencilMask( GLcontext *ctx, GLuint mask )
       (ctx->Stencil.WriteMask[0] << R200_STENCIL_WRITEMASK_SHIFT);
 }
 
-static void r200StencilOp( GLcontext *ctx, GLenum fail,
-                            GLenum zfail, GLenum zpass )
+static void
+r200StencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+                       GLenum zfail, GLenum zpass )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
 
@@ -1533,8 +1669,8 @@ void r200UpdateViewportOffset( GLcontext *ctx )
    GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
    const GLfloat *v = ctx->Viewport._WindowMap.m;
 
-   GLfloat tx = v[MAT_TX] + xoffset;
-   GLfloat ty = (- v[MAT_TY]) + yoffset;
+   GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
+   GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
 
    if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != *(GLuint *)&tx ||
        rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != *(GLuint *)&ty )
@@ -1671,14 +1807,15 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
    R200_FIREVERTICES(rmesa);   /* don't pipeline cliprect changes */
 
    /*
-    * _DrawDestMask is easier to cope with than <mode>.
+    * _ColorDrawBufferMask is easier to cope with than <mode>.
+    * Check for software fallback, update cliprects.
     */
-   switch ( ctx->Color._DrawDestMask ) {
-   case FRONT_LEFT_BIT:
+   switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
+   case BUFFER_BIT_FRONT_LEFT:
       FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
       r200SetCliprects( rmesa, GL_FRONT_LEFT );
       break;
-   case BACK_LEFT_BIT:
+   case BUFFER_BIT_BACK_LEFT:
       FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
       r200SetCliprects( rmesa, GL_BACK_LEFT );
       break;
@@ -1688,16 +1825,9 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
       return;
    }
 
-   /* We want to update the s/w rast state too so that r200SetBuffer()
-    * gets called.
+   /* We'll set the drawing engine's offset/pitch parameters later
+    * when we update other state.
     */
-   _swrast_DrawBuffer(ctx, mode);
-
-   R200_STATECHANGE( rmesa, ctx );
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->state.color.drawOffset +
-                                              rmesa->r200Screen->fbLocation)
-                                             & R200_COLOROFFSET_MASK);
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch;
 }
 
 
@@ -1738,17 +1868,8 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       break;
 
    case GL_BLEND:
-      R200_STATECHANGE( rmesa, ctx );
-      if (state) {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ALPHA_BLEND_ENABLE;
-      } else {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ALPHA_BLEND_ENABLE;
-      }
-      if ( ctx->Color._LogicOpEnabled ) {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
-      } else {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
-      }
+   case GL_COLOR_LOGIC_OP:
+      r200_set_blend_state( ctx );
       break;
 
    case GL_CLIP_PLANE0:
@@ -1801,7 +1922,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       R200_STATECHANGE(rmesa, ctx );
       if ( state ) {
         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_FOG_ENABLE;
-        r200Fogfv( ctx, GL_FOG_MODE, 0 );
+        r200Fogfv( ctx, GL_FOG_MODE, NULL );
       } else {
         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_FOG_ENABLE;
         R200_STATECHANGE(rmesa, tcl);
@@ -1864,15 +1985,6 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       }
       break;
 
-   case GL_COLOR_LOGIC_OP:
-      R200_STATECHANGE( rmesa, ctx );
-      if ( ctx->Color._LogicOpEnabled ) {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
-      } else {
-        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
-      }
-      break;
-      
    case GL_NORMALIZE:
       R200_STATECHANGE( rmesa, tcl );
       if ( state ) {
@@ -1882,10 +1994,10 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       }
       break;
 
-      /* Pointsize registers on r200 don't seem to do anything.  Maybe
-       * have to pass pointsizes as vertex parameters?  In any case,
-       * setting pointmin == pointsizemax == 1.0, and doing nothing
-       * for aa is enough to satisfy conform.
+      /* Pointsize registers on r200 only work for point sprites, and point smooth
+       * doesn't work for point sprites (and isn't needed for 1.0 sized aa points).
+       * In any case, setting pointmin == pointsizemax == 1.0 for aa points
+       * is enough to satisfy conform.
        */
    case GL_POINT_SMOOTH:
       break;
@@ -1983,6 +2095,38 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
       r200UpdateSpecular ( ctx );
       break;
 
+   case GL_VERTEX_PROGRAM_ARB:
+      TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_VERTEX_PROGRAM, state);
+      break;
+
+   case GL_FRAGMENT_SHADER_ATI:
+      if ( !state ) {
+        /* restore normal tex env colors and make sure tex env combine will get updated
+           mark env atoms dirty (as their data was overwritten by afs even
+           if they didn't change) and restore tex coord routing */
+        GLuint unit;
+        for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
+               ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+           /* need to guard this with drmSupportsFragmentShader? Should never get here if
+              we don't announce ATI_fs, right? */
+           rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
+           R200_STATECHANGE( rmesa, pix[unit] );
+           R200_STATECHANGE( rmesa, tex[unit] );
+         }
+        rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+        R200_STATECHANGE( rmesa, cst );
+        R200_STATECHANGE( rmesa, tf );
+      }
+      else {
+        /* need to mark this dirty as pix/tf atoms have overwritten the data
+           even if the data in the atoms didn't change */
+        R200_STATECHANGE( rmesa, atf );
+        R200_STATECHANGE( rmesa, afs[1] );
+        /* everything else picked up in r200UpdateTextureState hopefully */
+      }
+      break;
    default:
       return;
    }
@@ -2060,7 +2204,7 @@ static void update_texturematrix( GLcontext *ctx )
    rmesa->TexMatEnabled = 0;
    rmesa->TexMatCompSel = 0;
 
-   for (unit = 0 ; unit < 2; unit++) {
+   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
       if (!ctx->Texture.Unit[unit]._ReallyEnabled) 
         continue;
 
@@ -2074,9 +2218,9 @@ static void update_texturematrix( GLcontext *ctx )
            /* Need to preconcatenate any active texgen 
             * obj/eyeplane matrices:
             */
-           _math_matrix_mul_matrix( &rmesa->tmpmat, 
-                                    &rmesa->TexGenMatrix[unit],
-                                    ctx->TextureMatrixStack[unit].Top );
+           _math_matrix_mul_matrix( &rmesa->tmpmat,
+                                    ctx->TextureMatrixStack[unit].Top, 
+                                    &rmesa->TexGenMatrix[unit] );
            upload_matrix( rmesa, rmesa->tmpmat.m, R200_MTX_TEX0+unit );
         } 
         else {
@@ -2091,11 +2235,9 @@ static void update_texturematrix( GLcontext *ctx )
    }
 
    tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
-   if (tpc != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] ||
-       rmesa->TexGenInputs != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1]) {
+   if (tpc != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0]) {
       R200_STATECHANGE(rmesa, tcg);
       rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = tpc;
-      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] = rmesa->TexGenInputs;
    }
 
    compsel &= ~R200_OUTPUT_TEX_MASK;
@@ -2108,14 +2250,60 @@ static void update_texturematrix( GLcontext *ctx )
 
 
 
+/**
+ * Tell the card where to render (offset, pitch).
+ * Effected by glDrawBuffer, etc
+ */
+void
+r200UpdateDrawBuffer(GLcontext *ctx)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   driRenderbuffer *drb;
+
+   if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
+      /* draw to front */
+      drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+   }
+   else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) {
+      /* draw to back */
+      drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+   }
+   else {
+      /* drawing to multiple buffers, or none */
+      return;
+   }
+
+   assert(drb);
+   assert(drb->flippedPitch);
+
+   R200_STATECHANGE( rmesa, ctx );
+
+   /* Note: we used the (possibly) page-flipped values */
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
+     = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
+       & R200_COLOROFFSET_MASK);
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
+   if (rmesa->sarea->tiling_enabled) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+   }
+}
+
+
+
 void r200ValidateState( GLcontext *ctx )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    GLuint new_state = rmesa->NewGLState;
 
-   if (new_state & _NEW_TEXTURE) {
+   if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+     r200UpdateDrawBuffer(ctx);
+   }
+
+   if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
       r200UpdateTextureState( ctx );
       new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
+      r200UpdateLocalViewer( ctx );
    }
 
    /* Need an event driven matrix update?
@@ -2135,7 +2323,7 @@ void r200ValidateState( GLcontext *ctx )
     */
    if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) {
       update_texturematrix( ctx );
-   }      
+   }
 
    if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
       update_light( ctx );
@@ -2222,10 +2410,11 @@ void r200InitStateFuncs( struct dd_function_table *functions )
    functions->ReadBuffer               = r200ReadBuffer;
 
    functions->AlphaFunc                        = r200AlphaFunc;
+   functions->BlendColor               = r200BlendColor;
    functions->BlendEquationSeparate    = r200BlendEquationSeparate;
    functions->BlendFuncSeparate                = r200BlendFuncSeparate;
    functions->ClearColor               = r200ClearColor;
-   functions->ClearDepth               = NULL;
+   functions->ClearDepth               = r200ClearDepth;
    functions->ClearIndex               = NULL;
    functions->ClearStencil             = r200ClearStencil;
    functions->ClipPlane                        = r200ClipPlane;
@@ -2251,9 +2440,9 @@ void r200InitStateFuncs( struct dd_function_table *functions )
    functions->RenderMode               = r200RenderMode;
    functions->Scissor                  = r200Scissor;
    functions->ShadeModel               = r200ShadeModel;
-   functions->StencilFunc              = r200StencilFunc;
-   functions->StencilMask              = r200StencilMask;
-   functions->StencilOp                        = r200StencilOp;
+   functions->StencilFuncSeparate      = r200StencilFuncSeparate;
+   functions->StencilMaskSeparate      = r200StencilMaskSeparate;
+   functions->StencilOpSeparate                = r200StencilOpSeparate;
    functions->Viewport                 = r200Viewport;
 
    /* Swrast hooks for imaging extensions: