merge from master
[mesa.git] / src / mesa / drivers / dri / r300 / r300_state.c
index f998eb8cad65128ba605154f3c1582fa961ad9c3..fff11653de1451faa6f34961c15c17ef148a6840 100644 (file)
@@ -46,7 +46,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "api_arrayelt.h"
 #include "swrast/swrast.h"
 #include "swrast_setup/swrast_setup.h"
-#include "array_cache/acache.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "vbo/vbo.h"
 #include "tnl/tnl.h"
 #include "texformat.h"
 
@@ -58,80 +60,26 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_reg.h"
 #include "r300_program.h"
 #include "r300_emit.h"
-#include "r300_fixed_pipelines.h"
+#include "r300_fragprog.h"
 #include "r300_tex.h"
 #include "r300_maos.h"
-#include "r300_texprog.h"
 
-static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
-{
-       r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       int pp_misc = rmesa->hw.at.cmd[R300_AT_ALPHA_TEST];
-       GLubyte refByte;
-
-       CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
-       
-       R300_STATECHANGE(rmesa, at);
-
-       pp_misc &= ~(R300_ALPHA_TEST_OP_MASK | R300_REF_ALPHA_MASK);
-       pp_misc |= (refByte & R300_REF_ALPHA_MASK);
-
-       switch (func) {
-       case GL_NEVER:
-               pp_misc |= R300_ALPHA_TEST_FAIL;
-               break;
-       case GL_LESS:
-               pp_misc |= R300_ALPHA_TEST_LESS;
-               break;
-       case GL_EQUAL:
-               pp_misc |= R300_ALPHA_TEST_EQUAL;
-               break;
-       case GL_LEQUAL:
-               pp_misc |= R300_ALPHA_TEST_LEQUAL;
-               break;
-       case GL_GREATER:
-               pp_misc |= R300_ALPHA_TEST_GREATER;
-               break;
-       case GL_NOTEQUAL:
-               pp_misc |= R300_ALPHA_TEST_NEQUAL;
-               break;
-       case GL_GEQUAL:
-               pp_misc |= R300_ALPHA_TEST_GEQUAL;
-               break;
-       case GL_ALWAYS:
-               pp_misc |= R300_ALPHA_TEST_PASS;
-               //pp_misc &= ~R300_ALPHA_TEST_ENABLE;
-               break;
-       }
-
-       rmesa->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc;
-}
+#include "drirenderbuffer.h"
 
 static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4])
 {
        GLubyte color[4];
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
-       R300_STATECHANGE(rmesa, unk4E10);
+       R300_STATECHANGE(rmesa, blend_color);
 
-       /* Ordering might be wrong */
        CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
        CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
        CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
        CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
 
-       rmesa->hw.unk4E10.cmd[1]=r300PackColor(4, color[0], color[1], color[2], color[3]);
-       //fprintf(stderr, "%s:%s is not implemented yet. Fixme !\n", __FILE__, __FUNCTION__);
-#if 0
-       R200_STATECHANGE(rmesa, ctx);
-       CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
-       CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
-       CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
-       CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
-       if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
-               rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] =
-                   radeonPackColor(4, color[0], color[1], color[2], color[3]);
-#endif
+       rmesa->hw.blend_color.cmd[1] = r300PackColor(4, color[3], color[0],
+                                                color[1], color[2]);
 }
 
 /**
@@ -154,54 +102,54 @@ static int blend_factor(GLenum factor, GLboolean is_src)
 
        switch (factor) {
        case GL_ZERO:
-               func = R200_BLEND_GL_ZERO;
+               func = R300_BLEND_GL_ZERO;
                break;
        case GL_ONE:
-               func = R200_BLEND_GL_ONE;
+               func = R300_BLEND_GL_ONE;
                break;
        case GL_DST_COLOR:
-               func = R200_BLEND_GL_DST_COLOR;
+               func = R300_BLEND_GL_DST_COLOR;
                break;
        case GL_ONE_MINUS_DST_COLOR:
-               func = R200_BLEND_GL_ONE_MINUS_DST_COLOR;
+               func = R300_BLEND_GL_ONE_MINUS_DST_COLOR;
                break;
        case GL_SRC_COLOR:
-               func = R200_BLEND_GL_SRC_COLOR;
+               func = R300_BLEND_GL_SRC_COLOR;
                break;
        case GL_ONE_MINUS_SRC_COLOR:
-               func = R200_BLEND_GL_ONE_MINUS_SRC_COLOR;
+               func = R300_BLEND_GL_ONE_MINUS_SRC_COLOR;
                break;
        case GL_SRC_ALPHA:
-               func = R200_BLEND_GL_SRC_ALPHA;
+               func = R300_BLEND_GL_SRC_ALPHA;
                break;
        case GL_ONE_MINUS_SRC_ALPHA:
-               func = R200_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+               func = R300_BLEND_GL_ONE_MINUS_SRC_ALPHA;
                break;
        case GL_DST_ALPHA:
-               func = R200_BLEND_GL_DST_ALPHA;
+               func = R300_BLEND_GL_DST_ALPHA;
                break;
        case GL_ONE_MINUS_DST_ALPHA:
-               func = R200_BLEND_GL_ONE_MINUS_DST_ALPHA;
+               func = R300_BLEND_GL_ONE_MINUS_DST_ALPHA;
                break;
        case GL_SRC_ALPHA_SATURATE:
-               func =
-                   (is_src) ? R200_BLEND_GL_SRC_ALPHA_SATURATE :
-                   R200_BLEND_GL_ZERO;
+               func = (is_src) ? R300_BLEND_GL_SRC_ALPHA_SATURATE :
+               R300_BLEND_GL_ZERO;
                break;
        case GL_CONSTANT_COLOR:
-               func = R200_BLEND_GL_CONST_COLOR;
+               func = R300_BLEND_GL_CONST_COLOR;
                break;
        case GL_ONE_MINUS_CONSTANT_COLOR:
-               func = R200_BLEND_GL_ONE_MINUS_CONST_COLOR;
+               func = R300_BLEND_GL_ONE_MINUS_CONST_COLOR;
                break;
        case GL_CONSTANT_ALPHA:
-               func = R200_BLEND_GL_CONST_ALPHA;
+               func = R300_BLEND_GL_CONST_ALPHA;
                break;
        case GL_ONE_MINUS_CONSTANT_ALPHA:
-               func = R200_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+               func = R300_BLEND_GL_ONE_MINUS_CONST_ALPHA;
                break;
        default:
-               func = (is_src) ? R200_BLEND_GL_ONE : R200_BLEND_GL_ZERO;
+               fprintf(stderr, "unknown blend factor %x\n", factor);
+               func = (is_src) ? R300_BLEND_GL_ONE : R300_BLEND_GL_ZERO;
        }
        return func;
 }
@@ -211,14 +159,14 @@ static int blend_factor(GLenum factor, GLboolean is_src)
  * This is done in a single
  * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
  * change the interpretation of the blend function.
- * Also, make sure that blend function and blend equation are set to their default
- * value if color blending is not enabled, since at least blend equations GL_MIN
- * and GL_FUNC_REVERSE_SUBTRACT will cause wrong results otherwise for
- * unknown reasons.
+ * Also, make sure that blend function and blend equation are set to their
+ * default value if color blending is not enabled, since at least blend
+ * equations GL_MIN and GL_FUNC_REVERSE_SUBTRACT will cause wrong results
+ * otherwise for unknown reasons.
  */
 
 /* helper function */
-static void r300_set_blend_cntl(r300ContextPtr rmesa, int func, int eqn, int cbits, int funcA, int eqnA)
+static void r300_set_blend_cntl(r300ContextPtr r300, int func, int eqn, int cbits, int funcA, int eqnA)
 {
        GLuint new_ablend, new_cblend;
 
@@ -227,91 +175,48 @@ static void r300_set_blend_cntl(r300ContextPtr rmesa, int func, int eqn, int cbi
 #endif
        new_ablend = eqnA | funcA;
        new_cblend = eqn | func;
-       if(funcA == func){
+
+       /* Some blend factor combinations don't seem to work when the
+        * BLEND_NO_SEPARATE bit is set.
+        *
+        * Especially problematic candidates are the ONE_MINUS_* flags,
+        * but I can't see a real pattern.
+        */
+#if 0
+       if (new_ablend == new_cblend) {
                new_cblend |=  R300_BLEND_NO_SEPARATE;
-               }
+       }
+#endif
        new_cblend |= cbits;
 
-       if((new_ablend != rmesa->hw.bld.cmd[R300_BLD_ABLEND])
-               || (new_cblend != rmesa->hw.bld.cmd[R300_BLD_CBLEND])){
-               R300_STATECHANGE(rmesa, bld);
-               rmesa->hw.bld.cmd[R300_BLD_ABLEND]=new_ablend;
-               rmesa->hw.bld.cmd[R300_BLD_CBLEND]=new_cblend;
-               }
+       if((new_ablend != r300->hw.bld.cmd[R300_BLD_ABLEND]) ||
+          (new_cblend != r300->hw.bld.cmd[R300_BLD_CBLEND])) {
+               R300_STATECHANGE(r300, bld);
+               r300->hw.bld.cmd[R300_BLD_ABLEND]=new_ablend;
+               r300->hw.bld.cmd[R300_BLD_CBLEND]=new_cblend;
+       }
 }
 
+
 static void r300_set_blend_state(GLcontext * ctx)
 {
-       r300ContextPtr rmesa = R300_CONTEXT(ctx);
-#if 0
-       GLuint cntl = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &
-           ~(R300_ROP_ENABLE | R300_ALPHA_BLEND_ENABLE |
-             R300_SEPARATE_ALPHA_ENABLE);
-#endif
-
-       int func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
-           (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT);
-       int eqn = R200_COMB_FCN_ADD_CLAMP;
-       int funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
-           (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT);
-       int eqnA = R200_COMB_FCN_ADD_CLAMP;
-
-
-       if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
-               if (ctx->Color._LogicOpEnabled) {
-#if 0
-                       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =
-                           cntl | R300_ROP_ENABLE;
-#endif
-                       r300_set_blend_cntl(rmesa,
-                               func, eqn, 0,
-                               func, eqn);
-                       return;
-               } else if (ctx->Color.BlendEnabled) {
-#if 0
-                       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =
-                           cntl | R300_ALPHA_BLEND_ENABLE |
-                           R300_SEPARATE_ALPHA_ENABLE;
-#endif
-               } else {
-#if 0
-                       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl;
-#endif
-                       r300_set_blend_cntl(rmesa,
-                               func, eqn, 0,
-                               func, eqn);
-                       return;
-               }
-       } else {
-               if (ctx->Color._LogicOpEnabled) {
-#if 0
-                       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =
-                           cntl | R300_ROP_ENABLE;
-                       rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
-#endif
-                       return;
-               } else if (ctx->Color.BlendEnabled) {
-#if 0
-                       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =
-                           cntl | R300_ALPHA_BLEND_ENABLE;
-#endif
-               } else {
-#if 0
-                       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl;
-                       rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
-#endif
-                       r300_set_blend_cntl(rmesa,
-                               func, eqn, 0,
-                               func, eqn);
-                       return;
-               }
+       r300ContextPtr r300 = R300_CONTEXT(ctx);
+       int func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+           (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
+       int eqn = R300_COMB_FCN_ADD_CLAMP;
+       int funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+           (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
+       int eqnA = R300_COMB_FCN_ADD_CLAMP;
+
+       if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) {
+               r300_set_blend_cntl(r300,
+                       func, eqn, 0,
+                       func, eqn);
+               return;
        }
 
-       func =
-           (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) <<
-            R200_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstRGB,
-                                                  GL_FALSE) <<
-                                     R200_DST_BLEND_SHIFT);
+       func = (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) << R300_SRC_BLEND_SHIFT) |
+               (blend_factor(ctx->Color.BlendDstRGB, GL_FALSE) << R300_DST_BLEND_SHIFT);
 
        switch (ctx->Color.BlendEquationRGB) {
        case GL_FUNC_ADD:
@@ -323,19 +228,19 @@ static void r300_set_blend_state(GLcontext * ctx)
                break;
 
        case GL_FUNC_REVERSE_SUBTRACT:
-               eqn = R200_COMB_FCN_RSUB_CLAMP;
+               eqn = R300_COMB_FCN_RSUB_CLAMP;
                break;
 
        case GL_MIN:
-               eqn = R200_COMB_FCN_MIN;
-               func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
-                   (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+               eqn = R300_COMB_FCN_MIN;
+               func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+                   (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
                break;
 
        case GL_MAX:
-               eqn = R200_COMB_FCN_MAX;
-               func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
-                   (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+               eqn = R300_COMB_FCN_MAX;
+               func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+                   (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
                break;
 
        default:
@@ -345,18 +250,9 @@ static void r300_set_blend_state(GLcontext * ctx)
                return;
        }
 
-       if (!rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
-#if 0
-               rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
-#endif
-               return;
-       }
 
-       funcA =
-           (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) <<
-            R200_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstA,
-                                                  GL_FALSE) <<
-                                     R200_DST_BLEND_SHIFT);
+       funcA = (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) << R300_SRC_BLEND_SHIFT) |
+               (blend_factor(ctx->Color.BlendDstA, GL_FALSE) << R300_DST_BLEND_SHIFT);
 
        switch (ctx->Color.BlendEquationA) {
        case GL_FUNC_ADD:
@@ -368,19 +264,19 @@ static void r300_set_blend_state(GLcontext * ctx)
                break;
 
        case GL_FUNC_REVERSE_SUBTRACT:
-               eqnA = R200_COMB_FCN_RSUB_CLAMP;
+               eqnA = R300_COMB_FCN_RSUB_CLAMP;
                break;
 
        case GL_MIN:
-               eqnA = R200_COMB_FCN_MIN;
-               funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
-                   (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+               eqnA = R300_COMB_FCN_MIN;
+               funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+                   (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
                break;
 
        case GL_MAX:
-               eqnA = R200_COMB_FCN_MAX;
-               funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
-                   (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+               eqnA = R300_COMB_FCN_MAX;
+               funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+                   (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
                break;
 
        default:
@@ -389,10 +285,7 @@ static void r300_set_blend_state(GLcontext * ctx)
                return;
        }
 
-       r300_set_blend_cntl(rmesa,
-               func, eqn, R300_BLEND_UNKNOWN | R300_BLEND_ENABLE,
-               funcA, eqnA);
-       r300_set_blend_cntl(rmesa,
+       r300_set_blend_cntl(r300,
                func, eqn, R300_BLEND_UNKNOWN | R300_BLEND_ENABLE,
                funcA, eqnA);
 }
@@ -435,27 +328,130 @@ static void r300UpdateCulling(GLcontext* ctx)
        r300->hw.cul.cmd[R300_CUL_CULL] = val;
 }
 
-static void update_early_z(GLcontextctx)
+static void update_early_z(GLcontext *ctx)
 {
-       /* updates register 0x4f14 
-          if depth test is not enabled it should be 0x00000000
-          if depth is enabled and alpha not it should be 0x00000001
-          if depth and alpha is enabled it should be 0x00000000
+       /* updates register R300_RB3D_EARLY_Z (0x4F14)
+          if depth test is not enabled it should be R300_EARLY_Z_DISABLE
+          if depth is enabled and alpha not it should be R300_EARLY_Z_ENABLE
+          if depth and alpha is enabled it should be R300_EARLY_Z_DISABLE
        */
        r300ContextPtr r300 = R300_CONTEXT(ctx);
 
-       R300_STATECHANGE(r300, unk4F10);
-       if (ctx->Color.AlphaEnabled)
+       R300_STATECHANGE(r300, zstencil_format);
+       if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS)
                /* disable early Z */
-               r300->hw.unk4F10.cmd[2] = 0x00000000;
+               r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE;
        else {
-               if (ctx->Depth.Test)
+               if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER)
                        /* enable early Z */
-                       r300->hw.unk4F10.cmd[2] = 0x00000001;
+                       r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_ENABLE;
                else
                        /* disable early Z */
-                       r300->hw.unk4F10.cmd[2] = 0x00000000;
+                       r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE;
+       }
+}
+
+static void update_alpha(GLcontext *ctx)
+{
+       r300ContextPtr r300 = R300_CONTEXT(ctx);
+       GLubyte refByte;
+       uint32_t pp_misc = 0x0;
+       GLboolean really_enabled = ctx->Color.AlphaEnabled;
+
+       CLAMPED_FLOAT_TO_UBYTE(refByte, ctx->Color.AlphaRef);
+       
+       switch (ctx->Color.AlphaFunc) {
+       case GL_NEVER:
+               pp_misc |= R300_ALPHA_TEST_FAIL;
+               break;
+       case GL_LESS:
+               pp_misc |= R300_ALPHA_TEST_LESS;
+               break;
+       case GL_EQUAL:
+               pp_misc |= R300_ALPHA_TEST_EQUAL;
+               break;
+       case GL_LEQUAL:
+               pp_misc |= R300_ALPHA_TEST_LEQUAL;
+               break;
+       case GL_GREATER:
+               pp_misc |= R300_ALPHA_TEST_GREATER;
+               break;
+       case GL_NOTEQUAL:
+               pp_misc |= R300_ALPHA_TEST_NEQUAL;
+               break;
+       case GL_GEQUAL:
+               pp_misc |= R300_ALPHA_TEST_GEQUAL;
+               break;
+       case GL_ALWAYS:
+               /*pp_misc |= R300_ALPHA_TEST_PASS;*/
+               really_enabled = GL_FALSE;
+               break;
+       }
+       
+       if (really_enabled) {
+               pp_misc |= R300_ALPHA_TEST_ENABLE;
+               pp_misc |= (refByte & R300_REF_ALPHA_MASK);
+       } else {
+               pp_misc = 0x0;
+       }
+       
+       
+       R300_STATECHANGE(r300, at);
+       r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc;
+       update_early_z(ctx);
+}
+
+static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
+{
+       (void) func;
+       (void) ref;
+       update_alpha(ctx);
+}
+
+static int translate_func(int func)
+{
+       switch (func) {
+       case GL_NEVER:
+               return R300_ZS_NEVER;
+       case GL_LESS:
+               return R300_ZS_LESS;
+       case GL_EQUAL:
+               return R300_ZS_EQUAL;
+       case GL_LEQUAL:
+               return R300_ZS_LEQUAL;
+       case GL_GREATER:
+               return R300_ZS_GREATER;
+       case GL_NOTEQUAL:
+               return R300_ZS_NOTEQUAL;
+       case GL_GEQUAL:
+               return R300_ZS_GEQUAL;
+       case GL_ALWAYS:
+               return R300_ZS_ALWAYS;
+       }
+       return 0;
+}
+
+static void update_depth(GLcontext* ctx)
+{
+       r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+       R300_STATECHANGE(r300, zs);
+       r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE;
+       r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT);
+       
+       if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) {
+               if (ctx->Depth.Mask)
+                       r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST_AND_WRITE;
+               else
+                       r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST;
+               
+               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= translate_func(ctx->Depth.Func) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
+       } else {
+               r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_DISABLED_1;
+               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= translate_func(GL_NEVER) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
        }
+       
+       update_early_z(ctx);
 }
 
 /**
@@ -466,7 +462,6 @@ static void update_early_z(GLcontext* ctx)
 static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-       uint32_t newval;
 
        if (RADEON_DEBUG & DEBUG_STATE)
                fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__,
@@ -481,16 +476,26 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
        case GL_TEXTURE_3D:
                break;
 
-       case GL_ALPHA_TEST:
-               R300_STATECHANGE(r300, at);
+       case GL_FOG:
+               R300_STATECHANGE(r300, fogs);
                if (state) {
-                       r300->hw.at.cmd[R300_AT_ALPHA_TEST] |=
-                           R300_ALPHA_TEST_ENABLE;
+                       r300->hw.fogs.cmd[R300_FOGS_STATE] |=
+                           R300_FOG_ENABLE;
+                       
+                       ctx->Driver.Fogfv( ctx, GL_FOG_MODE, NULL );
+                       ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+                       ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+                       ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+                       ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
                } else {
-                       r300->hw.at.cmd[R300_AT_ALPHA_TEST] &=
-                           ~R300_ALPHA_TEST_ENABLE;
+                       r300->hw.fogs.cmd[R300_FOGS_STATE] &=
+                           ~R300_FOG_ENABLE;
                }
-               update_early_z(ctx);
+               
+               break;
+
+       case GL_ALPHA_TEST:
+               update_alpha(ctx);
                break;
 
        case GL_BLEND:
@@ -499,23 +504,10 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
                break;
 
        case GL_DEPTH_TEST:
-               R300_STATECHANGE(r300, zs);
-
-               if (state) {
-                       if (ctx->Depth.Mask)
-                               newval = R300_RB3D_Z_TEST_AND_WRITE;
-                       else
-                               newval = R300_RB3D_Z_TEST;
-               } else
-                       newval = R300_RB3D_Z_DISABLED_1;
-
-               r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE;
-               r300->hw.zs.cmd[R300_ZS_CNTL_0] |= newval;
-               update_early_z(ctx);
+               update_depth(ctx);
                break;
 
        case GL_STENCIL_TEST:
-               WARN_ONCE("TODO - double side stencil !\n");
                if (r300->state.stencil.hw_stencil) {
                        R300_STATECHANGE(r300, zs);
                        if (state) {
@@ -526,7 +518,9 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
                                    ~R300_RB3D_STENCIL_ENABLE;
                        }
                } else {
+#if R200_MERGED
                        FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state);
+#endif                 
                }
                break;
 
@@ -536,30 +530,16 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
 
        case GL_POLYGON_OFFSET_POINT:
        case GL_POLYGON_OFFSET_LINE:
-               WARN_ONCE("Don't know how to enable polygon offset point/line. Help me !\n");
-
-               /* Something is apparently blocking these from working */
-               R300_STATECHANGE(r300, unk42B4);
-               if(state){
-                       r300->hw.unk42B4.cmd[1] |= ~(3<<0);
-               } else {
-                       r300->hw.unk42B4.cmd[1] &= (3<<0);
-               }
                break;
 
        case GL_POLYGON_OFFSET_FILL:
-               R300_STATECHANGE(r300, unk42B4);
+               R300_STATECHANGE(r300, occlusion_cntl);
                if(state){
-                       r300->hw.unk42B4.cmd[1] |= (3<<0);
+                       r300->hw.occlusion_cntl.cmd[1] |= (3<<0);
                } else {
-                       r300->hw.unk42B4.cmd[1] &= ~(3<<0);
+                       r300->hw.occlusion_cntl.cmd[1] &= ~(3<<0);
                }
                break;
-
-       case GL_VERTEX_PROGRAM_ARB:
-               //TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_TCL_DISABLE, state);
-       break;
-
        default:
                radeonEnable(ctx, cap, state);
                return;
@@ -567,6 +547,56 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
 }
 
 
+static void r300UpdatePolygonMode(GLcontext *ctx)
+{
+       r300ContextPtr r300 = R300_CONTEXT(ctx);
+       uint32_t hw_mode=0;
+
+       if (ctx->Polygon.FrontMode != GL_FILL ||
+           ctx->Polygon.BackMode != GL_FILL) {
+               GLenum f, b;
+               
+               if (ctx->Polygon.FrontFace == GL_CCW) {
+                       f = ctx->Polygon.FrontMode;
+                       b = ctx->Polygon.BackMode;
+               } else {
+                       f = ctx->Polygon.BackMode;
+                       b = ctx->Polygon.FrontMode;
+               }
+
+               hw_mode |= R300_PM_ENABLED;
+
+               switch (f) {
+               case GL_LINE:
+                       hw_mode |= R300_PM_FRONT_LINE;
+               break;
+               case GL_POINT: /* noop */
+                       hw_mode |= R300_PM_FRONT_POINT;
+               break;
+               case GL_FILL:
+                       hw_mode |= R300_PM_FRONT_FILL;
+               break;
+               }
+
+               switch (b) {
+               case GL_LINE:
+                       hw_mode |= R300_PM_BACK_LINE;
+               break;
+               case GL_POINT: /* noop */
+                       hw_mode |= R300_PM_BACK_POINT;
+               break;
+               case GL_FILL:
+                       hw_mode |= R300_PM_BACK_FILL;
+               break;
+               }
+       }
+
+       if (r300->hw.polygon_mode.cmd[1] != hw_mode) {
+               R300_STATECHANGE(r300, polygon_mode);
+               r300->hw.polygon_mode.cmd[1] = hw_mode;
+       }
+}
+
 /**
  * Change the culling mode.
  *
@@ -590,6 +620,7 @@ static void r300FrontFace(GLcontext* ctx, GLenum mode)
        (void)mode;
 
        r300UpdateCulling(ctx);
+       r300UpdatePolygonMode(ctx);
 }
 
 
@@ -600,38 +631,8 @@ static void r300FrontFace(GLcontext* ctx, GLenum mode)
  */
 static void r300DepthFunc(GLcontext* ctx, GLenum func)
 {
-       r300ContextPtr r300 = R300_CONTEXT(ctx);
-
-       R300_STATECHANGE(r300, zs);
-
-       r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT);
-
-       switch(func) {
-       case GL_NEVER:
-               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_NEVER << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
-               break;
-       case GL_LESS:
-               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_LESS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
-               break;
-       case GL_EQUAL:
-               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_EQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
-               break;
-       case GL_LEQUAL:
-               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_LEQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
-               break;
-       case GL_GREATER:
-               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_GREATER << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
-               break;
-       case GL_NOTEQUAL:
-               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_NOTEQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
-               break;
-       case GL_GEQUAL:
-               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_GEQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
-               break;
-       case GL_ALWAYS:
-               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_ALWAYS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
-               break;
-       }
+       (void) func;
+       update_depth(ctx);
 }
 
 
@@ -642,15 +643,8 @@ static void r300DepthFunc(GLcontext* ctx, GLenum func)
  */
 static void r300DepthMask(GLcontext* ctx, GLboolean mask)
 {
-       r300ContextPtr r300 = R300_CONTEXT(ctx);
-
-       if (!ctx->Depth.Test)
-               return;
-
-       R300_STATECHANGE(r300, zs);
-       r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE;
-       r300->hw.zs.cmd[R300_ZS_CNTL_0] |= mask 
-           ? R300_RB3D_Z_TEST_AND_WRITE : R300_RB3D_Z_TEST;
+       (void) mask;
+       update_depth(ctx);
 }
 
 
@@ -661,7 +655,10 @@ static void r300ColorMask(GLcontext* ctx,
                          GLboolean r, GLboolean g, GLboolean b, GLboolean a)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-       int mask = (b << 0) | (g << 1) | (r << 2) | (a << 3);
+       int mask = (r ? R300_COLORMASK0_R : 0) |
+                  (g ? R300_COLORMASK0_G : 0) |
+                  (b ? R300_COLORMASK0_B : 0) |
+                  (a ? R300_COLORMASK0_A : 0);
 
        if (mask != r300->hw.cmk.cmd[R300_CMK_COLORMASK]) {
                R300_STATECHANGE(r300, cmk);
@@ -669,6 +666,101 @@ static void r300ColorMask(GLcontext* ctx,
        }
 }
 
+/* =============================================================
+ * Fog
+ */
+static void r300Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+       r300ContextPtr r300 = R300_CONTEXT(ctx);
+       union { int i; float f; } fogScale, fogStart;
+       
+       (void) param;
+       
+       fogScale.i = r300->hw.fogp.cmd[R300_FOGP_SCALE];
+       fogStart.i = r300->hw.fogp.cmd[R300_FOGP_START];
+
+       switch (pname) {
+       case GL_FOG_MODE:
+               if (!ctx->Fog.Enabled)
+                       return;
+               switch (ctx->Fog.Mode) {
+               case GL_LINEAR:
+                       R300_STATECHANGE(r300, fogs);
+                       r300->hw.fogs.cmd[R300_FOGS_STATE] =
+                               (r300->hw.fogs.cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) | R300_FOG_MODE_LINEAR;
+
+                       if (ctx->Fog.Start == ctx->Fog.End) {
+                               fogScale.f = -1.0;
+                               fogStart.f = 1.0;
+                       }
+                       else {
+                               fogScale.f = 1.0 / (ctx->Fog.End-ctx->Fog.Start);
+                               fogStart.f = -ctx->Fog.Start / (ctx->Fog.End-ctx->Fog.Start);
+                       }
+                       break;
+               case GL_EXP:
+                       R300_STATECHANGE(r300, fogs);
+                       r300->hw.fogs.cmd[R300_FOGS_STATE] =
+                               (r300->hw.fogs.cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) | R300_FOG_MODE_EXP;
+                       fogScale.f = 0.0933*ctx->Fog.Density;
+                       fogStart.f = 0.0;
+                       break;
+               case GL_EXP2:
+                       R300_STATECHANGE(r300, fogs);
+                       r300->hw.fogs.cmd[R300_FOGS_STATE] =
+                               (r300->hw.fogs.cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) | R300_FOG_MODE_EXP2;
+                       fogScale.f = 0.3*ctx->Fog.Density;
+                       fogStart.f = 0.0;
+               default:
+                       return;
+               }
+               break;
+       case GL_FOG_DENSITY:
+               switch (ctx->Fog.Mode) {
+               case GL_EXP:
+                       fogScale.f = 0.0933*ctx->Fog.Density;
+                       fogStart.f = 0.0;
+                       break;
+               case GL_EXP2:
+                       fogScale.f = 0.3*ctx->Fog.Density;
+                       fogStart.f = 0.0;
+               default:
+                       break;
+               }
+               break;
+       case GL_FOG_START:
+       case GL_FOG_END:
+               if (ctx->Fog.Mode == GL_LINEAR) {
+                       if (ctx->Fog.Start == ctx->Fog.End) {
+                               fogScale.f = -1.0;
+                               fogStart.f = 1.0;
+                       }
+                       else {
+                               fogScale.f = 1.0 / (ctx->Fog.End-ctx->Fog.Start);
+                               fogStart.f = -ctx->Fog.Start / (ctx->Fog.End-ctx->Fog.Start);
+                       }
+               }
+               break;
+       case GL_FOG_COLOR:
+               R300_STATECHANGE(r300, fogc);
+               r300->hw.fogc.cmd[R300_FOGC_R] = (GLuint) (ctx->Fog.Color[0]*1023.0F) & 0x3FF;
+               r300->hw.fogc.cmd[R300_FOGC_G] = (GLuint) (ctx->Fog.Color[1]*1023.0F) & 0x3FF;
+               r300->hw.fogc.cmd[R300_FOGC_B] = (GLuint) (ctx->Fog.Color[2]*1023.0F) & 0x3FF;
+               break;
+       case GL_FOG_COORD_SRC:
+               break;
+       default:
+               return;
+       }
+
+       if (fogScale.i != r300->hw.fogp.cmd[R300_FOGP_SCALE] ||
+           fogStart.i != r300->hw.fogp.cmd[R300_FOGP_START]) {
+               R300_STATECHANGE(r300, fogp);
+               r300->hw.fogp.cmd[R300_FOGP_SCALE] = fogScale.i;
+               r300->hw.fogp.cmd[R300_FOGP_START] = fogStart.i;
+       }
+}
+
 /* =============================================================
  * Point state
  */
@@ -679,15 +771,9 @@ static void r300PointSize(GLcontext * ctx, GLfloat size)
        size = ctx->Point._Size;
 
        R300_STATECHANGE(r300, ps);
-       r300->hw.ps.cmd[R300_PS_POINTSIZE] =
+       r300->hw.ps.cmd[R300_PS_POINTSIZE] = 
                ((int)(size * 6) << R300_POINTSIZE_X_SHIFT) |
                ((int)(size * 6) << R300_POINTSIZE_Y_SHIFT);
-
-#if 0 /* r200 reg? */
-       /* This might need fixing later */
-       R300_STATECHANGE(r300, vps);
-       r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0);
-#endif
 }
 
 /* =============================================================
@@ -701,195 +787,70 @@ static void r300LineWidth(GLcontext *ctx, GLfloat widthf)
 
        R300_STATECHANGE(r300, lcntl);
        r300->hw.lcntl.cmd[1] = (int)(widthf * 6.0);
-       /* Doesnt look very good without this... */
-       r300->hw.lcntl.cmd[1] |= R300_LINE_CNT_UNK1;
+       r300->hw.lcntl.cmd[1] |= R300_LINE_CNT_VE;
 }
 
-/*
-
-glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); :  00000091 (  1001 0001)
-glPolygonMode(GL_FRONT_AND_BACK, GL_POINT); : 00000001 (          1)
-
-glPolygonMode(GL_FRONT, GL_LINE); :           00000111 (1 0001 0001)
-glPolygonMode(GL_FRONT, GL_POINT); :          00000101 (1 0000 0001)
-
-glPolygonMode(GL_BACK, GL_LINE); :            000000a1 (  1010 0001)
-glPolygonMode(GL_BACK, GL_POINT); :           00000021 (    10 0001)
-
-*/
-
-/* exclusive */
-#define PM_NOT_BACK   (1<<8)
-#define PM_NOT_FRONT  (1<<5)
-
-#define PM_FRONT_LINE (1<<4)
-#define PM_BACK_LINE  (1<<7)
-
 static void r300PolygonMode(GLcontext *ctx, GLenum face, GLenum mode)
 {
-       r300ContextPtr r300 = R300_CONTEXT(ctx);
-       unsigned long hw_mode=0;
-
-       //hw_mode=r300->hw.unk4288.cmd[1];
-       hw_mode |= 1; /* enables point mode by default */
-
-       switch (ctx->Polygon.FrontMode) {
-       case GL_LINE:
-               hw_mode &= ~PM_NOT_FRONT;
-               hw_mode |= PM_FRONT_LINE;
-       break;
-       case GL_POINT:
-               hw_mode &= ~PM_NOT_FRONT;
-               hw_mode &= ~PM_FRONT_LINE;
-       break;
-        /* I dont think fgl properly handles these... In any case, test program is needed */
-       case GL_FILL:
-       break;
-       }
-
-       switch (ctx->Polygon.BackMode) {
-       case GL_LINE:
-               hw_mode &= ~PM_NOT_BACK;
-               hw_mode |= PM_BACK_LINE;
-       break;
-       case GL_POINT:
-               hw_mode &= ~PM_NOT_BACK;
-               hw_mode &= ~PM_BACK_LINE;
-       break;
-       case GL_FILL:
-       break;
-       }
-
-       if(hw_mode == 1)
-               hw_mode = 0;
-
-#if 0
-       switch (face) {
-       case GL_FRONT:
-               //fprintf(stderr, "front\n");
-               hw_mode &= ~PM_NOT_FRONT;
-               switch (mode) {
-               case GL_LINE:
-                       hw_mode |= PM_FRONT_LINE;
-               break;
-               case GL_POINT:
-                       hw_mode &= ~PM_FRONT_LINE;
-               break;
-               case GL_FILL:
-               break;
-               }
-       break;
-
-       case GL_BACK:
-               //fprintf(stderr, "back\n");
-               hw_mode &= ~PM_NOT_BACK;
-               switch (mode) {
-               case GL_LINE:
-                       hw_mode |= PM_BACK_LINE;
-               break;
-               case GL_POINT:
-                       hw_mode &= ~PM_BACK_LINE;
-               break;
-               case GL_FILL:
-               break;
-               }
-       break;
-
-       case GL_FRONT_AND_BACK:
-               //fprintf(stderr, "front and back\n");
-               hw_mode &= ~PM_NOT_FRONT;
-               hw_mode &= ~PM_NOT_BACK;
-               switch (mode) {
-               case GL_LINE:
-                       hw_mode |= PM_FRONT_LINE;
-                       hw_mode |= PM_BACK_LINE;
-               break;
-               case GL_POINT:
-                       hw_mode &= ~PM_FRONT_LINE;
-                       hw_mode &= ~PM_BACK_LINE;
-               break;
-               case GL_FILL:
-                       hw_mode = 0;
-               break;
-               }
-       break;
-       }
-#endif
-
-       //if( front and back fill) hw_mode=0;
-
-       if(r300->hw.unk4288.cmd[1] != hw_mode){
-               R300_STATECHANGE(r300, unk4288);
-               r300->hw.unk4288.cmd[1] = hw_mode;
-       }
+       (void)face;
+       (void)mode;
+       
+       r300UpdatePolygonMode(ctx);
 }
 
 /* =============================================================
  * Stencil
  */
 
- static int translate_stencil_func(int func)
- {
-       switch (func) {
-       case GL_NEVER:
-                   return R300_ZS_NEVER;
-               break;
-       case GL_LESS:
-                   return R300_ZS_LESS;
-               break;
-       case GL_EQUAL:
-                   return R300_ZS_EQUAL;
-               break;
-       case GL_LEQUAL:
-                   return R300_ZS_LEQUAL;
-               break;
-       case GL_GREATER:
-                   return R300_ZS_GREATER;
-               break;
-       case GL_NOTEQUAL:
-                   return R300_ZS_NOTEQUAL;
-               break;
-       case GL_GEQUAL:
-                   return R300_ZS_GEQUAL;
-               break;
-       case GL_ALWAYS:
-                   return R300_ZS_ALWAYS;
-               break;
-       }
- return 0;
- }
-
- static int translate_stencil_op(int op)
+static int translate_stencil_op(int op)
 {
        switch (op) {
        case GL_KEEP:
-                   return R300_ZS_KEEP;
+               return R300_ZS_KEEP;
        case GL_ZERO:
-                   return R300_ZS_ZERO;
+               return R300_ZS_ZERO;
        case GL_REPLACE:
-                   return R300_ZS_REPLACE;
+               return R300_ZS_REPLACE;
        case GL_INCR:
-                   return R300_ZS_INCR;
+               return R300_ZS_INCR;
        case GL_DECR:
-                   return R300_ZS_DECR;
+               return R300_ZS_DECR;
        case GL_INCR_WRAP_EXT:
-                   return R300_ZS_INCR_WRAP;
+               return R300_ZS_INCR_WRAP;
        case GL_DECR_WRAP_EXT:
-                   return R300_ZS_DECR_WRAP;
+               return R300_ZS_DECR_WRAP;
        case GL_INVERT:
-                   return R300_ZS_INVERT;
+               return R300_ZS_INVERT;
        default:
                WARN_ONCE("Do not know how to translate stencil op");
                return R300_ZS_KEEP;
        }
+       return 0;
 }
 
-static void r300StencilFunc(GLcontext * ctx, GLenum func,
-                           GLint ref, GLuint mask)
+static void r300ShadeModel(GLcontext * ctx, GLenum mode)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       GLuint refmask = ((ctx->Stencil.Ref[0] << R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
-                         (ctx->Stencil.ValueMask[0] << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
+       
+       R300_STATECHANGE(rmesa, shade);
+       switch (mode) {
+       case GL_FLAT:
+               rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT;
+               break;
+       case GL_SMOOTH:
+               rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH;
+               break;
+       default:
+               return;
+       }
+}
+
+static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
+                                    GLenum func, GLint ref, GLuint mask)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
+                         ((ctx->Stencil.ValueMask[0] & 0xff) << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
                          
        GLuint flag;
 
@@ -902,25 +863,28 @@ static void r300StencilFunc(GLcontext * ctx, GLenum func,
        rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=  ~((R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
                                                (R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
        
-       flag = translate_stencil_func(ctx->Stencil.Function[0]);
-
-       rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT)
-                                         | (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
+       flag = translate_func(ctx->Stencil.Function[0]);
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT);
+       
+       if (ctx->Stencil._TestTwoSide)
+               flag = translate_func(ctx->Stencil.Function[1]);
+       
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
        rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask;
 }
 
-static void r300StencilMask(GLcontext * ctx, GLuint mask)
+static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
        R300_STATECHANGE(rmesa, zs);
        rmesa->hw.zs.cmd[R300_ZS_CNTL_2]  &= ~(R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT);
-       rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= ctx->Stencil.WriteMask[0] << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT;
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= (ctx->Stencil.WriteMask[0] & 0xff) << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT;
 }
 
 
-static void r300StencilOp(GLcontext * ctx, GLenum fail,
-                         GLenum zfail, GLenum zpass)
+static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail,
+                                  GLenum zfail, GLenum zpass)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
@@ -934,10 +898,19 @@ static void r300StencilOp(GLcontext * ctx, GLenum fail,
        rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
                 (translate_stencil_op(ctx->Stencil.FailFunc[0]) << R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT)
                |(translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT)
-               |(translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT)
-               |(translate_stencil_op(ctx->Stencil.FailFunc[0]) << R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
-               |(translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT)
-               |(translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT);
+               |(translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT);
+       
+       if (ctx->Stencil._TestTwoSide) {
+               rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+                        (translate_stencil_op(ctx->Stencil.FailFunc[1]) << R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
+                       |(translate_stencil_op(ctx->Stencil.ZFailFunc[1]) << R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT)
+                       |(translate_stencil_op(ctx->Stencil.ZPassFunc[1]) << R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT);
+       } else {
+               rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+                        (translate_stencil_op(ctx->Stencil.FailFunc[0]) << R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
+                       |(translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT)
+                       |(translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT);
+       }
 }
 
 static void r300ClearStencil(GLcontext * ctx, GLint s)
@@ -945,9 +918,9 @@ static void r300ClearStencil(GLcontext * ctx, GLint s)
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
        rmesa->state.stencil.clear =
-           ((GLuint) ctx->Stencil.Clear |
+           ((GLuint) (ctx->Stencil.Clear & 0xff) |
             (R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT) |
-            (ctx->Stencil.WriteMask[0] << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT));
+            ((ctx->Stencil.WriteMask[0] & 0xff) << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT));
 }
 
 /* =============================================================
@@ -993,7 +966,6 @@ static void r300Viewport(GLcontext * ctx, GLint x, GLint y,
         * setting below.  Could apply deltas to rescue pipelined viewport
         * values, or keep the originals hanging around.
         */
-       R300_FIREVERTICES(R300_CONTEXT(ctx));
        r300UpdateWindow(ctx);
 }
 
@@ -1002,284 +974,174 @@ static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval)
        r300UpdateWindow(ctx);
 }
 
-/* =============================================================
- * Polygon state
- */
-static void r300PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units)
+void r300UpdateViewportOffset( GLcontext *ctx )
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       GLfloat constant = units * /*rmesa->state.depth.scale*/4;
+       __DRIdrawablePrivate *dPriv = ((radeonContextPtr)rmesa)->dri.drawable;
+       GLfloat xoffset = (GLfloat)dPriv->x;
+       GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+       const GLfloat *v = ctx->Viewport._WindowMap.m;
 
-       factor *= 12;
+       GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
+       GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
 
-/*    fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */
+       if ( rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] != r300PackFloat32(tx) ||
+               rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] != r300PackFloat32(ty))
+       {
+       /* Note: this should also modify whatever data the context reset
+        * code uses...
+        */
+       R300_STATECHANGE( rmesa, vpt );
+       rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx);
+       rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty);
+      
+       }
 
-       R300_STATECHANGE(rmesa, zbs);
-       rmesa->hw.zbs.cmd[R300_ZBS_T_FACTOR] = r300PackFloat32(factor);
-       rmesa->hw.zbs.cmd[R300_ZBS_T_CONSTANT] = r300PackFloat32(constant);
-       rmesa->hw.zbs.cmd[R300_ZBS_W_FACTOR] = r300PackFloat32(factor);
-       rmesa->hw.zbs.cmd[R300_ZBS_W_CONSTANT] = r300PackFloat32(constant);
+       radeonUpdateScissor( ctx );
 }
 
-/* Routing and texture-related */
-
-void r300_setup_routing(GLcontext *ctx, GLboolean immediate)
+/**
+ * Tell the card where to render (offset, pitch).
+ * Effected by glDrawBuffer, etc
+ */
+void
+r300UpdateDrawBuffer(GLcontext *ctx)
 {
-       int i, count=0,reg=0;
-       GLuint dw, mask;
-       TNLcontext *tnl = TNL_CONTEXT(ctx);
-       struct vertex_buffer *VB = &tnl->vb;
-       r300ContextPtr r300 = R300_CONTEXT(ctx);
-
-
-       /* Stage 1 - input to VAP */
-
-       /* Assign register number automatically, retaining it in rmesa->state.reg */
-
-       /* Note: immediate vertex data includes all coordinates.
-       To save bandwidth use either VBUF or state-based vertex generation */
-
-#define CONFIGURE_AOS(v, o, r, f) \
-               {\
-               if (RADEON_DEBUG & DEBUG_STATE)fprintf(stderr, "Enabling "#r "\n"); \
-               r300->state.aos[count].aos_size=4; \
-               r300->state.aos[count].aos_stride=4; \
-               r300->state.aos[count].aos_offset=o; \
-               r300->state.aos[count].aos_reg=reg; \
-               r300->state.aos[count].aos_format=(f); \
-               r300->state.vap_reg.r=reg; \
-               count++; \
-               reg++; \
-               }
-
-               /* All offsets are 0 - for use by immediate mode.
-               Should change later to handle vertex buffers */
-       if(r300->current_vp!=NULL){
-
-       /* VERT_ATTRIB_WEIGHT, VERT_ATTRIB_SIX, VERT_ATTRIB_SEVEN, VERT_ATTRIB_GENERIC0,
-          VERT_ATTRIB_GENERIC1, VERT_ATTRIB_GENERIC2, VERT_ATTRIB_GENERIC3 */
-       r300->state.render_inputs = 0;
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       r300ContextPtr r300 = rmesa;
+       struct gl_framebuffer *fb = ctx->DrawBuffer;
+       driRenderbuffer *drb;
 
-       if(r300->current_vp->inputs[VERT_ATTRIB_POS] != -1){
-               reg=r300->current_vp->inputs[VERT_ATTRIB_POS];
-               CONFIGURE_AOS(VB->ObjPtr, 0, i_coords, AOS_FORMAT_FLOAT);
-               r300->state.render_inputs |= _TNL_BIT_POS;
-       }
-       if(r300->current_vp->inputs[VERT_ATTRIB_NORMAL] != -1){
-               reg=r300->current_vp->inputs[VERT_ATTRIB_NORMAL];
-               CONFIGURE_AOS(VB->NormalPtr, 0, i_normal, AOS_FORMAT_FLOAT);
-               r300->state.render_inputs |= _TNL_BIT_NORMAL;
-       }
-       if(r300->current_vp->inputs[VERT_ATTRIB_COLOR0] != -1){
-               reg=r300->current_vp->inputs[VERT_ATTRIB_COLOR0];
-               CONFIGURE_AOS(VB->ColorPtr[0], 0, i_color[0], AOS_FORMAT_FLOAT_COLOR);
-               r300->state.render_inputs |= _TNL_BIT_COLOR0;
+       if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
+               /* draw to front */
+               drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
        }
-       if(r300->current_vp->inputs[VERT_ATTRIB_COLOR1] != -1){
-               reg=r300->current_vp->inputs[VERT_ATTRIB_COLOR1];
-               CONFIGURE_AOS(VB->SecondaryColorPtr[0], 0, i_color[1], AOS_FORMAT_FLOAT_COLOR);
-               r300->state.render_inputs |= _TNL_BIT_COLOR1;
+       else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) {
+               /* draw to back */
+               drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
        }
-       if(r300->current_vp->inputs[VERT_ATTRIB_FOG] != -1){
-               reg=r300->current_vp->inputs[VERT_ATTRIB_FOG];
-               CONFIGURE_AOS(VB->FogCoordPtr, 0, i_fog, AOS_FORMAT_FLOAT);
-               r300->state.render_inputs |= _TNL_BIT_FOG;
+       else {
+               /* drawing to multiple buffers, or none */
+               return;
        }
-       for(i=0;i < ctx->Const.MaxTextureUnits;i++) // tex 7 is last
-               if(r300->current_vp->inputs[VERT_ATTRIB_TEX0+i] != -1){
-                       reg=r300->current_vp->inputs[VERT_ATTRIB_TEX0+i];
-                       CONFIGURE_AOS(VB->TexCoordPtr[i], 0, i_tex[i], AOS_FORMAT_FLOAT);
-                       r300->state.render_inputs |= _TNL_BIT_TEX0<<i;
-               }
-#if 0
-       if((tnl->render_inputs & _TNL_BIT_INDEX))
-               CONFIGURE_AOS(VB->IndexPtr[0], 0, i_index, AOS_FORMAT_FLOAT);
-
-       if((tnl->render_inputs & _TNL_BIT_POINTSIZE))
-               CONFIGURE_AOS(VB->PointSizePtr, 0, i_pointsize, AOS_FORMAT_FLOAT);
-#endif
-       }else{
-
-       r300->state.render_inputs = tnl->render_inputs;
 
-       if(tnl->render_inputs & _TNL_BIT_POS)
-               CONFIGURE_AOS(VB->ObjPtr, 0, i_coords, AOS_FORMAT_FLOAT);
-       if(tnl->render_inputs & _TNL_BIT_NORMAL)
-               CONFIGURE_AOS(VB->NormalPtr, 0, i_normal, AOS_FORMAT_FLOAT);
+       assert(drb);
+       assert(drb->flippedPitch);
 
-       if(tnl->render_inputs & _TNL_BIT_COLOR0)
-               CONFIGURE_AOS(VB->ColorPtr[0], 0, i_color[0], AOS_FORMAT_FLOAT_COLOR);
-       if(tnl->render_inputs & _TNL_BIT_COLOR1)
-               CONFIGURE_AOS(VB->SecondaryColorPtr[0], 0, i_color[1], AOS_FORMAT_FLOAT_COLOR);
 
-       /*if(tnl->render_inputs & _TNL_BIT_FOG) // Causes lock ups when immediate mode is on
-               CONFIGURE_AOS(VB->FogCoordPtr, 0, i_fog, AOS_FORMAT_FLOAT);*/
-
-       for(i=0;i < ctx->Const.MaxTextureUnits;i++)
-               if(tnl->render_inputs & (_TNL_BIT_TEX0<<i))
-                       CONFIGURE_AOS(VB->TexCoordPtr[i], 0, i_tex[i], AOS_FORMAT_FLOAT);
+       R300_STATECHANGE( rmesa, cb );
+       
+       r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset + //r300->radeon.state.color.drawOffset +
+               r300->radeon.radeonScreen->fbLocation;
+       r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch;//r300->radeon.state.color.drawPitch;
+       
+       if (r300->radeon.radeonScreen->cpp == 4)
+               r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+       else
+               r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+       
+       if (r300->radeon.sarea->tiling_enabled)
+               r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+#if 0
+       R200_STATECHANGE( rmesa, ctx );
 
-       if(tnl->render_inputs & _TNL_BIT_INDEX)
-               CONFIGURE_AOS(VB->IndexPtr[0], 0, i_index, AOS_FORMAT_FLOAT);
-       if(tnl->render_inputs & _TNL_BIT_POINTSIZE)
-               CONFIGURE_AOS(VB->PointSizePtr, 0, i_pointsize, AOS_FORMAT_FLOAT);
+       /* Note: we used the (possibly) page-flipped values */
+       rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
+               = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
+               & R200_COLOROFFSET_MASK);
+       rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
+       
+       if (rmesa->sarea->tiling_enabled) {
+               rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
        }
+#endif
+}
 
-       r300->state.aos_count=count;
+static void
+r300FetchStateParameter(GLcontext *ctx,
+                        const gl_state_index state[STATE_LENGTH],
+                        GLfloat *value)
+{
+    r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+    switch(state[0])
+    {
+    case STATE_INTERNAL:
+       switch(state[1])
+       {
+       case STATE_R300_WINDOW_DIMENSION:
+           value[0] = r300->radeon.dri.drawable->w*0.5f;/* width*0.5 */
+           value[1] = r300->radeon.dri.drawable->h*0.5f;/* height*0.5 */
+           value[2] = 0.5F;                            /* for moving range [-1 1] -> [0 1] */
+           value[3] = 1.0F;                            /* not used */
+           break;
+       default:;
+       }
+    default:;
+    }
+}
 
-       if (RADEON_DEBUG & DEBUG_STATE)
-               fprintf(stderr, "aos_count=%d render_inputs=%08x\n", count, r300->state.render_inputs);
+/**
+ * Update R300's own internal state parameters.
+ * For now just STATE_R300_WINDOW_DIMENSION
+ */
+void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
+{
+       struct r300_fragment_program *fp;
+       struct gl_program_parameter_list *paramList;
+       GLuint i;
 
+       if(!(new_state & (_NEW_BUFFERS|_NEW_PROGRAM)))
+           return;
 
-       if(count>R300_MAX_AOS_ARRAYS){
-               fprintf(stderr, "Aieee ! AOS array count exceeded !\n");
-               exit(-1);
-               }
+       fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current;
+       if (!fp)
+           return;
 
-       /* Implement AOS */
+       paramList = fp->mesa_program.Base.Parameters;
 
-       /* setup INPUT_ROUTE */
-       R300_STATECHANGE(r300, vir[0]);
-       for(i=0;i+1<count;i+=2){
-               dw=(r300->state.aos[i].aos_size-1)
-               | ((r300->state.aos[i].aos_reg)<<8)
-               | (r300->state.aos[i].aos_format<<14)
-               | (((r300->state.aos[i+1].aos_size-1)
-               | ((r300->state.aos[i+1].aos_reg)<<8)
-               | (r300->state.aos[i+1].aos_format<<14))<<16);
+       if (!paramList)
+           return;
 
-               if(i+2==count){
-                       dw|=(1<<(13+16));
-                       }
-               r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
-               }
-       if(count & 1){
-               dw=(r300->state.aos[count-1].aos_size-1)
-               | (r300->state.aos[count-1].aos_format<<14)
-               | ((r300->state.aos[count-1].aos_reg)<<8)
-               | (1<<13);
-               r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(count>>1)]=dw;
-               //fprintf(stderr, "vir0 dw=%08x\n", dw);
-               }
-       /* Set the rest of INPUT_ROUTE_0 to 0 */
-       //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[0].cmd[R300_VIR_CNTL_0+i]=(0x0);
-       ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->unchecked_state.count = (count+1)>>1;
-
-
-       /* Mesa assumes that all missing components are from (0, 0, 0, 1) */
-#define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT))
-
-#define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \
-               | (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT))
-
-       R300_STATECHANGE(r300, vir[1]);
-
-       for(i=0;i+1<count;i+=2){
-               /* do i first.. */
-               mask=(1<<(r300->state.aos[i].aos_size*3))-1;
-               dw=(ALL_COMPONENTS & mask)
-               | (ALL_DEFAULT & ~mask)
-               | R300_INPUT_ROUTE_ENABLE;
-
-               /* i+1 */
-               mask=(1<<(r300->state.aos[i+1].aos_size*3))-1;
-               dw|=(
-               (ALL_COMPONENTS & mask)
-               | (ALL_DEFAULT & ~mask)
-               | R300_INPUT_ROUTE_ENABLE
-               )<<16;
-
-               r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
+       for (i = 0; i < paramList->NumParameters; i++) {
+               if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR){
+                       r300FetchStateParameter(ctx,
+                                   paramList->Parameters[i].StateIndexes,
+                                   paramList->ParameterValues[i]);
                }
-       if(count & 1){
-               mask=(1<<(r300->state.aos[count-1].aos_size*3))-1;
-               dw=(ALL_COMPONENTS & mask)
-               | (ALL_DEFAULT & ~mask)
-               | R300_INPUT_ROUTE_ENABLE;
-               r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(count>>1)]=dw;
-               //fprintf(stderr, "vir1 dw=%08x\n", dw);
-               }
-       /* Set the rest of INPUT_ROUTE_1 to 0 */
-       //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[1].cmd[R300_VIR_CNTL_0+i]=0x0;
-       ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->unchecked_state.count = (count+1)>>1;
-
-       /* Set up input_cntl */
-
-       R300_STATECHANGE(r300, vic);
-       r300->hw.vic.cmd[R300_VIC_CNTL_0]=0x5555;  /* Hard coded value, no idea what it means */
-
-       r300->hw.vic.cmd[R300_VIC_CNTL_1]=0;
-
-       if(r300->state.render_inputs & _TNL_BIT_POS)
-               r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_POS;
-
-       if(r300->state.render_inputs & _TNL_BIT_NORMAL)
-               r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_NORMAL;
-
-       if(r300->state.render_inputs & _TNL_BIT_COLOR0)
-               r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_COLOR;
+       }
+}
 
-       for(i=0;i < ctx->Const.MaxTextureUnits;i++)
-               if(r300->state.render_inputs & (_TNL_BIT_TEX0<<i))
-                       r300->hw.vic.cmd[R300_VIC_CNTL_1]|=(R300_INPUT_CNTL_TC0<<i);
+/* =============================================================
+ * Polygon state
+ */
+static void r300PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       GLfloat constant = units;
+       
+       switch (ctx->Visual.depthBits) {
+       case 16:
+               constant *= 4.0;
+       break;
+       case 24:
+               constant *= 2.0;
+       break;
+       }
 
-       /* Stage 3: VAP output */
-       R300_STATECHANGE(r300, vof);
-       r300->hw.vof.cmd[R300_VOF_CNTL_0]=R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT
-                                       | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
+       factor *= 12.0;
 
-       r300->hw.vof.cmd[R300_VOF_CNTL_1]=0;
-       for(i=0;i < ctx->Const.MaxTextureUnits;i++)
-               if(r300->state.render_inputs & (_TNL_BIT_TEX0<<i))
-                       r300->hw.vof.cmd[R300_VOF_CNTL_1]|=(4<<(3*i));
+/*    fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */
 
+       R300_STATECHANGE(rmesa, zbs);
+       rmesa->hw.zbs.cmd[R300_ZBS_T_FACTOR] = r300PackFloat32(factor);
+       rmesa->hw.zbs.cmd[R300_ZBS_T_CONSTANT] = r300PackFloat32(constant);
+       rmesa->hw.zbs.cmd[R300_ZBS_W_FACTOR] = r300PackFloat32(factor);
+       rmesa->hw.zbs.cmd[R300_ZBS_W_CONSTANT] = r300PackFloat32(constant);
 }
 
-static r300TexObj default_tex_obj={
-       filter:R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR,
-       pitch: 0x8000,
-       size: (0xff << R300_TX_WIDTHMASK_SHIFT)
-             | (0xff << R300_TX_HEIGHTMASK_SHIFT)
-             | (0x8 << R300_TX_SIZE_SHIFT),
-       format: 0x88a0c,
-       offset: 0x0,
-       unknown4: 0x0,
-       unknown5: 0x0
-       };
-
-       /* there is probably a system to these value, but, for now,
-          we just try by hand */
+/* Routing and texture-related */
 
-static int inline translate_src(int src)
-{
-       switch (src) {
-       case GL_TEXTURE:
-               return 1;
-               break;
-       case GL_CONSTANT:
-               return 2;
-               break;
-       case GL_PRIMARY_COLOR:
-               return 3;
-               break;
-       case GL_PREVIOUS:
-               return 4;
-               break;
-       case GL_ZERO:
-               return 5;
-               break;
-       case GL_ONE:
-               return 6;
-               break;
-       default:
-               return 0;
-       }
-}
 
 /* r300 doesnt handle GL_CLAMP and GL_MIRROR_CLAMP_EXT correctly when filter is NEAREST.
  * Since texwrap produces same results for GL_CLAMP and GL_CLAMP_TO_EDGE we use them instead.
@@ -1350,20 +1212,23 @@ void r300_setup_textures(GLcontext *ctx)
        int i, mtu;
        struct r300_tex_obj *t;
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-       int max_texture_unit=-1; /* -1 translates into no setup costs for fields */
-       struct gl_texture_unit *texUnit;
+       int hw_tmu=0;
+       int last_hw_tmu=-1; /* -1 translates into no setup costs for fields */
+       int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1 };
+       struct r300_fragment_program *rp =
+               (struct r300_fragment_program *)
+               (char *)ctx->FragmentProgram._Current;
 
        R300_STATECHANGE(r300, txe);
        R300_STATECHANGE(r300, tex.filter);
-       R300_STATECHANGE(r300, tex.unknown1);
+       R300_STATECHANGE(r300, tex.filter_1);
        R300_STATECHANGE(r300, tex.size);
        R300_STATECHANGE(r300, tex.format);
+       R300_STATECHANGE(r300, tex.pitch);
        R300_STATECHANGE(r300, tex.offset);
-       R300_STATECHANGE(r300, tex.unknown4);
+       R300_STATECHANGE(r300, tex.chroma_key);
        R300_STATECHANGE(r300, tex.border_color);
        
-       r300->state.texture.tc_count=0;
-
        r300->hw.txe.cmd[R300_TXE_ENABLE]=0x0;
 
        mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
@@ -1375,67 +1240,101 @@ void r300_setup_textures(GLcontext *ctx)
                        mtu, R300_MAX_TEXTURE_UNITS);
                exit(-1);
        }
-       
+
+       /* We cannot let disabled tmu offsets pass DRM */
        for(i=0; i < mtu; i++) {
-               /*if(ctx->Texture.Unit[i].Enabled == 0)
-                       continue;*/
-               if( ((r300->state.render_inputs & (_TNL_BIT_TEX0<<i))!=0) != ((ctx->Texture.Unit[i].Enabled)!=0) ) {
-                       WARN_ONCE("Mismatch between render_inputs and ctx->Texture.Unit[i].Enabled value(%d vs %d).\n",
-                                       ((r300->state.render_inputs & (_TNL_BIT_TEX0<<i))!=0), ((ctx->Texture.Unit[i].Enabled)!=0));
-               }
-               
-               if(r300->state.render_inputs & (_TNL_BIT_TEX0<<i)) {
-                       t=r300->state.texture.unit[i].texobj;
-                       //fprintf(stderr, "format=%08x\n", r300->state.texture.unit[i].format);
-                       r300->state.texture.tc_count++;
+               if(TMU_ENABLED(ctx, i)) {
                        
-                       if(t == NULL){
-                               fprintf(stderr, "Texture unit %d enabled, but corresponding texobj is NULL, using default object.\n", i);
-                               //exit(-1);
-                               t=&default_tex_obj;
-                       }
+#if 0 /* Enables old behaviour */
+                       hw_tmu = i;
+#endif
+                       tmu_mappings[i] = hw_tmu;
+                       
+                       t=r300->state.texture.unit[i].texobj;
                        
-                       //fprintf(stderr, "t->format=%08x\n", t->format);
                        if((t->format & 0xffffff00)==0xffffff00) {
                                WARN_ONCE("unknown texture format (entry %x) encountered. Help me !\n", t->format & 0xff);
-                               //fprintf(stderr, "t->format=%08x\n", t->format);
                        }
                        
                        if (RADEON_DEBUG & DEBUG_STATE)
                                fprintf(stderr, "Activating texture unit %d\n", i);
-                       max_texture_unit=i;
-                       r300->hw.txe.cmd[R300_TXE_ENABLE]|=(1<<i);
                        
-                       r300->hw.tex.filter.cmd[R300_TEX_VALUE_0+i]=gen_fixed_filter(t->filter) | (i << 28); 
-                       r300->hw.tex.unknown1.cmd[R300_TEX_VALUE_0+i]=0x0;
+                       r300->hw.txe.cmd[R300_TXE_ENABLE] |= (1 << hw_tmu);
+                       
+                       r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + hw_tmu] = gen_fixed_filter(t->filter) | (hw_tmu << 28);
+                       /* Currently disabled! */
+                       r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80;
+                       r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->size;
+                       r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->format;
+                       r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->pitch_reg;
+                       r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->offset;
+                       
+                       if(t->offset & R300_TXO_MACRO_TILE) {
+                               WARN_ONCE("macro tiling enabled!\n");
+                       }
+                       
+                       if(t->offset & R300_TXO_MICRO_TILE) {
+                               WARN_ONCE("micro tiling enabled!\n");
+                       }
+                       
+                       r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0;
+                       r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->pp_border_color;
+                       
+                       last_hw_tmu = hw_tmu;
                        
-                       /* No idea why linear filtered textures shake when puting random data */
-                       /*r300->hw.tex.unknown1.cmd[R300_TEX_VALUE_0+i]=(rand()%0xffffffff) & (~0x1fff);*/
-                       r300->hw.tex.size.cmd[R300_TEX_VALUE_0+i]=t->size;
-                       r300->hw.tex.format.cmd[R300_TEX_VALUE_0+i]=t->format;
-                       //fprintf(stderr, "t->format=%08x\n", t->format);
-                       r300->hw.tex.offset.cmd[R300_TEX_VALUE_0+i]=r300->radeon.radeonScreen->fbLocation+t->offset;
-                       r300->hw.tex.unknown4.cmd[R300_TEX_VALUE_0+i]=0x0;
-                       r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0+i]=t->pp_border_color;
+                       hw_tmu++;
                }
        }
        
-       ((drm_r300_cmd_header_t*)r300->hw.tex.filter.cmd)->unchecked_state.count = max_texture_unit+1;
-       ((drm_r300_cmd_header_t*)r300->hw.tex.unknown1.cmd)->unchecked_state.count = max_texture_unit+1;
-       ((drm_r300_cmd_header_t*)r300->hw.tex.size.cmd)->unchecked_state.count = max_texture_unit+1;
-       ((drm_r300_cmd_header_t*)r300->hw.tex.format.cmd)->unchecked_state.count = max_texture_unit+1;
-       ((drm_r300_cmd_header_t*)r300->hw.tex.offset.cmd)->unchecked_state.count = max_texture_unit+1;
-       ((drm_r300_cmd_header_t*)r300->hw.tex.unknown4.cmd)->unchecked_state.count = max_texture_unit+1;
-       ((drm_r300_cmd_header_t*)r300->hw.tex.border_color.cmd)->unchecked_state.count = max_texture_unit+1;
-
+       r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER_0, last_hw_tmu + 1);
+       r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
+       r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1);
+       r300->hw.tex.format.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1);
+       r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_PITCH_0, last_hw_tmu + 1);
+       r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1);
+       r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
+       r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
+       
+       
+       if (!rp)        /* should only happenen once, just after context is created */
+               return;
+       
+       R300_STATECHANGE(r300, fpt);
+       
+       for(i = 0; i < rp->tex.length; i++){
+               int unit;
+               unsigned long val;
+               
+               unit = rp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT;
+               unit &= 15;
+               
+               val = rp->tex.inst[i];
+               val &= ~R300_FPITX_IMAGE_MASK;
+               
+               assert(tmu_mappings[unit] >= 0);
+               
+               val |= tmu_mappings[unit] << R300_FPITX_IMAGE_SHIFT;
+               r300->hw.fpt.cmd[R300_FPT_INSTR_0+i] = val;
+       }
+       
+       r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, rp->tex.length);
+       
        if (RADEON_DEBUG & DEBUG_STATE)
-               fprintf(stderr, "TX_ENABLE: %08x  max_texture_unit=%d\n", r300->hw.txe.cmd[R300_TXE_ENABLE], max_texture_unit);
+               fprintf(stderr, "TX_ENABLE: %08x  last_hw_tmu=%d\n", r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu);
 }
 
+union r300_outputs_written {
+       GLuint vp_outputs;                       /* hw_tcl_on */
+       DECLARE_RENDERINPUTS(index_bitset);      /* !hw_tcl_on */
+};
+
+#define R300_OUTPUTS_WRITTEN_TEST(ow, vp_result, tnl_attrib) \
+       ((hw_tcl_on) ? (ow).vp_outputs & (1 << (vp_result)) : \
+       RENDERINPUTS_TEST( (ow.index_bitset), (tnl_attrib) ))
+
 void r300_setup_rs_unit(GLcontext *ctx)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-       int i, cur_reg;
        /* I'm still unsure if these are needed */
        GLuint interp_magic[8] = {
                0x00,
@@ -1447,103 +1346,124 @@ void r300_setup_rs_unit(GLcontext *ctx)
                0x00,
                0x00
        };
-       GLuint vap_outputs;
-       
-       /* This needs to be rewritten - it is a hack at best */
+       union r300_outputs_written OutputsWritten;
+       GLuint InputsRead;
+       int fp_reg, high_rr;
+       int in_texcoords, col_interp_nr;
+       int i;
+
+       if(hw_tcl_on)
+               OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+       else
+               RENDERINPUTS_COPY( OutputsWritten.index_bitset, r300->state.render_inputs_bitset );
+
+       if (ctx->FragmentProgram._Current)
+               InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
+       else {
+               fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+               return; /* This should only ever happen once.. */
+       }
 
        R300_STATECHANGE(r300, ri);
        R300_STATECHANGE(r300, rc);
        R300_STATECHANGE(r300, rr);
        
-#if 1
-       cur_reg = 0;
-       r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0;
+       fp_reg = in_texcoords = col_interp_nr = high_rr = 0;
 
-       if (VERTPROG_ACTIVE(ctx))
-               vap_outputs = r300->current_vp->outputs;
-       else
-               vap_outputs = r300->state.render_inputs;
+       r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0;
+       
+       if (InputsRead & FRAG_BIT_WPOS){
+               for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+                       if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
+                               break;
+
+               if(i == ctx->Const.MaxTextureUnits){
+                       fprintf(stderr, "\tno free texcoord found...\n");
+                       exit(0);
+               }
 
+               InputsRead |= (FRAG_BIT_TEX0 << i);
+               InputsRead &= ~FRAG_BIT_WPOS;
+       }
+       
        for (i=0;i<ctx->Const.MaxTextureUnits;i++) {
                r300->hw.ri.cmd[R300_RI_INTERP_0+i] = 0
                                | R300_RS_INTERP_USED
-                               | (cur_reg << R300_RS_INTERP_SRC_SHIFT)
+                               | (in_texcoords << R300_RS_INTERP_SRC_SHIFT)
                                | interp_magic[i];
-//             fprintf(stderr, "RS_INTERP[%d] = 0x%x\n", i, r300->hw.ri.cmd[R300_RI_INTERP_0+i]);
 
-               if (r300->state.render_inputs & (_TNL_BIT_TEX0<<i)) {
-                       assert(r300->state.texture.tc_count != 0);
-                       r300->hw.rr.cmd[R300_RR_ROUTE_0 + cur_reg] = 0
-                                       | R300_RS_ROUTE_ENABLE
+               r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0;
+               if (InputsRead & (FRAG_BIT_TEX0<<i)) {
+                       //assert(r300->state.texture.tc_count != 0);
+                       r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |=
+                                         R300_RS_ROUTE_ENABLE
                                        | i /* source INTERP */
-                                       | (cur_reg << R300_RS_ROUTE_DEST_SHIFT);
-//                     fprintf(stderr, "RS_ROUTE[%d] = 0x%x\n", cur_reg, r300->hw.rr.cmd[R300_RR_ROUTE_0 + cur_reg]);
-                       cur_reg++;
+                                       | (fp_reg << R300_RS_ROUTE_DEST_SHIFT);
+                       high_rr = fp_reg;
+
+                       if (!R300_OUTPUTS_WRITTEN_TEST( OutputsWritten, VERT_RESULT_TEX0+i, _TNL_ATTRIB_TEX(i) )) {
+                               /* Passing invalid data here can lock the GPU. */
+                               WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
+                               //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base);
+                               //exit(-1);
+                       }
+                       InputsRead &= ~(FRAG_BIT_TEX0<<i);
+                       fp_reg++;
                } 
+               /* Need to count all coords enabled at vof */
+               if (R300_OUTPUTS_WRITTEN_TEST( OutputsWritten, VERT_RESULT_TEX0+i, _TNL_ATTRIB_TEX(i) ))
+                       in_texcoords++;
        }
-       if (vap_outputs & _TNL_BIT_COLOR0)
-               r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
-                               | R300_RS_ROUTE_0_COLOR
-                               | (cur_reg << R300_RS_ROUTE_0_COLOR_DEST_SHIFT);
 
-//     fprintf(stderr, "ADJ_RR0 = 0x%x\n", r300->hw.rr.cmd[R300_RR_ROUTE_0]);
-
-       r300->hw.rc.cmd[1] = 0
-                       | (cur_reg /* count */ << R300_RS_CNTL_TC_CNT_SHIFT)
-                       | R300_RS_CNTL_0_UNKNOWN_7
-                       | R300_RS_CNTL_0_UNKNOWN_18;
+       if (InputsRead & FRAG_BIT_COL0) {
+               if (!R300_OUTPUTS_WRITTEN_TEST( OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0 )) {
+                       WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+                       goto out; /* FIXME */
+                       //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base);
+                       //exit(-1);
+               }
 
-       if (r300->state.texture.tc_count > 0) {
-                       r300->hw.rr.cmd[R300_RR_CMD_0] = cmducs(R300_RS_ROUTE_0, cur_reg);
-                       r300->hw.rc.cmd[2] = 0xC0 | (cur_reg-1); /* index of highest */
-       } else {
-                       r300->hw.rr.cmd[R300_RR_CMD_0] = cmducs(R300_RS_ROUTE_0, 1);
-                       r300->hw.rc.cmd[2] = 0x0;
+               r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
+                               | R300_RS_ROUTE_0_COLOR
+                               | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT);
+               InputsRead &= ~FRAG_BIT_COL0;
+               col_interp_nr++;
        }
-
-
-//     fprintf(stderr, "rendering with %d texture co-ordinate sets\n", cur_reg);
+       out:
        
-#else
-       for(i = 1; i <= 8; ++i)
-               r300->hw.ri.cmd[i] = 0x00d10000;
-       r300->hw.ri.cmd[R300_RI_INTERP_1] |= R300_RS_INTERP_1_UNKNOWN;
-       r300->hw.ri.cmd[R300_RI_INTERP_2] |= R300_RS_INTERP_2_UNKNOWN;
-       r300->hw.ri.cmd[R300_RI_INTERP_3] |= R300_RS_INTERP_3_UNKNOWN;
-
-#if 1
-       for(i = 2; i <= 8; ++i)
-               r300->hw.ri.cmd[i] |= 4;
-#endif
-
-       for(i = 1; i <= 8; ++i)
-               r300->hw.rr.cmd[i] = 0;
-       /* textures enabled ? */
-       if(r300->state.texture.tc_count>0){
-
-               /* This code only really works with one set of texture coordinates */
-
-               /* The second constant is needed to get glxgears display anything .. */
-               r300->hw.rc.cmd[1] = R300_RS_CNTL_0_UNKNOWN_7
-                               | R300_RS_CNTL_0_UNKNOWN_18
-                               | (r300->state.texture.tc_count<<R300_RS_CNTL_TC_CNT_SHIFT);
-               r300->hw.rc.cmd[2] = 0xc0;
-
-
-               ((drm_r300_cmd_header_t*)r300->hw.rr.cmd)->unchecked_state.count = 1;
-               r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0x24008;
-
-               } else {
+       if (InputsRead & FRAG_BIT_COL1) {
+               if (!R300_OUTPUTS_WRITTEN_TEST( OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1 )) {
+                       WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+                       //exit(-1);
+               }
 
-               /* The second constant is needed to get glxgears display anything .. */
-               r300->hw.rc.cmd[1] = R300_RS_CNTL_0_UNKNOWN_7 | R300_RS_CNTL_0_UNKNOWN_18;
-               r300->hw.rc.cmd[2] = 0;
+               r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11
+                               | R300_RS_ROUTE_1_COLOR1
+                               | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT);
+               InputsRead &= ~FRAG_BIT_COL1;
+               if (high_rr < 1) high_rr = 1;
+               col_interp_nr++;
+       }
+       
+       /* Need at least one. This might still lock as the values are undefined... */
+       if (in_texcoords == 0 && col_interp_nr == 0) {
+               r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
+                               | R300_RS_ROUTE_0_COLOR
+                               | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT);
+               col_interp_nr++;
+       }
+       
+       r300->hw.rc.cmd[1] = 0
+                       | (in_texcoords << R300_RS_CNTL_TC_CNT_SHIFT)
+                       | (col_interp_nr << R300_RS_CNTL_CI_CNT_SHIFT)
+                       | R300_RS_CNTL_0_UNKNOWN_18;
 
-               ((drm_r300_cmd_header_t*)r300->hw.rr.cmd)->unchecked_state.count = 1;
-               r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0x4000;
+       assert(high_rr >= 0);
+       r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, high_rr+1);
+       r300->hw.rc.cmd[2] = 0xC0 | high_rr;
 
-               }
-#endif
+       if (InputsRead)
+               WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
 }
 
 #define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
@@ -1551,6 +1471,7 @@ void r300_setup_rs_unit(GLcontext *ctx)
 #define bump_vpu_count(ptr, new_count)   do{\
        drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
        int _nc=(new_count)/4; \
+       assert(_nc < 256); \
        if(_nc>_p->vpu.count)_p->vpu.count=_nc;\
        }while(0)
 
@@ -1599,7 +1520,7 @@ void r300SetupVertexProgram(r300ContextPtr rmesa);
    while leaving colors intact. Nothing fancy (like lights) 
    
    If implementing lights make a copy first, so it is easy to switch between the two versions */
-void r300GenerateSimpleVertexShader(r300ContextPtr r300)
+static void r300GenerateSimpleVertexShader(r300ContextPtr r300)
 {
        int i;
        GLuint o_reg = 0;
@@ -1671,18 +1592,17 @@ void r300GenerateSimpleVertexShader(r300ContextPtr r300)
                VSF_TMP(0)
                )
        o_reg += 2;
-
-       /* Pass through texture coordinates, if any */
-       for(i=0;i < r300->radeon.glCtx->Const.MaxTextureUnits;i++)
-               if(r300->state.render_inputs & (_TNL_BIT_TEX0<<i)){
-                       // fprintf(stderr, "i_tex[%d]=%d\n", i, r300->state.vap_reg.i_tex[i]);
+       
+       for (i = VERT_ATTRIB_COLOR1; i < VERT_ATTRIB_MAX; i++)
+               if (r300->state.sw_tcl_inputs[i] != -1) {
                        WRITE_OP(
                                EASY_VSF_OP(MUL, o_reg++ /* 2+i */, ALL, RESULT),
-                               VSF_REG(r300->state.vap_reg.i_tex[i]),
-                               VSF_ATTR_UNITY(r300->state.vap_reg.i_tex[i]),
-                               VSF_UNITY(r300->state.vap_reg.i_tex[i])
+                               VSF_REG(r300->state.sw_tcl_inputs[i]),
+                               VSF_ATTR_UNITY(r300->state.sw_tcl_inputs[i]),
+                               VSF_UNITY(r300->state.sw_tcl_inputs[i])
                                )
-                       }
+               
+               }
        
        r300->state.vertex_shader.program_end--; /* r300 wants program length to be one more - no idea why */
        r300->state.vertex_shader.program.length=(r300->state.vertex_shader.program_end+1)*4;
@@ -1697,16 +1617,6 @@ void r300GenerateSimpleVertexShader(r300ContextPtr r300)
 void r300SetupVertexShader(r300ContextPtr rmesa)
 {
        GLcontext* ctx = rmesa->radeon.glCtx;
-       struct r300_vertex_shader_fragment unk4={
-                       length: 4,
-                       body: { f: {
-                               /*0.0*/(rand()%100)/10.0,
-                               /*0.0*/(rand()%100)/10.0,
-                               /*1.0*/(rand()%100)/10.0,
-                               /*0.0*/(rand()%100)/10.0
-                               } }
-                       };
-       LOCAL_VARS
 
        /* Reset state, in case we don't use something */
        ((drm_r300_cmd_header_t*)rmesa->hw.vpp.cmd)->vpu.count = 0;
@@ -1717,7 +1627,7 @@ void r300SetupVertexShader(r300ContextPtr rmesa)
           0x400 area might have something to do with pixel shaders as it appears right after pfs programming.
           0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */
        //setup_vertex_shader_fragment(rmesa, 0x406, &unk4);
-       if(VERTPROG_ACTIVE(ctx)){
+       if(hw_tcl_on && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->translated){
                r300SetupVertexProgram(rmesa);
                return ;
        }
@@ -1757,7 +1667,7 @@ void r300SetupVertexShader(r300ContextPtr rmesa)
 
        R300_STATECHANGE(rmesa, pvs);
        rmesa->hw.pvs.cmd[R300_PVS_CNTL_1]=(rmesa->state.vertex_shader.program_start << R300_PVS_CNTL_1_PROGRAM_START_SHIFT)
-               | (rmesa->state.vertex_shader.unknown_ptr1 << R300_PVS_CNTL_1_UNKNOWN_SHIFT)
+               | (rmesa->state.vertex_shader.unknown_ptr1 << R300_PVS_CNTL_1_POS_END_SHIFT)
                | (rmesa->state.vertex_shader.program_end << R300_PVS_CNTL_1_PROGRAM_END_SHIFT);
        rmesa->hw.pvs.cmd[R300_PVS_CNTL_2]=(rmesa->state.vertex_shader.param_offset << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT)
                | (rmesa->state.vertex_shader.param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT);
@@ -1777,36 +1687,36 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
        GLcontext* ctx = rmesa->radeon.glCtx;
        int inst_count;
        int param_count;
-       LOCAL_VARS
+       struct r300_vertex_program *prog=(struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
                        
 
-       /* Reset state, in case we don't use something */
        ((drm_r300_cmd_header_t*)rmesa->hw.vpp.cmd)->vpu.count = 0;
+       R300_STATECHANGE(rmesa, vpp);
+       param_count = r300VertexProgUpdateParams(ctx, (struct r300_vertex_program_cont *)ctx->VertexProgram._Current/*prog*/, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+       bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
+       param_count /= 4;
+       
+       /* Reset state, in case we don't use something */
        ((drm_r300_cmd_header_t*)rmesa->hw.vpi.cmd)->vpu.count = 0;
        ((drm_r300_cmd_header_t*)rmesa->hw.vps.cmd)->vpu.count = 0;
 
-       r300VertexProgUpdateParams(ctx, rmesa->current_vp);
-
-       setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, &(rmesa->current_vp->program));
-
-       setup_vertex_shader_fragment(rmesa, VSF_DEST_MATRIX0, &(rmesa->current_vp->params));
+       setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, &(prog->program));
 
 #if 0
        setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1, &(rmesa->state.vertex_shader.unknown1));
        setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2, &(rmesa->state.vertex_shader.unknown2));
 #endif
 
-       inst_count=rmesa->current_vp->program.length/4 - 1;
-       param_count=rmesa->current_vp->params.length/4;
+       inst_count=prog->program.length/4 - 1;
 
        R300_STATECHANGE(rmesa, pvs);
        rmesa->hw.pvs.cmd[R300_PVS_CNTL_1]=(0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT)
-               | (inst_count/*0*/ << R300_PVS_CNTL_1_UNKNOWN_SHIFT)
+               | (inst_count/*pos_end*/ << R300_PVS_CNTL_1_POS_END_SHIFT)
                | (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT);
        rmesa->hw.pvs.cmd[R300_PVS_CNTL_2]=(0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT)
                | (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT);
        rmesa->hw.pvs.cmd[R300_PVS_CNTL_3]=(0/*rmesa->state.vertex_shader.unknown_ptr2*/ << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT)
-       | ((inst_count-rmesa->current_vp->t2rs) /*rmesa->state.vertex_shader.unknown_ptr3*/ << 0);
+       | (inst_count /*rmesa->state.vertex_shader.unknown_ptr3*/ << 0);
 
        /* This is done for vertex shader fragments, but also needs to be done for vap_pvs,
        so I leave it as a reminder */
@@ -1816,140 +1726,113 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
 #endif
 }
 
+extern void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx );
 
-/* just a skeleton for now.. */
-void r300GenerateTexturePixelShader(r300ContextPtr r300)
+extern int future_hw_tcl_on;
+void r300UpdateShaders(r300ContextPtr rmesa)
 {
-       int i, mtu;
-       mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
-       GLenum envMode;
-
-       int tex_inst=0, alu_inst=0;
-
-       for(i=0;i<mtu;i++){
-               /* No need to proliferate {} */
-               if(! (r300->state.render_inputs & (_TNL_BIT_TEX0<<i)))continue;
-
-               envMode = r300->radeon.glCtx->Texture.Unit[i].EnvMode;
-               //fprintf(stderr, "envMode=%s\n", _mesa_lookup_enum_by_nr(envMode));
-
-               /* Fetch textured pixel */
-
-               r300->state.pixel_shader.program.tex.inst[tex_inst]=0x00018000;
-               tex_inst++;
-
-               switch(r300->radeon.glCtx->Texture.Unit[i]._CurrentCombine->ModeRGB){
-                       case GL_REPLACE:
-                               WARN_ONCE("ModeA==GL_REPLACE is possibly broken.\n");
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst0=
-                                       EASY_PFS_INSTR0(MAD, SRC0C_XYZ, ONE, ZERO);
-
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst1=
-                                       EASY_PFS_INSTR1(0, 0, 0 | PFS_FLAG_CONST, 0 | PFS_FLAG_CONST, NONE, ALL);
-                               break;
-                       case GL_MODULATE:
-                               WARN_ONCE("ModeRGB==GL_MODULATE is possibly broken.\n");
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst0=
-                                       EASY_PFS_INSTR0(MAD, SRC0C_XYZ, SRC1C_XYZ, ZERO);
-
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst1=
-                                       EASY_PFS_INSTR1(0, 0, 1, 0 | PFS_FLAG_CONST, NONE, ALL);
-
-                               break;
-                       default:
-                               WARN_ONCE("ModeRGB=%s is not implemented yet !\n",
-                                        _mesa_lookup_enum_by_nr(r300->radeon.glCtx->Texture.Unit[i]._CurrentCombine->ModeRGB));
-                               /* PFS_NOP */
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst0=
-                                       EASY_PFS_INSTR0(MAD, SRC0C_XYZ, ONE, ZERO);
-
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst1=
-                                       EASY_PFS_INSTR1(0, 0, 0 | PFS_FLAG_CONST, 0 | PFS_FLAG_CONST, NONE, ALL);
-                       }
-               switch(r300->radeon.glCtx->Texture.Unit[i]._CurrentCombine->ModeA){
-                       case GL_REPLACE:
-                               WARN_ONCE("ModeA==GL_REPLACE is possibly broken.\n");
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst2=
-                                       EASY_PFS_INSTR2(MAD, SRC0A, ONE, ZERO);
-
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst3=
-                                       EASY_PFS_INSTR3(0, 0, 0| PFS_FLAG_CONST, 0 | PFS_FLAG_CONST, OUTPUT);
-
-#if 0
-                               fprintf(stderr, "numArgsA=%d sourceA[0]=%s op=%d\n",
-                                        r300->radeon.glCtx->Texture.Unit[i]._CurrentCombine->_NumArgsA,
-                                        _mesa_lookup_enum_by_nr(r300->radeon.glCtx->Texture.Unit[i]._CurrentCombine->SourceA[0]),
-                                        r300->radeon.glCtx->Texture.Unit[i]._CurrentCombine->OperandA[0]-GL_SRC_ALPHA);
-#endif
-                               break;
-                       case GL_MODULATE:
-                               WARN_ONCE("ModeA==GL_MODULATE is possibly broken.\n");
-
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst2=
-                                       EASY_PFS_INSTR2(MAD, SRC0A, SRC1A, ZERO);
+       GLcontext *ctx;
+       struct r300_vertex_program *vp;
+       int i;
+       
+       ctx = rmesa->radeon.glCtx;
+       
+       if (rmesa->NewGLState && hw_tcl_on) {
+               rmesa->NewGLState = 0;
+               
+               for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+                       rmesa->temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i];
+                       TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &rmesa->dummy_attrib[i];
+               }
+               
+               _tnl_UpdateFixedFunctionProgram(ctx);
+       
+               for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+                       TNL_CONTEXT(ctx)->vb.AttribPtr[i] = rmesa->temp_attrib[i];
+               }
+               
+               r300_select_vertex_shader(rmesa);
+               vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
+               /*if (vp->translated == GL_FALSE)
+                       r300_translate_vertex_shader(vp);*/
+               if (vp->translated == GL_FALSE) {
+                       fprintf(stderr, "Failing back to sw-tcl\n");
+                       hw_tcl_on = future_hw_tcl_on = 0;
+                       r300ResetHwState(rmesa);
+
+                       return ;
+               }
+               r300UpdateStateParameters(ctx, _NEW_PROGRAM);
+       }
+       
+}
 
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst3=
-                                       EASY_PFS_INSTR3(0, 0, 1, 0 | PFS_FLAG_CONST, OUTPUT);
+void r300UpdateShaderStates(r300ContextPtr rmesa)
+{
+       GLcontext *ctx;
+       ctx = rmesa->radeon.glCtx;
+       
+       r300UpdateTextureState(ctx);
 
-                               break;
-                       default:
-                               WARN_ONCE("ModeA=%s is not implemented yet !\n",
-                                        _mesa_lookup_enum_by_nr(r300->radeon.glCtx->Texture.Unit[i]._CurrentCombine->ModeA));
-                               /* PFS_NOP */
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst2=
-                                       EASY_PFS_INSTR2(MAD, SRC0A, ONE, ZERO);
+       r300SetupPixelShader(rmesa);
+       r300_setup_textures(ctx);
+       
+       r300SetupVertexShader(rmesa);
+       r300_setup_rs_unit(ctx);
+}
 
-                               r300->state.pixel_shader.program.alu.inst[alu_inst].inst3=
-                                       EASY_PFS_INSTR3(0, 0, 0 | PFS_FLAG_CONST, 0 | PFS_FLAG_CONST, OUTPUT);
+/* This is probably wrong for some values, I need to test this
+ * some more.  Range checking would be a good idea also..
+ * 
+ * But it works for most things.  I'll fix it later if someone
+ * else with a better clue doesn't
+ */
+static unsigned int r300PackFloat24(float f)
+{
+       float mantissa;
+       int exponent;
+       unsigned int float24 = 0;
 
-                       }
+       if (f == 0.0) return 0;
 
-               alu_inst++;
-               }
-       
-       r300->state.pixel_shader.program.tex.length=tex_inst;
-       r300->state.pixel_shader.program.tex_offset=0;
-       r300->state.pixel_shader.program.tex_end=tex_inst-1;
+       mantissa = frexpf(f, &exponent);
 
-#if 0
-       /* saturate last instruction, like i915 driver does */
-       r300->state.pixel_shader.program.alu.inst[alu_inst-1].inst0|=R300_FPI0_OUTC_SAT;
-       r300->state.pixel_shader.program.alu.inst[alu_inst-1].inst2|=R300_FPI2_OUTA_SAT;
-#endif
+       /* Handle -ve */
+       if (mantissa < 0) {
+               float24 |= (1<<23);
+               mantissa = mantissa * -1.0;
+       }
+       /* Handle exponent, bias of 63 */
+       exponent += 62;
+       float24 |= (exponent << 16);
+       /* Kill 7 LSB of mantissa */
+       float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF)  >> 7;
 
-       r300->state.pixel_shader.program.alu.length=alu_inst;
-       r300->state.pixel_shader.program.alu_offset=0;
-       r300->state.pixel_shader.program.alu_end=alu_inst-1;
+       return float24;
 }
 
 void r300SetupPixelShader(r300ContextPtr rmesa)
 {
-int i,k;
+       GLcontext *ctx = rmesa->radeon.glCtx;
+       struct r300_fragment_program *rp =
+               (struct r300_fragment_program *)
+               (char *)ctx->FragmentProgram._Current;
+       int i,k;
 
-       /* This needs to be replaced by pixel shader generation code */
-
-       /* textures enabled ? */
-       if(rmesa->state.texture.tc_count>0){
-#if 1
-               r300GenerateTextureFragmentShader(rmesa);
-#else
-               rmesa->state.pixel_shader=SINGLE_TEXTURE_PIXEL_SHADER;
-               r300GenerateTexturePixelShader(rmesa);
-#endif
-               } else {
-               rmesa->state.pixel_shader=FLAT_COLOR_PIXEL_SHADER;
-               }
+       if (!rp)        /* should only happenen once, just after context is created */
+               return;
+       
+       r300_translate_fragment_shader(rmesa, rp);
+       if (!rp->translated) {
+               fprintf(stderr, "%s: No valid fragment shader, exiting\n", __func__);
+               return;
+       }
        
-       R300_STATECHANGE(rmesa, fpt);
-       for(i=0;i<rmesa->state.pixel_shader.program.tex.length;i++)
-               rmesa->hw.fpt.cmd[R300_FPT_INSTR_0+i]=rmesa->state.pixel_shader.program.tex.inst[i];
-       rmesa->hw.fpt.cmd[R300_FPT_CMD_0]=cmducs(R300_PFS_TEXI_0, rmesa->state.pixel_shader.program.tex.length);
-
 #define OUTPUT_FIELD(st, reg, field)  \
                R300_STATECHANGE(rmesa, st); \
-               for(i=0;i<rmesa->state.pixel_shader.program.alu.length;i++) \
-                       rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=rmesa->state.pixel_shader.program.alu.inst[i].field;\
-               rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmducs(reg, rmesa->state.pixel_shader.program.alu.length);
+               for(i=0;i<=rp->alu_end;i++) \
+                       rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=rp->alu.inst[i].field;\
+               rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmdpacket0(reg, rp->alu_end+1);
 
        OUTPUT_FIELD(fpi[0], R300_PFS_INSTR0_0, inst0);
        OUTPUT_FIELD(fpi[1], R300_PFS_INSTR1_0, inst1);
@@ -1958,37 +1841,41 @@ int i,k;
 #undef OUTPUT_FIELD
 
        R300_STATECHANGE(rmesa, fp);
-       for(i=0;i<4;i++){
-               rmesa->hw.fp.cmd[R300_FP_NODE0+i]=
-               (rmesa->state.pixel_shader.program.node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT)
-               | (rmesa->state.pixel_shader.program.node[i].alu_end  << R300_PFS_NODE_ALU_END_SHIFT)
-               | (rmesa->state.pixel_shader.program.node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT)
-               | (rmesa->state.pixel_shader.program.node[i].tex_end  << R300_PFS_NODE_TEX_END_SHIFT)
-               | ( (i==3) ? R300_PFS_NODE_LAST_NODE : 0);
+       /* I just want to say, the way these nodes are stored.. weird.. */
+       for (i=0,k=(4-(rp->cur_node+1));i<4;i++,k++) {
+               if (i<(rp->cur_node+1)) {
+                       rmesa->hw.fp.cmd[R300_FP_NODE0+k]=
+                               (rp->node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT)
+                               | (rp->node[i].alu_end  << R300_PFS_NODE_ALU_END_SHIFT)
+                               | (rp->node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT)
+                               | (rp->node[i].tex_end  << R300_PFS_NODE_TEX_END_SHIFT)
+                               | rp->node[i].flags; /*  ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0); */
+               } else {
+                       rmesa->hw.fp.cmd[R300_FP_NODE0+(3-i)] = 0;
                }
+       }
 
                /*  PFS_CNTL_0 */
        rmesa->hw.fp.cmd[R300_FP_CNTL0]=
-               (rmesa->state.pixel_shader.program.active_nodes-1)
-               | (rmesa->state.pixel_shader.program.first_node_has_tex<<3);
+               rp->cur_node
+               | (rp->first_node_has_tex<<3);
                /* PFS_CNTL_1 */
-       rmesa->hw.fp.cmd[R300_FP_CNTL1]=rmesa->state.pixel_shader.program.temp_register_count;
+       rmesa->hw.fp.cmd[R300_FP_CNTL1]=rp->max_temp_idx;
                /* PFS_CNTL_2 */
        rmesa->hw.fp.cmd[R300_FP_CNTL2]=
-               (rmesa->state.pixel_shader.program.alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
-               | (rmesa->state.pixel_shader.program.alu_end << R300_PFS_CNTL_ALU_END_SHIFT)
-               | (rmesa->state.pixel_shader.program.tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
-               | (rmesa->state.pixel_shader.program.tex_end << R300_PFS_CNTL_TEX_END_SHIFT);
+               (rp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
+               | (rp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT)
+               | (rp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
+               | (rp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT);
 
        R300_STATECHANGE(rmesa, fpp);
-       for(i=0;i<rmesa->state.pixel_shader.param_length;i++){
-               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+0]=r300PackFloat32(rmesa->state.pixel_shader.param[i].x);
-               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+1]=r300PackFloat32(rmesa->state.pixel_shader.param[i].y);
-               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+2]=r300PackFloat32(rmesa->state.pixel_shader.param[i].z);
-               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+3]=r300PackFloat32(rmesa->state.pixel_shader.param[i].w);
-               }
-       rmesa->hw.fpp.cmd[R300_FPP_CMD_0]=cmducs(R300_PFS_PARAM_0_X, rmesa->state.pixel_shader.param_length);
-
+       for(i=0;i<rp->const_nr;i++){
+               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+0]=r300PackFloat24(rp->constant[i][0]);
+               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+1]=r300PackFloat24(rp->constant[i][1]);
+               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+2]=r300PackFloat24(rp->constant[i][2]);
+               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+3]=r300PackFloat24(rp->constant[i][3]);
+       }
+       rmesa->hw.fpp.cmd[R300_FPP_CMD_0]=cmdpacket0(R300_PFS_PARAM_0_X, rp->const_nr*4);
 }
 
 /**
@@ -1997,77 +1884,36 @@ int i,k;
 static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-
+       
        _swrast_InvalidateState(ctx, new_state);
        _swsetup_InvalidateState(ctx, new_state);
-       _ac_InvalidateState(ctx, new_state);
+       _vbo_InvalidateState(ctx, new_state);
        _tnl_InvalidateState(ctx, new_state);
        _ae_invalidate_state(ctx, new_state);
 
-       /* Go inefficiency! */
-       r300ResetHwState(r300);
-}
-
-/* Checks that r300ResetHwState actually modifies all states.
-   Should probably be burried in somewhere else as this file is getting longish. */
-void verify_r300ResetHwState(r300ContextPtr r300, int stage)
-{
-       struct r300_state_atom* atom;
-       int i;
-       drm_r300_cmd_header_t cmd;
-       
-       if(stage){ /* mess around with states */
-               unsigned long fp1, cb1;
-       
-               fp1=r300->hw.fp.cmd[R300_FP_CMD_1]; /* some special cases... */
-               cb1=r300->hw.cb.cmd[R300_CB_CMD_1];
-       
-               fprintf(stderr, "verify begin:\n");
-       
-               foreach(atom, &r300->hw.atomlist) {
-                       for(i=1; i < (*atom->check)(r300, atom); i++)
-                               atom->cmd[i]=0xdeadbeef;
-               }       
-               r300->hw.fp.cmd[R300_FP_CMD_1]=fp1;
-               r300->hw.cb.cmd[R300_CB_CMD_1]=cb1;
-                       
-               foreach(atom, &r300->hw.atomlist) {
-                       cmd.u=atom->cmd[0];
-                       switch(cmd.header.cmd_type){
-                       case R300_CMD_UNCHECKED_STATE:
-                       case R300_CMD_VPU:
-                       case R300_CMD_PACKET3:
-                       case R300_CMD_END3D:
-                       case R300_CMD_CP_DELAY:
-                       case R300_CMD_DMA_DISCARD:
-                               break;
-                       default: fprintf(stderr, "unknown cmd_type %d in atom %s\n",
-                                       cmd.header.cmd_type, atom->name);
-                       }
-               
-               }       
-       } else { /* check that they were set */
-               foreach(atom, &r300->hw.atomlist) {
-                       for(i=1; i < (*atom->check)(r300, atom); i++)
-                               if(atom->cmd[i]==0xdeadbeef)
-                                       fprintf(stderr, "atom %s is untouched\n", atom->name);
-               }       
+       if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+               r300UpdateDrawBuffer(ctx);
        }
+
+       r300UpdateStateParameters(ctx, new_state);
+
+#ifdef HW_VBOS
+       if(new_state & _NEW_ARRAY)
+               r300->state.VB.lock_uptodate = GL_FALSE;
+#endif
+       r300->NewGLState |= new_state;
 }
-               
+
 /**
  * Completely recalculates hardware state based on the Mesa state.
  */
 void r300ResetHwState(r300ContextPtr r300)
 {
        GLcontext* ctx = r300->radeon.glCtx;
-       int i;
 
        if (RADEON_DEBUG & DEBUG_STATE)
                fprintf(stderr, "%s\n", __FUNCTION__);
 
-       //verify_r300ResetHwState(r300, 1);
-                       
                /* This is a place to initialize registers which
                   have bitfields accessed by different functions
                   and not all bits are used */
@@ -2102,21 +1948,26 @@ void r300ResetHwState(r300ContextPtr r300)
        
        /* stencil */
        r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
-       r300StencilMask(ctx, ctx->Stencil.WriteMask[0]);
-       r300StencilFunc(ctx, ctx->Stencil.Function[0], ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]);
-       r300StencilOp(ctx, ctx->Stencil.FailFunc[0], ctx->Stencil.ZFailFunc[0], ctx->Stencil.ZPassFunc[0]);
+       r300StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]);
+       r300StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0], ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]);
+       r300StencilOpSeparate(ctx, 0, ctx->Stencil.FailFunc[0], ctx->Stencil.ZFailFunc[0], ctx->Stencil.ZPassFunc[0]);
 
        r300UpdateCulling(ctx);
 
        r300UpdateTextureState(ctx);
 
 //     r300_setup_routing(ctx, GL_TRUE);
-       r300EmitArrays(ctx, GL_TRUE); /* Just do the routing */
-       r300_setup_textures(ctx);
-       r300_setup_rs_unit(ctx);
-
-       r300SetupVertexShader(r300);
-       r300SetupPixelShader(r300);
+       
+#if 0 /* Done in prior to rendering */
+       if(hw_tcl_on == GL_FALSE){
+               r300EmitArrays(ctx, GL_TRUE); /* Just do the routing */
+               r300_setup_textures(ctx);
+               r300_setup_rs_unit(ctx);
+
+               r300SetupVertexShader(r300);
+               r300SetupPixelShader(r300);
+       }
+#endif
 
        r300_set_blend_state(ctx);
 
@@ -2126,7 +1977,7 @@ void r300ResetHwState(r300ContextPtr r300)
                /* Initialize magic registers
                 TODO : learn what they really do, or get rid of
                 those we don't have to touch */
-       r300->hw.unk2080.cmd[1] = 0x0030045A; /* Dangerous */
+       r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */
 
        r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA
                                | R300_VPORT_X_OFFSET_ENA
@@ -2139,17 +1990,16 @@ void r300ResetHwState(r300ContextPtr r300)
 
        r300->hw.unk2134.cmd[1] = 0x00FFFFFF;
        r300->hw.unk2134.cmd[2] = 0x00000000;
-#ifdef MESA_BIG_ENDIAN
-       r300->hw.unk2140.cmd[1] = 0x00000002;
-#else
-       r300->hw.unk2140.cmd[1] = 0x00000000;
-#endif
+       if (_mesa_little_endian())
+               r300->hw.vap_cntl_status.cmd[1] = 0x00000000;
+       else
+               r300->hw.vap_cntl_status.cmd[1] = 0x00000002;
 
 #if 0 /* Done in setup routing */
-       ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->unchecked_state.count = 1;
+       ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->packet0.count = 1;
        r300->hw.vir[0].cmd[1] = 0x21030003;
 
-       ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->unchecked_state.count = 1;
+       ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->packet0.count = 1;
        r300->hw.vir[1].cmd[1] = 0xF688F688;
 
        r300->hw.vic.cmd[R300_VIR_CNTL_0] = 0x00000001;
@@ -2165,7 +2015,8 @@ void r300ResetHwState(r300ContextPtr r300)
        r300->hw.unk2220.cmd[3] = r300PackFloat32(1.0);
        r300->hw.unk2220.cmd[4] = r300PackFloat32(1.0);
 
-       if (GET_CHIP(r300->radeon.radeonScreen) == RADEON_CHIP_R300)
+       /* what about other chips than r300 or rv350??? */
+       if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300)
                r300->hw.unk2288.cmd[1] = R300_2288_R300;
        else
                r300->hw.unk2288.cmd[1] = R300_2288_RV350;
@@ -2183,15 +2034,20 @@ void r300ResetHwState(r300ContextPtr r300)
 
        r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE
                | R300_GB_LINE_STUFF_ENABLE
-               | R300_GB_TRIANGLE_STUFF_ENABLE;
+               | R300_GB_TRIANGLE_STUFF_ENABLE /*| R300_GB_UNK31*/;
 
        r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666;
        r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666;
-       if (GET_CHIP(r300->radeon.radeonScreen) == RADEON_CHIP_R300)
+       if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) || 
+            (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350))
                r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = R300_GB_TILE_ENABLE
                                                        | R300_GB_TILE_PIPE_COUNT_R300
                                                        | R300_GB_TILE_SIZE_16;
-       else if (GET_CHIP(r300->radeon.radeonScreen) == RADEON_CHIP_R420)
+       else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410)
+               r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = R300_GB_TILE_ENABLE
+                                                       | R300_GB_TILE_PIPE_COUNT_RV410
+                                                       | R300_GB_TILE_SIZE_16;
+       else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)
                r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = R300_GB_TILE_ENABLE
                                                        | R300_GB_TILE_PIPE_COUNT_R420
                                                        | R300_GB_TILE_SIZE_16;
@@ -2199,7 +2055,8 @@ void r300ResetHwState(r300ContextPtr r300)
                r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = R300_GB_TILE_ENABLE
                                                        | R300_GB_TILE_PIPE_COUNT_RV300
                                                        | R300_GB_TILE_SIZE_16;
-       r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = 0x00000000;
+       /* set to 0 when fog is disabled? */
+       r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = R300_GB_FOG_SELECT_1_1_W;
        r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = 0x00000000; /* No antialiasing */
 
        //r300->hw.txe.cmd[R300_TXE_ENABLE] = 0;
@@ -2217,37 +2074,28 @@ void r300ResetHwState(r300ContextPtr r300)
                                             (6 << R300_POINTSIZE_Y_SHIFT);
 #endif
 
-       r300->hw.unk4230.cmd[1] = 0x01800000;
+       r300->hw.unk4230.cmd[1] = 0x18000006;
        r300->hw.unk4230.cmd[2] = 0x00020006;
        r300->hw.unk4230.cmd[3] = r300PackFloat32(1.0 / 192.0);
 
        r300LineWidth(ctx, 0.0);
 
-#ifdef EXP_C
-       static int foobar=0;
-       r300->hw.lsf.cmd[1] = foobar++; //0x3a088889;
-#endif
        r300->hw.unk4260.cmd[1] = 0;
        r300->hw.unk4260.cmd[2] = r300PackFloat32(0.0);
        r300->hw.unk4260.cmd[3] = r300PackFloat32(1.0);
 
-       r300->hw.unk4274.cmd[1] = 0x00000002;
-       r300->hw.unk4274.cmd[2] = 0x0003AAAA;
-       r300->hw.unk4274.cmd[3] = 0x00000000;
-       r300->hw.unk4274.cmd[4] = 0x00000000;
+       r300->hw.shade.cmd[1] = 0x00000002;
+       r300ShadeModel(ctx, ctx->Light.ShadeModel);
+       r300->hw.shade.cmd[3] = 0x00000000;
+       r300->hw.shade.cmd[4] = 0x00000000;
 
        r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode);
        r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode);
-       r300->hw.unk4288.cmd[2] = 0x00000001;
-       r300->hw.unk4288.cmd[3] = 0x00000000;
-       r300->hw.unk4288.cmd[4] = 0x00000000;
-       r300->hw.unk4288.cmd[5] = 0x00000000;
-
-       r300->hw.unk42A0.cmd[1] = 0x00000000;
+       r300->hw.polygon_mode.cmd[2] = 0x00000001;
+       r300->hw.polygon_mode.cmd[3] = 0x00000000;
+       r300->hw.zbias_cntl.cmd[1] = 0x00000000;
 
        r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, ctx->Polygon.OffsetUnits);
-       r300Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint);
-       r300Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine);
        r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill);
        
        r300->hw.unk42C0.cmd[1] = 0x4B7FFFFF;
@@ -2284,13 +2132,13 @@ void r300ResetHwState(r300ContextPtr r300)
                r300->hw.fpi[3].cmd[i] = FP_SELA(0,W,NO,FP_TMP(0),0,0);
        }
 #endif
-
-       r300->hw.unk4BC0.cmd[1] = 0;
-
-       r300->hw.unk4BC8.cmd[1] = 0;
-       r300->hw.unk4BC8.cmd[2] = 0;
-       r300->hw.unk4BC8.cmd[3] = 0;
-
+       r300Enable(ctx, GL_FOG, ctx->Fog.Enabled);
+       ctx->Driver.Fogfv( ctx, GL_FOG_MODE, NULL );
+       ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+       ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+       ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+       ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+       ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL );
 
        r300->hw.at.cmd[R300_AT_UNKNOWN] = 0;
        r300->hw.unk4BD8.cmd[1] = 0;
@@ -2303,24 +2151,21 @@ void r300ResetHwState(r300ContextPtr r300)
 #endif
 
        r300BlendColor(ctx, ctx->Color.BlendColor);
-       r300->hw.unk4E10.cmd[2] = 0;
-       r300->hw.unk4E10.cmd[3] = 0;
+       r300->hw.blend_color.cmd[2] = 0;
+       r300->hw.blend_color.cmd[3] = 0;
        
        /* Again, r300ClearBuffer uses this */
-       if(ctx->Visual.doubleBufferMode){
-               r300->hw.cb.cmd[R300_CB_OFFSET] =
-                       r300->radeon.radeonScreen->backOffset +
-                       r300->radeon.radeonScreen->fbLocation;
-               r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.radeonScreen->backPitch
-                       | R300_COLOR_UNKNOWN_22_23;
-       } else {
-               r300->hw.cb.cmd[R300_CB_OFFSET] =
-                       r300->radeon.radeonScreen->frontOffset +
-                       r300->radeon.radeonScreen->fbLocation;
-               r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.radeonScreen->frontPitch
-                       | R300_COLOR_UNKNOWN_22_23;
-               
-       }
+       r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset +
+               r300->radeon.radeonScreen->fbLocation;
+       r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
+       
+       if (r300->radeon.radeonScreen->cpp == 4)
+               r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+       else
+               r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+       
+       if (r300->radeon.sarea->tiling_enabled)
+               r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
        
        r300->hw.unk4E50.cmd[1] = 0;
        r300->hw.unk4E50.cmd[2] = 0;
@@ -2337,19 +2182,38 @@ void r300ResetHwState(r300ContextPtr r300)
        r300->hw.unk4EA0.cmd[1] = 0x00000000;
        r300->hw.unk4EA0.cmd[2] = 0xffffffff;
 
-       r300->hw.unk4F10.cmd[1] = 0x00000002; // depthbuffer format?
-       //r300->hw.unk4F10.cmd[2] = 0x00000000;
-       r300->hw.unk4F10.cmd[3] = 0x00000003;
-       r300->hw.unk4F10.cmd[4] = 0x00000000;
-
-       /* experiment a bit */
-       //r300->hw.unk4F10.cmd[2] = 0x00000001; // depthbuffer format?
+       switch (ctx->Visual.depthBits) {
+       case 16:
+               r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z;
+       break;
+       case 24:
+               r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z;
+       break;
+       default:
+               fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
+                       ctx->Visual.depthBits);
+               exit(-1);
+                       
+       }
+       /* z compress? */
+       //r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32;
+       
+       r300->hw.zstencil_format.cmd[3] = 0x00000003;
+       r300->hw.zstencil_format.cmd[4] = 0x00000000;
 
        r300->hw.zb.cmd[R300_ZB_OFFSET] =
                r300->radeon.radeonScreen->depthOffset +
                r300->radeon.radeonScreen->fbLocation;
        r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
-
+       
+       if (r300->radeon.sarea->tiling_enabled) {
+               /* Turn off when clearing buffers ? */
+               r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTH_TILE_ENABLE;
+       
+               if (ctx->Visual.depthBits == 24)
+                       r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTH_MICROTILE_ENABLE;
+       }
+       
        r300->hw.unk4F28.cmd[1] = 0;
 
        r300->hw.unk4F30.cmd[1] = 0;
@@ -2380,7 +2244,6 @@ void r300ResetHwState(r300ContextPtr r300)
        r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0;
 
 //END: TODO
-       //verify_r300ResetHwState(r300, 0);
        r300->hw.all_dirty = GL_TRUE;
 }
 
@@ -2401,12 +2264,12 @@ void r300InitState(r300ContextPtr r300)
        switch (ctx->Visual.depthBits) {
        case 16:
                r300->state.depth.scale = 1.0 / (GLfloat) 0xffff;
-               depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z;
+               depth_fmt = R300_DEPTH_FORMAT_16BIT_INT_Z;
                r300->state.stencil.clear = 0x00000000;
                break;
        case 24:
                r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff;
-               depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z;
+               depth_fmt = R300_DEPTH_FORMAT_24BIT_INT_Z;
                r300->state.stencil.clear = 0x00ff0000;
                break;
        default:
@@ -2424,6 +2287,12 @@ void r300InitState(r300ContextPtr r300)
        r300ResetHwState(r300);
 }
 
+static void r300RenderMode( GLcontext *ctx, GLenum mode )
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       (void)rmesa;
+       (void)mode;
+}
 
 /**
  * Initialize driver's state callback functions
@@ -2442,13 +2311,15 @@ void r300InitStateFuncs(struct dd_function_table* functions)
        functions->DepthFunc = r300DepthFunc;
        functions->DepthMask = r300DepthMask;
        functions->CullFace = r300CullFace;
+       functions->Fogfv = r300Fogfv;
        functions->FrontFace = r300FrontFace;
+       functions->ShadeModel = r300ShadeModel;
 
        /* Stencil related */
        functions->ClearStencil = r300ClearStencil;
-       functions->StencilFunc = r300StencilFunc;
-       functions->StencilMask = r300StencilMask;
-       functions->StencilOp = r300StencilOp;
+       functions->StencilFuncSeparate = r300StencilFuncSeparate;
+       functions->StencilMaskSeparate = r300StencilMaskSeparate;
+       functions->StencilOpSeparate = r300StencilOpSeparate;
 
        /* Viewport related */
        functions->Viewport = r300Viewport;
@@ -2458,4 +2329,7 @@ void r300InitStateFuncs(struct dd_function_table* functions)
 
        functions->PolygonOffset = r300PolygonOffset;
        functions->PolygonMode = r300PolygonMode;
+       
+       functions->RenderMode = r300RenderMode;
 }
+