Fixes for clamp modes.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_state.c
index f67025494f0104d41efd85792654bf66335a9c3f..5266c4f90aad9685eca43e64de53d6828c78f947 100644 (file)
@@ -48,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "swrast_setup/swrast_setup.h"
 #include "array_cache/acache.h"
 #include "tnl/tnl.h"
+#include "texformat.h"
 
 #include "radeon_ioctl.h"
 #include "radeon_state.h"
@@ -56,6 +57,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_state.h"
 #include "r300_reg.h"
 #include "r300_program.h"
+#include "r300_emit.h"
+#include "r300_fixed_pipelines.h"
 
 static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
 {
@@ -199,6 +202,25 @@ static int blend_factor(GLenum factor, GLboolean is_src)
  * and GL_FUNC_REVERSE_SUBTRACT will cause wrong results otherwise for
  * unknown reasons.
  */
+
+/* helper function */
+static void r300_set_blend_cntl(r300ContextPtr rmesa, int func, int eqn, int cbits, int funcA, int eqnA)
+{
+       GLuint new_ablend, new_cblend;
+
+       new_ablend = eqnA | funcA;
+       new_cblend = eqn | func | cbits;
+       if(rmesa->hw.bld.cmd[R300_BLD_ABLEND] == rmesa->hw.bld.cmd[R300_BLD_CBLEND]){
+               new_cblend |=  R300_BLEND_NO_SEPARATE;
+               }
+       if((new_ablend != rmesa->hw.bld.cmd[R300_BLD_ABLEND])
+               || (new_cblend != rmesa->hw.bld.cmd[R300_BLD_CBLEND])){
+               R300_STATECHANGE(rmesa, bld);
+               rmesa->hw.bld.cmd[R300_BLD_ABLEND]=new_ablend;
+               rmesa->hw.bld.cmd[R300_BLD_CBLEND]=new_cblend;
+               }
+}
+
 static void r300_set_blend_state(GLcontext * ctx)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
@@ -215,7 +237,6 @@ static void r300_set_blend_state(GLcontext * ctx)
            (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT);
        int eqnA = R200_COMB_FCN_ADD_CLAMP;
 
-       R300_STATECHANGE(rmesa, bld);
 
        if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
                if (ctx->Color._LogicOpEnabled) {
@@ -223,8 +244,9 @@ static void r300_set_blend_state(GLcontext * ctx)
                        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =
                            cntl | R300_ROP_ENABLE;
                        #endif
-                       rmesa->hw.bld.cmd[R300_BLD_ABLEND] = eqn | func;
-                       rmesa->hw.bld.cmd[R300_BLD_CBLEND] = eqn | func;
+                       r300_set_blend_cntl(rmesa,
+                               func, eqn, 0,
+                               func, eqn);
                        return;
                } else if (ctx->Color.BlendEnabled) {
                        #if 0
@@ -236,8 +258,9 @@ static void r300_set_blend_state(GLcontext * ctx)
                        #if 0
                        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl;
                        #endif
-                       rmesa->hw.bld.cmd[R300_BLD_ABLEND] = eqn | func;
-                       rmesa->hw.bld.cmd[R300_BLD_CBLEND] = eqn | func;
+                       r300_set_blend_cntl(rmesa,
+                               func, eqn, 0,
+                               func, eqn);
                        return;
                }
        } else {
@@ -258,6 +281,9 @@ static void r300_set_blend_state(GLcontext * ctx)
                        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl;
                        rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
                        #endif
+                       r300_set_blend_cntl(rmesa,
+                               func, eqn, 0,
+                               func, eqn);
                        return;
                }
        }
@@ -344,14 +370,12 @@ static void r300_set_blend_state(GLcontext * ctx)
                return;
        }
 
-       rmesa->hw.bld.cmd[R300_BLD_ABLEND] = eqnA | funcA;
-       rmesa->hw.bld.cmd[R300_BLD_CBLEND] = eqn | func ;
-       if(rmesa->hw.bld.cmd[R300_BLD_ABLEND] == rmesa->hw.bld.cmd[R300_BLD_CBLEND]){
-               rmesa->hw.bld.cmd[R300_BLD_CBLEND] |= R300_BLEND_UNKNOWN | R300_BLEND_ENABLE | R300_BLEND_NO_SEPARATE;
-               } else {
-               rmesa->hw.bld.cmd[R300_BLD_CBLEND] |= R300_BLEND_UNKNOWN | R300_BLEND_ENABLE;
-               }
-
+       r300_set_blend_cntl(rmesa,
+               func, eqn, R300_BLEND_UNKNOWN | R300_BLEND_ENABLE,
+               funcA, eqnA);
+       r300_set_blend_cntl(rmesa,
+               func, eqn, R300_BLEND_UNKNOWN | R300_BLEND_ENABLE,
+               funcA, eqnA);
 }
 
 static void r300BlendEquationSeparate(GLcontext * ctx,
@@ -410,8 +434,31 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
                        state ? "GL_TRUE" : "GL_FALSE");
 
        switch (cap) {
+               /* Fast track this one...
+                */
+       case GL_TEXTURE_1D:
+       case GL_TEXTURE_2D:
+       case GL_TEXTURE_3D:
+               break;
+
+       case GL_ALPHA_TEST:
+               R200_STATECHANGE(r300, at);
+               if (state) {
+                       r300->hw.at.cmd[R300_AT_ALPHA_TEST] |=
+                           R300_ALPHA_TEST_ENABLE;
+               } else {
+                       r300->hw.at.cmd[R300_AT_ALPHA_TEST] |=
+                           ~R300_ALPHA_TEST_ENABLE;
+               }
+               break;
+
+       case GL_BLEND:
+       case GL_COLOR_LOGIC_OP:
+               r300_set_blend_state(ctx);
+               break;
+
        case GL_DEPTH_TEST:
-               R300_STATECHANGE(r300, zc);
+               R300_STATECHANGE(r300, zs);
 
                if (state) {
                        if (ctx->Depth.Mask)
@@ -421,7 +468,33 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
                } else
                        newval = 0;
 
-               r300->hw.zc.cmd[R300_ZC_CNTL_0] = newval;
+               r300->hw.zs.cmd[R300_ZS_CNTL_0] = newval;
+               break;
+
+       case GL_STENCIL_TEST:
+
+               {
+               static int stencil=1;
+               if(stencil){
+                       fprintf(stderr, "%s:%s - do not know how to enable stencil. Help me !\n",
+                               __FILE__, __FUNCTION__);
+                       stencil=0;
+                       }
+               }
+
+               if (r300->state.hw_stencil) {
+                       //fprintf(stderr, "Stencil %s\n", state ? "enabled" : "disabled");
+                       R300_STATECHANGE(r300, zs);
+                       if (state) {
+                               r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
+                                   R300_RB3D_STENCIL_ENABLE;
+                       } else {
+                               r300->hw.zs.cmd[R300_ZS_CNTL_0] &=
+                                   ~R300_RB3D_STENCIL_ENABLE;
+                       }
+               } else {
+                       FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state);
+               }
                break;
 
        case GL_CULL_FACE:
@@ -470,34 +543,37 @@ static void r300DepthFunc(GLcontext* ctx, GLenum func)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
 
-       R300_STATECHANGE(r300, zc);
+       R300_STATECHANGE(r300, zs);
+
+       r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT);
 
        switch(func) {
        case GL_NEVER:
-               r300->hw.zc.cmd[R300_ZC_CNTL_1] = R300_Z_TEST_NEVER;
+               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_NEVER << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
                break;
        case GL_LESS:
-               r300->hw.zc.cmd[R300_ZC_CNTL_1] = R300_Z_TEST_LESS;
+               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_LESS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
                break;
        case GL_EQUAL:
-               r300->hw.zc.cmd[R300_ZC_CNTL_1] = R300_Z_TEST_EQUAL;
+               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_EQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
                break;
        case GL_LEQUAL:
-               r300->hw.zc.cmd[R300_ZC_CNTL_1] = R300_Z_TEST_LEQUAL;
+               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_LEQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
                break;
        case GL_GREATER:
-               r300->hw.zc.cmd[R300_ZC_CNTL_1] = R300_Z_TEST_GREATER;
+               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_GREATER << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
                break;
        case GL_NOTEQUAL:
-               r300->hw.zc.cmd[R300_ZC_CNTL_1] = R300_Z_TEST_NEQUAL;
+               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_NOTEQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
                break;
        case GL_GEQUAL:
-               r300->hw.zc.cmd[R300_ZC_CNTL_1] = R300_Z_TEST_GEQUAL;
+               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_GEQUAL << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
                break;
        case GL_ALWAYS:
-               r300->hw.zc.cmd[R300_ZC_CNTL_1] = R300_Z_TEST_ALWAYS;
+               r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_ALWAYS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
                break;
        }
+
 }
 
 
@@ -513,8 +589,8 @@ static void r300DepthMask(GLcontext* ctx, GLboolean mask)
        if (!ctx->Depth.Test)
                return;
 
-       R300_STATECHANGE(r300, zc);
-       r300->hw.zc.cmd[R300_ZC_CNTL_0] = mask
+       R300_STATECHANGE(r300, zs);
+       r300->hw.zs.cmd[R300_ZS_CNTL_0] = mask
                ? R300_RB3D_Z_TEST_AND_WRITE : R300_RB3D_Z_TEST;
 }
 
@@ -540,11 +616,132 @@ static void r300ColorMask(GLcontext* ctx,
 static void r300PointSize(GLcontext * ctx, GLfloat size)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-       
+
        /* This might need fixing later */
        R300_STATECHANGE(r300, vps);
        r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0);
 }
+/* =============================================================
+ * Stencil
+ */
+
+ static int translate_stencil_func(int func)
+ {
+       switch (func) {
+       case GL_NEVER:
+                   return R300_ZS_NEVER;
+               break;
+       case GL_LESS:
+                   return R300_ZS_LESS;
+               break;
+       case GL_EQUAL:
+                   return R300_ZS_EQUAL;
+               break;
+       case GL_LEQUAL:
+                   return R300_ZS_LEQUAL;
+               break;
+       case GL_GREATER:
+                   return R300_ZS_GREATER;
+               break;
+       case GL_NOTEQUAL:
+                   return R300_ZS_NOTEQUAL;
+               break;
+       case GL_GEQUAL:
+                   return R300_ZS_GEQUAL;
+               break;
+       case GL_ALWAYS:
+                   return R300_ZS_ALWAYS;
+               break;
+       }
+ return 0;
+ }
+
+ static int translate_stencil_op(int op)
+{
+       switch (op) {
+       case GL_KEEP:
+                   return R300_ZS_KEEP;
+       case GL_ZERO:
+                   return R300_ZS_ZERO;
+       case GL_REPLACE:
+                   return R300_ZS_REPLACE;
+       case GL_INCR:
+                   return R300_ZS_INCR;
+       case GL_DECR:
+                   return R300_ZS_DECR;
+       case GL_INCR_WRAP_EXT:
+                   return R300_ZS_INCR_WRAP;
+       case GL_DECR_WRAP_EXT:
+                   return R300_ZS_DECR_WRAP;
+       case GL_INVERT:
+                   return R300_ZS_INVERT;
+       }
+}
+
+static void r300StencilFunc(GLcontext * ctx, GLenum func,
+                           GLint ref, GLuint mask)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       GLuint refmask = ((ctx->Stencil.Ref[0] << R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
+                         (ctx->Stencil.
+                          ValueMask[0] << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
+       GLuint flag;
+
+       R200_STATECHANGE(rmesa, zs);
+
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(
+               (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT)
+               | (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT));
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=  ~((R300_ZS_MASK << R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
+                                               (R300_ZS_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
+
+       flag = translate_stencil_func(ctx->Stencil.Function[0]);
+
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT)
+                                         | (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask;
+}
+
+static void r300StencilMask(GLcontext * ctx, GLuint mask)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+       R200_STATECHANGE(rmesa, zs);
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_2]  &= ~(R300_ZS_MASK << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT);
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= ctx->Stencil.WriteMask[0] << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT;
+}
+
+
+static void r300StencilOp(GLcontext * ctx, GLenum fail,
+                         GLenum zfail, GLenum zpass)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+       R200_STATECHANGE(rmesa, zs);
+               /* It is easier to mask what's left.. */
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT);
+
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+                (translate_stencil_op(ctx->Stencil.FailFunc[0]) << R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT)
+               |(translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT)
+               |(translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT)
+               |(translate_stencil_op(ctx->Stencil.FailFunc[0]) << R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
+               |(translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT)
+               |(translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT);
+
+}
+
+static void r300ClearStencil(GLcontext * ctx, GLint s)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+       /* Not sure whether this is correct.. */
+       R200_STATECHANGE(rmesa, zs);
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_2] =
+           ((GLuint) ctx->Stencil.Clear |
+            (0xff << R200_STENCIL_MASK_SHIFT) |
+            (ctx->Stencil.WriteMask[0] << R200_STENCIL_WRITEMASK_SHIFT));
+}
 
 /* =============================================================
  * Window position and viewport transformation
@@ -607,17 +804,18 @@ void r300_setup_routing(GLcontext *ctx, GLboolean immediate)
        TNLcontext *tnl = TNL_CONTEXT(ctx);
        struct vertex_buffer *VB = &tnl->vb;
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-       
-       
+
+
        /* Stage 1 - input to VAP */
-       
+
        /* Assign register number automatically, retaining it in rmesa->state.reg */
-       
+
        /* Note: immediate vertex data includes all coordinates.
        To save bandwidth use either VBUF or state-based vertex generation */
-       
+
        #define CONFIGURE_AOS(v, o, r, f) \
                {\
+               if (RADEON_DEBUG & DEBUG_STATE)fprintf(stderr, "Enabling "#r "\n"); \
                if(immediate){ \
                        r300->state.aos[count].element_size=4; \
                        r300->state.aos[count].stride=4; \
@@ -634,37 +832,53 @@ void r300_setup_routing(GLcontext *ctx, GLboolean immediate)
                count++; \
                reg++; \
                }
-       
-               /* All offsets are 0 - for use by immediate mode. 
+
+               /* All offsets are 0 - for use by immediate mode.
                Should change later to handle vertex buffers */
-       CONFIGURE_AOS(VB->ObjPtr, 0, i_coords, AOS_FORMAT_FLOAT);
-       CONFIGURE_AOS(VB->ColorPtr[0], 0, i_color[0], AOS_FORMAT_FLOAT_COLOR);
+       if(tnl->render_inputs & _TNL_BIT_POS)
+               CONFIGURE_AOS(VB->ObjPtr, 0, i_coords, AOS_FORMAT_FLOAT);
+       if(tnl->render_inputs & _TNL_BIT_NORMAL)
+               CONFIGURE_AOS(VB->NormalPtr, 0, i_normal, AOS_FORMAT_FLOAT);
+
+       if(tnl->render_inputs & _TNL_BIT_COLOR0)
+               CONFIGURE_AOS(VB->ColorPtr[0], 0, i_color[0], AOS_FORMAT_FLOAT_COLOR);
+       if(tnl->render_inputs & _TNL_BIT_COLOR1)
+               CONFIGURE_AOS(VB->ColorPtr[1], 0, i_color[1], AOS_FORMAT_FLOAT_COLOR);
+
+       if(tnl->render_inputs & _TNL_BIT_FOG)
+               CONFIGURE_AOS(VB->FogCoordPtr, 0, i_fog, AOS_FORMAT_FLOAT);
+
        for(i=0;i < ctx->Const.MaxTextureUnits;i++)
-               if(ctx->Texture.Unit[i].Enabled)
+               if(tnl->render_inputs & (_TNL_BIT_TEX0<<i))
                        CONFIGURE_AOS(VB->TexCoordPtr[i], 0, i_tex[i], AOS_FORMAT_FLOAT);
-                       
+
+       if(tnl->render_inputs & _TNL_BIT_INDEX)
+               CONFIGURE_AOS(VB->IndexPtr[0], 0, i_index, AOS_FORMAT_FLOAT);
+       if(tnl->render_inputs & _TNL_BIT_POINTSIZE)
+               CONFIGURE_AOS(VB->PointSizePtr, 0, i_pointsize, AOS_FORMAT_FLOAT);
+
        r300->state.aos_count=count;
-       
+
        if (RADEON_DEBUG & DEBUG_STATE)
                fprintf(stderr, "aos_count=%d\n", count);
-       
+
        if(count>R300_MAX_AOS_ARRAYS){
                fprintf(stderr, "Aieee ! AOS array count exceeded !\n");
                exit(-1);
                }
-                       
+
        /* Implement AOS */
-       
+
        /* setup INPUT_ROUTE */
        R300_STATECHANGE(r300, vir[0]);
        for(i=0;i+1<count;i+=2){
-               dw=(r300->state.aos[i].ncomponents-1) 
+               dw=(r300->state.aos[i].ncomponents-1)
                | ((r300->state.aos[i].reg)<<8)
                | (r300->state.aos[i].format<<14)
-               | (((r300->state.aos[i+1].ncomponents-1) 
+               | (((r300->state.aos[i+1].ncomponents-1)
                | ((r300->state.aos[i+1].reg)<<8)
                | (r300->state.aos[i+1].format<<14))<<16);
-               
+
                if(i+2==count){
                        dw|=(1<<(13+16));
                        }
@@ -681,36 +895,36 @@ void r300_setup_routing(GLcontext *ctx, GLboolean immediate)
        /* Set the rest of INPUT_ROUTE_0 to 0 */
        //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[0].cmd[R300_VIR_CNTL_0+i]=(0x0);
        ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->unchecked_state.count = (count+1)>>1;
-       
-       
+
+
        /* Mesa assumes that all missing components are from (0, 0, 0, 1) */
        #define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \
                | (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \
                | (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \
                | (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT))
-       
+
        #define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \
                | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \
                | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \
                | (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT))
-       
+
        R300_STATECHANGE(r300, vir[1]);
-               
+
        for(i=0;i+1<count;i+=2){
                /* do i first.. */
                mask=(1<<(r300->state.aos[i].ncomponents*3))-1;
                dw=(ALL_COMPONENTS & mask)
                | (ALL_DEFAULT & ~mask)
                | R300_INPUT_ROUTE_ENABLE;
-               
+
                /* i+1 */
                mask=(1<<(r300->state.aos[i+1].ncomponents*3))-1;
-               dw|=( 
+               dw|=(
                (ALL_COMPONENTS & mask)
                | (ALL_DEFAULT & ~mask)
                | R300_INPUT_ROUTE_ENABLE
                )<<16;
-       
+
                r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
                }
        if(count & 1){
@@ -724,35 +938,35 @@ void r300_setup_routing(GLcontext *ctx, GLboolean immediate)
        /* Set the rest of INPUT_ROUTE_1 to 0 */
        //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[1].cmd[R300_VIR_CNTL_0+i]=0x0;
        ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->unchecked_state.count = (count+1)>>1;
-       
+
        /* Set up input_cntl */
-       
+
        R300_STATECHANGE(r300, vic);
        r300->hw.vic.cmd[R300_VIC_CNTL_0]=0x5555;  /* Hard coded value, no idea what it means */
-       
+
        r300->hw.vic.cmd[R300_VIC_CNTL_1]=R300_INPUT_CNTL_POS
                                        | R300_INPUT_CNTL_COLOR;
-       
+
        for(i=0;i < ctx->Const.MaxTextureUnits;i++)
                if(ctx->Texture.Unit[i].Enabled)
                        r300->hw.vic.cmd[R300_VIC_CNTL_1]|=(R300_INPUT_CNTL_TC0<<i);
-       
+
        /* Stage 3: VAP output */
        R300_STATECHANGE(r300, vof);
        r300->hw.vof.cmd[R300_VOF_CNTL_0]=R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT
                                        | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
-       
+
        r300->hw.vof.cmd[R300_VOF_CNTL_1]=0;
        for(i=0;i < ctx->Const.MaxTextureUnits;i++)
                if(ctx->Texture.Unit[i].Enabled)
                        r300->hw.vof.cmd[R300_VOF_CNTL_1]|=(4<<(3*i));
-       
+
 }
 
 static r300TexObj default_tex_obj={
        filter:R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR,
        pitch: 0x8000,
-       size: (0xff << R300_TX_WIDTHMASK_SHIFT) 
+       size: (0xff << R300_TX_WIDTHMASK_SHIFT)
              | (0xff << R300_TX_HEIGHTMASK_SHIFT)
              | (0x8 << R300_TX_SIZE_SHIFT),
        format: 0x88a0c,
@@ -761,13 +975,107 @@ static r300TexObj default_tex_obj={
        unknown5: 0x0
        };
 
+       /* there is probably a system to these value, but, for now,
+          we just try by hand */
+
+static int inline translate_src(int src)
+{
+       switch (src) {
+       case GL_TEXTURE:
+               return 1;
+               break;
+       case GL_CONSTANT:
+               return 2;
+               break;
+       case GL_PRIMARY_COLOR:
+               return 3;
+               break;
+       case GL_PREVIOUS:
+               return 4;
+               break;
+       case GL_ZERO:
+               return 5;
+               break;
+       case GL_ONE:
+               return 6;
+               break;
+       default:
+               return 0;
+       }
+}
+
+/* r300 doesnt handle GL_CLAMP and GL_MIRROR_CLAMP_EXT correctly when filter is NEAREST.
+ * Since texwrap produces same results for GL_CLAMP and GL_CLAMP_TO_EDGE we use them instead.
+ * We need to recalculate wrap modes whenever filter mode is changed because someone might do:
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ * Since r300 completely ignores R300_TX_CLAMP when either min or mag is nearest it cant handle
+ * combinations where only one of them is nearest.
+ */
+static unsigned long gen_fixed_filter(unsigned long f)
+{
+       unsigned long mag, min, needs_fixing=0;
+       //return f;
+       
+       /* We ignore MIRROR bit so we dont have to do everything twice */
+       if((f & ((7-1) << R300_TX_WRAP_S_SHIFT)) == (R300_TX_CLAMP << R300_TX_WRAP_S_SHIFT)){
+               needs_fixing |= 1;
+       }
+       if((f & ((7-1) << R300_TX_WRAP_T_SHIFT)) == (R300_TX_CLAMP << R300_TX_WRAP_T_SHIFT)){
+               needs_fixing |= 2;
+       }
+       if((f & ((7-1) << R300_TX_WRAP_Q_SHIFT)) == (R300_TX_CLAMP << R300_TX_WRAP_Q_SHIFT)){
+               needs_fixing |= 4;
+       }
+       
+       if(!needs_fixing)
+               return f;
+       
+       mag=f & R300_TX_MAG_FILTER_MASK;
+       min=f & R300_TX_MIN_FILTER_MASK;
+       
+       /* TODO: Check for anisto filters too */
+       if((mag != R300_TX_MAG_FILTER_NEAREST) && (min != R300_TX_MIN_FILTER_NEAREST))
+               return f;
+       
+       /* r300 cant handle these modes hence we force nearest to linear */
+       if((mag == R300_TX_MAG_FILTER_NEAREST) && (min != R300_TX_MIN_FILTER_NEAREST)){
+               f &= ~R300_TX_MAG_FILTER_NEAREST;
+               f |= R300_TX_MAG_FILTER_LINEAR;
+               return f;
+       }
+       
+       if((min == R300_TX_MIN_FILTER_NEAREST) && (mag != R300_TX_MAG_FILTER_NEAREST)){
+               f &= ~R300_TX_MIN_FILTER_NEAREST;
+               f |= R300_TX_MIN_FILTER_LINEAR;
+               return f;
+       }
+       
+       /* Both are nearest */
+       if(needs_fixing & 1){
+               f &= ~((7-1) << R300_TX_WRAP_S_SHIFT);
+               f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_S_SHIFT;
+       }
+       if(needs_fixing & 2){
+               f &= ~((7-1) << R300_TX_WRAP_T_SHIFT);
+               f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT;
+       }
+       if(needs_fixing & 4){
+               f &= ~((7-1) << R300_TX_WRAP_Q_SHIFT);
+               f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_Q_SHIFT;
+       }
+       return f;
+}
+
 void r300_setup_textures(GLcontext *ctx)
 {
        int i, mtu;
        struct r300_tex_obj *t;
        r300ContextPtr r300 = R300_CONTEXT(ctx);
        int max_texture_unit=-1; /* -1 translates into no setup costs for fields */
-       
+       struct gl_texture_unit *texUnit;
+
        R300_STATECHANGE(r300, txe);
        R300_STATECHANGE(r300, tex.filter);
        R300_STATECHANGE(r300, tex.unknown1);
@@ -776,23 +1084,25 @@ void r300_setup_textures(GLcontext *ctx)
        R300_STATECHANGE(r300, tex.offset);
        R300_STATECHANGE(r300, tex.unknown4);
        R300_STATECHANGE(r300, tex.unknown5);
-       
+       //R300_STATECHANGE(r300, tex.border_color);
+
        r300->state.texture.tc_count=0;
-       
+
        r300->hw.txe.cmd[R300_TXE_ENABLE]=0x0;
-       
+
        mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
        if (RADEON_DEBUG & DEBUG_STATE)
                fprintf(stderr, "mtu=%d\n", mtu);
-       
+
        if(mtu>R300_MAX_TEXTURE_UNITS){
-               fprintf(stderr, "Aiiee ! mtu=%d is greater than R300_MAX_TEXTURE_UNITS=%d\n", 
+               fprintf(stderr, "Aiiee ! mtu=%d is greater than R300_MAX_TEXTURE_UNITS=%d\n",
                        mtu, R300_MAX_TEXTURE_UNITS);
                exit(-1);
                }
        for(i=0;i<mtu;i++){
                if(ctx->Texture.Unit[i].Enabled){
                        t=r300->state.texture.unit[i].texobj;
+                       fprintf(stderr, "format=%08x\n", r300->state.texture.unit[i].format);
                        r300->state.texture.tc_count++;
                        if(t==NULL){
                                fprintf(stderr, "Texture unit %d enabled, but corresponding texobj is NULL, using default object.\n", i);
@@ -803,31 +1113,19 @@ void r300_setup_textures(GLcontext *ctx)
                                fprintf(stderr, "Activating texture unit %d\n", i);
                        max_texture_unit=i;
                        r300->hw.txe.cmd[R300_TXE_ENABLE]|=(1<<i);
-                       
-                       r300->hw.tex.filter.cmd[R300_TEX_VALUE_0+i]=t->filter;
-                       r300->hw.tex.unknown1.cmd[R300_TEX_VALUE_0+i]=t->pitch;
+
+                       r300->hw.tex.filter.cmd[R300_TEX_VALUE_0+i]=gen_fixed_filter(t->filter);
+                       /* No idea why linear filtered textures shake when puting random data */
+                       /*r300->hw.tex.unknown1.cmd[R300_TEX_VALUE_0+i]=(rand()%0xffffffff) & (~0x1fff);*/
                        r300->hw.tex.size.cmd[R300_TEX_VALUE_0+i]=t->size;
                        r300->hw.tex.format.cmd[R300_TEX_VALUE_0+i]=t->format;
+                       //fprintf(stderr, "t->format=%08x\n", t->format);
                        r300->hw.tex.offset.cmd[R300_TEX_VALUE_0+i]=r300->radeon.radeonScreen->fbLocation+t->offset;
                        r300->hw.tex.unknown4.cmd[R300_TEX_VALUE_0+i]=0x0;
                        r300->hw.tex.unknown5.cmd[R300_TEX_VALUE_0+i]=0x0;
-                       
-                       /* We don't know how to set this yet */
-                       r300->hw.tex.format.cmd[R300_TEX_VALUE_0+i]=0x88a0c;
-                       
-                       } else {
-                       /* Fill in with 0's */
-                       #if 0 /* No need.. */
-                       r300->hw.tex.filter.cmd[R300_TEX_VALUE_0+i]=0x0;
-                       r300->hw.tex.unknown1.cmd[R300_TEX_VALUE_0+i]=0x0;
-                       r300->hw.tex.size.cmd[R300_TEX_VALUE_0+i]=0x0;
-                       r300->hw.tex.format.cmd[R300_TEX_VALUE_0+i]=0x0;
-                       r300->hw.tex.offset.cmd[R300_TEX_VALUE_0+i]=r300->radeon.radeonScreen->fbLocation;
-                       r300->hw.tex.unknown4.cmd[R300_TEX_VALUE_0+i]=0x0;
-                       r300->hw.tex.unknown5.cmd[R300_TEX_VALUE_0+i]=0x0;
-                       #endif
+                       //r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0+i]=t->pp_border_color;
                        }
-                       
+
                }
        ((drm_r300_cmd_header_t*)r300->hw.tex.filter.cmd)->unchecked_state.count = max_texture_unit+1;
        ((drm_r300_cmd_header_t*)r300->hw.tex.unknown1.cmd)->unchecked_state.count = max_texture_unit+1;
@@ -836,7 +1134,8 @@ void r300_setup_textures(GLcontext *ctx)
        ((drm_r300_cmd_header_t*)r300->hw.tex.offset.cmd)->unchecked_state.count = max_texture_unit+1;
        ((drm_r300_cmd_header_t*)r300->hw.tex.unknown4.cmd)->unchecked_state.count = max_texture_unit+1;
        ((drm_r300_cmd_header_t*)r300->hw.tex.unknown5.cmd)->unchecked_state.count = max_texture_unit+1;
-       
+       //((drm_r300_cmd_header_t*)r300->hw.tex.border_color.cmd)->unchecked_state.count = max_texture_unit+1;
+
        if (RADEON_DEBUG & DEBUG_STATE)
                fprintf(stderr, "TX_ENABLE: %08x  max_texture_unit=%d\n", r300->hw.txe.cmd[R300_TXE_ENABLE], max_texture_unit);
 }
@@ -845,46 +1144,218 @@ void r300_setup_rs_unit(GLcontext *ctx)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
        int i;
-       
+
        /* This needs to be rewritten - it is a hack at best */
-       
+
        R300_STATECHANGE(r300, ri);
        R300_STATECHANGE(r300, rc);
        R300_STATECHANGE(r300, rr);
-       
+
        for(i = 1; i <= 8; ++i)
                r300->hw.ri.cmd[i] = 0x00d10000;
        r300->hw.ri.cmd[R300_RI_INTERP_1] |= R300_RS_INTERP_1_UNKNOWN;
        r300->hw.ri.cmd[R300_RI_INTERP_2] |= R300_RS_INTERP_2_UNKNOWN;
        r300->hw.ri.cmd[R300_RI_INTERP_3] |= R300_RS_INTERP_3_UNKNOWN;
-       
+
+       #if 1
+       for(i = 2; i <= 8; ++i)
+               r300->hw.ri.cmd[i] |= 4;
+       #endif
+
        for(i = 1; i <= 8; ++i)
                r300->hw.rr.cmd[i] = 0;
        /* textures enabled ? */
        if(r300->state.texture.tc_count>0){
-       
+
                /* This code only really works with one set of texture coordinates */
-               
+
                /* The second constant is needed to get glxgears display anything .. */
-               r300->hw.rc.cmd[1] = R300_RS_CNTL_0_UNKNOWN_7 
-                               | R300_RS_CNTL_0_UNKNOWN_18 
+               r300->hw.rc.cmd[1] = R300_RS_CNTL_0_UNKNOWN_7
+                               | R300_RS_CNTL_0_UNKNOWN_18
                                | (r300->state.texture.tc_count<<R300_RS_CNTL_TC_CNT_SHIFT);
                r300->hw.rc.cmd[2] = 0xc0;
-       
-       
+
+
                ((drm_r300_cmd_header_t*)r300->hw.rr.cmd)->unchecked_state.count = 1;
                r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0x24008;
-               
+
                } else {
-               
+
                /* The second constant is needed to get glxgears display anything .. */
                r300->hw.rc.cmd[1] = R300_RS_CNTL_0_UNKNOWN_7 | R300_RS_CNTL_0_UNKNOWN_18;
                r300->hw.rc.cmd[2] = 0;
-               
+
                ((drm_r300_cmd_header_t*)r300->hw.rr.cmd)->unchecked_state.count = 1;
                r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0x4000;
-               
+
+               }
+}
+
+#define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
+
+#define bump_vpu_count(ptr, new_count)   do{\
+       drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
+       int _nc=(new_count)/4; \
+       if(_nc>_p->vpu.count)_p->vpu.count=_nc;\
+       }while(0)
+
+void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, struct r300_vertex_shader_fragment *vsf)
+{
+       int i;
+
+       if(vsf->length==0)return;
+
+       if(vsf->length & 0x3){
+               fprintf(stderr,"VERTEX_SHADER_FRAGMENT must have length divisible by 4\n");
+               exit(-1);
+               }
+
+       switch((dest>>8) & 0xf){
+       case 0:
+               R300_STATECHANGE(r300, vpi);
+               for(i=0;i<vsf->length;i++)
+                       r300->hw.vpi.cmd[R300_VPI_INSTR_0+i+4*(dest & 0xff)]=(vsf->body.d[i]);
+               bump_vpu_count(r300->hw.vpi.cmd, vsf->length+4*(dest & 0xff));
+               break;
+
+       case 2:
+               R300_STATECHANGE(r300, vpp);
+               for(i=0;i<vsf->length;i++)
+                       r300->hw.vpp.cmd[R300_VPP_PARAM_0+i+4*(dest & 0xff)]=(vsf->body.d[i]);
+               bump_vpu_count(r300->hw.vpp.cmd, vsf->length+4*(dest & 0xff));
+               break;
+       case 4:
+               R300_STATECHANGE(r300, vps);
+               for(i=0;i<vsf->length;i++)
+                       r300->hw.vps.cmd[1+i+4*(dest & 0xff)]=(vsf->body.d[i]);
+               bump_vpu_count(r300->hw.vps.cmd, vsf->length+4*(dest & 0xff));
+               break;
+       default:
+               fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
+               exit(-1);
+       }
+}
+
+
+void r300SetupVertexShader(r300ContextPtr rmesa)
+{
+       GLcontext* ctx = rmesa->radeon.glCtx;
+
+       /* Reset state, in case we don't use something */
+       ((drm_r300_cmd_header_t*)rmesa->hw.vpp.cmd)->vpu.count = 0;
+       ((drm_r300_cmd_header_t*)rmesa->hw.vpi.cmd)->vpu.count = 0;
+       ((drm_r300_cmd_header_t*)rmesa->hw.vps.cmd)->vpu.count = 0;
+
+
+/* This needs to be replaced by vertex shader generation code */
+
+
+       /* textures enabled ? */
+       if(rmesa->state.texture.tc_count>0){
+               rmesa->state.vertex_shader=SINGLE_TEXTURE_VERTEX_SHADER;
+               } else {
+               rmesa->state.vertex_shader=FLAT_COLOR_VERTEX_SHADER;
+               }
+
+
+        rmesa->state.vertex_shader.matrix[0].length=16;
+        memcpy(rmesa->state.vertex_shader.matrix[0].body.f, ctx->_ModelProjectMatrix.m, 16*4);
+
+       setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, &(rmesa->state.vertex_shader.program));
+
+       setup_vertex_shader_fragment(rmesa, VSF_DEST_MATRIX0, &(rmesa->state.vertex_shader.matrix[0]));
+       #if 0
+       setup_vertex_shader_fragment(rmesa, VSF_DEST_MATRIX1, &(rmesa->state.vertex_shader.matrix[0]));
+       setup_vertex_shader_fragment(rmesa, VSF_DEST_MATRIX2, &(rmesa->state.vertex_shader.matrix[0]));
+
+       setup_vertex_shader_fragment(rmesa, VSF_DEST_VECTOR0, &(rmesa->state.vertex_shader.vector[0]));
+       setup_vertex_shader_fragment(rmesa, VSF_DEST_VECTOR1, &(rmesa->state.vertex_shader.vector[1]));
+       #endif
+
+       #if 0
+       setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1, &(rmesa->state.vertex_shader.unknown1));
+       setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2, &(rmesa->state.vertex_shader.unknown2));
+       #endif
+
+       R300_STATECHANGE(rmesa, pvs);
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_1]=(rmesa->state.vertex_shader.program_start << R300_PVS_CNTL_1_PROGRAM_START_SHIFT)
+               | (rmesa->state.vertex_shader.unknown_ptr1 << R300_PVS_CNTL_1_UNKNOWN_SHIFT)
+               | (rmesa->state.vertex_shader.program_end << R300_PVS_CNTL_1_PROGRAM_END_SHIFT);
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_2]=(rmesa->state.vertex_shader.param_offset << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT)
+               | (rmesa->state.vertex_shader.param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT);
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_3]=(rmesa->state.vertex_shader.unknown_ptr2 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT)
+       | (rmesa->state.vertex_shader.unknown_ptr3 << 0);
+
+       /* This is done for vertex shader fragments, but also needs to be done for vap_pvs,
+       so I leave it as a reminder */
+       #if 0
+       reg_start(R300_VAP_PVS_WAITIDLE,0);
+               e32(0x00000000);
+       #endif
+}
+
+void r300SetupPixelShader(r300ContextPtr rmesa)
+{
+int i,k;
+
+       /* This needs to be replaced by pixel shader generation code */
+
+       /* textures enabled ? */
+       if(rmesa->state.texture.tc_count>0){
+               rmesa->state.pixel_shader=SINGLE_TEXTURE_PIXEL_SHADER;
+               } else {
+               rmesa->state.pixel_shader=FLAT_COLOR_PIXEL_SHADER;
                }
+
+       R300_STATECHANGE(rmesa, fpt);
+       for(i=0;i<rmesa->state.pixel_shader.program.tex.length;i++)
+               rmesa->hw.fpt.cmd[R300_FPT_INSTR_0+i]=rmesa->state.pixel_shader.program.tex.inst[i];
+       rmesa->hw.fpt.cmd[R300_FPT_CMD_0]=cmducs(R300_PFS_TEXI_0, rmesa->state.pixel_shader.program.tex.length);
+
+       #define OUTPUT_FIELD(st, reg, field)  \
+               R300_STATECHANGE(rmesa, st); \
+               for(i=0;i<rmesa->state.pixel_shader.program.alu.length;i++) \
+                       rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=rmesa->state.pixel_shader.program.alu.inst[i].field;\
+               rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmducs(reg, rmesa->state.pixel_shader.program.alu.length);
+
+       OUTPUT_FIELD(fpi[0], R300_PFS_INSTR0_0, inst0);
+       OUTPUT_FIELD(fpi[1], R300_PFS_INSTR1_0, inst1);
+       OUTPUT_FIELD(fpi[2], R300_PFS_INSTR2_0, inst2);
+       OUTPUT_FIELD(fpi[3], R300_PFS_INSTR3_0, inst3);
+       #undef OUTPUT_FIELD
+
+       R300_STATECHANGE(rmesa, fp);
+       for(i=0;i<4;i++){
+               rmesa->hw.fp.cmd[R300_FP_NODE0+i]=
+               (rmesa->state.pixel_shader.program.node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT)
+               | (rmesa->state.pixel_shader.program.node[i].alu_end  << R300_PFS_NODE_ALU_END_SHIFT)
+               | (rmesa->state.pixel_shader.program.node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT)
+               | (rmesa->state.pixel_shader.program.node[i].tex_end  << R300_PFS_NODE_TEX_END_SHIFT)
+               | ( (i==3) ? R300_PFS_NODE_LAST_NODE : 0);
+               }
+
+               /*  PFS_CNTL_0 */
+       rmesa->hw.fp.cmd[R300_FP_CNTL0]=
+               (rmesa->state.pixel_shader.program.active_nodes-1)
+               | (rmesa->state.pixel_shader.program.first_node_has_tex<<3);
+               /* PFS_CNTL_1 */
+       rmesa->hw.fp.cmd[R300_FP_CNTL1]=rmesa->state.pixel_shader.program.temp_register_count;
+               /* PFS_CNTL_2 */
+       rmesa->hw.fp.cmd[R300_FP_CNTL2]=
+               (rmesa->state.pixel_shader.program.alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
+               | (rmesa->state.pixel_shader.program.alu_end << R300_PFS_CNTL_ALU_END_SHIFT)
+               | (rmesa->state.pixel_shader.program.tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
+               | (rmesa->state.pixel_shader.program.tex_end << R300_PFS_CNTL_TEX_END_SHIFT);
+
+       R300_STATECHANGE(rmesa, fpp);
+       for(i=0;i<rmesa->state.pixel_shader.param_length;i++){
+               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+0]=r300PackFloat32(rmesa->state.pixel_shader.param[i].x);
+               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+1]=r300PackFloat32(rmesa->state.pixel_shader.param[i].y);
+               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+2]=r300PackFloat32(rmesa->state.pixel_shader.param[i].z);
+               rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+3]=r300PackFloat32(rmesa->state.pixel_shader.param[i].w);
+               }
+       rmesa->hw.fpp.cmd[R300_FPP_CMD_0]=cmducs(R300_PFS_PARAM_0_X, rmesa->state.pixel_shader.param_length);
+
 }
 
 /**
@@ -916,8 +1387,19 @@ void r300ResetHwState(r300ContextPtr r300)
        if (RADEON_DEBUG & DEBUG_STATE)
                fprintf(stderr, "%s\n", __FUNCTION__);
 
+               /* This is a place to initialize registers which
+                  have bitfields accessed by different functions
+                  and not all bits are used */
+       #if 0
+       r300->hw.zs.cmd[R300_ZS_CNTL_0] = 0;
+       r300->hw.zs.cmd[R300_ZS_CNTL_1] = 0;
+       r300->hw.zs.cmd[R300_ZS_CNTL_2] = 0xffff00;
+       #endif
+
+               /* go and compute register values from GL state */
+
        r300UpdateWindow(ctx);
-       
+
        r300ColorMask(ctx,
                ctx->Color.ColorMask[RCOMP],
                ctx->Color.ColorMask[GCOMP],
@@ -929,23 +1411,24 @@ void r300ResetHwState(r300ContextPtr r300)
        r300DepthFunc(ctx, ctx->Depth.Func);
 
        r300UpdateCulling(ctx);
-        
+
        r300_setup_routing(ctx, GL_TRUE);
-       
+
        r300UpdateTextureState(ctx);
        r300_setup_textures(ctx);
        r300_setup_rs_unit(ctx);
-       
-       
+
+       r300SetupVertexShader(r300);
+       r300SetupPixelShader(r300);
+
        r300_set_blend_state(ctx);
        r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
 
-//BEGIN: TODO
+               /* Initialize magic registers
+                TODO : learn what they really do, or get rid of
+                those we don't have to touch */
        r300->hw.unk2080.cmd[1] = 0x0030045A;
 
-       r300->hw.ovf.cmd[R300_OVF_FMT_0] = 0x00000003;
-       r300->hw.ovf.cmd[R300_OVF_FMT_1] = 0x00000000;
-
        r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA
                                | R300_VPORT_X_OFFSET_ENA
                                | R300_VPORT_Y_SCALE_ENA
@@ -970,7 +1453,7 @@ void r300ResetHwState(r300ContextPtr r300)
        r300->hw.vic.cmd[R300_VIR_CNTL_0] = 0x00000001;
        r300->hw.vic.cmd[R300_VIR_CNTL_1] = 0x00000405;
        #endif
-       
+
        r300->hw.unk21DC.cmd[1] = 0xAAAAAAAA;
 
        r300->hw.unk221C.cmd[1] = R300_221C_NORMAL;
@@ -989,11 +1472,12 @@ void r300ResetHwState(r300ContextPtr r300)
        r300->hw.vof.cmd[R300_VOF_CNTL_0] = R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT
                                | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
        r300->hw.vof.cmd[R300_VOF_CNTL_1] = 0; /* no textures */
-       #endif  
-       
+
+
        r300->hw.pvs.cmd[R300_PVS_CNTL_1] = 0;
        r300->hw.pvs.cmd[R300_PVS_CNTL_2] = 0;
        r300->hw.pvs.cmd[R300_PVS_CNTL_3] = 0;
+       #endif
 
        r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE
                | R300_GB_LINE_STUFF_ENABLE
@@ -1056,6 +1540,7 @@ void r300ResetHwState(r300ContextPtr r300)
 
        r300->hw.unk43E8.cmd[1] = 0x00FFFFFF;
 
+       #if 0
        r300->hw.fp.cmd[R300_FP_CNTL0] = 0;
        r300->hw.fp.cmd[R300_FP_CNTL1] = 0;
        r300->hw.fp.cmd[R300_FP_CNTL2] = 0;
@@ -1063,6 +1548,7 @@ void r300ResetHwState(r300ContextPtr r300)
        r300->hw.fp.cmd[R300_FP_NODE1] = 0;
        r300->hw.fp.cmd[R300_FP_NODE2] = 0;
        r300->hw.fp.cmd[R300_FP_NODE3] = 0;
+       #endif
 
        r300->hw.unk46A4.cmd[1] = 0x00001B01;
        r300->hw.unk46A4.cmd[2] = 0x00001B0F;
@@ -1070,6 +1556,7 @@ void r300ResetHwState(r300ContextPtr r300)
        r300->hw.unk46A4.cmd[4] = 0x00001B0F;
        r300->hw.unk46A4.cmd[5] = 0x00000001;
 
+       #if 0
        for(i = 1; i <= 64; ++i) {
                /* create NOP instructions */
                r300->hw.fpi[0].cmd[i] = FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO));
@@ -1077,6 +1564,7 @@ void r300ResetHwState(r300ContextPtr r300)
                r300->hw.fpi[2].cmd[i] = FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO));
                r300->hw.fpi[3].cmd[i] = FP_SELA(0,W,NO,FP_TMP(0),0,0);
        }
+       #endif
 
        r300->hw.unk4BC0.cmd[1] = 0;
 
@@ -1088,6 +1576,7 @@ void r300ResetHwState(r300ContextPtr r300)
        r300->hw.at.cmd[R300_AT_ALPHA_TEST] = 0;
        #endif
 
+       r300->hw.at.cmd[R300_AT_UNKNOWN] = 0;
        r300->hw.unk4BD8.cmd[1] = 0;
 
        r300->hw.unk4E00.cmd[1] = 0;
@@ -1122,13 +1611,14 @@ void r300ResetHwState(r300ContextPtr r300)
        r300->hw.unk4EA0.cmd[1] = 0x00000000;
        r300->hw.unk4EA0.cmd[2] = 0xffffffff;
 
-       r300->hw.unk4F08.cmd[1] = 0x00FFFF00;
-
        r300->hw.unk4F10.cmd[1] = 0x00000002; // depthbuffer format?
        r300->hw.unk4F10.cmd[2] = 0x00000000;
        r300->hw.unk4F10.cmd[3] = 0x00000003;
        r300->hw.unk4F10.cmd[4] = 0x00000000;
 
+       /* experiment a bit */
+       r300->hw.unk4F10.cmd[2] = 0x00000001; // depthbuffer format?
+
        r300->hw.zb.cmd[R300_ZB_OFFSET] =
                r300->radeon.radeonScreen->depthOffset +
                r300->radeon.radeonScreen->fbLocation;
@@ -1143,6 +1633,7 @@ void r300ResetHwState(r300ContextPtr r300)
 
        r300->hw.unk4F54.cmd[1] = 0;
 
+       #if 0
        ((drm_r300_cmd_header_t*)r300->hw.vpi.cmd)->vpu.count = 0;
        for(i = 1; i < R300_VPI_CMDSIZE; i += 4) {
                /* MOV t0, t0 */
@@ -1155,14 +1646,15 @@ void r300ResetHwState(r300ContextPtr r300)
        ((drm_r300_cmd_header_t*)r300->hw.vpp.cmd)->vpu.count = 0;
        for(i = 1; i < R300_VPP_CMDSIZE; ++i)
                r300->hw.vpp.cmd[i] = 0;
+       #endif
 
        r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0;
        r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0;
        r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0);
        r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0;
-       
+
 //END: TODO
-       
+
        r300->hw.all_dirty = GL_TRUE;
 }
 
@@ -1179,7 +1671,7 @@ void r300InitState(r300ContextPtr r300)
        GLuint depth_fmt;
 
        radeonInitState(&r300->radeon);
-       
+
        switch (ctx->Visual.depthBits) {
        case 16:
                r300->state.depth.scale = 1.0 / (GLfloat) 0xffff;
@@ -1196,7 +1688,11 @@ void r300InitState(r300ContextPtr r300)
                        ctx->Visual.depthBits);
                exit(-1);
        }
-       
+
+       /* Only have hw stencil when depth buffer is 24 bits deep */
+       r300->state.hw_stencil = (ctx->Visual.stencilBits > 0 &&
+                                        ctx->Visual.depthBits == 24);
+
        memset(&(r300->state.texture), 0, sizeof(r300->state.texture));
 
        r300ResetHwState(r300);
@@ -1212,7 +1708,7 @@ void r300InitStateFuncs(struct dd_function_table* functions)
        radeonInitStateFuncs(functions);
 
        functions->UpdateState = r300InvalidateState;
-       //functions->AlphaFunc = r300AlphaFunc;
+       functions->AlphaFunc = r300AlphaFunc;
        functions->BlendColor = r300BlendColor;
        functions->BlendEquationSeparate = r300BlendEquationSeparate;
        functions->BlendFuncSeparate = r300BlendFuncSeparate;
@@ -1223,6 +1719,12 @@ void r300InitStateFuncs(struct dd_function_table* functions)
        functions->CullFace = r300CullFace;
        functions->FrontFace = r300FrontFace;
 
+       /* Stencil related */
+       functions->ClearStencil = r300ClearStencil;
+       functions->StencilFunc = r300StencilFunc;
+       functions->StencilMask = r300StencilMask;
+       functions->StencilOp = r300StencilOp;
+
        /* Viewport related */
        functions->Viewport = r300Viewport;
        functions->DepthRange = r300DepthRange;