Initial multitexturing support. Old behaviour can be re-enabled by changing ifdefs...
[mesa.git] / src / mesa / drivers / dri / r300 / r300_state.c
index b47957054dbaa06542b463eb864428081cdd5b62..aa9eb5e28bdc25ce49d0c79160030dd13bdcbd9f 100644 (file)
@@ -61,6 +61,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_fixed_pipelines.h"
 #include "r300_tex.h"
 #include "r300_maos.h"
+#include "r300_texprog.h"
 
 static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
 {
@@ -69,7 +70,7 @@ static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
        GLubyte refByte;
 
        CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
-
+       
        R300_STATECHANGE(rmesa, at);
 
        pp_misc &= ~(R300_ALPHA_TEST_OP_MASK | R300_REF_ALPHA_MASK);
@@ -459,12 +460,12 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
                break;
 
        case GL_ALPHA_TEST:
-               R200_STATECHANGE(r300, at);
+               R300_STATECHANGE(r300, at);
                if (state) {
                        r300->hw.at.cmd[R300_AT_ALPHA_TEST] |=
                            R300_ALPHA_TEST_ENABLE;
                } else {
-                       r300->hw.at.cmd[R300_AT_ALPHA_TEST] |=
+                       r300->hw.at.cmd[R300_AT_ALPHA_TEST] &=
                            ~R300_ALPHA_TEST_ENABLE;
                }
                break;
@@ -483,17 +484,15 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state)
                        else
                                newval = R300_RB3D_Z_TEST;
                } else
-                       newval = 0;
+                       newval = R300_RB3D_Z_DISABLED_1;
 
-               r300->hw.zs.cmd[R300_ZS_CNTL_0] = newval;
+               r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE;
+               r300->hw.zs.cmd[R300_ZS_CNTL_0] |= newval;
                break;
 
        case GL_STENCIL_TEST:
-
-               WARN_ONCE("Do not know how to enable stencil. Help me !\n");
-
-               if (r300->state.hw_stencil) {
-                       //fprintf(stderr, "Stencil %s\n", state ? "enabled" : "disabled");
+               WARN_ONCE("TODO - double side stencil !\n");
+               if (r300->state.stencil.hw_stencil) {
                        R300_STATECHANGE(r300, zs);
                        if (state) {
                                r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
@@ -609,7 +608,6 @@ static void r300DepthFunc(GLcontext* ctx, GLenum func)
                r300->hw.zs.cmd[R300_ZS_CNTL_1] |= R300_ZS_ALWAYS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
                break;
        }
-
 }
 
 
@@ -626,8 +624,9 @@ static void r300DepthMask(GLcontext* ctx, GLboolean mask)
                return;
 
        R300_STATECHANGE(r300, zs);
-       r300->hw.zs.cmd[R300_ZS_CNTL_0] = mask
-               ? R300_RB3D_Z_TEST_AND_WRITE : R300_RB3D_Z_TEST;
+       r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE;
+       r300->hw.zs.cmd[R300_ZS_CNTL_0] |= mask 
+           ? R300_RB3D_Z_TEST_AND_WRITE : R300_RB3D_Z_TEST;
 }
 
 
@@ -866,8 +865,8 @@ static void r300StencilFunc(GLcontext * ctx, GLenum func,
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        GLuint refmask = ((ctx->Stencil.Ref[0] << R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
-                         (ctx->Stencil.
-                          ValueMask[0] << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
+                         (ctx->Stencil.ValueMask[0] << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
+                         
        GLuint flag;
 
        R300_STATECHANGE(rmesa, zs);
@@ -875,9 +874,10 @@ static void r300StencilFunc(GLcontext * ctx, GLenum func,
        rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(
                (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT)
                | (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT));
-       rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=  ~((R300_ZS_MASK << R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
-                                               (R300_ZS_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
-
+       
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=  ~((R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_REF_SHIFT) |
+                                               (R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT));
+       
        flag = translate_stencil_func(ctx->Stencil.Function[0]);
 
        rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT)
@@ -890,7 +890,7 @@ static void r300StencilMask(GLcontext * ctx, GLuint mask)
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
        R300_STATECHANGE(rmesa, zs);
-       rmesa->hw.zs.cmd[R300_ZS_CNTL_2]  &= ~(R300_ZS_MASK << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT);
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_2]  &= ~(R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT);
        rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= ctx->Stencil.WriteMask[0] << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT;
 }
 
@@ -902,7 +902,10 @@ static void r300StencilOp(GLcontext * ctx, GLenum fail,
 
        R300_STATECHANGE(rmesa, zs);
                /* It is easier to mask what's left.. */
-       rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT);
+       rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= 
+           (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT) | 
+           (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | 
+           (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
 
        rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
                 (translate_stencil_op(ctx->Stencil.FailFunc[0]) << R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT)
@@ -911,19 +914,16 @@ static void r300StencilOp(GLcontext * ctx, GLenum fail,
                |(translate_stencil_op(ctx->Stencil.FailFunc[0]) << R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
                |(translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT)
                |(translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT);
-
 }
 
 static void r300ClearStencil(GLcontext * ctx, GLint s)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
-       /* Not sure whether this is correct.. */
-       R300_STATECHANGE(rmesa, zs);
-       rmesa->hw.zs.cmd[R300_ZS_CNTL_2] =
+       rmesa->state.stencil.clear =
            ((GLuint) ctx->Stencil.Clear |
-            (0xff << R200_STENCIL_MASK_SHIFT) |
-            (ctx->Stencil.WriteMask[0] << R200_STENCIL_WRITEMASK_SHIFT));
+            (R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT) |
+            (ctx->Stencil.WriteMask[0] << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT));
 }
 
 /* =============================================================
@@ -1353,7 +1353,8 @@ void r300_setup_textures(GLcontext *ctx)
        }
        
        for(i=0; i < mtu; i++) {
-               
+               /*if(ctx->Texture.Unit[i].Enabled == 0)
+                       continue;*/
                if( ((r300->state.render_inputs & (_TNL_BIT_TEX0<<i))!=0) != ((ctx->Texture.Unit[i].Enabled)!=0) ) {
                        WARN_ONCE("Mismatch between render_inputs and ctx->Texture.Unit[i].Enabled value.\n");
                }
@@ -1380,7 +1381,7 @@ void r300_setup_textures(GLcontext *ctx)
                        max_texture_unit=i;
                        r300->hw.txe.cmd[R300_TXE_ENABLE]|=(1<<i);
                        
-                       r300->hw.tex.filter.cmd[R300_TEX_VALUE_0+i]=gen_fixed_filter(t->filter);
+                       r300->hw.tex.filter.cmd[R300_TEX_VALUE_0+i]=gen_fixed_filter(t->filter) | (i << 28); 
                        r300->hw.tex.unknown1.cmd[R300_TEX_VALUE_0+i]=0x0;
                        
                        /* No idea why linear filtered textures shake when puting random data */
@@ -1409,14 +1410,57 @@ void r300_setup_textures(GLcontext *ctx)
 void r300_setup_rs_unit(GLcontext *ctx)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-       int i;
-
+       int i, cur_reg;
+       GLuint interp_magic[8] = {
+               0x00,
+               0x40,
+               0x80,
+               0xC0,
+               0x00,
+               0x00,
+               0x00,
+               0x00
+       };
+       
        /* This needs to be rewritten - it is a hack at best */
 
        R300_STATECHANGE(r300, ri);
        R300_STATECHANGE(r300, rc);
        R300_STATECHANGE(r300, rr);
-
+       
+#if 1
+       cur_reg = 0;
+       for (i=0;i<ctx->Const.MaxTextureUnits;i++) {
+               r300->hw.ri.cmd[R300_RI_INTERP_0+i] = 0
+                               | R300_RS_INTERP_USED
+                               | (cur_reg << R300_RS_INTERP_SRC_SHIFT)
+                               | interp_magic[i];
+//             fprintf(stderr, "RS_INTERP[%d] = 0x%x\n", i, r300->hw.ri.cmd[R300_RI_INTERP_0+i]);
+
+               if (r300->state.render_inputs & (_TNL_BIT_TEX0<<i)) {
+                       r300->hw.rr.cmd[R300_RR_ROUTE_0 + cur_reg] = 0
+                                       | R300_RS_ROUTE_ENABLE
+                                       | i /* source INTERP */
+                                       | (cur_reg << R300_RS_ROUTE_DEST_SHIFT);
+//                     fprintf(stderr, "RS_ROUTE[%d] = 0x%x\n", cur_reg, r300->hw.rr.cmd[R300_RR_ROUTE_0 + cur_reg]);
+                       cur_reg++;
+               } 
+       }
+       if (r300->state.render_inputs & _TNL_BIT_COLOR0)
+               r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
+                               | R300_RS_ROUTE_0_COLOR
+                               | (cur_reg << R300_RS_ROUTE_0_COLOR_DEST_SHIFT);
+       r300->hw.rr.cmd[R300_RR_CMD_0] = cmducs(R300_RS_ROUTE_0, cur_reg);
+//     fprintf(stderr, "ADJ_RR0 = 0x%x\n", r300->hw.rr.cmd[R300_RR_ROUTE_0]);
+
+//     fprintf(stderr, "rendering with %d texture co-ordinate sets\n", cur_reg);
+
+       r300->hw.rc.cmd[1] = 0
+                       | (cur_reg /* count */ << R300_RS_CNTL_TC_CNT_SHIFT)
+                       | R300_RS_CNTL_0_UNKNOWN_7
+                       | R300_RS_CNTL_0_UNKNOWN_18;
+       r300->hw.rc.cmd[2] = (0xC0 | (cur_reg-1) /* index of highest */ );
+#else
        for(i = 1; i <= 8; ++i)
                r300->hw.ri.cmd[i] = 0x00d10000;
        r300->hw.ri.cmd[R300_RI_INTERP_1] |= R300_RS_INTERP_1_UNKNOWN;
@@ -1455,6 +1499,7 @@ void r300_setup_rs_unit(GLcontext *ctx)
                r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0x4000;
 
                }
+#endif
 }
 
 #define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
@@ -1504,31 +1549,146 @@ void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, s
 
 void r300SetupVertexProgram(r300ContextPtr rmesa);
 
-void r300SetupVertexShader(r300ContextPtr rmesa)
+/* just a skeleton for now.. */
+
+/* Generate a vertex shader that simply transforms vertex and texture coordinates,
+   while leaving colors intact. Nothing fancy (like lights) 
+   
+   If implementing lights make a copy first, so it is easy to switch between the two versions */
+void r300GenerateSimpleVertexShader(r300ContextPtr r300)
 {
-       GLcontext* ctx = rmesa->radeon.glCtx;
+       int i;
 
-       if(rmesa->current_vp != NULL){
-               r300SetupVertexProgram(rmesa);
-               return ;
+       /* Allocate parameters */
+       r300->state.vap_param.transform_offset=0x0;  /* transform matrix */
+       r300->state.vertex_shader.param_offset=0x0;
+       r300->state.vertex_shader.param_count=0x4;  /* 4 vector values - 4x4 matrix */
+       
+       r300->state.vertex_shader.program_start=0x0;
+       r300->state.vertex_shader.unknown_ptr1=0x4; /* magic value ? */
+       r300->state.vertex_shader.program_end=0x0;
+       
+       r300->state.vertex_shader.unknown_ptr2=0x0; /* magic value */
+       r300->state.vertex_shader.unknown_ptr3=0x4; /* magic value */
+       
+       /* Initialize matrix and vector parameters.. these should really be restructured */
+       /* TODO: fix vertex_shader structure */
+       r300->state.vertex_shader.matrix[0].length=16;
+       r300->state.vertex_shader.matrix[1].length=0;
+       r300->state.vertex_shader.matrix[2].length=0;
+       r300->state.vertex_shader.vector[0].length=0;
+       r300->state.vertex_shader.vector[1].length=0;
+       r300->state.vertex_shader.unknown1.length=0;
+       r300->state.vertex_shader.unknown2.length=0;
+       
+#define WRITE_OP(oper,source1,source2,source3) {\
+       r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].op=(oper); \
+       r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src1=(source1); \
+       r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src2=(source2); \
+       r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src3=(source3); \
+       r300->state.vertex_shader.program_end++; \
        }
 
+       /* Multiply vertex coordinates with transform matrix */
+                       
+       WRITE_OP(
+               EASY_VSF_OP(MUL, 0, ALL, TMP),
+               VSF_PARAM(3),
+               VSF_ATTR_W(0),
+               EASY_VSF_SOURCE(0, W, W, W, W, NONE, NONE)
+               )
+       
+       WRITE_OP(
+               EASY_VSF_OP(MUL, 1, ALL, RESULT),
+               VSF_REG(1),
+               VSF_ATTR_UNITY(1),
+               VSF_UNITY(1)
+               )
+       
+       WRITE_OP(
+               EASY_VSF_OP(MAD, 0, ALL, TMP),
+               VSF_PARAM(2),
+               VSF_ATTR_Z(0),
+               VSF_TMP(0)
+               )
+       
+       WRITE_OP(
+               EASY_VSF_OP(MAD, 0, ALL, TMP),
+               VSF_PARAM(1),
+               VSF_ATTR_Y(0),
+               VSF_TMP(0)
+               )
+       
+       WRITE_OP(
+               EASY_VSF_OP(MAD, 0, ALL, RESULT),
+               VSF_PARAM(0),
+               VSF_ATTR_X(0),
+               VSF_TMP(0)
+               )
+               
+       /* Pass through texture coordinates, if any */
+       for(i=0;i < r300->radeon.glCtx->Const.MaxTextureUnits;i++)
+               if(r300->state.render_inputs & (_TNL_BIT_TEX0<<i)){
+                       // fprintf(stderr, "i_tex[%d]=%d\n", i, r300->state.vap_reg.i_tex[i]);
+                       WRITE_OP(
+                               EASY_VSF_OP(MUL, 2+i, ALL, RESULT),
+                               VSF_REG(r300->state.vap_reg.i_tex[i]),
+                               VSF_ATTR_UNITY(r300->state.vap_reg.i_tex[i]),
+                               VSF_UNITY(r300->state.vap_reg.i_tex[i])
+                               )
+                       }
+       
+       r300->state.vertex_shader.program_end--; /* r300 wants program length to be one more - no idea why */
+       r300->state.vertex_shader.program.length=(r300->state.vertex_shader.program_end+1)*4;
+       
+       r300->state.vertex_shader.unknown_ptr1=r300->state.vertex_shader.program_end; /* magic value ? */
+       r300->state.vertex_shader.unknown_ptr2=r300->state.vertex_shader.program_end; /* magic value ? */
+       r300->state.vertex_shader.unknown_ptr3=r300->state.vertex_shader.program_end; /* magic value ? */
+       
+}
+
+
+void r300SetupVertexShader(r300ContextPtr rmesa)
+{
+       GLcontext* ctx = rmesa->radeon.glCtx;
+       struct r300_vertex_shader_fragment unk4={
+                       length: 4,
+                       body: { f: {
+                               /*0.0*/(rand()%100)/10.0,
+                               /*0.0*/(rand()%100)/10.0,
+                               /*1.0*/(rand()%100)/10.0,
+                               /*0.0*/(rand()%100)/10.0
+                               } }
+                       };
+       LOCAL_VARS
+
        /* Reset state, in case we don't use something */
        ((drm_r300_cmd_header_t*)rmesa->hw.vpp.cmd)->vpu.count = 0;
        ((drm_r300_cmd_header_t*)rmesa->hw.vpi.cmd)->vpu.count = 0;
        ((drm_r300_cmd_header_t*)rmesa->hw.vps.cmd)->vpu.count = 0;
 
+       /* Not sure why this doesnt work...
+          0x400 area might have something to do with pixel shaders as it appears right after pfs programming.
+          0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */
+       //setup_vertex_shader_fragment(rmesa, 0x406, &unk4);
+       if(rmesa->current_vp != NULL){
+               r300SetupVertexProgram(rmesa);
+               return ;
+       }
 
 /* This needs to be replaced by vertex shader generation code */
 
 
+#if 0
        /* textures enabled ? */
        if(rmesa->state.texture.tc_count>0){
                rmesa->state.vertex_shader=SINGLE_TEXTURE_VERTEX_SHADER;
                } else {
                rmesa->state.vertex_shader=FLAT_COLOR_VERTEX_SHADER;
                }
+#endif
 
+       r300GenerateSimpleVertexShader(rmesa);
 
         rmesa->state.vertex_shader.matrix[0].length=16;
         memcpy(rmesa->state.vertex_shader.matrix[0].body.f, ctx->_ModelProjectMatrix.m, 16*4);
@@ -1571,6 +1731,8 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
        GLcontext* ctx = rmesa->radeon.glCtx;
        int inst_count;
        int param_count;
+       LOCAL_VARS
+                       
 
        /* Reset state, in case we don't use something */
        ((drm_r300_cmd_header_t*)rmesa->hw.vpp.cmd)->vpu.count = 0;
@@ -1649,7 +1811,7 @@ void r300GenerateTexturePixelShader(r300ContextPtr r300)
 
                                break;
                        default:
-                               fprintf(stderr, "ModeRGB=%s is not implemented yet !\n",
+                               WARN_ONCE("ModeRGB=%s is not implemented yet !\n",
                                         _mesa_lookup_enum_by_nr(r300->radeon.glCtx->Texture.Unit[i]._CurrentCombine->ModeRGB));
                                /* PFS_NOP */
                                r300->state.pixel_shader.program.alu.inst[alu_inst].inst0=
@@ -1685,7 +1847,7 @@ void r300GenerateTexturePixelShader(r300ContextPtr r300)
 
                                break;
                        default:
-                               fprintf(stderr, "ModeA=%s is not implemented yet !\n",
+                               WARN_ONCE("ModeA=%s is not implemented yet !\n",
                                         _mesa_lookup_enum_by_nr(r300->radeon.glCtx->Texture.Unit[i]._CurrentCombine->ModeA));
                                /* PFS_NOP */
                                r300->state.pixel_shader.program.alu.inst[alu_inst].inst2=
@@ -1698,7 +1860,7 @@ void r300GenerateTexturePixelShader(r300ContextPtr r300)
 
                alu_inst++;
                }
-
+       
        r300->state.pixel_shader.program.tex.length=tex_inst;
        r300->state.pixel_shader.program.tex_offset=0;
        r300->state.pixel_shader.program.tex_end=tex_inst-1;
@@ -1722,12 +1884,16 @@ int i,k;
 
        /* textures enabled ? */
        if(rmesa->state.texture.tc_count>0){
+#if 1
+               r300GenerateTextureFragmentShader(rmesa);
+#else
                rmesa->state.pixel_shader=SINGLE_TEXTURE_PIXEL_SHADER;
                r300GenerateTexturePixelShader(rmesa);
+#endif
                } else {
                rmesa->state.pixel_shader=FLAT_COLOR_PIXEL_SHADER;
                }
-
+       
        R300_STATECHANGE(rmesa, fpt);
        for(i=0;i<rmesa->state.pixel_shader.program.tex.length;i++)
                rmesa->hw.fpt.cmd[R300_FPT_INSTR_0+i]=rmesa->state.pixel_shader.program.tex.inst[i];
@@ -1860,9 +2026,18 @@ void r300ResetHwState(r300ContextPtr r300)
                   have bitfields accessed by different functions
                   and not all bits are used */
 #if 0
+       /* initialize similiar to r200 */
        r300->hw.zs.cmd[R300_ZS_CNTL_0] = 0;
-       r300->hw.zs.cmd[R300_ZS_CNTL_1] = 0;
-       r300->hw.zs.cmd[R300_ZS_CNTL_2] = 0xffff00;
+       r300->hw.zs.cmd[R300_ZS_CNTL_1] =
+           (R300_ZS_ALWAYS << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) |
+           (R300_ZS_KEEP << R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) |
+           (R300_ZS_KEEP << R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT) |
+           (R300_ZS_KEEP << R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT) |
+           (R300_ZS_ALWAYS << R300_RB3D_ZS1_BACK_FUNC_SHIFT) |
+           (R300_ZS_KEEP << R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) |
+           (R300_ZS_KEEP << R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT) |
+           (R300_ZS_KEEP << R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT);
+       r300->hw.zs.cmd[R300_ZS_CNTL_2] = 0x00ffff00;
 #endif
 
                /* go and compute register values from GL state */
@@ -1878,6 +2053,12 @@ void r300ResetHwState(r300ContextPtr r300)
        r300Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
        r300DepthMask(ctx, ctx->Depth.Mask);
        r300DepthFunc(ctx, ctx->Depth.Func);
+       
+       /* stencil */
+       r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+       r300StencilMask(ctx, ctx->Stencil.WriteMask[0]);
+       r300StencilFunc(ctx, ctx->Stencil.Function[0], ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]);
+       r300StencilOp(ctx, ctx->Stencil.FailFunc[0], ctx->Stencil.ZFailFunc[0], ctx->Stencil.ZPassFunc[0]);
 
        r300UpdateCulling(ctx);
 
@@ -1892,8 +2073,10 @@ void r300ResetHwState(r300ContextPtr r300)
        r300SetupPixelShader(r300);
 
        r300_set_blend_state(ctx);
-       r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
 
+       r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
+       r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
+               
                /* Initialize magic registers
                 TODO : learn what they really do, or get rid of
                 those we don't have to touch */
@@ -1962,6 +2145,10 @@ void r300ResetHwState(r300ContextPtr r300)
                r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = R300_GB_TILE_ENABLE
                                                        | R300_GB_TILE_PIPE_COUNT_R300
                                                        | R300_GB_TILE_SIZE_16;
+       else if (GET_CHIP(r300->radeon.radeonScreen) == RADEON_CHIP_R420)
+               r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = R300_GB_TILE_ENABLE
+                                                       | R300_GB_TILE_PIPE_COUNT_R420
+                                                       | R300_GB_TILE_SIZE_16;
        else
                r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = R300_GB_TILE_ENABLE
                                                        | R300_GB_TILE_PIPE_COUNT_RV300
@@ -2058,10 +2245,6 @@ void r300ResetHwState(r300ContextPtr r300)
        r300->hw.unk4BC8.cmd[2] = 0;
        r300->hw.unk4BC8.cmd[3] = 0;
 
-       //r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
-#if 0
-       r300->hw.at.cmd[R300_AT_ALPHA_TEST] = 0;
-#endif
 
        r300->hw.at.cmd[R300_AT_UNKNOWN] = 0;
        r300->hw.unk4BD8.cmd[1] = 0;
@@ -2152,7 +2335,6 @@ void r300ResetHwState(r300ContextPtr r300)
 
 //END: TODO
        //verify_r300ResetHwState(r300, 0);
-
        r300->hw.all_dirty = GL_TRUE;
 }
 
@@ -2174,12 +2356,12 @@ void r300InitState(r300ContextPtr r300)
        case 16:
                r300->state.depth.scale = 1.0 / (GLfloat) 0xffff;
                depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z;
-               //r300->state.stencil.clear = 0x00000000;
+               r300->state.stencil.clear = 0x00000000;
                break;
        case 24:
                r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff;
                depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z;
-               //r300->state.stencil.clear = 0xff000000;
+               r300->state.stencil.clear = 0x00ff0000;
                break;
        default:
                fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
@@ -2188,7 +2370,7 @@ void r300InitState(r300ContextPtr r300)
        }
 
        /* Only have hw stencil when depth buffer is 24 bits deep */
-       r300->state.hw_stencil = (ctx->Visual.stencilBits > 0 &&
+       r300->state.stencil.hw_stencil = (ctx->Visual.stencilBits > 0 &&
                                         ctx->Visual.depthBits == 24);
 
        memset(&(r300->state.texture), 0, sizeof(r300->state.texture));