r300: Remove calls to rcommonEnsureCmdBufSpace.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_state.c
index 873ac4aaec6993e77b437a90468f3b9f11ccec08..4fe9175b61d883b7fa322a54ac331a282e747238 100644 (file)
@@ -62,8 +62,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_emit.h"
 #include "r300_tex.h"
 #include "r300_fragprog_common.h"
-#include "r300_fragprog.h"
-#include "r500_fragprog.h"
+#include "r300_render.h"
+#include "r300_vertprog.h"
 
 #include "drirenderbuffer.h"
 
@@ -371,6 +371,7 @@ static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
        p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
        ip = (GLint *)ctx->Transform._ClipUserPlane[p];
 
+       R300_STATECHANGE( rmesa, vap_flush );
        R300_STATECHANGE( rmesa, vpucp[p] );
        rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
        rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
@@ -432,6 +433,10 @@ static void r300UpdateCulling(GLcontext * ctx)
                break;
        }
 
+       /* Winding is inverted when rendering to FBO */
+       if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+               val ^= R300_FRONT_FACE_CW;
+
        R300_STATECHANGE(r300, cul);
        r300->hw.cul.cmd[R300_CUL_CULL] = val;
 }
@@ -450,15 +455,16 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
 
 static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
 {
-       struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
+       r300ContextPtr r300 = R300_CONTEXT(ctx);
 
-       return (fp && fp->writes_depth);
+       return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth;
 }
 
 static void r300SetEarlyZState(GLcontext * ctx)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
        GLuint topZ = R300_ZTOP_ENABLE;
+       GLuint w_fmt, fgdepthsrc;
 
        if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS)
                topZ = R300_ZTOP_DISABLE;
@@ -466,6 +472,8 @@ static void r300SetEarlyZState(GLcontext * ctx)
                topZ = R300_ZTOP_DISABLE;
        else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill)
                topZ = R300_ZTOP_DISABLE;
+       else if (r300->radeon.query.current)
+               topZ = R300_ZTOP_DISABLE;
 
        if (topZ != r300->hw.zstencil_format.cmd[2]) {
                /* Note: This completely reemits the stencil format.
@@ -475,6 +483,26 @@ static void r300SetEarlyZState(GLcontext * ctx)
                R300_STATECHANGE(r300, zstencil_format);
                r300->hw.zstencil_format.cmd[2] = topZ;
        }
+
+       /* w_fmt value is set to get best performance
+       * see p.130 R5xx 3D acceleration guide v1.3 */
+       if (current_fragment_program_writes_depth(ctx)) {
+               fgdepthsrc = R300_FG_DEPTH_SRC_SHADER;
+               w_fmt = R300_W_FMT_W24 | R300_W_SRC_US;
+       } else {
+               fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
+               w_fmt = R300_W_FMT_W0 | R300_W_SRC_US;
+       }
+
+       if (w_fmt != r300->hw.us_out_fmt.cmd[5]) {
+               R300_STATECHANGE(r300, us_out_fmt);
+               r300->hw.us_out_fmt.cmd[5] = w_fmt;
+       }
+
+       if (fgdepthsrc != r300->hw.fg_depth_src.cmd[1]) {
+               R300_STATECHANGE(r300, fg_depth_src);
+               r300->hw.fg_depth_src.cmd[1] = fgdepthsrc;
+       }
 }
 
 static void r300SetAlphaState(GLcontext * ctx)
@@ -574,10 +602,26 @@ static void r300SetDepthState(GLcontext * ctx)
        }
 }
 
+static void r300CatchStencilFallback(GLcontext *ctx)
+{
+       const unsigned back = ctx->Stencil._BackFace;
+
+       if (ctx->Stencil._Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
+               || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back]
+               || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) {
+               r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE);
+       } else {
+               r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE);
+       }
+}
+
 static void r300SetStencilState(GLcontext * ctx, GLboolean state)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
        GLboolean hw_stencil = GL_FALSE;
+
+       r300CatchStencilFallback(ctx);
+
        if (ctx->DrawBuffer) {
                struct radeon_renderbuffer *rrbStencil
                        = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
@@ -593,10 +637,6 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state)
                        r300->hw.zs.cmd[R300_ZS_CNTL_0] &=
                            ~R300_STENCIL_ENABLE;
                }
-       } else {
-#if R200_MERGED
-               FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state);
-#endif
        }
 }
 
@@ -846,11 +886,14 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
                                    GLenum func, GLint ref, GLuint mask)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       GLuint refmask =
-           ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT)
-            | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT);
-       const unsigned back = ctx->Stencil._BackFace;
+       GLuint refmask;
        GLuint flag;
+       const unsigned back = ctx->Stencil._BackFace;
+
+       r300CatchStencilFallback(ctx);
+
+       refmask = ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT)
+            | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT);
 
        R300_STATECHANGE(rmesa, zs);
        rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK;
@@ -878,6 +921,8 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
+       r300CatchStencilFallback(ctx);
+
        R300_STATECHANGE(rmesa, zs);
        rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
            ~(R300_STENCILREF_MASK <<
@@ -894,6 +939,8 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        const unsigned back = ctx->Stencil._BackFace;
 
+       r300CatchStencilFallback(ctx);
+
        R300_STATECHANGE(rmesa, zs);
        /* It is easier to mask what's left.. */
        rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &=
@@ -925,7 +972,7 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
 static void r300UpdateWindow(GLcontext * ctx)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+       __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
        GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
        GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
        const GLfloat *v = ctx->Viewport._WindowMap.m;
@@ -978,7 +1025,7 @@ static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval)
 void r300UpdateViewportOffset(GLcontext * ctx)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       __DRIdrawablePrivate *dPriv = ((radeonContextPtr) rmesa)->dri.drawable;
+       __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
        GLfloat xoffset = (GLfloat) dPriv->x;
        GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h;
        const GLfloat *v = ctx->Viewport._WindowMap.m;
@@ -1000,81 +1047,27 @@ void r300UpdateViewportOffset(GLcontext * ctx)
        radeonUpdateScissor(ctx);
 }
 
-static void
-r300FetchStateParameter(GLcontext * ctx,
-                       const gl_state_index state[STATE_LENGTH],
-                       GLfloat * value)
-{
-       r300ContextPtr r300 = R300_CONTEXT(ctx);
-
-       switch (state[0]) {
-       case STATE_INTERNAL:
-               switch (state[1]) {
-               case STATE_R300_WINDOW_DIMENSION:
-                       value[0] = r300->radeon.dri.drawable->w * 0.5f; /* width*0.5 */
-                       value[1] = r300->radeon.dri.drawable->h * 0.5f; /* height*0.5 */
-                       value[2] = 0.5F;        /* for moving range [-1 1] -> [0 1] */
-                       value[3] = 1.0F;        /* not used */
-                       break;
-
-               case STATE_R300_TEXRECT_FACTOR:{
-                               struct gl_texture_object *t =
-                                   ctx->Texture.Unit[state[2]].CurrentTex[TEXTURE_RECT_INDEX];
-
-                               if (t && t->Image[0][t->BaseLevel]) {
-                                       struct gl_texture_image *image =
-                                           t->Image[0][t->BaseLevel];
-                                       value[0] = 1.0 / image->Width2;
-                                       value[1] = 1.0 / image->Height2;
-                               } else {
-                                       value[0] = 1.0;
-                                       value[1] = 1.0;
-                               }
-                               value[2] = 1.0;
-                               value[3] = 1.0;
-                               break;
-                       }
-
-               default:
-                       break;
-               }
-               break;
-
-       default:
-               break;
-       }
-}
-
 /**
  * Update R300's own internal state parameters.
  * For now just STATE_R300_WINDOW_DIMENSION
  */
-void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
+static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
 {
-       struct r300_fragment_program *fp;
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
        struct gl_program_parameter_list *paramList;
-       GLuint i;
 
        if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
                return;
 
-       fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current;
-       if (!fp)
+       if (!ctx->FragmentProgram._Current || !rmesa->selected_fp)
                return;
 
-       paramList = fp->Base.Base.Parameters;
+       paramList = ctx->FragmentProgram._Current->Base.Parameters;
 
        if (!paramList)
                return;
 
-       for (i = 0; i < paramList->NumParameters; i++) {
-               if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
-                       r300FetchStateParameter(ctx,
-                                               paramList->Parameters[i].
-                                               StateIndexes,
-                                               paramList->ParameterValues[i]);
-               }
-       }
+       _mesa_load_state_parameters(ctx, paramList);
 }
 
 /* =============================================================
@@ -1181,8 +1174,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
        int i;
-       struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
-       struct r300_fragment_program_code *code = &fp->code.r300;
+       struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300;
 
        R300_STATECHANGE(r300, fpt);
 
@@ -1222,9 +1214,9 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
 
 static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
 {
+       r300ContextPtr r300 = R300_CONTEXT(ctx);
        int i;
-       struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
-       struct r500_fragment_program_code *code = &fp->code.r500;
+       struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500;
 
        /* find all the texture instructions and relocate the texture units */
        for (i = 0; i < code->inst_end + 1; i++) {
@@ -1264,6 +1256,7 @@ static GLuint translate_lod_bias(GLfloat bias)
        return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
 }
 
+
 static void r300SetupTextures(GLcontext * ctx)
 {
        int i, mtu;
@@ -1272,8 +1265,6 @@ static void r300SetupTextures(GLcontext * ctx)
        int hw_tmu = 0;
        int last_hw_tmu = -1;   /* -1 translates into no setup costs for fields */
        int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, };
-       struct r300_fragment_program *fp = (struct r300_fragment_program *)
-           (char *)ctx->FragmentProgram._Current;
 
        R300_STATECHANGE(r300, txe);
        R300_STATECHANGE(r300, tex.filter);
@@ -1358,6 +1349,28 @@ static void r300SetupTextures(GLcontext * ctx)
                }
        }
 
+       /* R3xx and R4xx chips require that the texture unit corresponding to
+        * KIL instructions is really enabled.
+        *
+        * We do some fakery here and in the state atom emit logic to enable
+        * the texture without tripping up the CS checker in the kernel.
+        */
+       if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+               if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) {
+                       last_hw_tmu++;
+
+                       r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
+
+                       r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0] = 0;
+                       r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0] = 0;
+                       r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
+                       r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0] = 0;
+                       r300->hw.tex.size.cmd[R300_TEX_VALUE_0] = 0; /* 1x1 texture */
+                       r300->hw.tex.format.cmd[R300_TEX_VALUE_0] = 0; /* A8 format */
+                       r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0] = 0;
+               }
+       }
+
        r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
            cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1);
        r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
@@ -1375,19 +1388,6 @@ static void r300SetupTextures(GLcontext * ctx)
        r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
            cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
 
-       if (!fp)                /* should only happenen once, just after context is created */
-               return;
-
-       if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
-               if (fp->Base.UsesKill && last_hw_tmu < 0) {
-                       // The KILL operation requires the first texture unit
-                       // to be enabled.
-                       r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
-                       r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
-                       r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
-                               cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1);
-               }
-       }
        r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings);
 
        if (RADEON_DEBUG & DEBUG_STATE)
@@ -1407,27 +1407,21 @@ union r300_outputs_written {
 static void r300SetupRSUnit(GLcontext * ctx)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-        TNLcontext *tnl = TNL_CONTEXT(ctx);
-       struct vertex_buffer *VB = &tnl->vb;
        union r300_outputs_written OutputsWritten;
        GLuint InputsRead;
        int fp_reg, high_rr;
        int col_ip, tex_ip;
        int rs_tex_count = 0;
-       int i, count, col_fmt, hw_tcl_on;
+       int i, col_fmt, hw_tcl_on;
 
        hw_tcl_on = r300->options.hw_tcl_enabled;
+
        if (hw_tcl_on)
-               OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+               OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten;
        else
                RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
 
-       if (ctx->FragmentProgram._Current)
-               InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
-       else {
-               fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
-               return;         /* This should only ever happen once.. */
-       }
+       InputsRead = r300->selected_fp->InputsRead;
 
        R300_STATECHANGE(r300, ri);
        R300_STATECHANGE(r300, rc);
@@ -1446,15 +1440,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
 
        if (InputsRead & FRAG_BIT_COL0) {
                if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
-                       count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
-                       if (count == 4)
-                           col_fmt = R300_RS_COL_FMT_RGBA;
-                       else if (count == 3)
-                           col_fmt = R300_RS_COL_FMT_RGB1;
-                       else
-                           col_fmt = R300_RS_COL_FMT_0001;
-
-                       r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt);
+                       r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
                        r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
                        InputsRead &= ~FRAG_BIT_COL0;
                        ++col_ip;
@@ -1466,15 +1452,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
 
        if (InputsRead & FRAG_BIT_COL1) {
                if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
-                       count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
-                       if (count == 4)
-                           col_fmt = R300_RS_COL_FMT_RGBA;
-                       else if (count == 3)
-                           col_fmt = R300_RS_COL_FMT_RGB1;
-                       else
-                           col_fmt = R300_RS_COL_FMT_0001;
-
-                       r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt);
+                       r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
                        r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
                        InputsRead &= ~FRAG_BIT_COL1;
                        ++col_ip;
@@ -1502,32 +1480,10 @@ static void r300SetupRSUnit(GLcontext * ctx)
                ++fp_reg;
        }
 
-       if (InputsRead & FRAG_BIT_WPOS) {
-               r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
-               r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
-               InputsRead &= ~FRAG_BIT_WPOS;
-               rs_tex_count += 4;
-               ++tex_ip;
-               ++fp_reg;
-       }
-
-       if (InputsRead & FRAG_BIT_FOGC) {
-               if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
-                       r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0);
-                       r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_Q(R300_RS_SEL_K1) | R300_RS_TEX_PTR(rs_tex_count);
-                       r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
-                       InputsRead &= ~FRAG_BIT_FOGC;
-                       rs_tex_count += 1;
-                       ++tex_ip;
-                       ++fp_reg;
-               } else {
-                       WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
-               }
-       }
-
        /* Setup default color if no color or tex was set */
        if (rs_tex_count == 0 && col_ip == 0) {
-               r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+               r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0);
+               r300->hw.ri.cmd[R300_RI_INTERP_0] = R300_RS_COL_PTR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
                ++col_ip;
        }
 
@@ -1536,6 +1492,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
        r300->hw.rc.cmd[2] |= high_rr - 1;
 
        r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr);
+       r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, high_rr);
 
        if (InputsRead)
                WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
@@ -1544,27 +1501,21 @@ static void r300SetupRSUnit(GLcontext * ctx)
 static void r500SetupRSUnit(GLcontext * ctx)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-        TNLcontext *tnl = TNL_CONTEXT(ctx);
-       struct vertex_buffer *VB = &tnl->vb;
        union r300_outputs_written OutputsWritten;
        GLuint InputsRead;
        int fp_reg, high_rr;
        int col_ip, tex_ip;
        int rs_tex_count = 0;
-       int i, count, col_fmt, hw_tcl_on;
+       int i, col_fmt, hw_tcl_on;
 
        hw_tcl_on = r300->options.hw_tcl_enabled;
+
        if (hw_tcl_on)
-               OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+               OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten;
        else
                RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
 
-       if (ctx->FragmentProgram._Current)
-               InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
-       else {
-               fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
-               return;         /* This should only ever happen once.. */
-       }
+       InputsRead = r300->selected_fp->InputsRead;
 
        R300_STATECHANGE(r300, ri);
        R300_STATECHANGE(r300, rc);
@@ -1583,15 +1534,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
 
        if (InputsRead & FRAG_BIT_COL0) {
                if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
-                       count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
-                       if (count == 4)
-                           col_fmt = R300_RS_COL_FMT_RGBA;
-                       else if (count == 3)
-                           col_fmt = R300_RS_COL_FMT_RGB1;
-                       else
-                           col_fmt = R300_RS_COL_FMT_0001;
-
-                       r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt);
+                       r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
                        r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
                        InputsRead &= ~FRAG_BIT_COL0;
                        ++col_ip;
@@ -1603,15 +1546,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
 
        if (InputsRead & FRAG_BIT_COL1) {
                if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
-                       count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
-                       if (count == 4)
-                           col_fmt = R300_RS_COL_FMT_RGBA;
-                       else if (count == 3)
-                           col_fmt = R300_RS_COL_FMT_RGB1;
-                       else
-                           col_fmt = R300_RS_COL_FMT_0001;
-
-                       r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt);
+                       r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
                        r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
                        InputsRead &= ~FRAG_BIT_COL1;
                        ++col_ip;
@@ -1643,104 +1578,27 @@ static void r500SetupRSUnit(GLcontext * ctx)
                ++fp_reg;
        }
 
-       if (InputsRead & FRAG_BIT_WPOS) {
-               r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
-                       ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
-                       ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
-                       ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
-
-               r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
-               InputsRead &= ~FRAG_BIT_WPOS;
-               rs_tex_count += 4;
-               ++tex_ip;
-               ++fp_reg;
-       }
-
-       if (InputsRead & FRAG_BIT_FOGC) {
-               if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
-                       r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= (rs_tex_count << R500_RS_IP_TEX_PTR_S_SHIFT) |
-                               (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-                               (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-                               (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
-
-                       r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
-                       InputsRead &= ~FRAG_BIT_FOGC;
-                       rs_tex_count += 1;
-                       ++tex_ip;
-                       ++fp_reg;
-               } else {
-                       WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
-               }
-       }
-
        /* Setup default color if no color or tex was set */
        if (rs_tex_count == 0 && col_ip == 0) {
-               r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+               r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0);
+               r300->hw.ri.cmd[R300_RI_INTERP_0] = R500_RS_COL_PTR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
                ++col_ip;
        }
 
        high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
-       r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT)  | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
-       r300->hw.rc.cmd[2] |= 0xC0 | (high_rr - 1);
+       r300->hw.rc.cmd[1] = (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
+       r300->hw.rc.cmd[2] = 0xC0 | (high_rr - 1);
 
        r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr);
+       r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, high_rr);
 
        if (InputsRead)
                WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
 }
 
-
-
-
-#define bump_vpu_count(ptr, new_count)   do{\
-       drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
-       int _nc=(new_count)/4; \
-       assert(_nc < 256); \
-       if(_nc>_p->vpu.count)_p->vpu.count=_nc;\
-       }while(0)
-
-static INLINE void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, struct r300_vertex_shader_fragment *vsf)
-{
-       int i;
-
-       if (vsf->length == 0)
-               return;
-
-       if (vsf->length & 0x3) {
-               fprintf(stderr, "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n");
-               _mesa_exit(-1);
-       }
-
-       switch ((dest >> 8) & 0xf) {
-       case 0:
-               R300_STATECHANGE(r300, vpi);
-               for (i = 0; i < vsf->length; i++)
-                       r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]);
-               bump_vpu_count(r300->hw.vpi.cmd, vsf->length + 4 * (dest & 0xff));
-               break;
-
-       case 2:
-               R300_STATECHANGE(r300, vpp);
-               for (i = 0; i < vsf->length; i++)
-                       r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]);
-               bump_vpu_count(r300->hw.vpp.cmd, vsf->length + 4 * (dest & 0xff));
-               break;
-       case 4:
-               R300_STATECHANGE(r300, vps);
-               for (i = 0; i < vsf->length; i++)
-                       r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]);
-               bump_vpu_count(r300->hw.vps.cmd, vsf->length + 4 * (dest & 0xff));
-               break;
-       default:
-               fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
-               _mesa_exit(-1);
-       }
-}
-
 #define MIN3(a, b, c)  ((a) < (b) ? MIN2(a, c) : MIN2(b, c))
 
-
-static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
+void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
                        GLuint output_count, GLuint temp_count)
 {
     int vtx_mem_size;
@@ -1794,115 +1652,6 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
 
 }
 
-static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa)
-{
-       struct r300_vertex_shader_state *prog = &(rmesa->vertex_shader);
-       GLuint o_reg = 0;
-       GLuint i_reg = 0;
-       int i;
-       int inst_count = 0;
-       int param_count = 0;
-       int program_end = 0;
-
-       for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) {
-               if (rmesa->swtcl.sw_tcl_inputs[i] != -1) {
-                       prog->program.body.i[program_end + 0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, GL_FALSE, GL_FALSE, o_reg++, VSF_FLAG_ALL, PVS_DST_REG_OUT);
-                       prog->program.body.i[program_end + 1] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-                       prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-                       prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-                       program_end += 4;
-                       i_reg++;
-               }
-       }
-
-       prog->program.length = program_end;
-
-       r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START,
-                                      &(prog->program));
-       inst_count = (prog->program.length / 4) - 1;
-
-       r300VapCntl(rmesa, i_reg, o_reg, 0);
-
-       R300_STATECHANGE(rmesa, pvs);
-       rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
-           (0 << R300_PVS_FIRST_INST_SHIFT) |
-           (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
-           (inst_count << R300_PVS_LAST_INST_SHIFT);
-       rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] =
-           (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
-           (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
-       rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] =
-           (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
-}
-
-static int bit_count (int x)
-{
-    x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U);
-    x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U);
-    x = (x >> 16) + (x & 0xffff);
-    x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f);
-    return (x >> 8) + (x & 0x00ff);
-}
-
-static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
-{
-       GLcontext *ctx = rmesa->radeon.glCtx;
-       struct r300_vertex_program *prog = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
-       int inst_count = 0;
-       int param_count = 0;
-
-       /* FIXME: r300SetupVertexProgramFragment */
-       R300_STATECHANGE(rmesa, vpp);
-       param_count =
-           r300VertexProgUpdateParams(ctx,
-                                      (struct r300_vertex_program_cont *)
-                                      ctx->VertexProgram._Current,
-                                      (float *)&rmesa->hw.vpp.
-                                      cmd[R300_VPP_PARAM_0]);
-       bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
-       param_count /= 4;
-
-       r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program));
-       inst_count = (prog->program.length / 4) - 1;
-
-       r300VapCntl(rmesa, bit_count(prog->key.InputsRead),
-                   bit_count(prog->key.OutputsWritten), prog->num_temporaries);
-
-       R300_STATECHANGE(rmesa, pvs);
-       rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
-         (0 << R300_PVS_FIRST_INST_SHIFT) |
-         (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
-         (inst_count << R300_PVS_LAST_INST_SHIFT);
-       rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] =
-         (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
-         (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
-       rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] =
-         (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
-}
-
-
-static void r300SetupVertexProgram(r300ContextPtr rmesa)
-{
-       GLcontext *ctx = rmesa->radeon.glCtx;
-
-       /* Reset state, in case we don't use something */
-       ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
-       ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
-       ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
-
-       /* Not sure why this doesnt work...
-          0x400 area might have something to do with pixel shaders as it appears right after pfs programming.
-          0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */
-       //setup_vertex_shader_fragment(rmesa, 0x406, &unk4);
-       if (rmesa->options.hw_tcl_enabled && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->translated) {
-               r300SetupRealVertexProgram(rmesa);
-       } else {
-               /* FIXME: This needs to be replaced by vertex shader generation code. */
-               r300SetupDefaultVertexProgram(rmesa);
-       }
-
-}
-
 /**
  * Enable/Disable states.
  *
@@ -1917,14 +1666,6 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
                        state ? "GL_TRUE" : "GL_FALSE");
 
        switch (cap) {
-       case GL_TEXTURE_1D:
-       case GL_TEXTURE_2D:
-       case GL_TEXTURE_3D:
-               /* empty */
-               break;
-       case GL_FOG:
-               /* empty */
-               break;
        case GL_ALPHA_TEST:
                r300SetAlphaState(ctx);
                break;
@@ -1942,14 +1683,31 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
        case GL_CLIP_PLANE5:
                r300SetClipPlaneState(ctx, cap, state);
                break;
+       case GL_CULL_FACE:
+               r300UpdateCulling(ctx);
+               break;
        case GL_DEPTH_TEST:
                r300SetDepthState(ctx);
                break;
-       case GL_STENCIL_TEST:
-               r300SetStencilState(ctx, state);
+       case GL_LINE_SMOOTH:
+               if (rmesa->options.conformance_mode)
+                       r300SwitchFallback(ctx, R300_FALLBACK_LINE_SMOOTH, ctx->Line.SmoothFlag);
                break;
-       case GL_CULL_FACE:
-               r300UpdateCulling(ctx);
+       case GL_LINE_STIPPLE:
+               if (rmesa->options.conformance_mode)
+                       r300SwitchFallback(ctx, R300_FALLBACK_LINE_STIPPLE, ctx->Line.StippleFlag);
+               break;
+       case GL_POINT_SMOOTH:
+               if (rmesa->options.conformance_mode)
+                       r300SwitchFallback(ctx, R300_FALLBACK_POINT_SMOOTH, ctx->Point.SmoothFlag);
+               break;
+       case GL_POLYGON_SMOOTH:
+               if (rmesa->options.conformance_mode)
+                       r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_SMOOTH, ctx->Polygon.SmoothFlag);
+               break;
+       case GL_POLYGON_STIPPLE:
+               if (rmesa->options.conformance_mode)
+                       r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_STIPPLE, ctx->Polygon.StippleFlag);
                break;
        case GL_POLYGON_OFFSET_POINT:
        case GL_POLYGON_OFFSET_LINE:
@@ -1961,6 +1719,9 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
                rmesa->radeon.state.scissor.enabled = state;
                radeonUpdateScissor( ctx );
                break;
+       case GL_STENCIL_TEST:
+               r300SetStencilState(ctx, state);
+               break;
        default:
                break;
        }
@@ -2151,13 +1912,8 @@ static void r300ResetHwState(r300ContextPtr r300)
 
        r300->hw.rb3d_aaresolve_ctl.cmd[1] = 0;
 
-       if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
-           r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
-       r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
-    } else {
-           r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = (2 << 30);
-       r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = (2 << 30);
-    }
+    r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
+    r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
 
        r300->hw.zb_depthclearvalue.cmd[1] = 0;
 
@@ -2186,82 +1942,118 @@ static void r300ResetHwState(r300ContextPtr r300)
 
 void r300UpdateShaders(r300ContextPtr rmesa)
 {
-       GLcontext *ctx;
-       struct r300_vertex_program *vp;
-       int i;
+       GLcontext *ctx = rmesa->radeon.glCtx;
 
-       ctx = rmesa->radeon.glCtx;
+       /* should only happenen once, just after context is created */
+       /* TODO: shouldn't we fallback to sw here? */
+       if (!ctx->FragmentProgram._Current) {
+               _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+               return;
+       }
 
-       if (rmesa->radeon.NewGLState && rmesa->options.hw_tcl_enabled) {
-               rmesa->radeon.NewGLState = 0;
+       {
+               struct r300_fragment_program *fp;
 
-               for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
-                       rmesa->temp_attrib[i] =
-                           TNL_CONTEXT(ctx)->vb.AttribPtr[i];
-                       TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
-                           &rmesa->dummy_attrib[i];
-               }
+               fp = r300SelectAndTranslateFragmentShader(ctx);
 
-               _tnl_UpdateFixedFunctionProgram(ctx);
+               r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error);
+       }
 
-               for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
-                       TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
-                           rmesa->temp_attrib[i];
-               }
+       if (rmesa->options.hw_tcl_enabled) {
+               struct r300_vertex_program *vp;
 
-               r300SelectVertexShader(rmesa);
-               vp = (struct r300_vertex_program *)
-                   CURRENT_VERTEX_SHADER(ctx);
-               /*if (vp->translated == GL_FALSE)
-                  r300TranslateVertexShader(vp); */
-               if (vp->translated == GL_FALSE) {
-                       fprintf(stderr, "Failing back to sw-tcl\n");
-                       rmesa->options.hw_tcl_enabled = 0;
-                       r300ResetHwState(rmesa);
-
-                       r300UpdateStateParameters(ctx, _NEW_PROGRAM |
-                                      _NEW_PROGRAM_CONSTANTS);
-                       return;
+               if (rmesa->radeon.NewGLState) {
+                       int i;
+                       for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+                               rmesa->temp_attrib[i] =
+                                   TNL_CONTEXT(ctx)->vb.AttribPtr[i];
+                               TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
+                                   &rmesa->dummy_attrib[i];
+                       }
+
+                       _tnl_UpdateFixedFunctionProgram(ctx);
+
+                       for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+                               TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
+                                   rmesa->temp_attrib[i];
+                       }
                }
+
+               vp = r300SelectAndTranslateVertexShader(ctx);
+
+               r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error);
        }
+
        r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+       rmesa->radeon.NewGLState = 0;
 }
 
-static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
-       struct gl_program *program, struct prog_src_register srcreg)
+static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, GLfloat * buffer)
 {
        static const GLfloat dummy[4] = { 0, 0, 0, 0 };
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       struct rc_constant * rcc = &rmesa->selected_fp->code.constants.Constants[index];
+
+       switch(rcc->Type) {
+       case RC_CONSTANT_EXTERNAL:
+               return ctx->FragmentProgram._Current->Base.Parameters->ParameterValues[rcc->u.External];
+       case RC_CONSTANT_IMMEDIATE:
+               return rcc->u.Immediate;
+       case RC_CONSTANT_STATE:
+               switch(rcc->u.State[0]) {
+               case RC_STATE_SHADOW_AMBIENT: {
+                       const int unit = (int) rcc->u.State[1];
+                       const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
+                       if (texObj) {
+                               buffer[0] =
+                               buffer[1] =
+                               buffer[2] =
+                               buffer[3] = texObj->CompareFailValue;
+                       }
+                       return buffer;
+               }
 
-       switch(srcreg.File) {
-       case PROGRAM_LOCAL_PARAM:
-               return program->LocalParams[srcreg.Index];
-       case PROGRAM_ENV_PARAM:
-               return ctx->FragmentProgram.Parameters[srcreg.Index];
-       case PROGRAM_STATE_VAR:
-       case PROGRAM_NAMED_PARAM:
-       case PROGRAM_CONSTANT:
-               return program->Parameters->ParameterValues[srcreg.Index];
-       default:
-               _mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n");
-               return dummy;
+               case RC_STATE_R300_WINDOW_DIMENSION: {
+                       __DRIdrawablePrivate * drawable = radeon_get_drawable(&rmesa->radeon);
+                       buffer[0] = drawable->w * 0.5f; /* width*0.5 */
+                       buffer[1] = drawable->h * 0.5f; /* height*0.5 */
+                       buffer[2] = 0.5F;       /* for moving range [-1 1] -> [0 1] */
+                       buffer[3] = 1.0F;       /* not used */
+                       return buffer;
+               }
+
+               case RC_STATE_R300_TEXRECT_FACTOR: {
+                       struct gl_texture_object *t =
+                               ctx->Texture.Unit[rcc->u.State[1]].CurrentTex[TEXTURE_RECT_INDEX];
+
+                       if (t && t->Image[0][t->BaseLevel]) {
+                               struct gl_texture_image *image =
+                                       t->Image[0][t->BaseLevel];
+                               buffer[0] = 1.0 / image->Width2;
+                               buffer[1] = 1.0 / image->Height2;
+                       } else {
+                               buffer[0] = 1.0;
+                               buffer[1] = 1.0;
+                       }
+                       buffer[2] = 1.0;
+                       buffer[3] = 1.0;
+                       return buffer;
+               }
+               }
        }
+
+       return dummy;
 }
 
 
-static GLboolean r300SetupPixelShader(GLcontext *ctx)
+static void r300SetupPixelShader(GLcontext *ctx)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
+       struct r300_fragment_program *fp = rmesa->selected_fp;
        struct r300_fragment_program_code *code;
-       int i, k;
-
-       /* Program is not native, fallback to software */
-       if (fp->error)
-               return GL_FALSE;
-
-       code = &fp->code.r300;
+       int i;
 
-       r300SetupTextures(ctx);
+       code = &fp->code.code.r300;
 
        R300_STATECHANGE(rmesa, fpi[0]);
        R300_STATECHANGE(rmesa, fpi[1]);
@@ -2272,46 +2064,29 @@ static GLboolean r300SetupPixelShader(GLcontext *ctx)
        rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length);
        rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
        for (i = 0; i < code->alu.length; i++) {
-               rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0;
-               rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1;
-               rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2;
-               rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3;
+               rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_inst;
+               rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_addr;
+               rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_inst;
+               rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_addr;
        }
 
        R300_STATECHANGE(rmesa, fp);
-       rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3);
-       rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx;
-       rmesa->hw.fp.cmd[R300_FP_CNTL2] =
-         (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
-         ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
-         (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
-         ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
-       /* I just want to say, the way these nodes are stored.. weird.. */
-       for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) {
-               if (i < (code->cur_node + 1)) {
-                       rmesa->hw.fp.cmd[R300_FP_NODE0 + k] =
-                         (code->node[i].alu_offset << R300_ALU_START_SHIFT) |
-                         (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) |
-                         (code->node[i].tex_offset << R300_TEX_START_SHIFT) |
-                         (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) |
-                         code->node[i].flags;
-               } else {
-                       rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0;
-               }
-       }
+       rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->config;
+       rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->pixsize;
+       rmesa->hw.fp.cmd[R300_FP_CNTL2] = code->code_offset;
+       for (i = 0; i < 4; i++)
+               rmesa->hw.fp.cmd[R300_FP_NODE0 + i] = code->code_addr[i];
 
        R300_STATECHANGE(rmesa, fpp);
-       rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4);
-       for (i = 0; i < code->const_nr; i++) {
-               const GLfloat *constant = get_fragmentprogram_constant(ctx,
-                       &fp->Base.Base, code->constant[i]);
+       rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4);
+       for (i = 0; i < fp->code.constants.Count; i++) {
+               GLfloat buffer[4];
+               const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
                rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
                rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
                rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
                rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]);
        }
-
-       return GL_TRUE;
 }
 
 #define bump_r500fp_count(ptr, new_count)   do{\
@@ -2328,35 +2103,29 @@ static GLboolean r300SetupPixelShader(GLcontext *ctx)
        if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\
 } while(0)
 
-static GLboolean r500SetupPixelShader(GLcontext *ctx)
+static void r500SetupPixelShader(GLcontext *ctx)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
+       struct r300_fragment_program *fp = rmesa->selected_fp;
        int i;
        struct r500_fragment_program_code *code;
 
        ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0;
        ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0;
 
-       /* Program is not native, fallback to software */
-       if (fp->error)
-               return GL_FALSE;
-
-       code = &fp->code.r500;
-
-       r300SetupTextures(ctx);
+       code = &fp->code.code.r500;
 
        R300_STATECHANGE(rmesa, fp);
        rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx;
 
        rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] =
-           R500_US_CODE_START_ADDR(code->inst_offset) |
+           R500_US_CODE_START_ADDR(0) |
            R500_US_CODE_END_ADDR(code->inst_end);
        rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] =
-           R500_US_CODE_RANGE_ADDR(code->inst_offset) |
+           R500_US_CODE_RANGE_ADDR(0) |
            R500_US_CODE_RANGE_SIZE(code->inst_end);
        rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] =
-           R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */
+           R500_US_CODE_OFFSET_ADDR(0);
 
        R300_STATECHANGE(rmesa, r500fp);
        /* Emit our shader... */
@@ -2372,17 +2141,73 @@ static GLboolean r500SetupPixelShader(GLcontext *ctx)
        bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6);
 
        R300_STATECHANGE(rmesa, r500fp_const);
-       for (i = 0; i < code->const_nr; i++) {
-               const GLfloat *constant = get_fragmentprogram_constant(ctx,
-                       &fp->Base.Base, code->constant[i]);
+       for (i = 0; i < fp->code.constants.Count; i++) {
+               GLfloat buffer[4];
+               const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
                rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
                rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
                rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
                rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]);
        }
-       bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4);
+       bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->code.constants.Count * 4);
+}
+
+void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten)
+{
+       r300ContextPtr rmesa = R300_CONTEXT( ctx );
+       struct vertex_attribute *attrs = rmesa->vbuf.attribs;
+       int i, j, reg_count;
+       uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1];
+       uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1];
+
+       for (i = 0; i < R300_VIR_CMDSIZE-1; ++i)
+               vir0[i] = vir1[i] = 0;
+
+       for (i = 0, j = 0; i < rmesa->vbuf.num_attribs; ++i) {
+               int tmp;
+
+               tmp = attrs[i].data_type | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT);
+               if (attrs[i]._signed)
+                       tmp |= R300_SIGNED;
+               if (attrs[i].normalize)
+                       tmp |= R300_NORMALIZE;
+
+               if (i % 2 == 0) {
+                       vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT;
+                       vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT);
+               } else {
+                       vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT;
+                       vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
+                       ++j;
+               }
+       }
+
+       reg_count = (rmesa->vbuf.num_attribs + 1) >> 1;
+       if (rmesa->vbuf.num_attribs % 2 != 0) {
+               vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
+       } else {
+               vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
+       }
 
-       return GL_TRUE;
+       R300_STATECHANGE(rmesa, vir[0]);
+       R300_STATECHANGE(rmesa, vir[1]);
+       R300_STATECHANGE(rmesa, vof);
+       R300_STATECHANGE(rmesa, vic);
+
+       if (rmesa->radeon.radeonScreen->kernel_mm) {
+               rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
+               rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
+               rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16;
+               rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16;
+       } else {
+               ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count;
+               ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count;
+       }
+
+       rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+       rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+       rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+       rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten);
 }
 
 void r300UpdateShaderStates(r300ContextPtr rmesa)
@@ -2396,36 +2221,15 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
 
        r300SetEarlyZState(ctx);
 
-       /* w_fmt value is set to get best performance
-        * see p.130 R5xx 3D acceleration guide v1.3 */
-       GLuint w_fmt, fgdepthsrc;
-       if (current_fragment_program_writes_depth(ctx)) {
-               fgdepthsrc = R300_FG_DEPTH_SRC_SHADER;
-               w_fmt = R300_W_FMT_W24 | R300_W_SRC_US;
-       } else {
-               fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
-               w_fmt = R300_W_FMT_W0 | R300_W_SRC_US;
-       }
-
-       if (w_fmt != rmesa->hw.us_out_fmt.cmd[5]) {
-               R300_STATECHANGE(rmesa, us_out_fmt);
-               rmesa->hw.us_out_fmt.cmd[5] = w_fmt;
-       }
-
-       if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) {
-               R300_STATECHANGE(rmesa, fg_depth_src);
-               rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc;
-       }
-
-       r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current);
+       r300SetupTextures(ctx);
 
-       if (!rmesa->vtbl.SetupPixelShader(ctx))
-               return;
+       rmesa->vtbl.SetupPixelShader(ctx);
 
        rmesa->vtbl.SetupRSUnit(ctx);
 
-       if (rmesa->options.hw_tcl_enabled)
+       if (rmesa->options.hw_tcl_enabled) {
                r300SetupVertexProgram(rmesa);
+       }
 }
 
 /**
@@ -2446,10 +2250,9 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
                _mesa_update_draw_buffer_bounds(ctx);
 
                R300_STATECHANGE(r300, cb);
+               R300_STATECHANGE(r300, zb);
        }
 
-       r300UpdateStateParameters(ctx, new_state);
-
        r300->radeon.NewGLState |= new_state;
 }
 
@@ -2465,9 +2268,7 @@ void r300InitState(r300ContextPtr r300)
 
 static void r300RenderMode(GLcontext * ctx, GLenum mode)
 {
-       r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       (void)rmesa;
-       (void)mode;
+       r300SwitchFallback(ctx, R300_FALLBACK_RENDER_MODE, ctx->RenderMode != GL_RENDER);
 }
 
 /**
@@ -2522,13 +2323,9 @@ void r300InitShaderFunctions(r300ContextPtr r300)
                r300->vtbl.SetupRSUnit = r500SetupRSUnit;
                r300->vtbl.SetupPixelShader = r500SetupPixelShader;
                r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures;
-               r300->vtbl.FragmentProgramEmit = r500FragmentProgramEmit;
-               r300->vtbl.FragmentProgramDump = r500FragmentProgramDump;
        } else {
                r300->vtbl.SetupRSUnit = r300SetupRSUnit;
                r300->vtbl.SetupPixelShader = r300SetupPixelShader;
                r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures;
-               r300->vtbl.FragmentProgramEmit = r300FragmentProgramEmit;
-               r300->vtbl.FragmentProgramDump = r300FragmentProgramDump;
        }
 }