Merge commit 'origin/gallium-0.1'
[mesa.git] / src / mesa / drivers / dri / r300 / r300_emit.c
index 76740df81fc941df7ef0b4d528d16a3b83ceb761..80bd3389aefe115ac436b8e50c01d47e72da0fbd 100644 (file)
@@ -33,12 +33,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * \author Keith Whitwell <keith@tungstengraphics.com>
  */
 
-#include "glheader.h"
-#include "mtypes.h"
-#include "colormac.h"
-#include "imports.h"
-#include "macros.h"
-#include "image.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/image.h"
 
 #include "swrast_setup/swrast_setup.h"
 #include "math/m_translate.h"
@@ -86,8 +86,7 @@ do {                                          \
 } while (0)
 #endif
 
-static void r300EmitVec4(GLcontext * ctx,
-                        struct r300_dma_region *rvb,
+static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
                         GLvoid * data, int stride, int count)
 {
        int i;
@@ -107,8 +106,7 @@ static void r300EmitVec4(GLcontext * ctx,
                }
 }
 
-static void r300EmitVec8(GLcontext * ctx,
-                        struct r300_dma_region *rvb,
+static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
                         GLvoid * data, int stride, int count)
 {
        int i;
@@ -129,8 +127,7 @@ static void r300EmitVec8(GLcontext * ctx,
                }
 }
 
-static void r300EmitVec12(GLcontext * ctx,
-                         struct r300_dma_region *rvb,
+static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
                          GLvoid * data, int stride, int count)
 {
        int i;
@@ -152,8 +149,7 @@ static void r300EmitVec12(GLcontext * ctx,
                }
 }
 
-static void r300EmitVec16(GLcontext * ctx,
-                         struct r300_dma_region *rvb,
+static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
                          GLvoid * data, int stride, int count)
 {
        int i;
@@ -176,8 +172,7 @@ static void r300EmitVec16(GLcontext * ctx,
                }
 }
 
-static void r300EmitVec(GLcontext * ctx,
-                       struct r300_dma_region *rvb,
+static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
                        GLvoid * data, int size, int stride, int count)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
@@ -212,57 +207,72 @@ static void r300EmitVec(GLcontext * ctx,
        }
 }
 
-static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
+#define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) |     \
+                   (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT)
+
+GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
                                 int *inputs, GLint * tab, GLuint nr)
 {
        GLuint i, dw;
 
-       /* dw: size, inputs, stop bit, type */
-       for (i = 0; i + 1 < nr; i += 2) {
-               dw = (attribptr[tab[i]]->size - 1) | (inputs[tab[i]] << 8) | (AOS_FORMAT_FLOAT << 14);
-               dw |= ((attribptr[tab[i + 1]]->size - 1) | (inputs[tab[i + 1]] << 8) | (AOS_FORMAT_FLOAT << 14)) << 16;
-               if (i + 2 == nr) {
-                       dw |= (1 << (13 + 16));
+       /* type, inputs, stop bit, size */
+       for (i = 0; i < nr; i += 2) {
+               /* make sure input is valid, would lockup the gpu */
+               assert(inputs[tab[i]] != -1);
+               dw = (R300_SIGNED | DW_SIZE(i));
+               if (i + 1 == nr) {
+                       dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
+               } else {
+                       assert(inputs[tab[i + 1]] != -1);
+                       dw |= (R300_SIGNED |
+                              DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT;
+                       if (i + 2 == nr) {
+                               dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
+                       }
                }
                dst[i >> 1] = dw;
        }
 
-       if (nr & 1) {
-               dw = (attribptr[tab[nr - 1]]->size - 1) | (inputs[tab[nr - 1]] << 8) | (AOS_FORMAT_FLOAT << 14);
-               dw |= 1 << 13;
-               dst[nr >> 1] = dw;
-       }
-
        return (nr + 1) >> 1;
 }
 
 static GLuint r300VAPInputRoute1Swizzle(int swizzle[4])
 {
-       return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) |
-           (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) |
-           (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) |
-           (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT);
+       return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) |
+           (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) |
+           (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) |
+           (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT);
 }
 
-static GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
+GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
 {
-       GLuint i;
-
-       for (i = 0; i + 1 < nr; i += 2) {
-               dst[i >> 1] = r300VAPInputRoute1Swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE;
-               dst[i >> 1] |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) << 16;
-       }
+       GLuint i, dw;
 
-       if (nr & 1) {
-               dst[nr >> 1] = r300VAPInputRoute1Swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE;
+       for (i = 0; i < nr; i += 2) {
+               dw = (r300VAPInputRoute1Swizzle(swizzle[i]) |
+                     ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
+                       R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT;
+               if (i + 1 < nr) {
+                       dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) |
+                              ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
+                                R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
+               }
+               dst[i >> 1] = dw;
        }
 
        return (nr + 1) >> 1;
 }
 
-static GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
+GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
 {
-       r300ContextPtr r300 = R300_CONTEXT(ctx);
+       /* No idea what this value means. I have seen other values written to
+        * this register... */
+       return 0x5555;
+}
+
+GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
        GLuint i, vic_1 = 0;
 
        if (InputsRead & (1 << VERT_ATTRIB_POS))
@@ -274,17 +284,17 @@ static GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
        if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
                vic_1 |= R300_INPUT_CNTL_COLOR;
 
-       r300->state.texture.tc_count = 0;
+       rmesa->state.texture.tc_count = 0;
        for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
                if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
-                       r300->state.texture.tc_count++;
+                       rmesa->state.texture.tc_count++;
                        vic_1 |= R300_INPUT_CNTL_TC0 << i;
                }
 
        return vic_1;
 }
 
-static GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
+GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
 {
        GLuint ret = 0;
 
@@ -292,18 +302,19 @@ static GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
                ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
 
        if (OutputsWritten & (1 << VERT_RESULT_COL0))
-               ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
+               ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
 
        if (OutputsWritten & (1 << VERT_RESULT_COL1))
                ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
 
-#if 0
-       if (OutputsWritten & (1 << VERT_RESULT_BFC0))
-               ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
-
-       if (OutputsWritten & (1 << VERT_RESULT_BFC1))
-               ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
+       if (OutputsWritten & (1 << VERT_RESULT_BFC0)
+           || OutputsWritten & (1 << VERT_RESULT_BFC1))
+               ret |=
+                   R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT |
+                   R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
+                   R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
 
+#if 0
        if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
 #endif
 
@@ -313,7 +324,7 @@ static GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
        return ret;
 }
 
-static GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
+GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
 {
        GLuint i, ret = 0;
 
@@ -333,7 +344,6 @@ static GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
 int r300EmitArrays(GLcontext * ctx)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       r300ContextPtr r300 = rmesa;
        TNLcontext *tnl = TNL_CONTEXT(ctx);
        struct vertex_buffer *vb = &tnl->vb;
        GLuint nr;
@@ -344,114 +354,105 @@ int r300EmitArrays(GLcontext * ctx)
        int vir_inputs[VERT_ATTRIB_MAX];
        GLint tab[VERT_ATTRIB_MAX];
        int swizzle[VERT_ATTRIB_MAX][4];
+       struct r300_vertex_program *prog =
+           (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
 
        if (hw_tcl_on) {
-               struct r300_vertex_program *prog =
-                   (struct r300_vertex_program *)
-                   CURRENT_VERTEX_SHADER(ctx);
                inputs = prog->inputs;
-               InputsRead = CURRENT_VERTEX_SHADER(ctx)->key.InputsRead;
-               OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+               InputsRead = prog->key.InputsRead;
+               OutputsWritten = prog->key.OutputsWritten;
        } else {
-               DECLARE_RENDERINPUTS(inputs_bitset);
-               inputs = r300->state.sw_tcl_inputs;
+               inputs = rmesa->state.sw_tcl_inputs;
+
+               DECLARE_RENDERINPUTS(render_inputs_bitset);
+               RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
 
-               RENDERINPUTS_COPY(inputs_bitset,
-                                 TNL_CONTEXT(ctx)->render_inputs_bitset);
+               vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr;
 
-               assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_POS));
-               InputsRead |= 1 << VERT_ATTRIB_POS;
-               OutputsWritten |= 1 << VERT_RESULT_HPOS;
+               assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
+               assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
+               //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
 
-               assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_NORMAL)
-                      == 0);
+               if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
+                       InputsRead |= 1 << VERT_ATTRIB_POS;
+                       OutputsWritten |= 1 << VERT_RESULT_HPOS;
+               }
 
-               assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR0));
-               InputsRead |= 1 << VERT_ATTRIB_COLOR0;
-               OutputsWritten |= 1 << VERT_RESULT_COL0;
+               if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
+                       InputsRead |= 1 << VERT_ATTRIB_COLOR0;
+                       OutputsWritten |= 1 << VERT_RESULT_COL0;
+               }
 
-               if (RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR1)) {
+               if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) {
                        InputsRead |= 1 << VERT_ATTRIB_COLOR1;
                        OutputsWritten |= 1 << VERT_RESULT_COL1;
                }
 
-               for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
-                       if (RENDERINPUTS_TEST
-                           (inputs_bitset, _TNL_ATTRIB_TEX(i))) {
+               for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+                       if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) {
                                InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
                                OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
                        }
+               }
 
-               for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
-                       if (InputsRead & (1 << i))
+               for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+                       if (InputsRead & (1 << i)) {
                                inputs[i] = nr++;
-                       else
+                       } else {
                                inputs[i] = -1;
-
-               if (!
-                   (r300->radeon.radeonScreen->
-                    chip_flags & RADEON_CHIPSET_TCL)) {
-                       /* Fixed, apply to vir0 only */
-                       memcpy(vir_inputs, inputs,
-                              VERT_ATTRIB_MAX * sizeof(int));
-                       inputs = vir_inputs;
-
-                       if (InputsRead & VERT_ATTRIB_POS)
-                               inputs[VERT_ATTRIB_POS] = 0;
-
-                       if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
-                               inputs[VERT_ATTRIB_COLOR0] = 2;
-
-                       if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
-                               inputs[VERT_ATTRIB_COLOR1] = 3;
-
-                       for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
-                               if (InputsRead & (1 << i))
-                                       inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+                       }
                }
 
-               RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset,
-                                 inputs_bitset);
+               /* Fixed, apply to vir0 only */
+               memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int));
+               inputs = vir_inputs;
+               if (InputsRead & VERT_ATTRIB_POS)
+                       inputs[VERT_ATTRIB_POS] = 0;
+               if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
+                       inputs[VERT_ATTRIB_COLOR0] = 2;
+               if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
+                       inputs[VERT_ATTRIB_COLOR1] = 3;
+               for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
+                       if (InputsRead & (1 << i))
+                               inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+
+               RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
        }
 
        assert(InputsRead);
        assert(OutputsWritten);
 
-       for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
-               if (InputsRead & (1 << i))
+       for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+               if (InputsRead & (1 << i)) {
                        tab[nr++] = i;
+               }
+       }
 
-       if (nr > R300_MAX_AOS_ARRAYS)
+       if (nr > R300_MAX_AOS_ARRAYS) {
                return R300_FALLBACK_TCL;
+       }
 
        for (i = 0; i < nr; i++) {
-               int ci;
-               int comp_size, fix, found = 0;
+               int ci, fix, found = 0;
 
                swizzle[i][0] = SWIZZLE_ZERO;
                swizzle[i][1] = SWIZZLE_ZERO;
                swizzle[i][2] = SWIZZLE_ZERO;
                swizzle[i][3] = SWIZZLE_ONE;
 
-               for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++)
+               for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
                        swizzle[i][ci] = ci;
+               }
 
                if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
-                       if (vb->AttribPtr[tab[i]]->stride % 4)
+                       if (vb->AttribPtr[tab[i]]->stride % 4) {
                                return R300_FALLBACK_TCL;
-
-                       rmesa->state.aos[i].address =
-                           (void *)(vb->AttribPtr[tab[i]]->data);
+                       }
+                       rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
                        rmesa->state.aos[i].start = 0;
-                       rmesa->state.aos[i].aos_offset =
-                           r300GartOffsetFromVirtual(rmesa,
-                                                     vb->
-                                                     AttribPtr[tab[i]]->data);
-                       rmesa->state.aos[i].aos_stride =
-                           vb->AttribPtr[tab[i]]->stride / 4;
-
-                       rmesa->state.aos[i].aos_size =
-                           vb->AttribPtr[tab[i]]->size;
+                       rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
+                       rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
+                       rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
                } else {
                        r300EmitVec(ctx, &rmesa->state.aos[i],
                                    vb->AttribPtr[tab[i]]->data,
@@ -461,13 +462,10 @@ int r300EmitArrays(GLcontext * ctx)
 
                rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
 
-               comp_size = _mesa_sizeof_type(GL_FLOAT);
-
                for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
-                       if ((rmesa->state.aos[i].aos_offset -
-                            comp_size * fix) % 4)
+                       if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
                                continue;
-
+                       }
                        found = 1;
                        break;
                }
@@ -476,11 +474,10 @@ int r300EmitArrays(GLcontext * ctx)
                        if (fix > 0) {
                                WARN_ONCE("Feeling lucky?\n");
                        }
-
-                       rmesa->state.aos[i].aos_offset -= comp_size * fix;
-
-                       for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++)
+                       rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
+                       for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
                                swizzle[i][ci] += fix;
+                       }
                } else {
                        WARN_ONCE
                            ("Cannot handle offset %x with stride %d, comp %d\n",
@@ -492,27 +489,25 @@ int r300EmitArrays(GLcontext * ctx)
        }
 
        /* Setup INPUT_ROUTE. */
-       R300_STATECHANGE(r300, vir[0]);
-       ((drm_r300_cmd_header_t *) r300->hw.vir[0].cmd)->packet0.count =
-           r300VAPInputRoute0(&r300->hw.vir[0].cmd[R300_VIR_CNTL_0],
+       R300_STATECHANGE(rmesa, vir[0]);
+       ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
+           r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
                               vb->AttribPtr, inputs, tab, nr);
-
-       R300_STATECHANGE(r300, vir[1]);
-       ((drm_r300_cmd_header_t *) r300->hw.vir[1].cmd)->packet0.count =
-           r300VAPInputRoute1(&r300->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
+       R300_STATECHANGE(rmesa, vir[1]);
+       ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
+           r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
                               nr);
 
        /* Setup INPUT_CNTL. */
-       /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
-       R300_STATECHANGE(r300, vic);
-       r300->hw.vic.cmd[R300_VIC_CNTL_0] = 0x5555;     /* Hard coded value, no idea what it means */
-       r300->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+       R300_STATECHANGE(rmesa, vic);
+       rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+       rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
 
        /* Setup OUTPUT_VTX_FMT. */
-       R300_STATECHANGE(r300, vof);
-       r300->hw.vof.cmd[R300_VOF_CNTL_0] =
+       R300_STATECHANGE(rmesa, vof);
+       rmesa->hw.vof.cmd[R300_VOF_CNTL_0] =
            r300VAPOutputCntl0(ctx, OutputsWritten);
-       r300->hw.vof.cmd[R300_VOF_CNTL_1] =
+       rmesa->hw.vof.cmd[R300_VOF_CNTL_1] =
            r300VAPOutputCntl1(ctx, OutputsWritten);
 
        rmesa->state.aos_count = nr;
@@ -546,3 +541,19 @@ void r300ReleaseArrays(GLcontext * ctx)
                r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
        }
 }
+
+void r300EmitCacheFlush(r300ContextPtr rmesa)
+{
+       int cmd_reserved = 0;
+       int cmd_written = 0;
+
+       drm_radeon_cmd_header_t *cmd = NULL;
+
+       reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
+       e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+           R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+
+       reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
+       e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+           R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+}