r300: Working trivial/clear for RV410.
authorCorbin Simpson <MostAwesomeDude@gmail.com>
Mon, 26 Jan 2009 05:35:26 +0000 (21:35 -0800)
committerCorbin Simpson <MostAwesomeDude@gmail.com>
Mon, 2 Feb 2009 07:30:29 +0000 (23:30 -0800)
This might work for other people too.

src/gallium/drivers/r300/r300_cs.h
src/gallium/drivers/r300/r300_cs_inlines.h [new file with mode: 0644]
src/gallium/drivers/r300/r300_surface.c

index edcfb9628f7d190554792889f739688302ebfd93..d515c2f0253b63b7b723e327d4d3fbbffcfa24cd 100644 (file)
@@ -81,7 +81,7 @@ static uint32_t pack_float_32(float f)
 } while (0)
 
 #define OUT_CS_REG(register, value) do { \
-    debug_printf("writing 0x%x to register 0x%x\n", value, register); \
+    debug_printf("r300: writing 0x%x to register 0x%x\n", value, register); \
     OUT_CS(CP_PACKET0(register, 0)); \
     OUT_CS(value); \
 } while (0)
@@ -89,11 +89,13 @@ static uint32_t pack_float_32(float f)
 /* Note: This expects count to be the number of registers,
  * not the actual packet0 count! */
 #define OUT_CS_REG_SEQ(register, count) do { \
-    debug_printf("writing register sequence 0x%x\n", register); \
+    debug_printf("r300: writing register sequence 0x%x\n", register); \
     OUT_CS(CP_PACKET0(register, ((count) - 1))); \
 } while (0)
 
 #define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \
+    debug_printf("r300: writing relocation for buffer %p, offset %d\n", \
+        bo, offset); \
     OUT_CS(offset); \
     cs_winsys->write_cs_reloc(cs, bo, rd, wd, flags); \
 } while (0)
@@ -110,4 +112,6 @@ static uint32_t pack_float_32(float f)
     cs_winsys->flush_cs(cs); \
 } while (0)
 
+#include "r300_cs_inlines.h"
+
 #endif /* R300_CS_H */
diff --git a/src/gallium/drivers/r300/r300_cs_inlines.h b/src/gallium/drivers/r300/r300_cs_inlines.h
new file mode 100644 (file)
index 0000000..aa0e647
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/* r300_cs_inlines: This is just a handful of useful inlines for sending
+ * (very) common instructions to the CS buffer. Should only be included from
+ * r300_cs.h, probably. */
+
+#ifdef R300_CS_H
+
+#define R300_PACIFY do { \
+    OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 15) | (1 << 17) | \
+        (1 << 18) | (1 << 31)); \
+} while (0)
+
+
+#endif /* R300_CS_H */
index 8afa06dec8f8e60582bc24c2755c4e0a6b590a80..226cc7fc6cf2c08b080cfdef5ebedb0182b69871 100644 (file)
@@ -32,356 +32,18 @@ static void r300_surface_fill(struct pipe_context* pipe,
 {
     struct r300_context* r300 = r300_context(pipe);
     CS_LOCALS(r300);
-#if 0
-    struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
-    boolean has_tcl = caps->has_tcl;
-    boolean is_r500 = caps->is_r500;
-    /* For the for loops. */
-    int i;
-    /* Emit a shitload of state, and then draw a point to clear the buffer.
-     * XXX it goes without saying that this needs to be cleaned up and
-     * shifted around to work with the rest of the driver's state handling.
-     */
-    BEGIN_CS(450);
-    /* XXX */
-    OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 15) | (1 << 17) | (1 << 18));
-    OUT_CS_REG(R300_TX_INVALTAGS, 0x0);
-    OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 15) | (1 << 17) | (1 << 18));
-    OUT_CS_REG(R300_TX_INVALTAGS, 0x0);
+    float r, g, b, a;
+    r = (float)((color >> 16) & 0xff) / 255.0f;
+    g = (float)((color >>  8) & 0xff) / 255.0f;
+    b = (float)((color >>  0) & 0xff) / 255.0f;
+    debug_printf("r300: Filling surface %p at (%d,%d),"
+        " dimensions %dx%d, color 0x%x\n",
+        dest, x, y, w, h, color);
 
-    /* Sequence starting at R300_VAP_PROG_STREAM_CNTL_0 */
-    OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 1);
-    if (has_tcl) {
-        OUT_CS(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) <<
-                R300_DATA_TYPE_0_SHIFT) | ((R300_LAST_VEC | (1 <<
-                R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) <<
-                R300_DATA_TYPE_1_SHIFT)));
-    } else {
-        OUT_CS(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) <<
-                R300_DATA_TYPE_0_SHIFT) | ((R300_LAST_VEC | (2 <<
-                R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) <<
-                R300_DATA_TYPE_1_SHIFT)));
-    }
-
-    /* Disable fog */
-    OUT_CS_REG(R300_FG_FOG_BLEND, 0);
-    OUT_CS_REG(R300_FG_ALPHA_FUNC, 0);
-
-    OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
-               ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
-                       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
-                       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
-                       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
-                       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
-                       R300_WRITE_ENA_Z | R300_WRITE_ENA_W) <<
-                       R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT) |
-                       (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
-                       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
-                       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
-                       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
-                       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
-                       R300_WRITE_ENA_Z | R300_WRITE_ENA_W) <<
-                       R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT)));
-    /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
-    OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
-    OUT_CS((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
-    OUT_CS(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
-
-    /* comes from fglrx startup of clear */
-    OUT_CS_REG_SEQ(R300_SE_VTE_CNTL, 2);
-    OUT_CS(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
-            R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
-            R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
-            R300_VPORT_Z_OFFSET_ENA);
-    OUT_CS(0x8);
-
-    /* XXX */
-    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
-    OUT_CS(0xFFFFFF);
-    OUT_CS(0x0);
-
-    OUT_CS_REG(R300_VAP_CNTL_STATUS, 0x0);
-
-    OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
-    OUT_CS(0x3f800000);
-    OUT_CS(0x3f800000);
-    OUT_CS(0x3f800000);
-    OUT_CS(0x3f800000);
-
-    OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
-
-    OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff);
-
-    OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
-    OUT_CS(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
-            R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
-    OUT_CS(0); /* no textures */
-
-    OUT_CS_REG(R300_TX_ENABLE, 0);
-
-    OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
-    OUT_CS_32F(1.0);
-    OUT_CS_32F(x);
-    OUT_CS_32F(1.0);
-    OUT_CS_32F(y);
-    OUT_CS_32F(1.0);
-    OUT_CS_32F(0.0);
-
-    OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 2);
-    OUT_CS(0x0);
-    OUT_CS(0x0);
-
-    OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
-
-    OUT_CS_REG(R300_GA_POINT_SIZE, ((w * 6) << R300_POINTSIZE_X_SHIFT) |
-            ((h * 6) << R300_POINTSIZE_Y_SHIFT));
-
-    if (is_r500) {
-        OUT_CS_REG_SEQ(R500_RS_IP_0, 8);
-        for (i = 0; i < 8; ++i) {
-            OUT_CS((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-                    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-                    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-                    (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
-        }
-
-        OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
-        /* XXX could hires be disabled for a speed boost? */
-        OUT_CS((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-        OUT_CS(0x0);
-
-        OUT_CS_REG(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
-    } else {
-        OUT_CS_REG_SEQ(R300_RS_IP_0, 8);
-        for (i = 0; i < 8; ++i) {
-            OUT_CS(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
-        }
-
-        OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
-        /* XXX could hires be disabled for a speed boost? */
-        OUT_CS((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-        OUT_CS(0x0);
-
-        OUT_CS_REG(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
-    }
-
-    if (is_r500) {
-        OUT_CS_REG_SEQ(R500_US_CONFIG, 2);
-        OUT_CS(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-        OUT_CS(0x0);
-        OUT_CS_REG_SEQ(R500_US_CODE_ADDR, 3);
-        OUT_CS(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
-        OUT_CS(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
-        OUT_CS(R500_US_CODE_OFFSET_ADDR(0));
-
-        OUT_CS_REG(R500_GA_US_VECTOR_INDEX, 0x0);
-
-        OUT_CS_REG(R500_GA_US_VECTOR_DATA, R500_INST_TYPE_OUT |
-                R500_INST_TEX_SEM_WAIT |
-                R500_INST_LAST |
-                R500_INST_RGB_OMASK_R |
-                R500_INST_RGB_OMASK_G |
-                R500_INST_RGB_OMASK_B |
-                R500_INST_ALPHA_OMASK |
-                R500_INST_RGB_CLAMP |
-                R500_INST_ALPHA_CLAMP);
-        OUT_CS_REG(R500_GA_US_VECTOR_DATA, R500_RGB_ADDR0(0) |
-                R500_RGB_ADDR1(0) |
-                R500_RGB_ADDR1_CONST |
-                R500_RGB_ADDR2(0) |
-                R500_RGB_ADDR2_CONST);
-        OUT_CS_REG(R500_GA_US_VECTOR_DATA, R500_ALPHA_ADDR0(0) |
-                R500_ALPHA_ADDR1(0) |
-                R500_ALPHA_ADDR1_CONST |
-                R500_ALPHA_ADDR2(0) |
-                R500_ALPHA_ADDR2_CONST);
-        OUT_CS_REG(R500_GA_US_VECTOR_DATA, R500_ALU_RGB_SEL_A_SRC0 |
-                R500_ALU_RGB_R_SWIZ_A_R |
-                R500_ALU_RGB_G_SWIZ_A_G |
-                R500_ALU_RGB_B_SWIZ_A_B |
-                R500_ALU_RGB_SEL_B_SRC0 |
-                R500_ALU_RGB_R_SWIZ_B_R |
-                R500_ALU_RGB_B_SWIZ_B_G |
-                R500_ALU_RGB_G_SWIZ_B_B);
-        OUT_CS_REG(R500_GA_US_VECTOR_DATA, R500_ALPHA_OP_CMP |
-                R500_ALPHA_SWIZ_A_A |
-                R500_ALPHA_SWIZ_B_A);
-        OUT_CS_REG(R500_GA_US_VECTOR_DATA, R500_ALU_RGBA_OP_CMP |
-                R500_ALU_RGBA_R_SWIZ_0 |
-                R500_ALU_RGBA_G_SWIZ_0 |
-                R500_ALU_RGBA_B_SWIZ_0 |
-                R500_ALU_RGBA_A_SWIZ_0);
-
-    } else {
-        OUT_CS_REG_SEQ(R300_US_CONFIG, 3);
-        OUT_CS(0x0);
-        OUT_CS(0x0);
-        OUT_CS(0x0);
-        OUT_CS_REG_SEQ(R300_US_CODE_ADDR_0, 4);
-        OUT_CS(0x0);
-        OUT_CS(0x0);
-        OUT_CS(0x0);
-        OUT_CS(R300_RGBA_OUT);
-
-        OUT_CS_REG(R300_US_ALU_RGB_INST_0,
-                   FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
-        OUT_CS_REG(R300_US_ALU_RGB_ADDR_0,
-                   FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
-        OUT_CS_REG(R300_US_ALU_ALPHA_INST_0,
-                   FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
-        OUT_CS_REG(R300_US_ALU_ALPHA_ADDR_0,
-                   FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
-    }
-
-    /* XXX */
-    uint32_t vap_cntl;
-    OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-    if (has_tcl) {
-        vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-                (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-                (12 << R300_VF_MAX_VTX_NUM_SHIFT));
-        if (CHIP_FAMILY_RV515)
-            vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
-    } else {
-        vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-                (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-                (5 << R300_VF_MAX_VTX_NUM_SHIFT));
-    }
-
-    vap_cntl |= (caps->num_vert_pipes <<
-                 R300_PVS_NUM_FPUS_SHIFT);
-
-    OUT_CS_REG(R300_VAP_CNTL, vap_cntl);
-
-    /* XXX unbreak this
-    if (has_tcl) {
-        OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
-        OUT_CS((0 << R300_PVS_FIRST_INST_SHIFT) |
-                (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
-                (1 << R300_PVS_LAST_INST_SHIFT));
-        OUT_CS((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
-                (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
-        OUT_CS(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
-
-        OUT_CS_REG(R300_SC_SCREENDOOR, 0x0);
-        OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 15) | (1 << 28));
-        OUT_CS_REG(R300_SC_SCREENDOOR, 0x00FFFFFF);
-        OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x1);
-        OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_CODE_START);
-
-        OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE,
-                                        0, 0xf, PVS_DST_REG_OUT));
-        OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
-                                     PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
-                                     PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
-        OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0,
-                                     PVS_SRC_SELECT_FORCE_0,
-                                     PVS_SRC_SELECT_FORCE_0,
-                                     PVS_SRC_SELECT_FORCE_0,
-                                     PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
-        OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x0);
-
-        OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf,
-                                        PVS_DST_REG_OUT));
-        OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X,
-                                     PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
-                                     PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT,
-                                     VSF_FLAG_NONE));
-        OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0,
-                                     PVS_SRC_SELECT_FORCE_0,
-                                     PVS_SRC_SELECT_FORCE_0,
-                                     PVS_SRC_SELECT_FORCE_0,
-                                     PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
-        OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x0);
-    } */
-
-    /* TODO in bufmgr */
-    /* XXX this should be split off, also figure out WTF with the numbers */
-    OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 15) | (1 << 17) | (1 << 18));
-    /* XXX might have to switch to 2D */
-
-    OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1);
-    OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-    /* XXX this needs more TLC (or TCL, as it were) */
-    OUT_CS_REG(R300_RB3D_COLORPITCH0, R300_COLOR_FORMAT_ARGB8888);
-    if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) {
-        assert(rrbd != 0);
-        cbpitch = (rrbd->pitch / rrbd->cpp);
-        if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
-            cbpitch |= R300_DEPTHMACROTILE_ENABLE;
-        }
-        if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
-            cbpitch |= R300_DEPTHMICROTILE_TILED;
-        }
-        BEGIN_BATCH_NO_AUTOSTATE(4);
-        OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
-        OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-        OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch);
-        END_BATCH();
-    }
-
-    {
-        uint32_t t1, t2;
-
-        t1 = 0x0;
-        t2 = 0x0;
-
-        if (flags & CLEARBUFFER_DEPTH) {
-            t1 |= R300_Z_ENABLE | R300_Z_WRITE_ENABLE;
-            t2 |=
-                    (R300_ZS_ALWAYS << R300_Z_FUNC_SHIFT);
-        }
-
-        if (flags & CLEARBUFFER_STENCIL) {
-            t1 |= R300_STENCIL_ENABLE;
-            t2 |=
-                    (R300_ZS_ALWAYS <<
-                    R300_S_FRONT_FUNC_SHIFT) |
-                    (R300_ZS_REPLACE <<
-                    R300_S_FRONT_SFAIL_OP_SHIFT) |
-                    (R300_ZS_REPLACE <<
-                    R300_S_FRONT_ZPASS_OP_SHIFT) |
-                    (R300_ZS_REPLACE <<
-                    R300_S_FRONT_ZFAIL_OP_SHIFT);
-        }
-
-        OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3);
-        OUT_BATCH(t1);
-        OUT_BATCH(t2);
-        OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) <<
-                R300_STENCILWRITEMASK_SHIFT) |
-                (ctx->Stencil.Clear & R300_STENCILREF_MASK));
-        END_BATCH();
-    }
-
-    OUT_CS_REG_SEQ(R300_ZB_CNTL, 3);
-    OUT_CS(0x0);
-    OUT_CS(0x0);
-    OUT_CS(0x0);
-
-    OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
-    OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
-               (1 << R300_PRIM_NUM_VERTICES_SHIFT));
-    OUT_CS_32F(w / 2.0);
-    OUT_CS_32F(h / 2.0);
-    /* XXX this should be the depth value to clear to */
-    OUT_CS_32F(1.0);
-    OUT_CS_32F(1.0);
-    OUT_CS_32F(color);
-    OUT_CS_32F(color);
-    OUT_CS_32F(color);
-    OUT_CS_32F(color);
-
-    /* XXX this should be split off, also figure out WTF with the numbers */
-    OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 15) | (1 << 17) | (1 << 18));
-
-    END_CS;
-    FLUSH_CS;
-#endif
 BEGIN_CS(276);
-OUT_CS_REG(0x1720, 0x00068000);
+R300_PACIFY;
 OUT_CS_REG(0x4100, 0x00000000);
-OUT_CS_REG(0x1720, 0x00068000);
+R300_PACIFY;
 OUT_CS_REG(0x1D98, 0x43000000);
 OUT_CS_REG(0x1D9C, 0x43002000);
 OUT_CS_REG(0x1DA0, 0xC3000000);
@@ -423,7 +85,6 @@ OUT_CS_REG(0x4204, 0x00000000);
 OUT_CS_REG(0x4208, 0x3F800000);
 OUT_CS_REG(0x420C, 0x3F800000);
 OUT_CS_REG(0x4214, 0x00050005);
-OUT_CS_REG(0x421C, 0x00060006);
 OUT_CS_REG(0x4230, 0x18000006);
 OUT_CS_REG(0x4234, 0x00020006);
 OUT_CS_REG(0x4238, 0x3BAAAAAB);
@@ -492,8 +153,6 @@ OUT_CS_REG(0x4E08, 0x20210000);
 OUT_CS_REG(0x4E0C, 0x0000000F);
 OUT_CS_REG(0x4E10, 0x00000000);
 OUT_CS_REG(0x4E18, 0x00000000);
-OUT_CS_REG(0x4E28, 0x00000000);
-OUT_CS_REG(0x4E38, 0x00C00100);
 OUT_CS_REG(0x4E50, 0x00000000);
 OUT_CS_REG(0x4E54, 0x00000000);
 OUT_CS_REG(0x4E58, 0x00000000);
@@ -513,15 +172,13 @@ OUT_CS_REG(0x4F10, 0x00000002);
 OUT_CS_REG(0x4F14, 0x00000001);
 OUT_CS_REG(0x4F18, 0x00000003);
 OUT_CS_REG(0x4F1C, 0x00000000);
-OUT_CS_REG(0x4F20, 0x00000000);
-OUT_CS_REG(0x4F24, 0x00000100);
 OUT_CS_REG(0x4F28, 0x00000000);
 OUT_CS_REG(0x4F30, 0x00000000);
 OUT_CS_REG(0x4F34, 0x00000000);
 OUT_CS_REG(0x4F44, 0x00000000);
 OUT_CS_REG(0x4F54, 0x00000000);
 OUT_CS_REG(0x43E8, 0x00000000);
-OUT_CS_REG(0x1720, 0x10008000);
+R300_PACIFY;
 OUT_CS_REG(0x43E8, 0x00FFFFFF);
 OUT_CS_REG(0x2284, 0x00000001);
 OUT_CS_REG(0x2200, 0x00000406);
@@ -530,7 +187,7 @@ OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x2208, 0x3F800000);
 OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x43E8, 0x00000000);
-OUT_CS_REG(0x1720, 0x10008000);
+R300_PACIFY;
 OUT_CS_REG(0x43E8, 0x00FFFFFF);
 OUT_CS_REG(0x2284, 0x00000001);
 OUT_CS_REG(0x2200, 0x00000400);
@@ -539,7 +196,7 @@ OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x43E8, 0x00000000);
-OUT_CS_REG(0x1720, 0x10008000);
+R300_PACIFY;
 OUT_CS_REG(0x43E8, 0x00FFFFFF);
 OUT_CS_REG(0x2284, 0x00000001);
 OUT_CS_REG(0x2200, 0x00000401);
@@ -548,7 +205,7 @@ OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x43E8, 0x00000000);
-OUT_CS_REG(0x1720, 0x10008000);
+R300_PACIFY;
 OUT_CS_REG(0x43E8, 0x00FFFFFF);
 OUT_CS_REG(0x2284, 0x00000001);
 OUT_CS_REG(0x2200, 0x00000402);
@@ -557,7 +214,7 @@ OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x43E8, 0x00000000);
-OUT_CS_REG(0x1720, 0x10008000);
+R300_PACIFY;
 OUT_CS_REG(0x43E8, 0x00FFFFFF);
 OUT_CS_REG(0x2284, 0x00000001);
 OUT_CS_REG(0x2200, 0x00000403);
@@ -566,7 +223,7 @@ OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x43E8, 0x00000000);
-OUT_CS_REG(0x1720, 0x10008000);
+R300_PACIFY;
 OUT_CS_REG(0x43E8, 0x00FFFFFF);
 OUT_CS_REG(0x2284, 0x00000001);
 OUT_CS_REG(0x2200, 0x00000404);
@@ -575,7 +232,7 @@ OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x2208, 0x00000000);
 OUT_CS_REG(0x43E8, 0x00000000);
-OUT_CS_REG(0x1720, 0x10008000);
+R300_PACIFY;
 OUT_CS_REG(0x43E8, 0x00FFFFFF);
 OUT_CS_REG(0x2284, 0x00000001);
 OUT_CS_REG(0x2200, 0x00000405);
@@ -604,7 +261,8 @@ OUT_CS_REG(0x4BD4, 0x00000000);
 OUT_CS_REG(0x4E04, 0x00000000);
 OUT_CS_REG(0x4E08, 0x00000000);
 OUT_CS_REG(0x221C, 0x0001C000);
-OUT_CS_REG(0x421C, 0x06000600);
+OUT_CS_REG(R300_GA_POINT_SIZE, ((h * 6) & R300_POINTSIZE_Y_MASK) |
+    ((w * 6) << R300_POINTSIZE_X_SHIFT));
 OUT_CS_REG(0x4310, 0x00D10000);
 OUT_CS_REG(0x4314, 0x00D10000);
 OUT_CS_REG(0x4318, 0x00D10000);
@@ -633,7 +291,7 @@ OUT_CS_REG(0x22D0, 0x00100000);
 OUT_CS_REG(0x22D4, 0x00000000);
 OUT_CS_REG(0x22D8, 0x00000001);
 OUT_CS_REG(0x43E8, 0x00000000);
-OUT_CS_REG(0x1720, 0x10008000);
+R300_PACIFY;
 OUT_CS_REG(0x43E8, 0x00FFFFFF);
 OUT_CS_REG(0x2284, 0x00000001);
 OUT_CS_REG(0x2200, 0x00000000);
@@ -645,16 +303,32 @@ OUT_CS_REG(0x2208, 0x00F02203);
 OUT_CS_REG(0x2208, 0x00D10021);
 OUT_CS_REG(0x2208, 0x01248021);
 OUT_CS_REG(0x2208, 0x00000000);
-OUT_CS_REG(0x1720, 0x00068000);
-OUT_CS_REG(0x4E28, 0x00000000);
-OUT_CS_REG(0x4E38, 0x00C00100);
+R300_PACIFY;
+OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1);
+OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+//OUT_CS_REG(0x4E38, 0x00C00100);
 OUT_CS_REG(0x4E0C, 0x0000000F);
 OUT_CS_REG(0x4F00, 0x00000000);
 OUT_CS_REG(0x4F04, 0x00000000);
 OUT_CS_REG(0x4F08, 0x00FF0000);
+
+/* XXX Packet3 */
+OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
+OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
+(1 << R300_PRIM_NUM_VERTICES_SHIFT));
+OUT_CS_32F(w / 2.0);
+OUT_CS_32F(h / 2.0);
+/* XXX this should be the depth value to clear to */
+OUT_CS_32F(1.0);
+OUT_CS_32F(1.0);
+OUT_CS_32F(r);
+OUT_CS_32F(g);
+OUT_CS_32F(b);
+OUT_CS_32F(1.0);
+
 OUT_CS_REG(0x4E4C, 0x0000000A);
 OUT_CS_REG(0x4F18, 0x00000003);
-OUT_CS_REG(0x1720, 0x00068000);
+R300_PACIFY;
 
 END_CS;
 FLUSH_CS;