r300-gallium: Die on bad texture formats.
[mesa.git] / src / gallium / drivers / r300 / r300_surface.c
index c0b020f81d380509840840f3dbed08fd49876cbd..17b42504d447b98e5207d56fba8a3a800e88d310 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *                Joakim Sindholt <opensource@zhasha.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
 
 #include "r300_surface.h"
 
+static void r300_surface_setup(struct r300_context* r300,
+                               struct r300_texture* dest,
+                               unsigned x, unsigned y,
+                               unsigned w, unsigned h)
+{
+    struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
+    unsigned pixpitch = dest->stride / dest->tex.block.size;
+    CS_LOCALS(r300);
+
+    /* Make sure our target BO is okay. */
+    r300->winsys->add_buffer(r300->winsys, dest->buffer,
+            0, RADEON_GEM_DOMAIN_VRAM);
+    if (r300->winsys->validate(r300->winsys)) {
+        r300->context.flush(&r300->context, 0, NULL);
+    }
+
+    r300_emit_blend_state(r300, &blend_clear_state);
+    r300_emit_blend_color_state(r300, &blend_color_clear_state);
+    r300_emit_dsa_state(r300, &dsa_clear_state);
+    r300_emit_rs_state(r300, &rs_clear_state);
+
+    BEGIN_CS(24);
+
+    /* Viewport setup */
+    OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
+    OUT_CS_32F((float)w);
+    OUT_CS_32F((float)x);
+    OUT_CS_32F((float)h);
+    OUT_CS_32F((float)y);
+    OUT_CS_32F(1.0);
+    OUT_CS_32F(0.0);
+
+    OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VPORT_X_SCALE_ENA |
+            R300_VPORT_X_OFFSET_ENA |
+            R300_VPORT_Y_SCALE_ENA |
+            R300_VPORT_Y_OFFSET_ENA |
+            R300_VTX_XY_FMT | R300_VTX_Z_FMT);
+
+    /* Pixel scissors. */
+    OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
+    if (caps->is_r500) {
+        OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT));
+        OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT));
+    } else {
+        /* Non-R500 chipsets have an offset of 1440 in their scissors. */
+        OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) |
+                ((y + 1440) << R300_SCISSORS_Y_SHIFT));
+        OUT_CS(((w + 1440) << R300_SCISSORS_X_SHIFT) |
+                ((h + 1440) << R300_SCISSORS_Y_SHIFT));
+    }
+
+    /* Flush colorbuffer and blend caches. */
+    OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
+        R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D |
+        R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL);
+    OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
+        R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+        R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+
+    /* Setup colorbuffer. */
+    OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1);
+    OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+    OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch |
+        r300_translate_colorformat(dest->tex.format));
+    OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf);
+
+    END_CS;
+}
+
 /* Provides pipe_context's "surface_fill". Commonly used for clearing
  * buffers. */
 static void r300_surface_fill(struct pipe_context* pipe,
@@ -30,260 +100,227 @@ static void r300_surface_fill(struct pipe_context* pipe,
                               unsigned w, unsigned h,
                               unsigned color)
 {
+    int i;
+    float r, g, b, a, depth;
     struct r300_context* r300 = r300_context(pipe);
+    struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
+    struct r300_texture* tex = (struct r300_texture*)dest->texture;
+    unsigned pixpitch = tex->stride / tex->tex.block.size;
     CS_LOCALS(r300);
-    struct r300_capabilities* caps = ((struct r300_screen*)pipe->screen)->caps;
-    float r, g, b, a;
+
+    a = (float)((color >> 24) & 0xff) / 255.0f;
     r = (float)((color >> 16) & 0xff) / 255.0f;
     g = (float)((color >>  8) & 0xff) / 255.0f;
     b = (float)((color >>  0) & 0xff) / 255.0f;
     debug_printf("r300: Filling surface %p at (%d,%d),"
-        " dimensions %dx%d, color 0x%x\n",
-        dest, x, y, w, h, color);
-
-BEGIN_CS(276);
-R300_PACIFY;
-OUT_CS_REG(R300_TX_INVALTAGS, 0x0);
-R300_PACIFY;
-/* Viewport setup */
-OUT_CS_REG(0x1D98, 0x43000000);
-OUT_CS_REG(0x1D9C, 0x43002000);
-OUT_CS_REG(0x1DA0, 0xC3000000);
-OUT_CS_REG(0x1DA4, 0x43002000);
-OUT_CS_REG(0x1DA8, 0x3F000000);
-OUT_CS_REG(0x1DAC, 0x3F000000);
-/* Flush PVS. */
-OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
-
-OUT_CS_REG(R300_SE_VTE_CNTL, R300_VPORT_X_SCALE_ENA |
-    R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
-    R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
-    R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT);
-/* Vertex size. */
-OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8);
-/* Max and min vertex index clamp. */
-OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xFFFFFF);
-OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0);
-/* XXX endian */
-OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP);
-OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, 0x0);
-OUT_CS_REG(0x21DC, 0xAAAAAAAA);
-OUT_CS_REG(0x221C, 0x00000000);
-OUT_CS_REG(0x2220, 0x3F800000);
-OUT_CS_REG(0x2224, 0x3F800000);
-OUT_CS_REG(0x2228, 0x3F800000);
-OUT_CS_REG(0x222C, 0x3F800000);
-OUT_CS_REG(0x2288, 0x0000FFFF);
-OUT_CS_REG(0x2090, 0x00000000);
-OUT_CS_REG(0x2094, 0x00000000);
-OUT_CS_REG(0x22D0, 0x00000000);
-OUT_CS_REG(0x22D4, 0x00000000);
-OUT_CS_REG(0x22D8, 0x00000000);
-OUT_CS_REG(0x4008, 0x00000007);
-OUT_CS_REG(0x4010, 0x66666666);
-OUT_CS_REG(0x4014, 0x06666666);
-/* XXX why doesn't classic Mesa write the number of pipes, too? */
-OUT_CS_REG(R300_GB_TILE_CONFIG, R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16);
-OUT_CS_REG(0x401C, 0x00000004);
-OUT_CS_REG(0x4020, 0x00000000);
-OUT_CS_REG(0x4104, 0x00000000);
-OUT_CS_REG(0x4200, 0x00000000);
-OUT_CS_REG(0x4204, 0x00000000);
-OUT_CS_REG(0x4208, 0x3F800000);
-OUT_CS_REG(0x420C, 0x3F800000);
-OUT_CS_REG(0x4214, 0x00050005);
-OUT_CS_REG(0x4230, 0x18000006);
-OUT_CS_REG(0x4234, 0x00020006);
-OUT_CS_REG(0x4238, 0x3BAAAAAB);
-OUT_CS_REG(0x4234, 0x00030006);
-OUT_CS_REG(0x4260, 0x00000000);
-OUT_CS_REG(0x4264, 0x00000000);
-OUT_CS_REG(0x4268, 0x3F800000);
-OUT_CS_REG(0x4274, 0x00000002);
-OUT_CS_REG(0x4278, 0x0003AAAA);
-OUT_CS_REG(0x427C, 0x00000000);
-OUT_CS_REG(0x4280, 0x00000000);
-OUT_CS_REG(0x4288, 0x00000000);
-OUT_CS_REG(0x428C, 0x00000001);
-OUT_CS_REG(0x4290, 0x00000000);
-OUT_CS_REG(0x4294, 0x3DBF1412);
-OUT_CS_REG(0x4298, 0x00000000);
-OUT_CS_REG(0x42A0, 0x00000000);
-OUT_CS_REG(0x42A4, 0x00000000);
-OUT_CS_REG(0x42A8, 0x00000000);
-OUT_CS_REG(0x42AC, 0x00000000);
-OUT_CS_REG(0x42B0, 0x00000000);
-OUT_CS_REG(0x42B4, 0x00000000);
-OUT_CS_REG(0x42B8, 0x00000000);
-OUT_CS_REG(0x42C0, 0x4B7FFFFF);
-OUT_CS_REG(0x42C4, 0x00000000);
-OUT_CS_REG(0x4300, 0x00000000);
-OUT_CS_REG(0x4304, 0x00000000);
-OUT_CS_REG(0x4310, 0x00000000);
-OUT_CS_REG(0x4314, 0x00000000);
-OUT_CS_REG(0x4318, 0x00000000);
-OUT_CS_REG(0x431C, 0x00000000);
-OUT_CS_REG(0x4320, 0x00000000);
-OUT_CS_REG(0x4324, 0x00000000);
-OUT_CS_REG(0x4328, 0x00000000);
-OUT_CS_REG(0x432C, 0x00000000);
-OUT_CS_REG(0x4330, 0x00000000);
-OUT_CS_REG(0x43A4, 0x0000001C);
-OUT_CS_REG(0x43A8, 0x2DA49525);
-OUT_CS_REG(0x43E8, 0x00FFFFFF);
-OUT_CS_REG(0x46A4, 0x00001B00);
-OUT_CS_REG(0x46A8, 0x00001B0F);
-OUT_CS_REG(0x46AC, 0x00001B0F);
-OUT_CS_REG(0x46B0, 0x00001B0F);
-OUT_CS_REG(0x46B4, 0x00000001);
-OUT_CS_REG(0x4600, 0x00000000);
-OUT_CS_REG(0x4604, 0x00000000);
-OUT_CS_REG(0x4608, 0x00000000);
-OUT_CS_REG(0x4610, 0x00000000);
-OUT_CS_REG(0x4614, 0x00000000);
-OUT_CS_REG(0x4618, 0x00000000);
-OUT_CS_REG(0x461C, 0x00000000);
-OUT_CS_REG(0x48C0, 0x00000000);
-OUT_CS_REG(0x46C0, 0x00000000);
-OUT_CS_REG(0x49C0, 0x00000000);
-OUT_CS_REG(0x47C0, 0x00000000);
-OUT_CS_REG(0x4BC0, 0x00000002);
-OUT_CS_REG(0x4BC8, 0x00000000);
-OUT_CS_REG(0x4BCC, 0x00000000);
-OUT_CS_REG(0x4BD0, 0x00000000);
-OUT_CS_REG(0x4BD8, 0x00000000);
-OUT_CS_REG(0x4BD8, 0x00000000);
-OUT_CS_REG(0x4E00, 0x00000000);
-OUT_CS_REG(0x4E0C, 0x0000000F);
-
-r300_emit_blend_color_state(r300, &blend_color_clear_state);
-
-OUT_CS_REG(0x4E10, 0x00000000);
-OUT_CS_REG(0x4E54, 0x00000000);
-OUT_CS_REG(0x4E58, 0x00000000);
-OUT_CS_REG(0x4E5C, 0x00000000);
-OUT_CS_REG(0x4E60, 0x00000000);
-OUT_CS_REG(0x4E64, 0x00000000);
-OUT_CS_REG(0x4E68, 0x00000000);
-OUT_CS_REG(0x4E6C, 0x00000000);
-OUT_CS_REG(0x4E70, 0x00000000);
-OUT_CS_REG(0x4E88, 0x00000000);
-OUT_CS_REG(0x4EA0, 0x00000000);
-OUT_CS_REG(0x4EA4, 0xFFFFFFFF);
-OUT_CS_REG(0x4F00, 0x00000010);
-OUT_CS_REG(0x4F04, 0x00038038);
-OUT_CS_REG(0x4F08, 0x00FFFF00);
-OUT_CS_REG(0x4F10, 0x00000002);
-OUT_CS_REG(0x4F18, 0x00000003);
-OUT_CS_REG(0x4F1C, 0x00000000);
-OUT_CS_REG(0x4F28, 0x00000000);
-OUT_CS_REG(0x4F30, 0x00000000);
-OUT_CS_REG(0x4F34, 0x00000000);
-OUT_CS_REG(0x4F44, 0x00000000);
-OUT_CS_REG(0x4F54, 0x00000000);
-OUT_CS_REG(0x43E8, 0x00000000);
-R300_PACIFY;
-OUT_CS_REG(0x43E8, 0x00FFFFFF);
-OUT_CS_REG(0x2150, 0x21030003);
-OUT_CS_REG(0x4BC0, 0x00000000);
-OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0xF688F688);
-OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1);
-OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405);
-OUT_CS_REG(0x20B0, 0x0000043F);
-OUT_CS_REG(0x20B4, 0x00000008);
-OUT_CS_REG(0x21DC, 0xAAAAAAAA);
-OUT_CS_REG(0x2090, 0x00000003);
-OUT_CS_REG(0x2094, 0x00000000);
-OUT_CS_REG(0x4104, 0x00000000);
-OUT_CS_REG(0x1D98, 0x3F800000);
-OUT_CS_REG(0x1D9C, 0x00000000);
-OUT_CS_REG(0x1DA0, 0x3F800000);
-OUT_CS_REG(0x1DA4, 0x00000000);
-OUT_CS_REG(0x1DA8, 0x3F800000);
-OUT_CS_REG(0x1DAC, 0x00000000);
-OUT_CS_REG(0x4BD4, 0x00000000);
-
-r300_emit_blend_state(r300, &blend_clear_state);
-
-OUT_CS_REG(0x221C, 0x0001C000);
-OUT_CS_REG(R300_GA_POINT_SIZE, ((h * 6) & R300_POINTSIZE_Y_MASK) |
-    ((w * 6) << R300_POINTSIZE_X_SHIFT));
-OUT_CS_REG(0x4310, 0x00D10000);
-OUT_CS_REG(0x4314, 0x00D10000);
-OUT_CS_REG(0x4318, 0x00D10000);
-OUT_CS_REG(0x431C, 0x00D10000);
-OUT_CS_REG(0x4320, 0x00D10000);
-OUT_CS_REG(0x4324, 0x00D10000);
-OUT_CS_REG(0x4328, 0x00D10000);
-OUT_CS_REG(0x432C, 0x00D10000);
-OUT_CS_REG(0x4300, 0x00040080);
-OUT_CS_REG(0x4304, 0x00000000);
-OUT_CS_REG(0x4330, 0x00004000);
-OUT_CS_REG(0x4600, 0x00000000);
-OUT_CS_REG(0x4604, 0x00000000);
-OUT_CS_REG(0x4608, 0x00000000);
-OUT_CS_REG(0x4610, 0x00000000);
-OUT_CS_REG(0x4614, 0x00000000);
-OUT_CS_REG(0x4618, 0x00000000);
-OUT_CS_REG(0x461C, 0x00400000);
-OUT_CS_REG(0x48C0, 0x00050A80);
-OUT_CS_REG(0x46C0, 0x1C000000);
-OUT_CS_REG(0x49C0, 0x00040889);
-OUT_CS_REG(0x47C0, 0x01000000);
-/* XXX these magic numbers should be explained when
- * this becomes a cached state object */
-OUT_CS_REG(R300_VAP_CNTL, 0xA | (0x5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-    (caps->num_vert_fpus << R300_PVS_NUM_FPUS_SHIFT));
-OUT_CS_REG(0x22D0, 0x00100000);
-OUT_CS_REG(0x22D4, 0x00000000);
-OUT_CS_REG(0x22D8, 0x00000001);
-OUT_CS_REG(0x43E8, 0x00000000);
-R300_PACIFY;
-OUT_CS_REG(0x43E8, 0x00FFFFFF);
-/* XXX translate these back into normal instructions */
-OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x1);
-OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0x0);
-OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xF00203);
-OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xD10001);
-OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x1248001);
-OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x0);
-OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xF02203);
-OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xD10021);
-OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x1248021);
-OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x0);
-
-r300_emit_dsa_state(r300, &dsa_clear_state);
-
-R300_PACIFY;
-OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1);
-OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-//OUT_CS_REG(0x4E38, 0x00C00100);
-OUT_CS_REG(0x4E0C, 0x0000000F);
-/* XXX Packet3 */
-OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
-OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
-(1 << R300_PRIM_NUM_VERTICES_SHIFT));
-OUT_CS_32F(w / 2.0);
-OUT_CS_32F(h / 2.0);
-/* XXX this should be the depth value to clear to */
-OUT_CS_32F(1.0);
-OUT_CS_32F(1.0);
-OUT_CS_32F(r);
-OUT_CS_32F(g);
-OUT_CS_32F(b);
-OUT_CS_32F(1.0);
-
-OUT_CS_REG(0x4E4C, 0x0000000A);
-OUT_CS_REG(0x4F18, 0x00000003);
-R300_PACIFY;
-
-END_CS;
-FLUSH_CS;
-
-    r300->dirty_state = R300_NEW_KITCHEN_SINK;
+        " dimensions %dx%d (pixel pitch %d), color 0x%x\n",
+        dest, x, y, w, h, pixpitch, color);
+
+    /* Fallback? */
+    if (FALSE) {
+        debug_printf("r300: Falling back on surface clear...");
+        util_surface_fill(pipe, dest, x, y, w, h, color);
+        return;
+    }
+
+    r300_surface_setup(r300, tex, x, y, w, h);
+
+    /* Vertex shader setup */
+    if (caps->has_tcl) {
+        r300_emit_vertex_shader(r300, &r300_passthrough_vertex_shader);
+    } else {
+        BEGIN_CS(4);
+        OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VAP_TCL_BYPASS);
+        OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) |
+                R300_PVS_NUM_CNTLRS(5) |
+                R300_PVS_NUM_FPUS(caps->num_vert_fpus) |
+                R300_PVS_VF_MAX_VTX_NUM(12));
+        END_CS;
+    }
+
+    /* Fragment shader setup */
+    if (caps->is_r500) {
+        r500_emit_fragment_shader(r300, &r500_passthrough_fragment_shader);
+        r300_emit_rs_block_state(r300, &r500_rs_block_clear_state);
+    } else {
+        r300_emit_fragment_shader(r300, &r300_passthrough_fragment_shader);
+        r300_emit_rs_block_state(r300, &r300_rs_block_clear_state);
+    }
+
+    BEGIN_CS(24);
+
+    /* VAP stream control, mapping from input memory to PVS/RS memory */
+    if (caps->has_tcl) {
+        OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
+            (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) |
+            ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) |
+                R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT));
+    } else {
+        OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
+            (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) |
+            ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) |
+                R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT));
+    }
+    OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+            (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE0_SHIFT) |
+            (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE1_SHIFT));
+
+    /* VAP format controls */
+    OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0,
+            R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
+            R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
+    OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x0);
+
+    /* Disable textures */
+    OUT_CS_REG(R300_TX_ENABLE, 0x0);
+
+    /* The size of the point we're about to draw, in sixths of pixels */
+    OUT_CS_REG(R300_GA_POINT_SIZE,
+        ((h * 6)  & R300_POINTSIZE_Y_MASK) |
+        ((w * 6) << R300_POINTSIZE_X_SHIFT));
+
+    /* Packet3 with our point vertex */
+    OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8);
+    OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
+            (1 << R300_PRIM_NUM_VERTICES_SHIFT));
+    /* Position */
+    OUT_CS_32F(0.5);
+    OUT_CS_32F(0.5);
+    OUT_CS_32F(1.0);
+    OUT_CS_32F(1.0);
+    /* Color */
+    OUT_CS_32F(r);
+    OUT_CS_32F(g);
+    OUT_CS_32F(b);
+    OUT_CS_32F(a);
+
+    /* XXX figure out why this is 0xA and not 0x2 */
+    OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
+    /* XXX OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
+        R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+        R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); */
+
+    END_CS;
+
+    r300->dirty_hw++;
+}
+
+static void r300_surface_copy(struct pipe_context* pipe,
+                              struct pipe_surface* dest,
+                              unsigned destx, unsigned desty,
+                              struct pipe_surface* src,
+                              unsigned srcx, unsigned srcy,
+                              unsigned w, unsigned h)
+{
+    struct r300_context* r300 = r300_context(pipe);
+    struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
+    struct r300_texture* srctex = (struct r300_texture*)src->texture;
+    struct r300_texture* desttex = (struct r300_texture*)dest->texture;
+    unsigned pixpitch = srctex->stride / srctex->tex.block.size;
+    CS_LOCALS(r300);
+
+    debug_printf("r300: Copying surface %p at (%d,%d) to %p at (%d, %d),"
+        " dimensions %dx%d (pixel pitch %d)\n",
+        src, srcx, srcy, dest, destx, desty, w, h, pixpitch);
+
+    if ((srctex == desttex) &&
+            ((destx < srcx + w) || (srcx < destx + w)) &&
+            ((desty < srcy + h) || (srcy < desty + h))) {
+        debug_printf("r300: Falling back on surface_copy\n");
+        util_surface_copy(pipe, FALSE, dest, destx, desty, src,
+                srcx, srcy, w, h);
+    }
+
+    /* Add our source texture to the BO list before emitting anything.
+     * r300_surface_setup will flush if needed for us. */
+    r300->winsys->add_buffer(r300->winsys, srctex->buffer,
+            RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+
+    r300_surface_setup(r300, desttex, destx, desty, w, h);
+
+    r300_emit_sampler(r300, &r300_sampler_copy_state, 0);
+    r300_emit_texture(r300, srctex, 0);
+    r300_flush_textures(r300);
+
+    /* Vertex shader setup */
+    if (caps->has_tcl) {
+        r300_emit_vertex_shader(r300, &r300_passthrough_vertex_shader);
+    } else {
+        BEGIN_CS(4);
+        OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VAP_TCL_BYPASS);
+        OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) |
+                R300_PVS_NUM_CNTLRS(5) |
+                R300_PVS_NUM_FPUS(caps->num_vert_fpus) |
+                R300_PVS_VF_MAX_VTX_NUM(12));
+        END_CS;
+    }
+
+    /* Fragment shader setup */
+    if (caps->is_r500) {
+        r500_emit_fragment_shader(r300, &r500_texture_fragment_shader);
+        r300_emit_rs_block_state(r300, &r500_rs_block_copy_state);
+    } else {
+        r300_emit_fragment_shader(r300, &r300_texture_fragment_shader);
+        r300_emit_rs_block_state(r300, &r300_rs_block_copy_state);
+    }
+
+    BEGIN_CS(28);
+    /* VAP stream control, mapping from input memory to PVS/RS memory */
+    if (caps->has_tcl) {
+        OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
+            (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+            ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) |
+                R300_DATA_TYPE_FLOAT_2) << R300_DATA_TYPE_1_SHIFT));
+    } else {
+        OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
+            (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+            ((R300_LAST_VEC | (6 << R300_DST_VEC_LOC_SHIFT) |
+                R300_DATA_TYPE_FLOAT_2) << R300_DATA_TYPE_1_SHIFT));
+    }
+    OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+            (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE0_SHIFT) |
+            (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE1_SHIFT));
+
+    /* VAP format controls */
+    OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0,
+            R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT);
+    /* Two components of texture 0 */
+    OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x2);
+
+    /* Packet3 with our texcoords */
+    OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 16);
+    OUT_CS(R300_PRIM_TYPE_QUADS | R300_PRIM_WALK_RING |
+            (4 << R300_PRIM_NUM_VERTICES_SHIFT));
+    /* (x    , y    ) */
+    OUT_CS_32F((float)(destx / dest->width));
+    OUT_CS_32F((float)(desty / dest->height));
+    OUT_CS_32F((float)(srcx  / dest->width));
+    OUT_CS_32F((float)(srcy  / dest->height));
+    /* (x    , y + h) */
+    OUT_CS_32F((float)(destx / dest->width));
+    OUT_CS_32F((float)((desty + h) / dest->height));
+    OUT_CS_32F((float)(srcx  / dest->width));
+    OUT_CS_32F((float)((srcy  + h) / dest->height));
+    /* (x + w, y + h) */
+    OUT_CS_32F((float)((destx + w) / dest->width));
+    OUT_CS_32F((float)((desty + h) / dest->height));
+    OUT_CS_32F((float)((srcx  + w) / dest->width));
+    OUT_CS_32F((float)((srcy  + h) / dest->height));
+    /* (x + w, y    ) */
+    OUT_CS_32F((float)((destx + w) / dest->width));
+    OUT_CS_32F((float)(desty / dest->height));
+    OUT_CS_32F((float)((srcx  + w) / dest->width));
+    OUT_CS_32F((float)(srcy  / dest->height));
+
+    OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
+
+    END_CS;
+
+    r300->dirty_hw++;
 }
 
 void r300_init_surface_functions(struct r300_context* r300)
 {
     r300->context.surface_fill = r300_surface_fill;
+    r300->context.surface_copy = r300_surface_copy;
 }