r300g: implement fast color clear
authorMarek Olšák <maraeo@gmail.com>
Mon, 12 Jul 2010 11:23:24 +0000 (13:23 +0200)
committerMarek Olšák <maraeo@gmail.com>
Mon, 12 Jul 2010 11:26:00 +0000 (13:26 +0200)
An initial implementation made by Dave Airlie.

For it to be used, a color-only clear must be invoked and exactly one
point-sampled render target must be set. The render target must be
macrotiled (for us to overcome alignment issues) and bpp must be either
16 or 32.

I can't see a difference in performance. :(

Conflicts:

src/gallium/drivers/r300/r300_blit.c

src/gallium/drivers/r300/r300_blit.c
src/gallium/drivers/r300/r300_context.c
src/gallium/drivers/r300/r300_context.h
src/gallium/drivers/r300/r300_emit.c
src/gallium/drivers/r300/r300_emit.h
src/gallium/drivers/r300/r300_flush.c
src/gallium/drivers/r300/r300_hyperz.c
src/gallium/drivers/r300/r300_render.c
src/gallium/drivers/r300/r300_state.c
src/gallium/drivers/r300/r300_texture.c

index 3cc054788bc3b0df42e06a4c4d5d374803d7b648..895efaa1c4ac146948f86f24e536979ed60a9c51 100644 (file)
@@ -24,6 +24,7 @@
 #include "r300_texture.h"
 
 #include "util/u_format.h"
+#include "util/u_pack_color.h"
 
 enum r300_blitter_op /* bitmask */
 {
@@ -79,6 +80,48 @@ static void r300_blitter_end(struct r300_context *r300)
     }
 }
 
+static uint32_t r300_depth_clear_cb_value(enum pipe_format format,
+                                         const float* rgba)
+{
+    union util_color uc;
+    util_pack_color(rgba, format, &uc);
+
+    if (util_format_get_blocksizebits(format) == 32)
+        return uc.ui;
+    else
+        return uc.us | (uc.us << 16);
+}
+
+static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
+                                       unsigned clear_buffers)
+{
+    struct pipe_framebuffer_state *fb =
+        (struct pipe_framebuffer_state*)r300->fb_state.state;
+    struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+    unsigned bpp;
+
+    /* Only color clear allowed, and only one colorbuffer. */
+    if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
+        return FALSE;
+
+    /* The colorbuffer must be point-sampled. */
+    if (surf->base.texture->nr_samples > 1)
+        return FALSE;
+
+    bpp = util_format_get_blocksizebits(surf->base.format);
+
+    /* ZB can only work with the two pixel sizes. */
+    if (bpp != 16 && bpp != 32)
+        return FALSE;
+
+    /* If the midpoint ZB offset is not aligned to 2048, it returns garbage
+     * with certain texture sizes. Macrotiling ensures the alignment. */
+    if (!r300_texture(surf->base.texture)->mip_macrotile[surf->base.level])
+        return FALSE;
+
+    return TRUE;
+}
+
 /* Clear currently bound buffers. */
 static void r300_clear(struct pipe_context* pipe,
                        unsigned buffers,
@@ -124,16 +167,40 @@ static void r300_clear(struct pipe_context* pipe,
     struct r300_context* r300 = r300_context(pipe);
     struct pipe_framebuffer_state *fb =
         (struct pipe_framebuffer_state*)r300->fb_state.state;
+    struct r300_hyperz_state *hyperz =
+        (struct r300_hyperz_state*)r300->hyperz_state.state;
+    uint32_t width = fb->width;
+    uint32_t height = fb->height;
+
+    /* Enable CBZB clear. */
+    if (r300_cbzb_clear_allowed(r300, buffers)) {
+        struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+
+        hyperz->zb_depthclearvalue =
+                r300_depth_clear_cb_value(surf->base.format, rgba);
+
+        width = surf->cbzb_width;
+        height = surf->cbzb_height;
+
+        r300->cbzb_clear = TRUE;
+        r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+    }
 
     /* Clear. */
     r300_blitter_begin(r300, R300_CLEAR);
     util_blitter_clear(r300->blitter,
-                       fb->width,
-                       fb->height,
+                       width,
+                       height,
                        fb->nr_cbufs,
                        buffers, rgba, depth, stencil);
     r300_blitter_end(r300);
 
+    /* Disable CBZB clear. */
+    if (r300->cbzb_clear) {
+        r300->cbzb_clear = FALSE;
+        r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+    }
+
     /* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */
     if (r300->flush_counter == 0)
         pipe->flush(pipe, 0, NULL);
index cce76cb1dfe5b2475d3e2c702e700846063bfe82..1beab7628a03fd7fd1c3c56632b577369061b8db 100644 (file)
@@ -330,7 +330,7 @@ static void r300_init_states(struct pipe_context *pipe)
         BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size);
         OUT_CB_REG(R300_ZB_BW_CNTL, 0);
         OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0);
-        OUT_CB_REG(R300_SC_HYPERZ, 0x1C);
+        OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2);
         END_CB;
     }
 }
index 2483af7fb5e315b2ae75cdc943cec9f98054b208..df4299b7ea3380412b23e6e228931d45cf52ff94 100644 (file)
@@ -311,6 +311,13 @@ struct r300_surface {
     uint32_t offset;    /* COLOROFFSET or DEPTHOFFSET. */
     uint32_t pitch;     /* COLORPITCH or DEPTHPITCH. */
     uint32_t format;    /* US_OUT_FMT or ZB_FORMAT. */
+
+    /* Parameters dedicated to the CBZB clear. */
+    uint32_t cbzb_width;            /* Aligned width. */
+    uint32_t cbzb_height;           /* Half of the height. */
+    uint32_t cbzb_midpoint_offset;  /* DEPTHOFFSET. */
+    uint32_t cbzb_pitch;            /* DEPTHPITCH. */
+    uint32_t cbzb_format;           /* ZB_FORMAT. */
 };
 
 struct r300_texture {
@@ -525,6 +532,7 @@ struct r300_context {
     /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
     boolean incompatible_vb_layout;
 
+    boolean cbzb_clear;
     /* upload managers */
     struct u_upload_mgr *upload_vb;
     struct u_upload_mgr *upload_ib;
@@ -593,7 +601,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300);
 
 /* r300_state.c */
 enum r300_fb_state_change {
-    R300_CHANGED_FB_STATE = 0
+    R300_CHANGED_FB_STATE = 0,
+    R300_CHANGED_CBZB_FLAG
 };
 
 void r300_mark_fb_state_dirty(struct r300_context *r300,
index 5ce3eb63c56393079fed12ba1f606d4214d814ae..e1cb2bf5012178a77021ee63d38759671f757a08 100644 (file)
@@ -32,6 +32,7 @@
 #include "r300_emit.h"
 #include "r300_fs.h"
 #include "r300_screen.h"
+#include "r300_texture.h"
 #include "r300_screen_buffer.h"
 #include "r300_vs.h"
 
@@ -272,8 +273,17 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
     struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;
     struct pipe_framebuffer_state* fb =
             (struct pipe_framebuffer_state*)r300->fb_state.state;
+    uint32_t height = fb->height;
+    uint32_t width = fb->width;
     CS_LOCALS(r300);
 
+    if (r300->cbzb_clear) {
+        struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+
+        height = surf->cbzb_height;
+        width = surf->cbzb_width;
+    }
+
     BEGIN_CS(size);
 
     /* Set up scissors.
@@ -281,13 +291,13 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
     OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
     if (r300->screen->caps.is_r500) {
         OUT_CS(0);
-        OUT_CS(((fb->width  - 1) << R300_SCISSORS_X_SHIFT) |
-               ((fb->height - 1) << R300_SCISSORS_Y_SHIFT));
+        OUT_CS(((width  - 1) << R300_SCISSORS_X_SHIFT) |
+               ((height - 1) << R300_SCISSORS_Y_SHIFT));
     } else {
         OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
                (1440 << R300_SCISSORS_Y_SHIFT));
-        OUT_CS(((fb->width  + 1440-1) << R300_SCISSORS_X_SHIFT) |
-               ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT));
+        OUT_CS(((width  + 1440-1) << R300_SCISSORS_X_SHIFT) |
+               ((height + 1440-1) << R300_SCISSORS_Y_SHIFT));
     }
 
     /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */
@@ -344,8 +354,20 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
         OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0);
     }
 
+    /* Set up the ZB part of the CBZB clear. */
+    if (r300->cbzb_clear) {
+        surf = r300_surface(fb->cbufs[0]);
+
+        OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
+
+        OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
+        OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain, 0);
+
+        OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
+        OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain, 0);
+    }
     /* Set up a zbuffer. */
-    if (fb->zsbuf) {
+    else if (fb->zsbuf) {
         surf = r300_surface(fb->zsbuf);
 
         OUT_CS_REG(R300_ZB_FORMAT, surf->format);
@@ -377,6 +399,18 @@ void r300_emit_hyperz_state(struct r300_context *r300,
     WRITE_CS_TABLE(state, size);
 }
 
+void r300_emit_hyperz_end(struct r300_context *r300)
+{
+    struct r300_hyperz_state z =
+            *(struct r300_hyperz_state*)r300->hyperz_state.state;
+
+    z.zb_bw_cntl = 0;
+    z.zb_depthclearvalue = 0;
+    z.sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+
+    r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z);
+}
+
 void r300_emit_fb_state_pipelined(struct r300_context *r300,
                                   unsigned size, void *state)
 {
index 586ccda620b9ac34a1bc2034608fd2f60e484774..5d05039669ffad85bab221c7574ec9859489aa86 100644 (file)
@@ -45,6 +45,11 @@ void r300_emit_clip_state(struct r300_context* r300,
 void r300_emit_dsa_state(struct r300_context* r300,
                          unsigned size, void* state);
 
+void r300_emit_hyperz_state(struct r300_context *r300,
+                            unsigned size, void *state);
+
+void r300_emit_hyperz_end(struct r300_context *r300);
+
 void r300_emit_fs(struct r300_context* r300, unsigned size, void *state);
 
 void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
@@ -64,9 +69,6 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
 
 void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state);
 
-void r300_emit_hyperz_state(struct r300_context *r300,
-                            unsigned size, void *state);
-
 void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state);
 
 void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state);
index 2ebf1c814b4ec61872eb61831fd8944571bb7c8a..6f31ba159aca017ca51ca7dd43bf015ac82b42bf 100644 (file)
@@ -48,6 +48,7 @@ static void r300_flush(struct pipe_context* pipe,
     }
 
     if (r300->dirty_hw) {
+        r300_emit_hyperz_end(r300);
         r300_emit_query_end(r300);
 
         r300->flush_counter++;
index 2c4e6c7211061c1fe8cd0aea995e1b2601d7d2e2..e9528956019fada19635d65acae1fea681d07d79 100644 (file)
 #include "r300_reg.h"
 #include "r300_fs.h"
 
+/*****************************************************************************/
+/* The HyperZ setup                                                          */
+/*****************************************************************************/
+
+static void r300_update_hyperz(struct r300_context* r300)
+{
+    struct r300_hyperz_state *z =
+        (struct r300_hyperz_state*)r300->hyperz_state.state;
+
+    z->zb_bw_cntl = 0;
+    z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+
+    if (r300->cbzb_clear)
+        z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
+}
+
 /*****************************************************************************/
 /* The ZTOP state                                                            */
 /*****************************************************************************/
@@ -118,4 +134,7 @@ static void r300_update_ztop(struct r300_context* r300)
 void r300_update_hyperz_state(struct r300_context* r300)
 {
     r300_update_ztop(r300);
+    if (r300->hyperz_state.dirty) {
+        r300_update_hyperz(r300);
+    }
 }
index adb02b4e639c2e14caaebacac15ce0ee5fd9660d..1e0369b37749b138c0fa3a8e846d66d8c8946fa8 100644 (file)
@@ -224,6 +224,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300,
 
     /* Emitted in flush. */
     end_dwords += 26; /* emit_query_end */
+    end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */
 
     cs_dwords += end_dwords;
 
index b0722cb95f694a51e13088673a44709ba297fc9b..f4c6a262d4a4549f30dd91f253f1570292d82964 100644 (file)
@@ -688,7 +688,9 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
     /* Now compute the fb_state atom size. */
     r300->fb_state.size = 2 + (8 * state->nr_cbufs);
 
-    if (state->zsbuf)
+    if (r300->cbzb_clear)
+        r300->fb_state.size += 10;
+    else if (state->zsbuf)
         r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14;
 
     /* The size of the rest of atoms stays the same. */
index d378a7150d6bc4efd3b227059b6fc3b6e36a9132..e8b1d67007094757fb49889787abdeb6a1eb1c69 100644 (file)
@@ -1034,6 +1034,8 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
     struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
 
     if (surface) {
+        uint32_t stride, offset, tile_height;
+
         pipe_reference_init(&surface->base.reference, 1);
         pipe_resource_reference(&surface->base.texture, texture);
         surface->base.format = texture->format;
@@ -1054,6 +1056,34 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
         surface->offset = r300_texture_get_offset(tex, level, zslice, face);
         surface->pitch = tex->fb_state.pitch[level];
         surface->format = tex->fb_state.format;
+
+        /* Parameters for the CBZB clear. */
+        surface->cbzb_width = align(surface->base.width, 64);
+
+        /* Height must be aligned to the size of a tile. */
+        tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level],
+                                               DIM_HEIGHT);
+        surface->cbzb_height = align((surface->base.height + 1) / 2,
+                                     tile_height);
+
+        /* Offset must be aligned to 2K and must point at the beginning
+         * of a scanline. */
+        stride = r300_texture_get_stride(r300_screen(screen), tex, level);
+        offset = surface->offset + stride * surface->cbzb_height;
+        surface->cbzb_midpoint_offset = offset & ~2047;
+
+        surface->cbzb_pitch = surface->pitch & 0x1ffffc;
+
+        if (util_format_get_blocksizebits(surface->base.format) == 32)
+            surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+        else
+            surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z;
+
+        SCREEN_DBG(r300_screen(screen), DBG_TEX,
+                   "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n",
+                   surface->cbzb_width, surface->cbzb_height,
+                   offset & 2047,
+                   tex->mip_macrotile[level] ? "YES" : " NO");
     }
 
     return &surface->base;