An initial implementation made by Dave Airlie.
For it to be used, a color-only clear must be invoked and exactly one
point-sampled render target must be set. The render target must be
macrotiled (for us to overcome alignment issues) and bpp must be either
16 or 32.
I can't see a difference in performance. :(
Conflicts:
src/gallium/drivers/r300/r300_blit.c
#include "r300_texture.h"
#include "util/u_format.h"
+#include "util/u_pack_color.h"
enum r300_blitter_op /* bitmask */
{
}
}
+static uint32_t r300_depth_clear_cb_value(enum pipe_format format,
+ const float* rgba)
+{
+ union util_color uc;
+ util_pack_color(rgba, format, &uc);
+
+ if (util_format_get_blocksizebits(format) == 32)
+ return uc.ui;
+ else
+ return uc.us | (uc.us << 16);
+}
+
+static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
+ unsigned clear_buffers)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+ unsigned bpp;
+
+ /* Only color clear allowed, and only one colorbuffer. */
+ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
+ return FALSE;
+
+ /* The colorbuffer must be point-sampled. */
+ if (surf->base.texture->nr_samples > 1)
+ return FALSE;
+
+ bpp = util_format_get_blocksizebits(surf->base.format);
+
+ /* ZB can only work with the two pixel sizes. */
+ if (bpp != 16 && bpp != 32)
+ return FALSE;
+
+ /* If the midpoint ZB offset is not aligned to 2048, it returns garbage
+ * with certain texture sizes. Macrotiling ensures the alignment. */
+ if (!r300_texture(surf->base.texture)->mip_macrotile[surf->base.level])
+ return FALSE;
+
+ return TRUE;
+}
+
/* Clear currently bound buffers. */
static void r300_clear(struct pipe_context* pipe,
unsigned buffers,
struct r300_context* r300 = r300_context(pipe);
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_hyperz_state *hyperz =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+ uint32_t width = fb->width;
+ uint32_t height = fb->height;
+
+ /* Enable CBZB clear. */
+ if (r300_cbzb_clear_allowed(r300, buffers)) {
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+
+ hyperz->zb_depthclearvalue =
+ r300_depth_clear_cb_value(surf->base.format, rgba);
+
+ width = surf->cbzb_width;
+ height = surf->cbzb_height;
+
+ r300->cbzb_clear = TRUE;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ }
/* Clear. */
r300_blitter_begin(r300, R300_CLEAR);
util_blitter_clear(r300->blitter,
- fb->width,
- fb->height,
+ width,
+ height,
fb->nr_cbufs,
buffers, rgba, depth, stencil);
r300_blitter_end(r300);
+ /* Disable CBZB clear. */
+ if (r300->cbzb_clear) {
+ r300->cbzb_clear = FALSE;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ }
+
/* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */
if (r300->flush_counter == 0)
pipe->flush(pipe, 0, NULL);
BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size);
OUT_CB_REG(R300_ZB_BW_CNTL, 0);
OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0);
- OUT_CB_REG(R300_SC_HYPERZ, 0x1C);
+ OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2);
END_CB;
}
}
uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
+
+ /* Parameters dedicated to the CBZB clear. */
+ uint32_t cbzb_width; /* Aligned width. */
+ uint32_t cbzb_height; /* Half of the height. */
+ uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */
+ uint32_t cbzb_pitch; /* DEPTHPITCH. */
+ uint32_t cbzb_format; /* ZB_FORMAT. */
};
struct r300_texture {
/* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
boolean incompatible_vb_layout;
+ boolean cbzb_clear;
/* upload managers */
struct u_upload_mgr *upload_vb;
struct u_upload_mgr *upload_ib;
/* r300_state.c */
enum r300_fb_state_change {
- R300_CHANGED_FB_STATE = 0
+ R300_CHANGED_FB_STATE = 0,
+ R300_CHANGED_CBZB_FLAG
};
void r300_mark_fb_state_dirty(struct r300_context *r300,
#include "r300_emit.h"
#include "r300_fs.h"
#include "r300_screen.h"
+#include "r300_texture.h"
#include "r300_screen_buffer.h"
#include "r300_vs.h"
struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
+ uint32_t height = fb->height;
+ uint32_t width = fb->width;
CS_LOCALS(r300);
+ if (r300->cbzb_clear) {
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+
+ height = surf->cbzb_height;
+ width = surf->cbzb_width;
+ }
+
BEGIN_CS(size);
/* Set up scissors.
OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
if (r300->screen->caps.is_r500) {
OUT_CS(0);
- OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) |
- ((fb->height - 1) << R300_SCISSORS_Y_SHIFT));
+ OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) |
+ ((height - 1) << R300_SCISSORS_Y_SHIFT));
} else {
OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
(1440 << R300_SCISSORS_Y_SHIFT));
- OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) |
- ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT));
+ OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) |
+ ((height + 1440-1) << R300_SCISSORS_Y_SHIFT));
}
/* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */
OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0);
}
+ /* Set up the ZB part of the CBZB clear. */
+ if (r300->cbzb_clear) {
+ surf = r300_surface(fb->cbufs[0]);
+
+ OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
+
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
+ OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain, 0);
+
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
+ OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain, 0);
+ }
/* Set up a zbuffer. */
- if (fb->zsbuf) {
+ else if (fb->zsbuf) {
surf = r300_surface(fb->zsbuf);
OUT_CS_REG(R300_ZB_FORMAT, surf->format);
WRITE_CS_TABLE(state, size);
}
+void r300_emit_hyperz_end(struct r300_context *r300)
+{
+ struct r300_hyperz_state z =
+ *(struct r300_hyperz_state*)r300->hyperz_state.state;
+
+ z.zb_bw_cntl = 0;
+ z.zb_depthclearvalue = 0;
+ z.sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+
+ r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z);
+}
+
void r300_emit_fb_state_pipelined(struct r300_context *r300,
unsigned size, void *state)
{
void r300_emit_dsa_state(struct r300_context* r300,
unsigned size, void* state);
+void r300_emit_hyperz_state(struct r300_context *r300,
+ unsigned size, void *state);
+
+void r300_emit_hyperz_end(struct r300_context *r300);
+
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state);
void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state);
-void r300_emit_hyperz_state(struct r300_context *r300,
- unsigned size, void *state);
-
void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state);
void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state);
}
if (r300->dirty_hw) {
+ r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
r300->flush_counter++;
#include "r300_reg.h"
#include "r300_fs.h"
+/*****************************************************************************/
+/* The HyperZ setup */
+/*****************************************************************************/
+
+static void r300_update_hyperz(struct r300_context* r300)
+{
+ struct r300_hyperz_state *z =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+
+ z->zb_bw_cntl = 0;
+ z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+
+ if (r300->cbzb_clear)
+ z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
+}
+
/*****************************************************************************/
/* The ZTOP state */
/*****************************************************************************/
void r300_update_hyperz_state(struct r300_context* r300)
{
r300_update_ztop(r300);
+ if (r300->hyperz_state.dirty) {
+ r300_update_hyperz(r300);
+ }
}
/* Emitted in flush. */
end_dwords += 26; /* emit_query_end */
+ end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */
cs_dwords += end_dwords;
/* Now compute the fb_state atom size. */
r300->fb_state.size = 2 + (8 * state->nr_cbufs);
- if (state->zsbuf)
+ if (r300->cbzb_clear)
+ r300->fb_state.size += 10;
+ else if (state->zsbuf)
r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14;
/* The size of the rest of atoms stays the same. */
struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
if (surface) {
+ uint32_t stride, offset, tile_height;
+
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
surface->base.format = texture->format;
surface->offset = r300_texture_get_offset(tex, level, zslice, face);
surface->pitch = tex->fb_state.pitch[level];
surface->format = tex->fb_state.format;
+
+ /* Parameters for the CBZB clear. */
+ surface->cbzb_width = align(surface->base.width, 64);
+
+ /* Height must be aligned to the size of a tile. */
+ tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level],
+ DIM_HEIGHT);
+ surface->cbzb_height = align((surface->base.height + 1) / 2,
+ tile_height);
+
+ /* Offset must be aligned to 2K and must point at the beginning
+ * of a scanline. */
+ stride = r300_texture_get_stride(r300_screen(screen), tex, level);
+ offset = surface->offset + stride * surface->cbzb_height;
+ surface->cbzb_midpoint_offset = offset & ~2047;
+
+ surface->cbzb_pitch = surface->pitch & 0x1ffffc;
+
+ if (util_format_get_blocksizebits(surface->base.format) == 32)
+ surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ else
+ surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z;
+
+ SCREEN_DBG(r300_screen(screen), DBG_TEX,
+ "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n",
+ surface->cbzb_width, surface->cbzb_height,
+ offset & 2047,
+ tex->mip_macrotile[level] ? "YES" : " NO");
}
return &surface->base;