From fdd37af3f76ea3ac32f21e9a9c41979a9b33cc5c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 7 May 2011 19:55:45 +0200 Subject: [PATCH] r300g: dynamically ask for and release Hyper-Z access We ask for Hyper-Z access when clearing a zbuffer. We release it if no zbuffer clear has been done for 2 seconds. --- src/gallium/drivers/r300/r300_blit.c | 50 ++++++--- src/gallium/drivers/r300/r300_context.c | 26 ++--- src/gallium/drivers/r300/r300_context.h | 27 ++--- src/gallium/drivers/r300/r300_emit.c | 3 +- src/gallium/drivers/r300/r300_flush.c | 82 +++++++++----- src/gallium/drivers/r300/r300_hyperz.c | 3 +- src/gallium/drivers/r300/r300_state.c | 3 +- .../winsys/radeon/drm/radeon_drm_winsys.c | 100 ++++++++++++++---- .../winsys/radeon/drm/radeon_drm_winsys.h | 10 +- src/gallium/winsys/radeon/drm/radeon_winsys.h | 16 ++- 10 files changed, 225 insertions(+), 95 deletions(-) diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 8ec6479bb9e..4ec77df8fb7 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -206,23 +206,47 @@ static void r300_clear(struct pipe_context* pipe, (struct r300_hyperz_state*)r300->hyperz_state.state; uint32_t width = fb->width; uint32_t height = fb->height; - boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ); uint32_t hyperz_dcv = hyperz->zb_depthclearvalue; /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ - if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { - if (r300_fast_zclear_allowed(r300)) { - hyperz_dcv = hyperz->zb_depthclearvalue = - r300_depth_clear_value(fb->zsbuf->format, depth, stencil); - - r300_mark_atom_dirty(r300, &r300->zmask_clear); - buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; - } - - if (r300_hiz_clear_allowed(r300)) { - r300->hiz_clear_value = r300_hiz_clear_value(depth); - r300_mark_atom_dirty(r300, &r300->hiz_clear); + if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { + boolean zmask_clear, hiz_clear; + + zmask_clear = r300_fast_zclear_allowed(r300); + hiz_clear = r300_hiz_clear_allowed(r300); + + /* If we need Hyper-Z. */ + if (zmask_clear || hiz_clear) { + r300->num_z_clears++; + + /* Try to obtain the access to Hyper-Z buffers if we don't have one. */ + if (!r300->hyperz_enabled) { + r300->hyperz_enabled = + r300->rws->cs_request_feature(r300->cs, + RADEON_FID_HYPERZ_RAM_ACCESS, + TRUE); + if (r300->hyperz_enabled) { + /* Need to emit HyperZ buffer regs for the first time. */ + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); + } + } + + /* Setup Hyper-Z clears. */ + if (r300->hyperz_enabled) { + if (zmask_clear) { + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + + r300_mark_atom_dirty(r300, &r300->zmask_clear); + buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; + } + + if (hiz_clear) { + r300->hiz_clear_value = r300_hiz_clear_value(depth); + r300_mark_atom_dirty(r300, &r300->hiz_clear); + } + } } } diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 15d1278c3bb..0554c40eef0 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -26,6 +26,7 @@ #include "util/u_sampler.h" #include "util/u_simple_list.h" #include "util/u_upload_mgr.h" +#include "os/os_time.h" #include "r300_cb.h" #include "r300_context.h" @@ -95,6 +96,10 @@ static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); + if (r300->cs && r300->hyperz_enabled) { + r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, FALSE); + } + if (r300->blitter) util_blitter_destroy(r300->blitter); if (r300->draw) @@ -167,8 +172,6 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; boolean drm_2_6_0 = r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0); - boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ); - boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; /* Create the actual atom list. * @@ -219,13 +222,10 @@ static boolean r300_setup_atoms(struct r300_context* r300) /* TX. */ R300_INIT_ATOM(texture_cache_inval, 2); R300_INIT_ATOM(textures_state, 0); - if (can_hyperz) { - /* HiZ Clear */ - if (has_hiz_ram) - R300_INIT_ATOM(hiz_clear, 4); - /* zmask clear */ - R300_INIT_ATOM(zmask_clear, 4); - } + /* HiZ Clear */ + R300_INIT_ATOM(hiz_clear, r300->screen->caps.hiz_ram > 0 ? 4 : 0); + /* zmask clear */ + R300_INIT_ATOM(zmask_clear, r300->screen->caps.zmask_ram > 0 ? 4 : 0); /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); @@ -503,6 +503,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, &dsa); } + r300->hyperz_time_of_last_flush = os_time_get(); + /* Print driver info. */ #ifdef DEBUG { @@ -512,7 +514,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, fprintf(stderr, "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" "r300: GART size: %d MB, VRAM size: %d MB\n" - "r300: AA compression: %s, Z compression: %s, HiZ: %s\n", + "r300: AA compression RAM: %s, Z compression RAM: %s, HiZ RAM: %s\n", rws->get_value(rws, RADEON_VID_DRM_MAJOR), rws->get_value(rws, RADEON_VID_DRM_MINOR), rws->get_value(rws, RADEON_VID_DRM_PATCHLEVEL), @@ -522,10 +524,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, rws->get_value(rws, RADEON_VID_R300_Z_PIPES), rws->get_value(rws, RADEON_VID_GART_SIZE) >> 20, rws->get_value(rws, RADEON_VID_VRAM_SIZE) >> 20, - rws->get_value(rws, RADEON_VID_CAN_AACOMPRESS) ? "YES" : "NO", - rws->get_value(rws, RADEON_VID_CAN_HYPERZ) && + "YES", /* XXX really? */ r300->screen->caps.zmask_ram ? "YES" : "NO", - rws->get_value(rws, RADEON_VID_CAN_HYPERZ) && r300->screen->caps.hiz_ram ? "YES" : "NO"); } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 0704021c319..139dd210b8f 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -592,18 +592,6 @@ struct r300_context { boolean frag_clamp; /* Whether fast color clear is enabled. */ boolean cbzb_clear; - /* Whether ZMASK is enabled. */ - boolean zmask_in_use; - /* Whether ZMASK is being decompressed. */ - boolean zmask_decompress; - /* Whether ZMASK/HIZ is locked, i.e. should be disabled and cannot be taken over. */ - struct pipe_surface *locked_zbuffer; - /* Whether HIZ is enabled. */ - boolean hiz_in_use; - /* HiZ function. Can be either MIN or MAX. */ - enum r300_hiz_func hiz_func; - /* HiZ clear value. */ - uint32_t hiz_clear_value; /* Whether fragment shader needs to be validated. */ enum r300_fs_validity_status fs_status; /* Framebuffer multi-write. */ @@ -627,6 +615,21 @@ struct r300_context { int vertex_arrays_offset; int vertex_arrays_instance_id; boolean instancing_enabled; + + /* Hyper-Z stats. */ + boolean hyperz_enabled; /* Whether it owns Hyper-Z access. */ + int64_t hyperz_time_of_last_flush; /* Time of the last flush with Z clear. */ + unsigned num_z_clears; /* Since the last flush. */ + + /* ZMask state. */ + boolean zmask_in_use; /* Whether ZMASK is enabled. */ + boolean zmask_decompress; /* Whether ZMASK is being decompressed. */ + struct pipe_surface *locked_zbuffer; /* Unbound zbuffer which still has data in ZMASK. */ + + /* HiZ state. */ + boolean hiz_in_use; /* Whether HIZ is enabled. */ + enum r300_hiz_func hiz_func; /* HiZ function. Can be either MIN or MAX. */ + uint32_t hiz_clear_value; /* HiZ clear value. */ }; #define foreach_atom(r300, atom) \ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 62435c5e2e2..874037ed9fd 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -375,7 +375,6 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_surface* surf; unsigned i; - boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ); uint32_t rb3d_cctl = 0; CS_LOCALS(r300); @@ -432,7 +431,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch); OUT_CS_RELOC(surf); - if (can_hyperz) { + if (r300->hyperz_enabled) { /* HiZ RAM. */ OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index de7d77d608b..34f5419a864 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -27,17 +27,46 @@ #include "util/u_simple_list.h" #include "util/u_upload_mgr.h" +#include "os/os_time.h" + #include "r300_context.h" #include "r300_cs.h" #include "r300_emit.h" +static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags) +{ + struct r300_atom *atom; + + r300_emit_hyperz_end(r300); + r300_emit_query_end(r300); + if (r300->screen->caps.is_r500) + r500_emit_index_bias(r300, 0); + + r300->flush_counter++; + r300->rws->cs_flush(r300->cs, flags); + r300->dirty_hw = 0; + + /* New kitchen sink, baby. */ + foreach_atom(r300, atom) { + if (atom->state || atom->allow_null_state) { + r300_mark_atom_dirty(r300, atom); + } + } + r300->vertex_arrays_dirty = TRUE; + + /* Unmark HWTCL state for SWTCL. */ + if (!r300->screen->caps.has_tcl) { + r300->vs_state.dirty = FALSE; + r300->vs_constants.dirty = FALSE; + } +} + void r300_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct r300_context *r300 = r300_context(pipe); - struct r300_atom *atom; struct pb_buffer **rfence = (struct pb_buffer**)fence; if (r300->draw && !r300->draw_vbo_locked) @@ -56,32 +85,11 @@ void r300_flush(struct pipe_context *pipe, } if (r300->dirty_hw) { - r300_emit_hyperz_end(r300); - r300_emit_query_end(r300); - if (r300->screen->caps.is_r500) - r500_emit_index_bias(r300, 0); - - r300->flush_counter++; - r300->rws->cs_flush(r300->cs, flags); - r300->dirty_hw = 0; - - /* New kitchen sink, baby. */ - foreach_atom(r300, atom) { - if (atom->state || atom->allow_null_state) { - r300_mark_atom_dirty(r300, atom); - } - } - r300->vertex_arrays_dirty = TRUE; - - /* Unmark HWTCL state for SWTCL. */ - if (!r300->screen->caps.has_tcl) { - r300->vs_state.dirty = FALSE; - r300->vs_constants.dirty = FALSE; - } + r300_flush_and_cleanup(r300, flags); } else { if (rfence) { /* We have to create a fence object, but the command stream is empty - * and we cannot emit an empty CS. We must write some regs then. */ + * and we cannot emit an empty CS. Let's write to some reg. */ CS_LOCALS(r300); OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0); r300->rws->cs_flush(r300->cs, flags); @@ -91,6 +99,32 @@ void r300_flush(struct pipe_context *pipe, r300->rws->cs_flush(r300->cs, flags); } } + + /* Update Hyper-Z status. */ + if (r300->num_z_clears) { + r300->hyperz_time_of_last_flush = os_time_get(); + } else if (!r300->hyperz_time_of_last_flush > 2000000) { + /* 2 seconds without a Z clear pretty much means a dead context + * for HyperZ. */ + + r300->hiz_in_use = FALSE; + + /* Decompress Z buffer. */ + if (r300->zmask_in_use) { + if (r300->locked_zbuffer) { + r300_decompress_zmask_locked(r300); + } else { + r300_decompress_zmask(r300); + } + + r300_flush_and_cleanup(r300, flags); + } + + /* Release HyperZ. */ + r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, + FALSE); + } + r300->num_z_clears = 0; } static void r300_flush_wrapped(struct pipe_context *pipe, diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index b68a346b37d..544aa1f066c 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -151,8 +151,7 @@ static void r300_update_hyperz(struct r300_context* r300) return; } - if (!zstex || - !r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ)) + if (!zstex || !r300->hyperz_enabled) return; /* Zbuffer compression. */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index fc7dcb1960f..bc6c67dd034 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -768,7 +768,6 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, enum r300_fb_state_change change) { struct pipe_framebuffer_state *state = r300->fb_state.state; - boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ); r300_mark_atom_dirty(r300, &r300->gpu_flush); r300_mark_atom_dirty(r300, &r300->fb_state); @@ -797,7 +796,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, r300->fb_state.size += 10; else if (state->zsbuf) { r300->fb_state.size += 10; - if (can_hyperz) + if (r300->hyperz_enabled) r300->fb_state.size += 8; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 37f6d18689d..3ac57d25b5e 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -48,22 +48,59 @@ #define RADEON_INFO_WANT_CMASK 8 #endif -/* Enable/disable feature access. Return TRUE on success. */ -static boolean radeon_set_fd_access(int fd, unsigned request, boolean enable) +/* Enable/disable feature access for one command stream. + * If enable == TRUE, return TRUE on success. + * Otherwise, return FALSE. + * + * We basically do the same thing kernel does, because we have to deal + * with multiple contexts (here command streams) backed by one winsys. */ +static boolean radeon_set_fd_access(struct radeon_drm_cs *applier, + struct radeon_drm_cs **owner, + pipe_mutex *mutex, + unsigned request, boolean enable) { struct drm_radeon_info info = {0}; unsigned value = enable ? 1 : 0; + pipe_mutex_lock(*mutex); + + /* Early exit if we are sure the request will fail. */ + if (enable) { + if (*owner) { + pipe_mutex_unlock(*mutex); + return FALSE; + } + } else { + if (*owner != applier) { + pipe_mutex_unlock(*mutex); + return FALSE; + } + } + + /* Pass through the request to the kernel. */ info.value = (unsigned long)&value; info.request = request; - - if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0) + if (drmCommandWriteRead(applier->ws->fd, DRM_RADEON_INFO, + &info, sizeof(info)) != 0) { + pipe_mutex_unlock(*mutex); return FALSE; + } - if (enable && !value) - return FALSE; + /* Update the rights in the winsys. */ + if (enable) { + if (value) { + *owner = applier; + fprintf(stderr, "radeon: Acquired Hyper-Z.\n"); + pipe_mutex_unlock(*mutex); + return TRUE; + } + } else { + *owner = NULL; + fprintf(stderr, "radeon: Released Hyper-Z.\n"); + } - return TRUE; + pipe_mutex_unlock(*mutex); + return FALSE; } /* Helper function to do the ioctls needed for setup and init. */ @@ -138,16 +175,6 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) } winsys->z_pipes = target; - if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) { - winsys->hyperz = radeon_set_fd_access(winsys->fd, - RADEON_INFO_WANT_HYPERZ, TRUE); - } - - if (debug_get_bool_option("RADEON_CMASK", FALSE)) { - winsys->aacompress = radeon_set_fd_access(winsys->fd, - RADEON_INFO_WANT_CMASK, TRUE); - } - retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); if (retval) { @@ -167,6 +194,9 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws) { struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; + pipe_mutex_destroy(ws->hyperz_owner_mutex); + pipe_mutex_destroy(ws->cmask_owner_mutex); + ws->cman->destroy(ws->cman); ws->kman->destroy(ws->kman); FREE(rws); @@ -198,14 +228,38 @@ static uint32_t radeon_get_value(struct radeon_winsys *rws, return ws->drm_major*100 + ws->drm_minor >= 206; case RADEON_VID_DRM_2_8_0: return ws->drm_major*100 + ws->drm_minor >= 208; - case RADEON_VID_CAN_HYPERZ: - return ws->hyperz; - case RADEON_VID_CAN_AACOMPRESS: - return ws->aacompress; } return 0; } +static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs, + enum radeon_feature_id fid, + boolean enable) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + + switch (fid) { + case RADEON_FID_HYPERZ_RAM_ACCESS: + if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) { + return radeon_set_fd_access(cs, &cs->ws->hyperz_owner, + &cs->ws->hyperz_owner_mutex, + RADEON_INFO_WANT_HYPERZ, enable); + } else { + return FALSE; + } + + case RADEON_FID_CMASK_RAM_ACCESS: + if (debug_get_bool_option("RADEON_CMASK", FALSE)) { + return radeon_set_fd_access(cs, &cs->ws->cmask_owner, + &cs->ws->cmask_owner_mutex, + RADEON_INFO_WANT_CMASK, enable); + } else { + return FALSE; + } + } + return FALSE; +} + struct radeon_winsys *radeon_drm_winsys_create(int fd) { struct radeon_drm_winsys *ws = CALLOC_STRUCT(radeon_drm_winsys); @@ -231,10 +285,14 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd) /* Set functions. */ ws->base.destroy = radeon_winsys_destroy; ws->base.get_value = radeon_get_value; + ws->base.cs_request_feature = radeon_cs_request_feature; radeon_bomgr_init_functions(ws); radeon_drm_cs_init_functions(ws); + pipe_mutex_init(ws->hyperz_owner_mutex); + pipe_mutex_init(ws->cmask_owner_mutex); + return &ws->base; fail: diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index e1b9493fc10..d5186bc4d17 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -32,6 +32,8 @@ #include "radeon_winsys.h" +#include "os/os_thread.h" + struct radeon_drm_winsys { struct radeon_winsys base; @@ -52,10 +54,10 @@ struct radeon_drm_winsys { unsigned drm_minor; unsigned drm_patchlevel; - /* Hyper-Z user */ - boolean hyperz; - /* AA compression (CMask) */ - boolean aacompress; + struct radeon_drm_cs *hyperz_owner; + pipe_mutex hyperz_owner_mutex; + struct radeon_drm_cs *cmask_owner; + pipe_mutex cmask_owner_mutex; }; static INLINE struct radeon_drm_winsys * diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index ca0e6624138..3a64e4abc35 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -87,9 +87,11 @@ enum radeon_value_id { * - TBD */ RADEON_VID_DRM_2_8_0, +}; - RADEON_VID_CAN_HYPERZ, /* ZMask + HiZ */ - RADEON_VID_CAN_AACOMPRESS, /* CMask */ +enum radeon_feature_id { + RADEON_FID_HYPERZ_RAM_ACCESS, /* ZMask + HiZ */ + RADEON_FID_CMASK_RAM_ACCESS, }; struct radeon_winsys { @@ -314,6 +316,16 @@ struct radeon_winsys { */ boolean (*cs_is_buffer_referenced)(struct radeon_winsys_cs *cs, struct radeon_winsys_cs_handle *buf); + + /** + * Request access to a feature for a command stream. + * + * \param cs A command stream. + * \param fid A winsys buffer. + */ + boolean (*cs_request_feature)(struct radeon_winsys_cs *cs, + enum radeon_feature_id fid, + boolean enable); }; #endif -- 2.30.2