From 12498553462c7807034814cf843d86d52c407380 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 15 Nov 2011 18:20:31 -0800 Subject: [PATCH] i965/gen6: Manipulate state batches for HiZ meta-ops [v4] A lot of the state manipulation is handled by the meta-op state setup. However, some batches need manual intervention. v2: Do not special-case the 3DSTATE_DEPTH_STENCIL.Depth_Test_Enable bit for HiZ in gen6_upload_depth_stencil(). The HiZ meta-op sets ctx->Depth.Test, just read the value from that. v3: Add a new dirty flag, BRW_STATE_HIZ, for brw_tracked_state. Flag it immediately before and after executing the HiZ operation in gen6_resolve_slice(). Add the flag to the the dirty bits for the following state packets: gen6_clip_state gen6_depth_stencil_state gen6_sf_state gen6_wm_state v4: - Add BRW_NEW_STATE_HIZ to the dirty bit table in brw_state_upload.c. This is needed for INTEL_DEBUG=state. - Align brw dirty bit for gen6_depth_stencil_state. Reviewed-by: Eric Anholt Signed-off-by: Chad Versace --- src/mesa/drivers/dri/i965/brw_context.h | 2 ++ src/mesa/drivers/dri/i965/brw_draw.c | 9 ++++++++- src/mesa/drivers/dri/i965/brw_state_upload.c | 1 + src/mesa/drivers/dri/i965/gen6_clip_state.c | 20 ++++++++++++++++++- src/mesa/drivers/dri/i965/gen6_depthstencil.c | 11 +++++++--- src/mesa/drivers/dri/i965/gen6_hiz.c | 2 ++ src/mesa/drivers/dri/i965/gen6_sf_state.c | 18 ++++++++++++++--- src/mesa/drivers/dri/i965/gen6_wm_state.c | 20 ++++++++++++++++++- 8 files changed, 74 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index fa2fc72e435..ec05fb75869 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -144,6 +144,7 @@ enum brw_state_id { BRW_STATE_VS_CONSTBUF, BRW_STATE_PROGRAM_CACHE, BRW_STATE_STATE_BASE_ADDRESS, + BRW_STATE_HIZ, }; #define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE) @@ -172,6 +173,7 @@ enum brw_state_id { #define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) #define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) #define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS) +#define BRW_NEW_HIZ (1 << BRW_STATE_HIZ) struct brw_state_flags { /** State update flags signalled by mesa internals */ diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 1571fb74d6c..d2ae0877e8d 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -117,10 +117,17 @@ static void brw_set_prim(struct brw_context *brw, static void gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim) { - uint32_t hw_prim = prim_to_hw_prim[prim->mode]; + uint32_t hw_prim; DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); + if (brw->hiz.op) { + assert(prim->mode == GL_TRIANGLES); + hw_prim = _3DPRIM_RECTLIST; + } else { + hw_prim = prim_to_hw_prim[prim->mode]; + } + if (hw_prim != brw->primitive) { brw->primitive = hw_prim; brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 730a0e8a12b..bd32815d08c 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -368,6 +368,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS), + DEFINE_BIT(BRW_NEW_HIZ), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index b3bb8aee3ec..d2a5f755726 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -67,6 +67,23 @@ upload_clip_state(struct brw_context *brw) GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE; } + if (brw->hiz.op) { + /* HiZ operations emit a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 3D Primitives Overview: + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + return; + } + if (!ctx->Transform.DepthClamp) depth_clamp = GEN6_CLIP_Z_TEST; @@ -107,7 +124,8 @@ const struct brw_tracked_state gen6_clip_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_LIGHT, .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM), + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_HIZ), .cache = 0 }, .emit = upload_clip_state, diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c index 72e86879b2b..eec1bf69015 100644 --- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -77,8 +77,12 @@ gen6_upload_depth_stencil_state(struct brw_context *brw) } /* _NEW_DEPTH */ - if (ctx->Depth.Test) { - ds->ds2.depth_test_enable = 1; + if (ctx->Depth.Test || brw->hiz.op) { + assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_RESOLVE || ctx->Depth.Test); + assert(brw->hiz.op != BRW_HIZ_OP_HIZ_RESOLVE || !ctx->Depth.Test); + assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_CLEAR || !ctx->Depth.Test); + + ds->ds2.depth_test_enable = ctx->Depth.Test; ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func); ds->ds2.depth_write_enable = ctx->Depth.Mask; } @@ -89,7 +93,8 @@ gen6_upload_depth_stencil_state(struct brw_context *brw) const struct brw_tracked_state gen6_depth_stencil_state = { .dirty = { .mesa = _NEW_DEPTH | _NEW_STENCIL, - .brw = BRW_NEW_BATCH, + .brw = (BRW_NEW_BATCH | + BRW_NEW_HIZ), .cache = 0, }, .emit = gen6_upload_depth_stencil_state, diff --git a/src/mesa/drivers/dri/i965/gen6_hiz.c b/src/mesa/drivers/dri/i965/gen6_hiz.c index a71c64c3f72..e28251182c9 100644 --- a/src/mesa/drivers/dri/i965/gen6_hiz.c +++ b/src/mesa/drivers/dri/i965/gen6_hiz.c @@ -301,7 +301,9 @@ gen6_resolve_slice(struct intel_context *intel, /* Execute the HiZ operation. */ brw->hiz.op = op; + brw->state.dirty.brw |= BRW_NEW_HIZ; _mesa_DrawArrays(GL_TRIANGLES, 0, 3); + brw->state.dirty.brw |= BRW_NEW_HIZ; brw->hiz.op = BRW_HIZ_OP_NONE; /* Restore state. diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 67119d8d182..4c4ff308054 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -147,8 +147,19 @@ upload_sf_state(struct brw_context *brw) num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; - dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE | - GEN6_SF_STATISTICS_ENABLE; + + dw2 = GEN6_SF_STATISTICS_ENABLE; + + /* Enable viewport transform only if no HiZ operation is progress + * + * From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + */ + if (!brw->hiz.op) + dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; + dw3 = 0; dw4 = 0; dw16 = 0; @@ -334,7 +345,8 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_POINT | _NEW_TRANSFORM), .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM), + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_HIZ), .cache = CACHE_NEW_VS_PROG }, .emit = upload_sf_state, diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 271a9aeca78..070220a5077 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -147,6 +147,23 @@ upload_wm_state(struct brw_context *brw) dw4 |= (brw->wm.prog_data->first_curbe_grf_16 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2); + switch (brw->hiz.op) { + case BRW_HIZ_OP_NONE: + break; + case BRW_HIZ_OP_DEPTH_CLEAR: + dw4 |= GEN6_WM_DEPTH_CLEAR; + break; + case BRW_HIZ_OP_DEPTH_RESOLVE: + dw4 |= GEN6_WM_DEPTH_RESOLVE; + break; + case BRW_HIZ_OP_HIZ_RESOLVE: + dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + default: + assert(0); + break; + } + dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; /* CACHE_NEW_WM_PROG */ @@ -215,7 +232,8 @@ const struct brw_tracked_state gen6_wm_state = { _NEW_POLYGON), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_URB_FENCE | - BRW_NEW_BATCH), + BRW_NEW_BATCH | + BRW_NEW_HIZ), .cache = (CACHE_NEW_SAMPLER | CACHE_NEW_WM_PROG) }, -- 2.30.2