From 7139ab80ca9b1b3572ef6da98765ca783e8e5e3b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 8 Aug 2012 09:03:26 -0700 Subject: [PATCH] i965: Fix rendering to small mipmaps of depth/stencil buffers using a temp mt. Fixes 51 piglit tests (fbo-clear-formats, and most of the remaining failures in depthstencil). Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_draw.c | 5 + src/mesa/drivers/dri/i965/brw_misc_state.c | 187 +++++++++++++------- src/mesa/drivers/dri/i965/gen7_misc_state.c | 100 +++++------ 4 files changed, 172 insertions(+), 121 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 16bcb9cecc7..eae41d16d84 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1134,6 +1134,7 @@ void brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt, struct intel_mipmap_tree *stencil_mt, uint32_t *out_tile_mask_x, uint32_t *out_tile_mask_y); +void brw_workaround_depthstencil_alignment(struct brw_context *brw); /*====================================================================== * brw_queryobj.c diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 21abfbe71a3..323310a5bc6 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -440,6 +440,11 @@ static bool brw_try_draw_prims( struct gl_context *ctx, */ brw_predraw_resolve_buffers(brw); + /* This workaround has to happen outside of brw_state_upload() because it + * may flush the batchbuffer for a blit, affecting the state flags. + */ + brw_workaround_depthstencil_alignment(brw); + /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index be8dcc45274..ce23fa0293f 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -317,6 +317,91 @@ brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt, *out_tile_mask_y = tile_mask_y; } +void +brw_workaround_depthstencil_alignment(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + struct gl_framebuffer *fb = ctx->DrawBuffer; + bool rebase_depth = false; + bool rebase_stencil = false; + struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); + struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); + struct intel_mipmap_tree *depth_mt = NULL; + struct intel_mipmap_tree *stencil_mt = NULL; + + if (depth_irb) + depth_mt = depth_irb->mt; + if (stencil_irb) + stencil_mt = stencil_irb->mt; + + uint32_t tile_mask_x, tile_mask_y; + brw_get_depthstencil_tile_masks(depth_mt, stencil_mt, + &tile_mask_x, &tile_mask_y); + + if (depth_irb) { + uint32_t depth_tile_x = depth_irb->draw_x & tile_mask_x; + uint32_t depth_tile_y = depth_irb->draw_y & tile_mask_y; + + /* The low 3 bits of x and y tile offset are ignored by the hardware. + * Rebase if they're set, so that we can actually render to the buffer. + */ + if (depth_tile_x & 7 || depth_tile_y & 7) + rebase_depth = true; + + /* We didn't even have intra-tile offsets before g45. */ + if (intel->gen == 4 && !intel->is_g4x) { + if (depth_tile_x || depth_tile_y) + rebase_depth = true; + } + + if (stencil_irb) { + int stencil_tile_x = stencil_irb->draw_x & tile_mask_x; + int stencil_tile_y = stencil_irb->draw_y & tile_mask_y; + + /* If the two don't match up, then we need to move them to a + * temporary so that the x/y draw offsets will end up being 0. + */ + if (depth_tile_x != stencil_tile_x || + depth_tile_y != stencil_tile_y) { + rebase_depth = true; + rebase_stencil = true; + } + } + } + + /* If we have (just) stencil, check it for ignored low bits as well */ + if (stencil_irb) { + uint32_t stencil_tile_x = stencil_irb->draw_x & tile_mask_x; + uint32_t stencil_tile_y = stencil_irb->draw_y & tile_mask_y; + + if (stencil_tile_x & 7 || stencil_tile_y & 7) + rebase_stencil = true; + + if (intel->gen == 4 && !intel->is_g4x) { + if (stencil_tile_x || stencil_tile_y) + rebase_stencil = true; + } + } + + if (rebase_depth) { + intel_renderbuffer_move_to_temp(intel, depth_irb); + + if (stencil_irb && stencil_irb->mt == depth_mt) { + intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); + intel_renderbuffer_set_draw_offset(stencil_irb); + } + } + if (rebase_stencil) { + intel_renderbuffer_move_to_temp(intel, stencil_irb); + + if (depth_irb && depth_irb->mt == stencil_mt) { + intel_miptree_reference(&depth_irb->mt, stencil_irb->mt); + intel_renderbuffer_set_draw_offset(depth_irb); + } + } +} + static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; @@ -331,12 +416,6 @@ static void emit_depthbuffer(struct brw_context *brw) unsigned int len; bool separate_stencil = false; - /* Amount by which drawing should be offset in order to draw to the - * appropriate miplevel/zoffset/cubeface. We will extract these values - * from depth_irb or stencil_irb once we determine which is present. - */ - uint32_t draw_x = 0, draw_y = 0; - if (depth_irb && depth_irb->mt && depth_irb->mt->hiz_mt) { @@ -367,6 +446,39 @@ static void emit_depthbuffer(struct brw_context *brw) brw_get_depthstencil_tile_masks(depth_mt, stencil_mt, &tile_mask_x, &tile_mask_y); + /* The intra-tile offsets should already have been forced into agreement by + * gen7_workaround_depthstencil_alignment(). + */ + uint32_t tile_x = 0, tile_y = 0; + if (depth_mt) { + tile_x = depth_irb->draw_x & tile_mask_x; + tile_y = depth_irb->draw_y & tile_mask_y; + + if (stencil_mt) { + assert((stencil_irb->draw_x & tile_mask_x) == tile_x); + assert((stencil_irb->draw_y & tile_mask_y) == tile_y); + } + } else if (stencil_mt) { + tile_x = stencil_irb->draw_x & tile_mask_x; + tile_y = stencil_irb->draw_y & tile_mask_y; + } + + /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 + * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth + * Coordinate Offset X/Y": + * + * "The 3 LSBs of both offsets must be zero to ensure correct + * alignment" + * + * This should already have been corrected by + * gen6_workaround_depthstencil_alignment. + */ + WARN_ONCE((tile_x & 7) || (tile_y & 7), + "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" + "Truncating offset, bad rendering may occur.\n"); + tile_x &= ~7; + tile_y &= ~7; + /* If there's a packed depth/stencil bound to stencil only, we need to * emit the packed depth/stencil buffer packet. */ @@ -398,8 +510,6 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } else if (!depth_irb && separate_stencil) { - uint32_t tile_x, tile_y; - /* * There exists a separate stencil buffer but no depth buffer. * @@ -422,29 +532,6 @@ static void emit_depthbuffer(struct brw_context *brw) */ assert(intel->has_separate_stencil); - draw_x = stencil_irb->draw_x; - draw_y = stencil_irb->draw_y; - tile_x = draw_x & tile_mask_x; - tile_y = draw_y & tile_mask_y; - - /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 - * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth - * Coordinate Offset X/Y": - * - * "The 3 LSBs of both offsets must be zero to ensure correct - * alignment" - * - * We have no guarantee that tile_x and tile_y are correctly aligned, - * since they are determined by the mipmap layout, which is only aligned - * to multiples of 4. - * - * So, to avoid hanging the GPU, just smash the low order 3 bits of - * tile_x and tile_y to 0. This is a temporary workaround until we come - * up with a better solution. - */ - tile_x &= ~7; - tile_y &= ~7; - BEGIN_BATCH(len); OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | @@ -470,7 +557,7 @@ static void emit_depthbuffer(struct brw_context *brw) } else { struct intel_region *region = depth_irb->mt->region; - uint32_t tile_x, tile_y, offset; + uint32_t offset; /* If using separate stencil, hiz must be enabled. */ assert(!separate_stencil || hiz_region); @@ -478,32 +565,10 @@ static void emit_depthbuffer(struct brw_context *brw) assert(intel->gen < 6 || region->tiling == I915_TILING_Y); assert(!hiz_region || region->tiling == I915_TILING_Y); - draw_x = depth_irb->draw_x; - draw_y = depth_irb->draw_y; - tile_x = draw_x & tile_mask_x; - tile_y = draw_y & tile_mask_y; - - /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 - * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth - * Coordinate Offset X/Y": - * - * "The 3 LSBs of both offsets must be zero to ensure correct - * alignment" - * - * We have no guarantee that tile_x and tile_y are correctly aligned, - * since they are determined by the mipmap layout, which is only aligned - * to multiples of 4. - * - * So, to avoid hanging the GPU, just smash the low order 3 bits of - * tile_x and tile_y to 0. This is a temporary workaround until we come - * up with a better solution. - */ - tile_x &= ~7; - tile_y &= ~7; - offset = intel_region_get_aligned_offset(region, - draw_x & ~tile_mask_x, - draw_y & ~tile_mask_y, false); + depth_irb->draw_x & ~tile_mask_x, + depth_irb->draw_y & ~tile_mask_y, + false); BEGIN_BATCH(len); OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); @@ -546,8 +611,8 @@ static void emit_depthbuffer(struct brw_context *brw) if (hiz_region) { uint32_t hiz_offset = intel_region_get_aligned_offset(hiz_region, - draw_x & ~tile_mask_x, - (draw_y & ~tile_mask_y) / 2, + depth_irb->draw_x & ~tile_mask_x, + (depth_irb->draw_y & ~tile_mask_y) / 2, false); BEGIN_BATCH(3); @@ -574,8 +639,8 @@ static void emit_depthbuffer(struct brw_context *brw) * that the region is untiled; in fact it's W tiled. */ uint32_t stencil_offset = - (draw_y & ~tile_mask_y) * region->pitch + - (draw_x & ~tile_mask_x) * 64; + (stencil_irb->draw_y & ~tile_mask_y) * region->pitch + + (stencil_irb->draw_x & ~tile_mask_x) * 64; BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c index 337143c58cf..50fa0ecc44b 100644 --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c @@ -42,12 +42,6 @@ static void emit_depthbuffer(struct brw_context *brw) *stencil_mt = NULL, *hiz_mt = NULL; - /* Amount by which drawing should be offset in order to draw to the - * appropriate miplevel/zoffset/cubeface. We will extract these values - * from depth_irb or stencil_irb once we determine which is present. - */ - uint32_t draw_x = 0, draw_y = 0; - if (drb) depth_mt = drb->mt; @@ -66,6 +60,39 @@ static void emit_depthbuffer(struct brw_context *brw) brw_get_depthstencil_tile_masks(depth_mt, stencil_mt, &tile_mask_x, &tile_mask_y); + /* The intra-tile offsets should already have been forced into agreement by + * gen7_workaround_depthstencil_alignment(). + */ + uint32_t tile_x = 0, tile_y = 0; + if (depth_mt) { + tile_x = drb->draw_x & tile_mask_x; + tile_y = drb->draw_y & tile_mask_y; + + if (stencil_mt) { + assert((srb->draw_x & tile_mask_x) == tile_x); + assert((srb->draw_y & tile_mask_y) == tile_y); + } + } else if (stencil_mt) { + tile_x = srb->draw_x & tile_mask_x; + tile_y = srb->draw_y & tile_mask_y; + } + + /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 + * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth + * Coordinate Offset X/Y": + * + * "The 3 LSBs of both offsets must be zero to ensure correct + * alignment" + * + * This should already have been corrected by + * gen6_workaround_depthstencil_alignment. + */ + WARN_ONCE((tile_x & 7) || (tile_y & 7), + "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" + "Truncating offset, bad rendering may occur.\n"); + tile_x &= ~7; + tile_y &= ~7; + /* Gen7 doesn't support packed depth/stencil */ assert(stencil_mt == NULL || depth_mt != stencil_mt); assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format)); @@ -75,7 +102,6 @@ static void emit_depthbuffer(struct brw_context *brw) if (depth_mt == NULL) { uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18; uint32_t dw3 = 0; - uint32_t tile_x = 0, tile_y = 0; if (stencil_mt == NULL) { dw1 |= (BRW_SURFACE_NULL << 29); @@ -83,29 +109,6 @@ static void emit_depthbuffer(struct brw_context *brw) /* _NEW_STENCIL: enable stencil buffer writes */ dw1 |= ((ctx->Stencil.WriteMask != 0) << 27); - draw_x = srb->draw_x; - draw_y = srb->draw_y; - tile_x = draw_x & tile_mask_x; - tile_y = draw_y & tile_mask_y; - - /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 - * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth - * Coordinate Offset X/Y": - * - * "The 3 LSBs of both offsets must be zero to ensure correct - * alignment" - * - * We have no guarantee that tile_x and tile_y are correctly aligned, - * since they are determined by the mipmap layout, which is only - * aligned to multiples of 4. - * - * So, to avoid hanging the GPU, just smash the low order 3 bits of - * tile_x and tile_y to 0. This is a temporary workaround until we - * come up with a better solution. - */ - tile_x &= ~7; - tile_y &= ~7; - /* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */ dw1 |= (BRW_SURFACE_2D << 29); dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) | @@ -123,34 +126,11 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } else { struct intel_region *region = depth_mt->region; - uint32_t tile_x, tile_y, offset; - - draw_x = drb->draw_x; - draw_y = drb->draw_y; - tile_x = draw_x & tile_mask_x; - tile_y = draw_y & tile_mask_y; - - /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 - * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth - * Coordinate Offset X/Y": - * - * "The 3 LSBs of both offsets must be zero to ensure correct - * alignment" - * - * We have no guarantee that tile_x and tile_y are correctly aligned, - * since they are determined by the mipmap layout, which is only aligned - * to multiples of 4. - * - * So, to avoid hanging the GPU, just smash the low order 3 bits of - * tile_x and tile_y to 0. This is a temporary workaround until we come - * up with a better solution. - */ - tile_x &= ~7; - tile_y &= ~7; + uint32_t offset; offset = intel_region_get_aligned_offset(region, - draw_x & ~tile_mask_x, - draw_y & ~tile_mask_y, + drb->draw_x & ~tile_mask_x, + drb->draw_y & ~tile_mask_y, false); assert(region->tiling == I915_TILING_Y); @@ -184,8 +164,8 @@ static void emit_depthbuffer(struct brw_context *brw) } else { uint32_t hiz_offset = intel_region_get_aligned_offset(hiz_mt->region, - draw_x & ~tile_mask_x, - (draw_y & ~tile_mask_y) / 2, + drb->draw_x & ~tile_mask_x, + (drb->draw_y & ~tile_mask_y) / 2, false); BEGIN_BATCH(3); OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); @@ -211,8 +191,8 @@ static void emit_depthbuffer(struct brw_context *brw) * that the region is untiled; in fact it's W tiled. */ uint32_t stencil_offset = - (draw_y & ~tile_mask_y) * stencil_mt->region->pitch + - (draw_x & ~tile_mask_x) * 64; + (srb->draw_y & ~tile_mask_y) * stencil_mt->region->pitch + + (srb->draw_x & ~tile_mask_x) * 64; BEGIN_BATCH(3); OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); -- 2.30.2