From: Eric Anholt Date: Sun, 4 Nov 2012 20:47:02 +0000 (-0800) Subject: i965: Move all the depth/stencil/hiz offset logic into the workaround. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=52ee1a7269b84b60558760655a4c722d827703d4;p=mesa.git i965: Move all the depth/stencil/hiz offset logic into the workaround. Given that we have the mask information here (assuming the rebase is to the same tiling, which is safe), we can just save a set of miptrees and offsets and the global intra-tile offset in the context and cut out a bunch of logic. This will also save emitting the next fix I need to do twice. Acked-by: Chad Versace --- diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 4863c40c8c1..1abaee3db1d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1072,6 +1072,21 @@ struct brw_context bool enable_cut_index; } prim_restart; + /** Computed depth/stencil/hiz state from the current attached + * renderbuffers, valid only during the drawing state upload loop after + * brw_workaround_depthstencil_alignment(). + */ + struct { + struct intel_mipmap_tree *depth_mt; + struct intel_mipmap_tree *stencil_mt; + struct intel_mipmap_tree *hiz_mt; + + /* Inter-tile (page-aligned) byte offsets. */ + uint32_t depth_offset, hiz_offset, stencil_offset; + /* Intra-tile x,y offsets for drawing to depth/stencil/hiz */ + uint32_t tile_x, tile_y; + } depthstencil; + uint32_t num_instances; int basevertex; }; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 8e80d70b897..f6cfdc41b11 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -329,7 +329,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw) struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); struct intel_mipmap_tree *depth_mt = NULL; struct intel_mipmap_tree *stencil_mt = NULL; - uint32_t depth_tile_x = 0, depth_tile_y = 0; + uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0; if (depth_irb) depth_mt = depth_irb->mt; @@ -341,18 +341,22 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw) &tile_mask_x, &tile_mask_y); if (depth_irb) { - depth_tile_x = depth_irb->draw_x & tile_mask_x; - depth_tile_y = depth_irb->draw_y & tile_mask_y; + tile_x = depth_irb->draw_x & tile_mask_x; + tile_y = depth_irb->draw_y & tile_mask_y; - /* The low 3 bits of x and y tile offset are ignored by the hardware. - * Rebase if they're set, so that we can actually render to the buffer. + /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 + * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth + * Coordinate Offset X/Y": + * + * "The 3 LSBs of both offsets must be zero to ensure correct + * alignment" */ - if (depth_tile_x & 7 || depth_tile_y & 7) + if (tile_x & 7 || tile_y & 7) rebase_depth = true; /* We didn't even have intra-tile offsets before g45. */ if (intel->gen == 4 && !intel->is_g4x) { - if (depth_tile_x || depth_tile_y) + if (tile_x || tile_y) rebase_depth = true; } @@ -368,8 +372,8 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw) intel_renderbuffer_set_draw_offset(stencil_irb); } - depth_tile_x = depth_irb->draw_x & tile_mask_x; - depth_tile_y = depth_irb->draw_y & tile_mask_y; + tile_x = depth_irb->draw_x & tile_mask_x; + tile_y = depth_irb->draw_y & tile_mask_y; } if (stencil_irb) { @@ -381,8 +385,8 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw) * post-stencil depth test will also rebase depth to try to match it * up). */ - if (depth_tile_x != stencil_tile_x || - depth_tile_y != stencil_tile_y) { + if (tile_x != stencil_tile_x || + tile_y != stencil_tile_y) { rebase_stencil = true; } } @@ -390,8 +394,8 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw) /* If we have (just) stencil, check it for ignored low bits as well */ if (stencil_irb) { - uint32_t stencil_tile_x = stencil_irb->draw_x & tile_mask_x; - uint32_t stencil_tile_y = stencil_irb->draw_y & tile_mask_y; + stencil_tile_x = stencil_irb->draw_x & tile_mask_x; + stencil_tile_y = stencil_irb->draw_y & tile_mask_y; if (stencil_tile_x & 7 || stencil_tile_y & 7) rebase_stencil = true; @@ -405,24 +409,96 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw) if (rebase_stencil) { intel_renderbuffer_move_to_temp(intel, stencil_irb); - uint32_t stencil_tile_x = stencil_irb->draw_x & tile_mask_x; - uint32_t stencil_tile_y = stencil_irb->draw_y & tile_mask_y; + stencil_tile_x = stencil_irb->draw_x & tile_mask_x; + stencil_tile_y = stencil_irb->draw_y & tile_mask_y; if (depth_irb && depth_irb->mt == stencil_mt) { intel_miptree_reference(&depth_irb->mt, stencil_irb->mt); intel_renderbuffer_set_draw_offset(depth_irb); } else if (depth_irb && !rebase_depth) { - if (depth_tile_x != stencil_tile_x || - depth_tile_y != stencil_tile_y) { + if (tile_x != stencil_tile_x || + tile_y != stencil_tile_y) { intel_renderbuffer_move_to_temp(intel, depth_irb); + tile_x = depth_irb->draw_x & tile_mask_x; + tile_y = depth_irb->draw_y & tile_mask_y; + if (stencil_irb && stencil_irb->mt == depth_mt) { intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); intel_renderbuffer_set_draw_offset(stencil_irb); } + + WARN_ONCE(stencil_tile_x != tile_x || + stencil_tile_y != tile_y, + "Rebased stencil tile offset (%d,%d) doesn't match depth " + "tile offset (%d,%d).\n", + stencil_tile_x, stencil_tile_y, + tile_x, tile_y); } } } + + if (!depth_irb) { + tile_x = stencil_tile_x; + tile_y = stencil_tile_y; + } + + /* While we just tried to get everything aligned, we may have failed to do + * so in the case of rendering to array or 3D textures, where nonzero faces + * will still have an offset post-rebase. At least give an informative + * warning. + */ + WARN_ONCE((tile_x & 7) || (tile_y & 7), + "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" + "Truncating offset, bad rendering may occur.\n"); + tile_x &= ~7; + tile_y &= ~7; + + /* Now, after rebasing, save off the new dephtstencil state so the hardware + * packets can just dereference that without re-calculating tile offsets. + */ + brw->depthstencil.tile_x = tile_x; + brw->depthstencil.tile_y = tile_y; + brw->depthstencil.depth_offset = 0; + brw->depthstencil.stencil_offset = 0; + brw->depthstencil.hiz_offset = 0; + brw->depthstencil.depth_mt = NULL; + brw->depthstencil.stencil_mt = NULL; + brw->depthstencil.hiz_mt = NULL; + if (depth_irb) { + depth_mt = depth_irb->mt; + brw->depthstencil.depth_mt = depth_mt; + brw->depthstencil.depth_offset = + intel_region_get_aligned_offset(depth_mt->region, + depth_irb->draw_x & ~tile_mask_x, + depth_irb->draw_y & ~tile_mask_y, + false); + if (depth_mt->hiz_mt) { + brw->depthstencil.hiz_mt = depth_mt->hiz_mt; + brw->depthstencil.hiz_offset = + intel_region_get_aligned_offset(depth_mt->region, + depth_irb->draw_x & ~tile_mask_x, + (depth_irb->draw_y & ~tile_mask_y) / + 2, + false); + } + } + if (stencil_irb) { + stencil_mt = stencil_irb->mt; + if (stencil_mt->stencil_mt) + stencil_mt = stencil_mt->stencil_mt; + + brw->depthstencil.stencil_mt = stencil_mt; + if (stencil_mt->format == MESA_FORMAT_S8) { + /* Note: we can't compute the stencil offset using + * intel_region_get_aligned_offset(), because stencil_region claims + * that the region is untiled even though it's W tiled. + */ + brw->depthstencil.stencil_offset = + (stencil_irb->draw_y & ~tile_mask_y) * stencil_mt->region->pitch + + (stencil_irb->draw_x & ~tile_mask_x) * 64; + } + } } static void emit_depthbuffer(struct brw_context *brw) @@ -433,19 +509,16 @@ static void emit_depthbuffer(struct brw_context *brw) /* _NEW_BUFFERS */ struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); - struct intel_mipmap_tree *depth_mt = NULL; - struct intel_mipmap_tree *stencil_mt = NULL; - struct intel_region *hiz_region = NULL; + struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt; + struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt; + struct intel_mipmap_tree *hiz_mt = brw->depthstencil.hiz_mt; + uint32_t tile_x = brw->depthstencil.tile_x; + uint32_t tile_y = brw->depthstencil.tile_y; unsigned int len; bool separate_stencil = false; - if (depth_irb){ - depth_mt = depth_irb->mt; - if (depth_mt && - depth_mt->hiz_mt) { - hiz_region = depth_irb->mt->hiz_mt->region; - } - } + if (stencil_mt && stencil_mt->format == MESA_FORMAT_S8) + separate_stencil = true; /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both * non-pipelined state that will need the PIPE_CONTROL workaround. @@ -455,54 +528,6 @@ static void emit_depthbuffer(struct brw_context *brw) intel_emit_depth_stall_flushes(intel); } - /* Find the real separate stencil mt if present. */ - if (stencil_irb) { - stencil_mt = stencil_irb->mt; - if (stencil_mt->stencil_mt) - stencil_mt = stencil_mt->stencil_mt; - - if (stencil_mt->format == MESA_FORMAT_S8) { - separate_stencil = true; - } - } - - uint32_t tile_mask_x, tile_mask_y; - brw_get_depthstencil_tile_masks(depth_mt, stencil_mt, - &tile_mask_x, &tile_mask_y); - - /* The intra-tile offsets should already have been forced into agreement by - * gen7_workaround_depthstencil_alignment(). - */ - uint32_t tile_x = 0, tile_y = 0; - if (depth_mt) { - tile_x = depth_irb->draw_x & tile_mask_x; - tile_y = depth_irb->draw_y & tile_mask_y; - - if (stencil_mt) { - assert((stencil_irb->draw_x & tile_mask_x) == tile_x); - assert((stencil_irb->draw_y & tile_mask_y) == tile_y); - } - } else if (stencil_mt) { - tile_x = stencil_irb->draw_x & tile_mask_x; - tile_y = stencil_irb->draw_y & tile_mask_y; - } - - /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 - * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth - * Coordinate Offset X/Y": - * - * "The 3 LSBs of both offsets must be zero to ensure correct - * alignment" - * - * This should already have been corrected by - * gen6_workaround_depthstencil_alignment. - */ - WARN_ONCE((tile_x & 7) || (tile_y & 7), - "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" - "Truncating offset, bad rendering may occur.\n"); - tile_x &= ~7; - tile_y &= ~7; - /* If there's a packed depth/stencil bound to stencil only, we need to * emit the packed depth/stencil buffer packet. */ @@ -580,32 +605,26 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } else { - struct intel_region *region = depth_irb->mt->region; - uint32_t offset; + struct intel_region *region = depth_mt->region; /* If using separate stencil, hiz must be enabled. */ - assert(!separate_stencil || hiz_region); + assert(!separate_stencil || hiz_mt); assert(intel->gen < 6 || region->tiling == I915_TILING_Y); - assert(!hiz_region || region->tiling == I915_TILING_Y); - - offset = intel_region_get_aligned_offset(region, - depth_irb->draw_x & ~tile_mask_x, - depth_irb->draw_y & ~tile_mask_y, - false); + assert(!hiz_mt || region->tiling == I915_TILING_Y); BEGIN_BATCH(len); OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | (brw_depthbuffer_format(brw) << 18) | - ((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */ - ((hiz_region ? 1 : 0) << 22) | /* hiz enable */ + ((hiz_mt ? 1 : 0) << 21) | /* separate stencil enable */ + ((hiz_mt ? 1 : 0) << 22) | /* hiz enable */ (BRW_TILEWALK_YMAJOR << 26) | ((region->tiling != I915_TILING_NONE) << 27) | (BRW_SURFACE_2D << 29)); OUT_RELOC(region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - offset); + brw->depthstencil.depth_offset); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | (((depth_irb->Base.Base.Width + tile_x) - 1) << 6) | (((depth_irb->Base.Base.Height + tile_y) - 1) << 19)); @@ -622,7 +641,7 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } - if (hiz_region || separate_stencil) { + if (hiz_mt || separate_stencil) { /* * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate * stencil enable' and 'hiz enable' bits were set. Therefore we must @@ -632,19 +651,13 @@ static void emit_depthbuffer(struct brw_context *brw) */ /* Emit hiz buffer. */ - if (hiz_region) { - uint32_t hiz_offset = - intel_region_get_aligned_offset(hiz_region, - depth_irb->draw_x & ~tile_mask_x, - (depth_irb->draw_y & ~tile_mask_y) / 2, - false); - + if (hiz_mt) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); - OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); - OUT_RELOC(hiz_region->bo, + OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1); + OUT_RELOC(hiz_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - hiz_offset); + brw->depthstencil.hiz_offset); ADVANCE_BATCH(); } else { BEGIN_BATCH(3); @@ -658,14 +671,6 @@ static void emit_depthbuffer(struct brw_context *brw) if (separate_stencil) { struct intel_region *region = stencil_mt->region; - /* Note: we can't compute the stencil offset using - * intel_region_get_aligned_offset(), because stencil_region claims - * that the region is untiled; in fact it's W tiled. - */ - uint32_t stencil_offset = - (stencil_irb->draw_y & ~tile_mask_y) * region->pitch + - (stencil_irb->draw_x & ~tile_mask_x) * 64; - BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); /* The stencil buffer has quirky pitch requirements. From Vol 2a, @@ -676,7 +681,7 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(2 * region->pitch * region->cpp - 1); OUT_RELOC(region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - stencil_offset); + brw->depthstencil.stencil_offset); ADVANCE_BATCH(); } else { BEGIN_BATCH(3); @@ -695,7 +700,7 @@ static void emit_depthbuffer(struct brw_context *brw) * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet * when HiZ is enabled and the DEPTH_BUFFER_STATE changes. */ - if (intel->gen >= 6 || hiz_region) { + if (intel->gen >= 6 || hiz_mt) { if (intel->gen == 6) intel_emit_post_sync_nonzero_flush(intel); diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c index 50fa0ecc44b..75e917926c4 100644 --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c @@ -38,63 +38,14 @@ static void emit_depthbuffer(struct brw_context *brw) /* _NEW_BUFFERS */ struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH); struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL); - struct intel_mipmap_tree *depth_mt = NULL, - *stencil_mt = NULL, - *hiz_mt = NULL; - - if (drb) - depth_mt = drb->mt; - - if (depth_mt) - hiz_mt = depth_mt->hiz_mt; - - if (srb) { - stencil_mt = srb->mt; - if (stencil_mt->stencil_mt) - stencil_mt = stencil_mt->stencil_mt; - - assert(stencil_mt->format == MESA_FORMAT_S8); - } - - uint32_t tile_mask_x, tile_mask_y; - brw_get_depthstencil_tile_masks(depth_mt, stencil_mt, - &tile_mask_x, &tile_mask_y); - - /* The intra-tile offsets should already have been forced into agreement by - * gen7_workaround_depthstencil_alignment(). - */ - uint32_t tile_x = 0, tile_y = 0; - if (depth_mt) { - tile_x = drb->draw_x & tile_mask_x; - tile_y = drb->draw_y & tile_mask_y; - - if (stencil_mt) { - assert((srb->draw_x & tile_mask_x) == tile_x); - assert((srb->draw_y & tile_mask_y) == tile_y); - } - } else if (stencil_mt) { - tile_x = srb->draw_x & tile_mask_x; - tile_y = srb->draw_y & tile_mask_y; - } - - /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 - * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth - * Coordinate Offset X/Y": - * - * "The 3 LSBs of both offsets must be zero to ensure correct - * alignment" - * - * This should already have been corrected by - * gen6_workaround_depthstencil_alignment. - */ - WARN_ONCE((tile_x & 7) || (tile_y & 7), - "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" - "Truncating offset, bad rendering may occur.\n"); - tile_x &= ~7; - tile_y &= ~7; - - /* Gen7 doesn't support packed depth/stencil */ - assert(stencil_mt == NULL || depth_mt != stencil_mt); + struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt; + struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt; + struct intel_mipmap_tree *hiz_mt = brw->depthstencil.hiz_mt; + uint32_t tile_x = brw->depthstencil.tile_x; + uint32_t tile_y = brw->depthstencil.tile_y; + + /* Gen7 only supports separate stencil */ + assert(!stencil_mt || stencil_mt->format == MESA_FORMAT_S8); assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format)); intel_emit_depth_stall_flushes(intel); @@ -126,12 +77,6 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } else { struct intel_region *region = depth_mt->region; - uint32_t offset; - - offset = intel_region_get_aligned_offset(region, - drb->draw_x & ~tile_mask_x, - drb->draw_y & ~tile_mask_y, - false); assert(region->tiling == I915_TILING_Y); @@ -146,7 +91,7 @@ static void emit_depthbuffer(struct brw_context *brw) (BRW_SURFACE_2D << 29)); OUT_RELOC(region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - offset); + brw->depthstencil.depth_offset); OUT_BATCH((((drb->Base.Base.Width + tile_x) - 1) << 4) | (((drb->Base.Base.Height + tile_y) - 1) << 18)); OUT_BATCH(0); @@ -162,18 +107,13 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } else { - uint32_t hiz_offset = - intel_region_get_aligned_offset(hiz_mt->region, - drb->draw_x & ~tile_mask_x, - (drb->draw_y & ~tile_mask_y) / 2, - false); BEGIN_BATCH(3); OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1); OUT_RELOC(hiz_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - hiz_offset); + brw->depthstencil.hiz_offset); ADVANCE_BATCH(); } @@ -186,14 +126,6 @@ static void emit_depthbuffer(struct brw_context *brw) } else { const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0; - /* Note: We can't compute the stencil offset using - * intel_region_get_aligned_offset(), because the stencil region claims - * that the region is untiled; in fact it's W tiled. - */ - uint32_t stencil_offset = - (srb->draw_y & ~tile_mask_y) * stencil_mt->region->pitch + - (srb->draw_x & ~tile_mask_x) * 64; - BEGIN_BATCH(3); OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); /* The stencil buffer has quirky pitch requirements. From the Graphics @@ -214,7 +146,7 @@ static void emit_depthbuffer(struct brw_context *brw) (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1)); OUT_RELOC(stencil_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - stencil_offset); + brw->depthstencil.stencil_offset); ADVANCE_BATCH(); }