From bf25ee284045c5d2ddf3b2d234d7344187e6da29 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 9 Jul 2013 15:36:32 -0700 Subject: [PATCH] gen7 depth surface: program 3DSTATE_DEPTH_BUFFER to top of surface Previously we would always find the 2D sub-surface of interest, and then program the surface to this location. Now we always program the 3DSTATE_DEPTH_BUFFER at the start of the surface. To select the lod/slice, we utilize the lod & minimum array element fields. As part of this change, we must revert 1f112ccf: Revert "i965/gen7: Align all depth miplevels to 8 in the X direction." We also must disable brw_workaround_depthstencil_alignment for gen >= 7. Now the hardware will handle alignment when rendering to additional slices/LODs. v2: * Merge with recent MOCS changes Signed-off-by: Jordan Justen Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_misc_state.c | 6 +++ src/mesa/drivers/dri/i965/brw_tex_layout.c | 10 +--- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 59 ++++----------------- src/mesa/drivers/dri/i965/gen7_misc_state.c | 36 ++++++++++--- 4 files changed, 45 insertions(+), 66 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 1e31ad8cc41..3bf37b9561c 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -355,6 +355,12 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw, if (stencil_irb) brw->depthstencil.stencil_mt = get_stencil_miptree(stencil_irb); + /* Gen7+ doesn't require the workarounds, since we always program the + * surface state at the start of the whole surface. + */ + if (brw->gen >= 7) + return; + /* Check if depth buffer is in depth/stencil format. If so, then it's only * safe to invalidate it if we're also clearing stencil, and both depth_irb * and stencil_irb point to the same miptree. diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index ebc67b1d9d2..e4e66b4219c 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -78,15 +78,7 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, if (format == MESA_FORMAT_S8) return 8; - /* The depth alignment requirements in the table above are for rendering to - * depth miplevels using the LOD control fields. We don't use LOD control - * fields, and instead use page offsets plus intra-tile x/y offsets, which - * require that the low 3 bits are zero. To reduce the number of x/y - * offset workaround blits we do, align the X to 8, which depth texturing - * can handle (sadly, it can't handle 8 in the Y direction). - */ - if (brw->gen >= 7 && - _mesa_get_format_base_format(format) == GL_DEPTH_COMPONENT) + if (brw->gen >= 7 && format == MESA_FORMAT_Z16) return 8; return 4; diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 920790a2b79..518d7f5b526 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -658,10 +658,6 @@ static void gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, const brw_blorp_params *params) { - struct gl_context *ctx = &brw->ctx; - uint32_t draw_x = params->depth.x_offset; - uint32_t draw_y = params->depth.y_offset; - uint32_t tile_mask_x, tile_mask_y; uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0; uint32_t surfwidth, surfheight; uint32_t surftype; @@ -670,11 +666,6 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, GLenum gl_target = params->depth.mt->target; unsigned int lod; - brw_get_depthstencil_tile_masks(params->depth.mt, - params->depth.level, - params->depth.layer, - NULL, - &tile_mask_x, &tile_mask_y); switch (gl_target) { case GL_TEXTURE_CUBE_MAP_ARRAY: case GL_TEXTURE_CUBE_MAP: @@ -713,34 +704,6 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, /* 3DSTATE_DEPTH_BUFFER */ { - uint32_t tile_x = draw_x & tile_mask_x; - uint32_t tile_y = draw_y & tile_mask_y; - uint32_t offset = - intel_region_get_aligned_offset(params->depth.mt->region, - draw_x & ~tile_mask_x, - draw_y & ~tile_mask_y, false); - - /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 - * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth - * Coordinate Offset X/Y": - * - * "The 3 LSBs of both offsets must be zero to ensure correct - * alignment" - * - * We have no guarantee that tile_x and tile_y are correctly aligned, - * since they are determined by the mipmap layout, which is only aligned - * to multiples of 4. - * - * So, to avoid hanging the GPU, just smash the low order 3 bits of - * tile_x and tile_y to 0. This is a temporary workaround until we come - * up with a better solution. - */ - WARN_ONCE((tile_x & 7) || (tile_y & 7), - "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" - "Truncating offset, bad rendering may occur.\n"); - tile_x &= ~7; - tile_y &= ~7; - intel_emit_depth_stall_flushes(brw); BEGIN_BATCH(7); @@ -749,26 +712,24 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, params->depth_format << 18 | 1 << 22 | /* hiz enable */ 1 << 28 | /* depth write */ - BRW_SURFACE_2D << 29); + surftype << 29); OUT_RELOC(params->depth.mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - offset); - OUT_BATCH((params->depth.width + tile_x - 1) << 4 | - (params->depth.height + tile_y - 1) << 18); - OUT_BATCH(mocs); - OUT_BATCH(tile_x | - tile_y << 16); + 0); + OUT_BATCH((surfwidth - 1) << 4 | + (surfheight - 1) << 18 | + lod); + OUT_BATCH(((depth - 1) << 21) | + (min_array_element << 10) | + mocs); OUT_BATCH(0); + OUT_BATCH((depth - 1) << 21); ADVANCE_BATCH(); } /* 3DSTATE_HIER_DEPTH_BUFFER */ { struct intel_region *hiz_region = params->depth.mt->hiz_mt->region; - uint32_t hiz_offset = - intel_region_get_aligned_offset(hiz_region, - draw_x & ~tile_mask_x, - (draw_y & ~tile_mask_y) / 2, false); BEGIN_BATCH(3); OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); @@ -776,7 +737,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, (hiz_region->pitch - 1)); OUT_RELOC(hiz_region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - hiz_offset); + 0); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c index e9167b70239..51067b3762d 100644 --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c @@ -48,6 +48,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, unsigned int min_array_element; GLenum gl_target = GL_TEXTURE_2D; unsigned int lod; + const struct intel_mipmap_tree *mt = depth_mt ? depth_mt : stencil_mt; const struct intel_renderbuffer *irb = NULL; const struct gl_renderbuffer *rb = NULL; @@ -91,29 +92,48 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, lod = irb ? irb->mt_level - irb->mt->first_level : 0; + if (mt) { + width = mt->physical_width0; + height = mt->physical_height0; + } + /* _NEW_DEPTH, _NEW_STENCIL, _NEW_BUFFERS */ BEGIN_BATCH(7); + /* 3DSTATE_DEPTH_BUFFER dw0 */ OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + + /* 3DSTATE_DEPTH_BUFFER dw1 */ OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) | (depthbuffer_format << 18) | ((hiz ? 1 : 0) << 22) | ((stencil_mt != NULL && ctx->Stencil._WriteEnabled) << 27) | ((ctx->Depth.Mask != 0) << 28) | - (depth_surface_type << 29)); + (surftype << 29)); + /* 3DSTATE_DEPTH_BUFFER dw2 */ if (depth_mt) { OUT_RELOC(depth_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - depth_offset); + 0); } else { OUT_BATCH(0); } - OUT_BATCH(((width + tile_x - 1) << 4) | - ((height + tile_y - 1) << 18)); - OUT_BATCH(mocs); - OUT_BATCH(tile_x | (tile_y << 16)); + /* 3DSTATE_DEPTH_BUFFER dw3 */ + OUT_BATCH(((width - 1) << 4) | + ((height - 1) << 18) | + lod); + + /* 3DSTATE_DEPTH_BUFFER dw4 */ + OUT_BATCH(((depth - 1) << 21) | + (min_array_element << 10) | + mocs); + + /* 3DSTATE_DEPTH_BUFFER dw5 */ OUT_BATCH(0); + + /* 3DSTATE_DEPTH_BUFFER dw6 */ + OUT_BATCH((depth - 1) << 21); ADVANCE_BATCH(); if (!hiz) { @@ -131,7 +151,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, OUT_RELOC(hiz_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - brw->depthstencil.hiz_offset); + 0); ADVANCE_BATCH(); } @@ -161,7 +181,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, (2 * stencil_mt->region->pitch - 1)); OUT_RELOC(stencil_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - brw->depthstencil.stencil_offset); + 0); ADVANCE_BATCH(); } -- 2.30.2