From 10903d228919085cdb160c563c481ed1cc09e34c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 27 May 2017 10:36:23 -0700 Subject: [PATCH] i965: Rework Sandy Bridge HiZ and stencil layouts Sandy Bridge does not technically support mipmapped depth/stencil. In order to work around this, we allocate what are effectively completely separate images for each miplevel, ensure that they are page-aligned, and manually offset to them. Prior to layered rendering, this was a simple matter of setting a large enough halign/valign. With the advent of layered rendering, however, things got more complicated. Now, things weren't as simple as just handing a surface off to the hardware. Any miplevel of a normally mipmapped surface can be considered as just an array surface given the right qpitch. However, the hardware gives us no capability to specify qpitch so this won't work. Instead, the chosen solution was to use a new "all slices at each LOD" layout which laid things out as a mipmap of arrays rather than an array of mipmaps. This way you can easily offset to any of the miplevels and each is a valid array. Unfortunately, the "all slices at each lod" concept missed one fundamental thing about SNB HiZ and stencil hardware: It doesn't just always act as if you're always working with a non-mipmapped surface, it acts as if you're always working on a non-mipmapped surface of the same size as LOD0. In other words, even though it may only write the upper-left corner of each array slice, the qpitch for the array is for a surface the size of LOD0 of the depth surface. This mistake causes us to under-allocate HiZ and stencil in some cases and also to accidentally allow different miplevels to overlap. Sadly, piglit test coverage didn't quite catch this until I started making changes to the resolve code that caused additional HiZ resolves in certain tests. This commit switches Sandy Bridge HiZ and stencil over to a new scheme that lays out the non-zero miplevels horizontally below LOD0. This way they can all have the same qpitch without interfering with each other. Technically, the miplevels still overlap, but things are spaced out enough that each page is only in the "written area" of one LOD. Cc: "17.0 17.1" Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_blorp.c | 11 +- src/mesa/drivers/dri/i965/brw_tex_layout.c | 100 +++++++++++++++--- src/mesa/drivers/dri/i965/gen6_depth_state.c | 4 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 11 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 37 ++++++- 5 files changed, 134 insertions(+), 29 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 9030fe7e760..61c6bda56b5 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -123,7 +123,7 @@ apply_gen6_stencil_hiz_offset(struct isl_surf *surf, uint32_t lod, uint32_t *offset) { - assert(mt->array_layout == ALL_SLICES_AT_EACH_LOD); + assert(mt->array_layout == GEN6_HIZ_STENCIL); if (mt->format == MESA_FORMAT_S_UINT8) { /* Note: we can't compute the stencil offset using @@ -182,12 +182,12 @@ blorp_surf_for_miptree(struct brw_context *brw, }; if (brw->gen == 6 && mt->format == MESA_FORMAT_S_UINT8 && - mt->array_layout == ALL_SLICES_AT_EACH_LOD) { - /* Sandy bridge stencil and HiZ use this ALL_SLICES_AT_EACH_LOD hack in + mt->array_layout == GEN6_HIZ_STENCIL) { + /* Sandy bridge stencil and HiZ use this GEN6_HIZ_STENCIL hack in * order to allow for layered rendering. The hack makes each LOD of the * stencil or HiZ buffer a single tightly packed array surface at some * offset into the surface. Since ISL doesn't know how to deal with the - * crazy ALL_SLICES_AT_EACH_LOD layout and since we have to do a manual + * crazy GEN6_HIZ_STENCIL layout and since we have to do a manual * offset of it anyway, we might as well do the offset here and keep the * hacks inside the i965 driver. * @@ -261,8 +261,7 @@ blorp_surf_for_miptree(struct brw_context *brw, struct intel_mipmap_tree *hiz_mt = mt->hiz_buf->mt; if (hiz_mt) { - assert(brw->gen == 6 && - hiz_mt->array_layout == ALL_SLICES_AT_EACH_LOD); + assert(brw->gen == 6 && hiz_mt->array_layout == GEN6_HIZ_STENCIL); /* gen6 requires the HiZ buffer to be manually offset to the * right location. We could fixup the surf but it doesn't diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index bfa8afaa698..1f0a1e9a38d 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -216,6 +216,8 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) mt->total_height = MAX2(mt->total_height, y + img_height); /* Layout_below: step right after second mipmap. + * + * For Sandy Bridge HiZ and stencil, we always step down. */ if (level == mt->first_level + 1) { x += ALIGN_NPOT(width, mt->halign) / bw; @@ -231,6 +233,67 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) } } +static void +brw_miptree_layout_gen6_hiz_stencil(struct intel_mipmap_tree *mt) +{ + unsigned x = 0; + unsigned y = 0; + unsigned width = mt->physical_width0; + unsigned height = mt->physical_height0; + /* Number of layers of array texture. */ + unsigned depth = mt->physical_depth0; + unsigned tile_width, tile_height, bw, bh; + + if (mt->format == MESA_FORMAT_S_UINT8) { + bw = bh = 1; + /* W-tiled */ + tile_width = 64; + tile_height = 64; + } else { + assert(_mesa_get_format_base_format(mt->format) == GL_DEPTH_COMPONENT || + _mesa_get_format_base_format(mt->format) == GL_DEPTH_STENCIL); + /* Each 128-bit HiZ block corresponds to a region of of 8x4 depth + * samples. Each cache line in the Y-Tiled HiZ image contains 2x2 HiZ + * blocks. Therefore, each Y-tiled cache line corresponds to an 16x8 + * region in the depth surface. Since we're representing it as + * RGBA_FLOAT32, the miptree calculations will think that each cache + * line is 1x4 pixels. Therefore, we need a scale-down factor of 16x2 + * and a vertical alignment of 2. + */ + mt->cpp = 16; + bw = 16; + bh = 2; + /* Y-tiled */ + tile_width = 128 / mt->cpp; + tile_height = 32; + } + + mt->total_width = 0; + mt->total_height = 0; + + for (unsigned level = mt->first_level; level <= mt->last_level; level++) { + intel_miptree_set_level_info(mt, level, x, y, depth); + + const unsigned img_width = ALIGN(DIV_ROUND_UP(width, bw), mt->halign); + const unsigned img_height = + ALIGN(DIV_ROUND_UP(height, bh), mt->valign) * depth; + + mt->total_width = MAX2(mt->total_width, x + img_width); + mt->total_height = MAX2(mt->total_height, y + img_height); + + if (level == mt->first_level) { + y += ALIGN(img_height, tile_height); + } else { + x += ALIGN(img_width, tile_width); + } + + /* We only minify the width. We want qpitch to match for all miplevels + * because the hardware doesn't know we aren't on LOD0. + */ + width = minify(width, 1); + } +} + unsigned brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw, const struct intel_mipmap_tree *mt, @@ -249,6 +312,8 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw, const struct intel_mipmap_tree *mt, unsigned level) { + assert(mt->array_layout != GEN6_HIZ_STENCIL || brw->gen == 6); + if (brw->gen >= 9) { /* ALL_SLICES_AT_EACH_LOD isn't supported on Gen8+ but this code will * effectively end up with a packed qpitch anyway whenever @@ -281,6 +346,15 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw, mt->array_layout == ALL_SLICES_AT_EACH_LOD) { return ALIGN_NPOT(minify(mt->physical_height0, level), mt->valign); + } else if (mt->array_layout == GEN6_HIZ_STENCIL) { + /* For HiZ and stencil on Sandy Bridge, we don't minify the height. */ + if (mt->format == MESA_FORMAT_S_UINT8) { + return ALIGN(mt->physical_height0, mt->valign); + } else { + /* HiZ has a vertical scale factor of 2. */ + return ALIGN(DIV_ROUND_UP(mt->physical_height0, 2), mt->valign); + } + } else { const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->valign); const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->valign); @@ -333,6 +407,8 @@ brw_miptree_layout_texture_array(struct brw_context *brw, if (layout_1d) gen9_miptree_layout_1d(mt); + else if (mt->array_layout == GEN6_HIZ_STENCIL) + brw_miptree_layout_gen6_hiz_stencil(mt); else brw_miptree_layout_2d(mt); @@ -556,6 +632,8 @@ intel_miptree_set_total_width_height(struct brw_context *brw, case INTEL_MSAA_LAYOUT_IMS: if (gen9_use_linear_1d_layout(brw, mt)) gen9_miptree_layout_1d(mt); + else if (mt->array_layout == GEN6_HIZ_STENCIL) + brw_miptree_layout_gen6_hiz_stencil(mt); else brw_miptree_layout_2d(mt); break; @@ -579,15 +657,9 @@ intel_miptree_set_alignment(struct brw_context *brw, * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4 * - BSpec (for Ivybridge and slight variations in separate stencil) */ - bool gen6_hiz_or_stencil = false; - if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) { - const GLenum base_format = _mesa_get_format_base_format(mt->format); - gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format); - } - - if (gen6_hiz_or_stencil) { - /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the + if (mt->array_layout == GEN6_HIZ_STENCIL) { + /* On gen6, we use GEN6_HIZ_STENCIL for stencil/hiz because the * hardware doesn't support multiple mip levels on stencil/hiz. * * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer: @@ -600,15 +672,13 @@ intel_miptree_set_alignment(struct brw_context *brw, /* Stencil uses W tiling, so we force W tiling alignment for the * ALL_SLICES_AT_EACH_LOD miptree layout. */ - mt->halign = 64; - mt->valign = 64; + mt->halign = 4; + mt->valign = 2; assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); } else { - /* Depth uses Y tiling, so we force need Y tiling alignment for the - * ALL_SLICES_AT_EACH_LOD miptree layout. - */ - mt->halign = 128 / mt->cpp; - mt->valign = 32; + /* See brw_miptree_layout_gen6_hiz_stencil() */ + mt->halign = 1; + mt->valign = 2; } } else if (mt->compressed) { /* The hardware alignment requirements for compressed textures diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c b/src/mesa/drivers/dri/i965/gen6_depth_state.c index 0ff240753e3..a77e4616dc5 100644 --- a/src/mesa/drivers/dri/i965/gen6_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c @@ -164,7 +164,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw, struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt; uint32_t offset = 0; - if (hiz_mt->array_layout == ALL_SLICES_AT_EACH_LOD) { + if (hiz_mt->array_layout == GEN6_HIZ_STENCIL) { offset = intel_miptree_get_aligned_offset( hiz_mt, hiz_mt->level[lod].level_x, @@ -190,7 +190,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw, if (separate_stencil) { uint32_t offset = 0; - if (stencil_mt->array_layout == ALL_SLICES_AT_EACH_LOD) { + if (stencil_mt->array_layout == GEN6_HIZ_STENCIL) { assert(stencil_mt->format == MESA_FORMAT_S_UINT8); /* Note: we can't compute the stencil offset using diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index b73c0d0ad1d..f7a69a0e235 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -464,7 +464,7 @@ intel_miptree_create_layout(struct brw_context *brw, intel_miptree_wants_hiz_buffer(brw, mt)))) { uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; if (brw->gen == 6) { - stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD | + stencil_flags |= MIPTREE_LAYOUT_GEN6_HIZ_STENCIL | MIPTREE_LAYOUT_TILING_ANY; } @@ -497,8 +497,8 @@ intel_miptree_create_layout(struct brw_context *brw, } } - if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD) - mt->array_layout = ALL_SLICES_AT_EACH_LOD; + if (layout_flags & MIPTREE_LAYOUT_GEN6_HIZ_STENCIL) + mt->array_layout = GEN6_HIZ_STENCIL; /* * Obey HALIGN_16 constraints for Gen8 and Gen9 buffers which are @@ -1790,7 +1790,7 @@ intel_hiz_miptree_buf_create(struct brw_context *brw, uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; if (brw->gen == 6) - layout_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD; + layout_flags |= MIPTREE_LAYOUT_GEN6_HIZ_STENCIL; if (!buf) return NULL; @@ -2380,7 +2380,7 @@ intel_update_r8stencil(struct brw_context *brw, const uint32_t r8stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD | MIPTREE_LAYOUT_TILING_Y | MIPTREE_LAYOUT_DISABLE_AUX; - assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD */ + assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_GEN6_HIZ_STENCIL */ mt->r8stencil_mt = intel_miptree_create(brw, src->target, MESA_FORMAT_R_UINT8, @@ -3287,6 +3287,7 @@ intel_miptree_get_isl_surf(struct brw_context *brw, surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_FULL; break; case ALL_SLICES_AT_EACH_LOD: + case GEN6_HIZ_STENCIL: surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_COMPACT; break; default: diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 7aabac006f3..be460f33eb8 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -251,6 +251,41 @@ enum miptree_array_layout { * +---+ */ ALL_SLICES_AT_EACH_LOD, + + /* On Sandy Bridge, HiZ and stencil buffers work the same as on Ivy Bridge + * except that they don't technically support mipmapping. That does not, + * however, stop us from doing it. As far as Sandy Bridge hardware is + * concerned, HiZ and stencil always operates on a single miplevel 2D + * (possibly array) image. The dimensions of that image are NOT minified. + * + * In order to implement HiZ and stencil on Sandy Bridge, we create one + * full-sized 2D (possibly array) image for every LOD with every image + * aligned to a page boundary. In order to save memory, we pretend that + * the width of each miplevel is minified and we place LOD1 and above below + * LOD0 but horizontally adjacent to each other. When considered as + * full-sized images, LOD1 and above technically overlap. However, since + * we only write to part of that image, the hardware will never notice the + * overlap. + * + * This layout looks something like this: + * + * +---------+ + * | | + * | | + * +---------+ + * | | + * | | + * +---------+ + * + * +----+ +-+ . + * | | +-+ + * +----+ + * + * +----+ +-+ . + * | | +-+ + * +----+ + */ + GEN6_HIZ_STENCIL, }; enum intel_aux_disable { @@ -672,7 +707,7 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, enum { MIPTREE_LAYOUT_ACCELERATED_UPLOAD = 1 << 0, - MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD = 1 << 1, + MIPTREE_LAYOUT_GEN6_HIZ_STENCIL = 1 << 1, MIPTREE_LAYOUT_FOR_BO = 1 << 2, MIPTREE_LAYOUT_DISABLE_AUX = 1 << 3, MIPTREE_LAYOUT_FORCE_HALIGN16 = 1 << 4, -- 2.30.2