From 455ac562722f60ac9fb0c3d3c697fa339fa011ad Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Tue, 8 May 2012 13:39:10 -0700 Subject: [PATCH] i965/msaa: Properly handle sliced layout for Gen7. Starting in Gen7, there are two possible layouts for MSAA surfaces: - Interleaved, in which additional samples are accommodated by scaling up the width and height of the surface. This is the only layout available in Gen6. On Gen7 it is used for depth and stencil surfaces only. - Sliced, in which the surface is stored as a 2D array, with array slice n containing all pixel data for sample n. On Gen7 this layout is used for color surfaces. The "Sliced" layout has an additional requirement: it must be used in ARYSPC_LOD0 mode, which means that the surface doesn't leave any extra room between array slices for miplevels other than 0. This patch modifies the surface allocation functions to use the correct layout when allocating MSAA surfaces in Gen7, and to set the array offsets properly when using ARYSPC_LOD0 mode. It also modifies the code that populates SURFACE_STATE structures to ensure that ARYSPC_LOD0 mode is selected in the appropriate circumstances. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_blorp.cpp | 1 + src/mesa/drivers/dri/i965/brw_blorp.h | 5 + src/mesa/drivers/dri/i965/brw_tex_layout.c | 10 +- .../drivers/dri/i965/brw_wm_surface_state.c | 3 +- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 2 + .../drivers/dri/i965/gen7_wm_surface_state.c | 10 ++ .../drivers/dri/intel/intel_mipmap_tree.c | 160 ++++++++++++------ .../drivers/dri/intel/intel_mipmap_tree.h | 23 ++- src/mesa/drivers/dri/intel/intel_tex_image.c | 3 +- .../drivers/dri/intel/intel_tex_validate.c | 3 +- 10 files changed, 162 insertions(+), 58 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index d80e4f15405..09b176b57ac 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -58,6 +58,7 @@ brw_blorp_surface_info::set(struct intel_mipmap_tree *mt, { brw_blorp_mip_info::set(mt, level, layer); this->num_samples = mt->num_samples; + this->array_spacing_lod0 = mt->array_spacing_lod0; if (mt->format == MESA_FORMAT_S8) { /* The miptree is a W-tiled stencil buffer. Surface states can't be set diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 6af5f67683d..f4aafd77825 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -85,6 +85,11 @@ public: bool map_stencil_as_y_tiled; unsigned num_samples; + + /* Setting this flag indicates that the surface should be set up in + * ARYSPC_LOD0 mode. Ignored prior to Gen7. + */ + bool array_spacing_lod0; }; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 8bf1d3ddbcd..f7421318c21 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -49,7 +49,10 @@ brw_miptree_layout_texture_array(struct intel_context *intel, h0 = ALIGN(mt->height0, mt->align_h); h1 = ALIGN(minify(mt->height0), mt->align_h); - qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * mt->align_h); + if (mt->array_spacing_lod0) + qpitch = h0; + else + qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * mt->align_h); if (mt->compressed) qpitch /= 4; @@ -165,7 +168,10 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) break; default: - i945_miptree_layout_2d(mt); + if (mt->num_samples > 0 && !mt->msaa_is_interleaved) + brw_miptree_layout_texture_array(intel, mt); + else + i945_miptree_layout_2d(mt); break; } DBG("%s: %dx%dx%d\n", __FUNCTION__, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 849da852277..6e745cf71ab 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -955,7 +955,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, intel_image->base.Base.Level, width, height, depth, true, - 0 /* num_samples */); + 0 /* num_samples */, + false /* msaa_is_interleaved */); intel_miptree_copy_teximage(intel, intel_image, new_mt); intel_miptree_reference(&irb->mt, intel_image->mt); diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 9e1aa4b8d94..58da3893d0a 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -169,6 +169,8 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, surf->ss0.surface_format = format; surf->ss0.surface_type = BRW_SURFACE_2D; + surf->ss0.surface_array_spacing = surface->array_spacing_lod0 ? + GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL; /* reloc */ surf->ss1.base_addr = region->bo->offset; /* No tile offsets needed */ diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 5aa62bddb1a..9a01ddf68af 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -142,6 +142,10 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit) return; } + /* We don't support MSAA for textures. */ + assert(!mt->array_spacing_lod0); + assert(mt->num_samples == 0); + intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth); surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, @@ -296,6 +300,9 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, sizeof(*surf), 32, &brw->wm.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); + /* Render targets can't use MSAA interleaved layout */ + assert(!irb->mt->msaa_is_interleaved); + if (irb->mt->align_h == 4) surf->ss0.vertical_alignment = 1; if (irb->mt->align_w == 8) @@ -324,6 +331,9 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, } surf->ss0.surface_type = BRW_SURFACE_2D; + surf->ss0.surface_array_spacing = irb->mt->array_spacing_lod0 ? + GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL; + /* reloc */ surf->ss1.base_addr = intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y); surf->ss1.base_addr += region->bo->offset; /* reloc */ diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 13e73969e6a..7018c3732bf 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -73,7 +73,8 @@ intel_miptree_create_internal(struct intel_context *intel, GLuint height0, GLuint depth0, bool for_region, - GLuint num_samples) + GLuint num_samples, + bool msaa_is_interleaved) { struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); int compress_byte = 0; @@ -95,8 +96,14 @@ intel_miptree_create_internal(struct intel_context *intel, mt->cpp = compress_byte ? compress_byte : _mesa_get_format_bytes(mt->format); mt->num_samples = num_samples; mt->compressed = compress_byte ? 1 : 0; + mt->msaa_is_interleaved = msaa_is_interleaved; mt->refcount = 1; + /* array_spacing_lod0 is only used for non-interleaved MSAA surfaces. + * TODO: can we use it elsewhere? + */ + mt->array_spacing_lod0 = num_samples > 0 && !msaa_is_interleaved; + if (target == GL_TEXTURE_CUBE_MAP) { assert(depth0 == 1); mt->depth0 = 6; @@ -109,6 +116,8 @@ intel_miptree_create_internal(struct intel_context *intel, (intel->must_use_separate_stencil || (intel->has_separate_stencil && intel->vtbl.is_hiz_depth_format(intel, format)))) { + /* MSAA stencil surfaces are always interleaved. */ + bool msaa_is_interleaved = num_samples > 0; mt->stencil_mt = intel_miptree_create(intel, mt->target, MESA_FORMAT_S8, @@ -118,7 +127,8 @@ intel_miptree_create_internal(struct intel_context *intel, mt->height0, mt->depth0, true, - num_samples); + num_samples, + msaa_is_interleaved); if (!mt->stencil_mt) { intel_miptree_release(&mt); return NULL; @@ -165,7 +175,8 @@ intel_miptree_create(struct intel_context *intel, GLuint height0, GLuint depth0, bool expect_accelerated_upload, - GLuint num_samples) + GLuint num_samples, + bool msaa_is_interleaved) { struct intel_mipmap_tree *mt; uint32_t tiling = I915_TILING_NONE; @@ -207,7 +218,7 @@ intel_miptree_create(struct intel_context *intel, mt = intel_miptree_create_internal(intel, target, format, first_level, last_level, width0, height0, depth0, - false, num_samples); + false, num_samples, msaa_is_interleaved); /* * pitch == 0 || height == 0 indicates the null texture */ @@ -243,7 +254,8 @@ intel_miptree_create_for_region(struct intel_context *intel, mt = intel_miptree_create_internal(intel, target, format, 0, 0, region->width, region->height, 1, - true, 0 /* num_samples */); + true, 0 /* num_samples */, + false /* msaa_is_interleaved */); if (!mt) return mt; @@ -252,6 +264,31 @@ intel_miptree_create_for_region(struct intel_context *intel, return mt; } +/** + * Determine whether the MSAA surface being created should use an interleaved + * layout or a sliced layout, based on the chip generation and the surface + * type. + */ +static bool +msaa_format_is_interleaved(struct intel_context *intel, gl_format format) +{ + /* Prior to Gen7, all surfaces used interleaved layout. */ + if (intel->gen < 7) + return true; + + /* In Gen7, interleaved layout is only used for depth and stencil + * buffers. + */ + switch (_mesa_get_format_base_format(format)) { + case GL_DEPTH_COMPONENT: + case GL_STENCIL_INDEX: + case GL_DEPTH_STENCIL: + return true; + default: + return false; + } +} + struct intel_mipmap_tree* intel_miptree_create_for_renderbuffer(struct intel_context *intel, gl_format format, @@ -260,55 +297,71 @@ intel_miptree_create_for_renderbuffer(struct intel_context *intel, uint32_t num_samples) { struct intel_mipmap_tree *mt; - - /* Adjust width/height for MSAA. - * - * In the Sandy Bridge PRM, volume 4, part 1, page 31, it says: - * - * "Any of the other messages (sample*, LOD, load4) used with a - * (4x) multisampled surface will in-effect sample a surface with - * double the height and width as that indicated in the surface - * state. Each pixel position on the original-sized surface is - * replaced with a 2x2 of samples with the following arrangement: - * - * sample 0 sample 2 - * sample 1 sample 3" - * - * Thus, when sampling from a multisampled texture, it behaves as though - * the layout in memory for (x,y,sample) is: - * - * (0,0,0) (0,0,2) (1,0,0) (1,0,2) - * (0,0,1) (0,0,3) (1,0,1) (1,0,3) - * - * (0,1,0) (0,1,2) (1,1,0) (1,1,2) - * (0,1,1) (0,1,3) (1,1,1) (1,1,3) - * - * However, the actual layout of multisampled data in memory is: - * - * (0,0,0) (1,0,0) (0,0,1) (1,0,1) - * (0,1,0) (1,1,0) (0,1,1) (1,1,1) - * - * (0,0,2) (1,0,2) (0,0,3) (1,0,3) - * (0,1,2) (1,1,2) (0,1,3) (1,1,3) - * - * This pattern repeats for each 2x2 pixel block. - * - * As a result, when calculating the size of our 4-sample buffer for - * an odd width or height, we have to align before scaling up because - * sample 3 is in that bottom right 2x2 block. - */ - if (num_samples > 4) { - num_samples = 8; - width = ALIGN(width, 2) * 4; - height = ALIGN(height, 2) * 2; - } else if (num_samples > 0) { - num_samples = 4; - width = ALIGN(width, 2) * 2; - height = ALIGN(height, 2) * 2; + uint32_t depth = 1; + bool msaa_is_interleaved = false; + + if (num_samples > 0) { + /* Adjust width/height/depth for MSAA */ + msaa_is_interleaved = msaa_format_is_interleaved(intel, format); + if (msaa_is_interleaved) { + /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says: + * + * "Any of the other messages (sample*, LOD, load4) used with a + * (4x) multisampled surface will in-effect sample a surface with + * double the height and width as that indicated in the surface + * state. Each pixel position on the original-sized surface is + * replaced with a 2x2 of samples with the following arrangement: + * + * sample 0 sample 2 + * sample 1 sample 3" + * + * Thus, when sampling from a multisampled texture, it behaves as + * though the layout in memory for (x,y,sample) is: + * + * (0,0,0) (0,0,2) (1,0,0) (1,0,2) + * (0,0,1) (0,0,3) (1,0,1) (1,0,3) + * + * (0,1,0) (0,1,2) (1,1,0) (1,1,2) + * (0,1,1) (0,1,3) (1,1,1) (1,1,3) + * + * However, the actual layout of multisampled data in memory is: + * + * (0,0,0) (1,0,0) (0,0,1) (1,0,1) + * (0,1,0) (1,1,0) (0,1,1) (1,1,1) + * + * (0,0,2) (1,0,2) (0,0,3) (1,0,3) + * (0,1,2) (1,1,2) (0,1,3) (1,1,3) + * + * This pattern repeats for each 2x2 pixel block. + * + * As a result, when calculating the size of our 4-sample buffer for + * an odd width or height, we have to align before scaling up because + * sample 3 is in that bottom right 2x2 block. + */ + switch (num_samples) { + case 4: + width = ALIGN(width, 2) * 2; + height = ALIGN(height, 2) * 2; + break; + case 8: + width = ALIGN(width, 2) * 4; + height = ALIGN(height, 2) * 2; + break; + default: + /* num_samples should already have been quantized to 0, 4, or + * 8. + */ + assert(false); + } + } else { + /* Non-interleaved */ + depth = num_samples; + } } mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0, - width, height, 1, true, num_samples); + width, height, depth, true, num_samples, + msaa_is_interleaved); return mt; } @@ -582,6 +635,8 @@ intel_miptree_alloc_hiz(struct intel_context *intel, GLuint num_samples) { assert(mt->hiz_mt == NULL); + /* MSAA HiZ surfaces are always interleaved. */ + bool msaa_is_interleaved = num_samples > 0; mt->hiz_mt = intel_miptree_create(intel, mt->target, MESA_FORMAT_X8_Z24, @@ -591,7 +646,8 @@ intel_miptree_alloc_hiz(struct intel_context *intel, mt->height0, mt->depth0, true, - num_samples); + num_samples, + msaa_is_interleaved); if (!mt->hiz_mt) return false; diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index 7536065f227..5c57e02f031 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -172,6 +172,26 @@ struct intel_mipmap_tree GLuint num_samples; bool compressed; + /** + * For 1D array, 2D array, cube, and 2D multisampled surfaces on Gen7: true + * if the surface only contains LOD 0, and hence no space is for LOD's + * other than 0 in between array slices. + * + * Corresponds to the surface_array_spacing bit in gen7_surface_state. + */ + bool array_spacing_lod0; + + /** + * For MSAA buffers, there are two possible layouts: + * - Interleaved, in which the additional samples are accommodated + * by scaling up the width and height of the surface. + * - Sliced, in which the surface is stored as a 2D array, with + * array slice n containing all pixel data for sample n. + * + * This value is true if num_samples > 0 and the format is interleaved. + */ + bool msaa_is_interleaved; + /* Derived from the above: */ GLuint total_width; @@ -238,7 +258,8 @@ struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel, GLuint height0, GLuint depth0, bool expect_accelerated_upload, - GLuint num_samples); + GLuint num_samples, + bool msaa_is_interleaved); struct intel_mipmap_tree * intel_miptree_create_for_region(struct intel_context *intel, diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 68f4ff4c9ed..abd75c51d29 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -100,7 +100,8 @@ intel_miptree_create_for_teximage(struct intel_context *intel, height, depth, expect_accelerated_upload, - 0 /* num_samples */); + 0 /* num_samples */, + false /* msaa_is_interleaved */); } /* There are actually quite a few combinations this will work for, diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index cadba29ff41..c2d7d67df54 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -87,7 +87,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) height, depth, true, - 0 /* num_samples */); + 0 /* num_samples */, + false /* msaa_is_interleaved */); if (!intelObj->mt) return false; } -- 2.30.2