From 34a5f12e35dd4a5aff6683a8286d4582ba17df14 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 9 May 2012 08:29:33 -0700 Subject: [PATCH] i965/blorp: Use MSDISPMODE_PERSAMPLE rendering when necessary This patch modifies the "blorp" WM program so that it can be run in MSDISPMODE_PERSAMPLE (which means that every single sample of a multisampled render target is dispatched to the WM program, not just every pixel). Previously we were using the ugly hack of configuring multisampled destination surfaces as single-sampled, and generating sample indices other than zero by swizzling the pixel coordinates in the WM program. Reviewed-by: Kenneth Graunke Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_blorp.h | 12 +++ src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 87 ++++++++++++++------ src/mesa/drivers/dri/i965/gen6_blorp.cpp | 5 +- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 10 ++- 4 files changed, 87 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 9aa5efa4294..951492f8319 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -118,6 +118,12 @@ const unsigned int BRW_BLORP_NUM_PUSH_CONST_REGS = struct brw_blorp_prog_data { unsigned int first_curbe_grf; + + /** + * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more + * than one sample per pixel. + */ + bool persample_msaa_dispatch; }; class brw_blorp_params @@ -207,6 +213,12 @@ struct brw_blorp_blit_prog_key * pixels that are outside the destination rectangle. */ bool use_kill; + + /** + * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more + * than one sample per pixel. + */ + bool persample_msaa_dispatch; }; class brw_blorp_blit_params : public brw_blorp_params diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 26a3514c4e2..31e4556f374 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -437,13 +437,14 @@ brw_blorp_blit_program::compile(struct brw_context *brw, GLuint *program_size) { /* Sanity checks */ - if (key->dst_tiled_w) { - /* If the destination image is W tiled, then dst_samples must be 0. - * Otherwise, after conversion between W and Y tiling, there's no + if (key->dst_tiled_w && key->rt_samples > 0) { + /* If the destination image is W tiled and multisampled, then the thread + * must be dispatched once per sample, not once per pixel. This is + * necessary because after conversion between W and Y tiling, there's no * guarantee that all samples corresponding to a single pixel will still * be together. */ - assert(key->rt_samples == 0); + assert(key->persample_msaa_dispatch); } if (key->blend) { @@ -459,6 +460,17 @@ brw_blorp_blit_program::compile(struct brw_context *brw, assert(key->tex_samples > 0); } + if (key->persample_msaa_dispatch) { + /* It only makes sense to do persample dispatch if the render target is + * configured as multisampled. + */ + assert(key->rt_samples > 0); + } + + /* Set up prog_data */ + memset(&prog_data, 0, sizeof(prog_data)); + prog_data.persample_msaa_dispatch = key->persample_msaa_dispatch; + brw_set_compression_control(&func, BRW_COMPRESSION_NONE); alloc_regs(); @@ -658,11 +670,29 @@ brw_blorp_blit_program::compute_frag_coords() */ brw_ADD(&func, Y, stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100)); - /* Since we always run the WM in a mode that causes a single fragment - * dispatch per pixel, it's not meaningful to compute a sample value. Just - * set it to 0. - */ - s_is_zero = true; + if (key->persample_msaa_dispatch) { + /* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples > 0. + * Therefore, subspan 0 will represent sample 0, subspan 1 will + * represent sample 1, and so on. + * + * So we need to populate S with the sequence (0, 0, 0, 0, 1, 1, 1, 1, + * 2, 2, 2, 2, 3, 3, 3, 3). The easiest way to do this is to populate a + * temporary variable with the sequence (0, 1, 2, 3), and then copy from + * it using vstride=1, width=4, hstride=0. + * + * TODO: implement the necessary calculation for 8x multisampling. + */ + brw_MOV(&func, t1, brw_imm_v(0x3210)); + brw_MOV(&func, S, stride(t1, 1, 4, 0)); + s_is_zero = false; + } else { + /* Either the destination surface is single-sampled, or the WM will be + * run in MSDISPMODE_PERPIXEL (which causes a single fragment dispatch + * per pixel). In either case, it's not meaningful to compute a sample + * value. Just set it to 0. + */ + s_is_zero = true; + } } /** @@ -1071,22 +1101,23 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct intel_mipmap_tree *src_mt, use_wm_prog = true; memset(&wm_prog_key, 0, sizeof(wm_prog_key)); - if (dst.map_stencil_as_y_tiled) { - /* If the destination surface is a W-tiled stencil buffer that we're - * mapping as Y tiled, then we need to set up the surface state as - * single-sampled, because the memory layout of related samples doesn't - * match between W and Y tiling. + if (dst.map_stencil_as_y_tiled && dst.num_samples > 0) { + /* If the destination surface is a W-tiled multisampled stencil buffer + * that we're mapping as Y tiled, then we need to arrange for the WM + * program to run once per sample rather than once per pixel, because + * the memory layout of related samples doesn't match between W and Y + * tiling. */ - dst.num_samples = 0; + wm_prog_key.persample_msaa_dispatch = true; } - if (src_mt->num_samples > 0 && dst_mt->num_samples > 0) { + if (src.num_samples > 0 && dst.num_samples > 0) { /* We are blitting from a multisample buffer to a multisample buffer, so * we must preserve samples within a pixel. This means we have to - * configure the render target as single-sampled, so that the WM program - * generate each sample separately. + * arrange for the WM program to run once per sample rather than once + * per pixel. */ - dst.num_samples = 0; + wm_prog_key.persample_msaa_dispatch = true; } /* The render path must be configured to use the same number of samples as @@ -1144,12 +1175,22 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct intel_mipmap_tree *src_mt, * dimensions 64x64. We must also align it to a multiple of the tile * size, because the differences between W and Y tiling formats will * mean that pixels are scrambled within the tile. + * + * Note: if the destination surface configured as an MSAA surface, then + * the effective tile size we need to align it to is smaller, because + * each pixel covers a 2x2 or a 4x2 block of samples. + * * TODO: what if this makes the coordinates too large? */ - x0 = (x0 * 2) & ~127; - y0 = (y0 / 2) & ~31; - x1 = ALIGN(x1 * 2, 128); - y1 = ALIGN(y1 / 2, 32); + unsigned x_align = 64, y_align = 64; + if (dst_mt->num_samples > 0) { + x_align /= (dst_mt->num_samples == 4 ? 2 : 4); + y_align /= 2; + } + x0 = (x0 & ~(x_align - 1)) * 2; + y0 = (y0 & ~(y_align - 1)) / 2; + x1 = ALIGN(x1, x_align) * 2; + y1 = ALIGN(y1, y_align) / 2; wm_prog_key.use_kill = true; } } diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index d05f105cca7..b77a3b43fe1 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -741,7 +741,10 @@ gen6_blorp_emit_wm_config(struct brw_context *brw, if (params->num_samples > 0) { dw6 |= GEN6_WM_MSRAST_ON_PATTERN; - dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; + if (prog_data && prog_data->persample_msaa_dispatch) + dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; + else + dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; } else { dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 2c440fd99af..1112e79c28e 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -419,7 +419,8 @@ gen7_blorp_emit_sf_config(struct brw_context *brw, */ static void gen7_blorp_emit_wm_config(struct brw_context *brw, - const brw_blorp_params *params) + const brw_blorp_params *params, + brw_blorp_prog_data *prog_data) { struct intel_context *intel = &brw->intel; @@ -450,7 +451,10 @@ gen7_blorp_emit_wm_config(struct brw_context *brw, if (params->num_samples > 0) { dw1 |= GEN7_WM_MSRAST_ON_PATTERN; - dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; + if (prog_data && prog_data->persample_msaa_dispatch) + dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; + else + dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; } else { dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; @@ -776,7 +780,7 @@ gen7_blorp_exec(struct intel_context *intel, gen7_blorp_emit_streamout_disable(brw, params); gen6_blorp_emit_clip_disable(brw, params); gen7_blorp_emit_sf_config(brw, params); - gen7_blorp_emit_wm_config(brw, params); + gen7_blorp_emit_wm_config(brw, params, prog_data); if (params->use_wm_prog) { gen7_blorp_emit_binding_table_pointers_ps(brw, params, wm_bind_bo_offset); -- 2.30.2