From 19e9b24626c2b9d7abef054d57bb2a52106c545b Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Sun, 29 Apr 2012 21:41:42 -0700 Subject: [PATCH] i965/gen6: Initial implementation of MSAA. This patch enables MSAA for Gen6, by modifying intel_mipmap_tree to understand multisampled buffers, adapting the rendering pipeline setup to enable multisampled rendering, and adding multisample resolve operations to brw_blorp_blit.cpp. Some preparation work is also included for Gen7, but it is not yet enabled. MSAA support is still fairly preliminary. In particular, the following are not yet supported: - Fully general blits between MSAA and non-MSAA buffers. - Formats other than RGBA8, DEPTH24, and STENCIL8. - Centroid interpolation. - Coverage parameters (glSampleCoverage, GL_SAMPLE_ALPHA_TO_COVERAGE, GL_SAMPLE_ALPHA_TO_ONE, GL_SAMPLE_COVERAGE, GL_SAMPLE_COVERAGE_VALUE, GL_SAMPLE_COVERAGE_INVERT). Fixes piglit tests "EXT_framebuffer_multisample/accuracy" on i965/Gen6. v2: - In intel_alloc_renderbuffer_storage(), quantize the requested number of samples to the next higher sample count supported by the hardware. This ensures that a query of GL_SAMPLES will return the correct value. It also ensures that MSAA is fully disabled on Gen7 for now (since Gen7 MSAA support doesn't work yet). - When reading from a non-MSAA surface, ensure that s_is_zero is true so that we won't try to read from a nonexistent sample. --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_blorp.cpp | 10 +- src/mesa/drivers/dri/i965/brw_blorp.h | 30 +- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 332 ++++++++++++++++-- src/mesa/drivers/dri/i965/brw_context.h | 8 + src/mesa/drivers/dri/i965/brw_defines.h | 7 + src/mesa/drivers/dri/i965/brw_misc_state.c | 33 +- src/mesa/drivers/dri/i965/brw_state.h | 4 + src/mesa/drivers/dri/i965/brw_state_upload.c | 2 + .../drivers/dri/i965/brw_wm_surface_state.c | 16 +- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 44 +-- .../drivers/dri/i965/gen6_multisample_state.c | 102 ++++++ src/mesa/drivers/dri/i965/gen6_sf_state.c | 15 +- src/mesa/drivers/dri/i965/gen6_wm_state.c | 12 + src/mesa/drivers/dri/i965/gen7_blorp.cpp | 20 +- src/mesa/drivers/dri/i965/gen7_sf_state.c | 14 +- src/mesa/drivers/dri/i965/gen7_wm_state.c | 18 +- .../drivers/dri/i965/gen7_wm_surface_state.c | 16 + src/mesa/drivers/dri/intel/intel_fbo.c | 31 +- .../drivers/dri/intel/intel_mipmap_tree.c | 52 ++- .../drivers/dri/intel/intel_mipmap_tree.h | 10 +- src/mesa/drivers/dri/intel/intel_tex_image.c | 3 +- .../drivers/dri/intel/intel_tex_validate.c | 3 +- 23 files changed, 662 insertions(+), 121 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/gen6_multisample_state.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 182b432ed45..3847f9187d2 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -88,6 +88,7 @@ i965_C_FILES = \ gen6_clip_state.c \ gen6_depthstencil.c \ gen6_gs_state.c \ + gen6_multisample_state.c \ gen6_sampler_state.c \ gen6_scissor_state.c \ gen6_sf_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index 762d7350632..8e225117fab 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -36,7 +36,8 @@ brw_blorp_mip_info::brw_blorp_mip_info() } brw_blorp_surface_info::brw_blorp_surface_info() - : map_stencil_as_y_tiled(false) + : map_stencil_as_y_tiled(false), + num_samples(0) { } @@ -60,11 +61,15 @@ brw_blorp_surface_info::set(struct intel_mipmap_tree *mt, if (mt->format == MESA_FORMAT_S8) { /* The miptree is a W-tiled stencil buffer. Surface states can't be set * up for W tiling, so we'll need to use Y tiling and have the WM - * program swizzle the coordinates. + * program swizzle the coordinates. Furthermore, we need to set up the + * surface state as single-sampled, because the memory layout of related + * samples doesn't match between W and Y tiling. */ this->map_stencil_as_y_tiled = true; + this->num_samples = 0; } else { this->map_stencil_as_y_tiled = false; + this->num_samples = mt->num_samples; } } @@ -88,6 +93,7 @@ brw_blorp_params::brw_blorp_params() y1(0), depth_format(0), hiz_op(GEN6_HIZ_OP_NONE), + num_samples(0), use_wm_prog(false) { } diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index b6b659dbdae..f14a5c7aae0 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -97,6 +97,8 @@ public: * width and height of the buffer. */ bool map_stencil_as_y_tiled; + + unsigned num_samples; }; @@ -151,6 +153,7 @@ public: brw_blorp_surface_info src; brw_blorp_surface_info dst; enum gen6_hiz_op hiz_op; + unsigned num_samples; bool use_wm_prog; brw_blorp_wm_push_constants wm_push_consts; }; @@ -177,16 +180,39 @@ public: struct brw_blorp_blit_prog_key { + /* Number of samples per pixel that have been configured in the surface + * state for texturing from. + */ + unsigned tex_samples; + + /* Actual number of samples per pixel in the source image. */ + unsigned src_samples; + + /* Number of samples per pixel that have been configured in the render + * target. + */ + unsigned rt_samples; + + /* Actual number of samples per pixel in the destination image. */ + unsigned dst_samples; + /* True if the source image is W tiled. If true, the surface state for the - * source image must be configured as Y tiled. + * source image must be configured as Y tiled, and tex_samples must be 0. */ bool src_tiled_w; /* True if the destination image is W tiled. If true, the surface state - * for the render target must be configured as Y tiled. + * for the render target must be configured as Y tiled, and rt_samples must + * be 0. */ bool dst_tiled_w; + /* True if all source samples should be blended together to produce each + * destination pixel. If true, src_tiled_w must be false, tex_samples must + * equal src_samples, and tex_samples must be nonzero. + */ + bool blend; + /* True if the rectangle being sent through the rendering pipeline might be * larger than the destination rectangle, so the WM program should kill any * pixels that are outside the destination rectangle. diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index cce5d1b560e..1f0c3185394 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -215,11 +215,29 @@ brw_blorp_framebuffer(struct intel_context *intel, * * The bulk of the work done by the WM program is to wrap and unwrap the * coordinate transformations used by the hardware to store surfaces in - * memory. The hardware transforms a pixel location (X, Y) to a memory offset - * by the following formulas: + * memory. The hardware transforms a pixel location (X, Y, S) (where S is the + * sample index for a multisampled surface) to a memory offset by the + * following formulas: * - * offset = tile(tiling_format, X, Y) - * (X, Y) = detile(tiling_format, offset) + * offset = tile(tiling_format, encode_msaa(num_samples, X, Y, S)) + * (X, Y, S) = decode_msaa(num_samples, detile(tiling_format, offset)) + * + * For a single-sampled surface, encode_msaa() and decode_msaa are the + * identity function: + * + * encode_msaa(1, X, Y, 0) = (X, Y) + * decode_msaa(1, X, Y) = (X, Y, 0) + * + * For a 4x multisampled surface, encode_msaa() embeds the sample number into + * bit 1 of the X and Y coordinates: + * + * encode_msaa(4, X, Y, S) = (X', Y') + * where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1) + * Y' = (Y & ~0b1 ) << 1 | (S & 0b10) | (Y & 0b1) + * decode_msaa(4, X, Y) = (X', Y', S) + * where X' = (X & ~0b11) >> 1 | (X & 0b1) + * Y' = (Y & ~0b11) >> 1 | (Y & 0b1) + * S = (Y & 0b10) | (X & 0b10) >> 1 * * For X tiling, tile() combines together the low-order bits of the X and Y * coordinates in the pattern 0byyyxxxxxxxxx, creating 4k tiles that are 512 @@ -239,7 +257,7 @@ brw_blorp_framebuffer(struct intel_context *intel, * | (A & 0b111111111) * * (In all tiling formulas, cpp is the number of bytes occupied by a single - * pixel ("chars per pixel"), and tile_pitch is the number of 4k tiles + * sample ("chars per pixel"), and tile_pitch is the number of 4k tiles * required to fill the width of the surface). * * For Y tiling, tile() combines together the low-order bits of the X and Y @@ -301,7 +319,7 @@ brw_blorp_framebuffer(struct intel_context *intel, * X' = A % pitch * * (In these formulas, pitch is the number of bytes occupied by a single row - * of pixels). + * of samples). */ class brw_blorp_blit_program { @@ -319,8 +337,12 @@ private: void alloc_push_const_regs(int base_reg); void compute_frag_coords(); void translate_tiling(bool old_tiled_w, bool new_tiled_w); + void encode_msaa(unsigned num_samples); + void decode_msaa(unsigned num_samples); void kill_if_outside_dst_rect(); void translate_dst_to_src(); + void single_to_blend(); + void sample(); void texel_fetch(); void texture_lookup(GLuint msg_type, struct brw_reg mrf_u, struct brw_reg mrf_v); @@ -364,6 +386,14 @@ private: */ int xy_coord_index; + /* True if, at the point in the program currently being compiled, the + * sample index is known to be zero. + */ + bool s_is_zero; + + /* Register storing the sample index when s_is_zero is false. */ + struct brw_reg sample_index; + /* Temporaries */ struct brw_reg t1; struct brw_reg t2; @@ -395,6 +425,37 @@ const GLuint * brw_blorp_blit_program::compile(struct brw_context *brw, GLuint *program_size) { + /* Sanity checks */ + if (key->src_tiled_w) { + /* If the source image is W tiled, then tex_samples must be 0. + * Otherwise, after conversion between W and Y tiling, there's no + * guarantee that the sample index will be 0. + */ + assert(key->tex_samples == 0); + } + + if (key->dst_tiled_w) { + /* If the destination image is W tiled, then dst_samples must be 0. + * Otherwise, after conversion between W and Y tiling, there's no + * guarantee that all samples corresponding to a single pixel will still + * be together. + */ + assert(key->rt_samples == 0); + } + + if (key->blend) { + /* We are blending, which means we'll be using a SAMPLE message, which + * causes the hardware to pick up the all of the samples corresponding + * to this pixel and average them together. Since we'll be relying on + * the hardware to find all of the samples and combine them together, + * the surface state for the texture must be configured with the correct + * tiling and sample count. + */ + assert(!key->src_tiled_w); + assert(key->tex_samples == key->src_samples); + assert(key->tex_samples > 0); + } + brw_set_compression_control(&func, BRW_COMPRESSION_NONE); alloc_regs(); @@ -405,22 +466,29 @@ brw_blorp_blit_program::compile(struct brw_context *brw, const bool tex_tiled_w = false; /* The address that data will be written to is determined by the - * coordinates supplied to the WM thread and the tiling of the render - * target, according to the formula: + * coordinates supplied to the WM thread and the tiling and sample count of + * the render target, according to the formula: * - * (X, Y) = detile(rt_tiling, offset) + * (X, Y, S) = decode_msaa(rt_samples, detile(rt_tiling, offset)) * - * If the actual tiling of the destination surface is not the same as the - * configuration of the render target, then these coordinates are wrong and - * we have to adjust them to compensate for the difference. + * If the actual tiling and sample count of the destination surface are not + * the same as the configuration of the render target, then these + * coordinates are wrong and we have to adjust them to compensate for the + * difference. */ - if (rt_tiled_w != key->dst_tiled_w) + if (rt_tiled_w != key->dst_tiled_w || + key->rt_samples != key->dst_samples) { + encode_msaa(key->rt_samples); + /* Now (X, Y) = detile(rt_tiling, offset) */ translate_tiling(rt_tiled_w, key->dst_tiled_w); + /* Now (X, Y) = detile(dst_tiling, offset) */ + decode_msaa(key->dst_samples); + } - /* Now (X, Y) = detile(dst_tiling, offset). + /* Now (X, Y, S) = decode_msaa(dst_samples, detile(dst_tiling, offset)). * - * That is: X and Y now contain the true coordinates of the data that the - * WM thread should output. + * That is: X, Y and S now contain the true coordinates and sample index of + * the data that the WM thread should output. * * If we need to kill pixels that are outside the destination rectangle, * now is the time to do it. @@ -432,31 +500,50 @@ brw_blorp_blit_program::compile(struct brw_context *brw, /* Next, apply a translation to obtain coordinates in the source image. */ translate_dst_to_src(); - /* X and Y are now the coordinates of the pixel in the source image that we - * want to texture from. - * - * The address that we want to fetch from is - * related to the X and Y values according to the formula: - * - * (X, Y) = detile(src_tiling, offset). - * - * If the actual tiling of the source surface is not the same as the - * configuration of the texture, then we need to adjust the coordinates to - * compensate for the difference. + /* If the source image is not multisampled, then we want to fetch sample + * number 0, because that's the only sample there is. */ - if (tex_tiled_w != key->src_tiled_w) - translate_tiling(key->src_tiled_w, tex_tiled_w); + if (key->src_samples == 0) + s_is_zero = true; - /* Now (X, Y) = detile(tex_tiling, offset). - * - * In other words: X and Y now contain values which, when passed to - * the texturing unit, will cause data to be read from the correct - * memory location. So we can fetch the texel now. + /* X, Y, and S are now the coordinates of the pixel in the source image + * that we want to texture from. Exception: if we are blending, then S is + * irrelevant, because we are going to fetch all samples. */ - texel_fetch(); + if (key->blend) { + single_to_blend(); + sample(); + } else { + /* We aren't blending, which means we just want to fetch a single sample + * from the source surface. The address that we want to fetch from is + * related to the X, Y and S values according to the formula: + * + * (X, Y, S) = decode_msaa(src_samples, detile(src_tiling, offset)). + * + * If the actual tiling and sample count of the source surface are not + * the same as the configuration of the texture, then we need to adjust + * the coordinates to compensate for the difference. + */ + if (tex_tiled_w != key->src_tiled_w || + key->tex_samples != key->src_samples) { + encode_msaa(key->src_samples); + /* Now (X, Y) = detile(src_tiling, offset) */ + translate_tiling(key->src_tiled_w, tex_tiled_w); + /* Now (X, Y) = detile(tex_tiling, offset) */ + decode_msaa(key->tex_samples); + } - /* Finally, write the fetched value to the render target and terminate the - * thread. + /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)). + * + * In other words: X, Y, and S now contain values which, when passed to + * the texturing unit, will cause data to be read from the correct + * memory location. So we can fetch the texel now. + */ + texel_fetch(); + } + + /* Finally, write the fetched (or blended) value to the render target and + * terminate the thread. */ render_target_write(); return brw_get_program(&func, program_size); @@ -499,6 +586,8 @@ brw_blorp_blit_program::alloc_regs() = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW)); } this->xy_coord_index = 0; + this->sample_index + = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW)); this->t1 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW)); this->t2 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW)); @@ -511,11 +600,14 @@ brw_blorp_blit_program::alloc_regs() /* In the code that follows, X and Y can be used to quickly refer to the * active elements of x_coords and y_coords, and Xp and Yp ("X prime" and "Y * prime") to the inactive elements. + * + * S can be used to quickly refer to sample_index. */ #define X x_coords[xy_coord_index] #define Y y_coords[xy_coord_index] #define Xp x_coords[!xy_coord_index] #define Yp y_coords[!xy_coord_index] +#define S sample_index /* Quickly swap the roles of (X, Y) and (Xp, Yp). Saves us from having to do * MOVs to transfor (Xp, Yp) to (X, Y) after a coordinate transformation. @@ -564,6 +656,12 @@ brw_blorp_blit_program::compute_frag_coords() * pixels n+2 and n+3 are in the bottom half of the subspan. */ brw_ADD(&func, Y, stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100)); + + /* Since we always run the WM in a mode that causes a single fragment + * dispatch per pixel, it's not meaningful to compute a sample value. Just + * set it to 0. + */ + s_is_zero = true; } /** @@ -655,6 +753,86 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w) } } +/** + * Emit code to compensate for the difference between MSAA and non-MSAA + * surfaces. + * + * This code modifies the X and Y coordinates according to the formula: + * + * (X', Y') = encode_msaa_4x(X, Y, S) + * + * (See brw_blorp_blit_program). + */ +void +brw_blorp_blit_program::encode_msaa(unsigned num_samples) +{ + if (num_samples == 0) { + /* No translation necessary. */ + } else { + /* encode_msaa_4x(X, Y, S) = (X', Y') + * where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1) + * Y' = (Y & ~0b1 ) << 1 | (S & 0b10) | (Y & 0b1) + */ + brw_AND(&func, t1, X, brw_imm_uw(0xfffe)); /* X & ~0b1 */ + if (!s_is_zero) { + brw_AND(&func, t2, S, brw_imm_uw(1)); /* S & 0b1 */ + brw_OR(&func, t1, t1, t2); /* (X & ~0b1) | (S & 0b1) */ + } + brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b1) << 1 + | (S & 0b1) << 1 */ + brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */ + brw_OR(&func, Xp, t1, t2); + brw_AND(&func, t1, Y, brw_imm_uw(0xfffe)); /* Y & ~0b1 */ + brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b1) << 1 */ + if (!s_is_zero) { + brw_AND(&func, t2, S, brw_imm_uw(2)); /* S & 0b10 */ + brw_OR(&func, t1, t1, t2); /* (Y & ~0b1) << 1 | (S & 0b10) */ + } + brw_AND(&func, t2, Y, brw_imm_uw(1)); + brw_OR(&func, Yp, t1, t2); + SWAP_XY_AND_XPYP(); + } +} + +/** + * Emit code to compensate for the difference between MSAA and non-MSAA + * surfaces. + * + * This code modifies the X and Y coordinates according to the formula: + * + * (X', Y', S) = decode_msaa(num_samples, X, Y) + * + * (See brw_blorp_blit_program). + */ +void +brw_blorp_blit_program::decode_msaa(unsigned num_samples) +{ + if (num_samples == 0) { + /* No translation necessary. */ + s_is_zero = true; + } else { + /* decode_msaa_4x(X, Y) = (X', Y', S) + * where X' = (X & ~0b11) >> 1 | (X & 0b1) + * Y' = (Y & ~0b11) >> 1 | (Y & 0b1) + * S = (Y & 0b10) | (X & 0b10) >> 1 + */ + brw_AND(&func, t1, X, brw_imm_uw(0xfffc)); /* X & ~0b11 */ + brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b11) >> 1 */ + brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */ + brw_OR(&func, Xp, t1, t2); + brw_AND(&func, t1, Y, brw_imm_uw(0xfffc)); /* Y & ~0b11 */ + brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b11) >> 1 */ + brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */ + brw_OR(&func, Yp, t1, t2); + brw_AND(&func, t1, Y, brw_imm_uw(2)); /* Y & 0b10 */ + brw_AND(&func, t2, X, brw_imm_uw(2)); /* X & 0b10 */ + brw_SHR(&func, t2, t2, brw_imm_uw(1)); /* (X & 0b10) >> 1 */ + brw_OR(&func, S, t1, t2); + s_is_zero = false; + SWAP_XY_AND_XPYP(); + } +} + /** * Emit code that kills pixels whose X and Y coordinates are outside the * boundary of the rectangle defined by the push constants (dst_x0, dst_y0, @@ -693,6 +871,36 @@ brw_blorp_blit_program::translate_dst_to_src() SWAP_XY_AND_XPYP(); } +/** + * Emit code to transform the X and Y coordinates as needed for blending + * together the different samples in an MSAA texture. + */ +void +brw_blorp_blit_program::single_to_blend() +{ + /* When looking up samples in an MSAA texture using the SAMPLE message, + * Gen6 requires the texture coordinates to be odd integers (so that they + * correspond to the center of a 2x2 block representing the four samples + * that maxe up a pixel). So we need to multiply our X and Y coordinates + * each by 2 and then add 1. + */ + brw_SHL(&func, t1, X, brw_imm_w(1)); + brw_SHL(&func, t2, Y, brw_imm_w(1)); + brw_ADD(&func, Xp, t1, brw_imm_w(1)); + brw_ADD(&func, Yp, t2, brw_imm_w(1)); + SWAP_XY_AND_XPYP(); +} + +/** + * Emit code to look up a value in the texture using the SAMPLE message (which + * does blending of MSAA surfaces). + */ +void +brw_blorp_blit_program::sample() +{ + texture_lookup(GEN5_SAMPLER_MESSAGE_SAMPLE, mrf_u_float, mrf_v_float); +} + /** * Emit code to look up a value in the texture using the SAMPLE_LD message * (which does a simple texel fetch). @@ -700,6 +908,7 @@ brw_blorp_blit_program::translate_dst_to_src() void brw_blorp_blit_program::texel_fetch() { + assert(s_is_zero); texture_lookup(GEN5_SAMPLER_MESSAGE_SAMPLE_LD, retype(mrf_u_float, BRW_REGISTER_TYPE_UD), retype(mrf_v_float, BRW_REGISTER_TYPE_UD)); @@ -816,6 +1025,39 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct intel_mipmap_tree *src_mt, use_wm_prog = true; memset(&wm_prog_key, 0, sizeof(wm_prog_key)); + if (src_mt->num_samples > 0 && dst_mt->num_samples > 0) { + /* We are blitting from a multisample buffer to a multisample buffer, so + * we must preserve samples within a pixel. This means we have to + * configure the render target and texture surface states as + * single-sampled, so that the WM program can access each sample + * individually. + */ + src.num_samples = dst.num_samples = 0; + } + + /* The render path must be configured to use the same number of samples as + * the destination buffer. + */ + num_samples = dst.num_samples; + + GLenum base_format = _mesa_get_format_base_format(src_mt->format); + if (base_format != GL_DEPTH_COMPONENT && /* TODO: what about depth/stencil? */ + base_format != GL_STENCIL_INDEX && + src_mt->num_samples > 0 && dst_mt->num_samples == 0) { + /* We are downsampling a color buffer, so blend. */ + wm_prog_key.blend = true; + } + + /* src_samples and dst_samples are the true sample counts */ + wm_prog_key.src_samples = src_mt->num_samples; + wm_prog_key.dst_samples = dst_mt->num_samples; + + /* tex_samples and rt_samples are the sample counts that are set up in + * SURFACE_STATE. + */ + wm_prog_key.tex_samples = src.num_samples; + wm_prog_key.rt_samples = dst.num_samples; + wm_prog_key.src_tiled_w = src.map_stencil_as_y_tiled; wm_prog_key.dst_tiled_w = dst.map_stencil_as_y_tiled; x0 = wm_push_consts.dst_x0 = dst_x0; @@ -825,6 +1067,22 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct intel_mipmap_tree *src_mt, wm_push_consts.x_transform.setup(src_x0, dst_x0, dst_x1, mirror_x); wm_push_consts.y_transform.setup(src_y0, dst_y0, dst_y1, mirror_y); + if (dst.num_samples == 0 && dst_mt->num_samples > 0) { + /* We must expand the rectangle we send through the rendering pipeline, + * to account for the fact that we are mapping the destination region as + * single-sampled when it is in fact multisampled. We must also align + * it to a multiple of the multisampling pattern, because the + * differences between multisampled and single-sampled surface formats + * will mean that pixels are scrambled within the multisampling pattern. + * TODO: what if this makes the coordinates too large? + */ + x0 = (x0 * 2) & ~3; + y0 = (y0 * 2) & ~3; + x1 = ALIGN(x1 * 2, 4); + y1 = ALIGN(y1 * 2, 4); + wm_prog_key.use_kill = true; + } + if (dst.map_stencil_as_y_tiled) { /* We must modify the rectangle we send through the rendering pipeline, * to account for the fact that we are mapping it as Y-tiled when it is diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8ffd208ef64..a7684166949 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1099,6 +1099,14 @@ brw_blorp_framebuffer(struct intel_context *intel, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +/* gen6_multisample_state.c */ +void +gen6_emit_3dstate_multisample(struct brw_context *brw, + unsigned num_samples); +void +gen6_emit_3dstate_sample_mask(struct brw_context *brw, + unsigned num_samples); + /*====================================================================== diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 01bad5c1892..aaab5a2158f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -456,6 +456,11 @@ /* Surface state DW4 */ #define BRW_SURFACE_MIN_LOD_SHIFT 28 #define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28) +#define BRW_SURFACE_MULTISAMPLECOUNT_1 (0 << 4) +#define BRW_SURFACE_MULTISAMPLECOUNT_4 (2 << 4) +#define GEN7_SURFACE_MULTISAMPLECOUNT_1 0 +#define GEN7_SURFACE_MULTISAMPLECOUNT_4 2 +#define GEN7_SURFACE_MULTISAMPLECOUNT_8 3 /* Surface state DW5 */ #define BRW_SURFACE_X_OFFSET_SHIFT 25 @@ -1305,6 +1310,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) # define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) # define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) +# define GEN6_WM_MSDISPMODE_PERSAMPLE (0 << 0) # define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) /* DW7: kernel 1 pointer */ /* DW8: kernel 2 pointer */ @@ -1388,6 +1394,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_WM_MSRAST_ON_PIXEL (2 << 0) # define GEN7_WM_MSRAST_ON_PATTERN (3 << 0) /* DW2 */ +# define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31) # define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31) #define _3DSTATE_PS 0x7820 /* GEN7+ */ diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 0c0389f8bdf..b00278a233d 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -782,33 +782,16 @@ static void upload_invariant_state( struct brw_context *brw ) ADVANCE_BATCH(); } - if (intel->gen >= 6) { + if (intel->gen == 6) { int i; - int len = intel->gen >= 7 ? 4 : 3; - - BEGIN_BATCH(len); - OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2)); - OUT_BATCH(MS_PIXEL_LOCATION_CENTER | - MS_NUMSAMPLES_1); - OUT_BATCH(0); /* positions for 4/8-sample */ - if (intel->gen >= 7) - OUT_BATCH(0); - ADVANCE_BATCH(); - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2)); - OUT_BATCH(1); - ADVANCE_BATCH(); - - if (intel->gen < 7) { - for (i = 0; i < 4; i++) { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); - OUT_BATCH(i << SVB_INDEX_SHIFT); - OUT_BATCH(0); - OUT_BATCH(0xffffffff); - ADVANCE_BATCH(); - } + for (i = 0; i < 4; i++) { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(i << SVB_INDEX_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0xffffffff); + ADVANCE_BATCH(); } } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 9e3736170c7..89d09637081 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -95,6 +95,7 @@ extern const struct brw_tracked_state gen6_color_calc_state; extern const struct brw_tracked_state gen6_depth_stencil_state; extern const struct brw_tracked_state gen6_gs_state; extern const struct brw_tracked_state gen6_gs_binding_table; +extern const struct brw_tracked_state gen6_multisample_state; extern const struct brw_tracked_state gen6_renderbuffer_surfaces; extern const struct brw_tracked_state gen6_sampler_state; extern const struct brw_tracked_state gen6_scissor_state; @@ -181,6 +182,7 @@ void *brw_state_batch(struct brw_context *brw, /* brw_wm_surface_state.c */ void gen4_init_vtable_surface_functions(struct brw_context *brw); uint32_t brw_get_surface_tiling_bits(uint32_t tiling); +uint32_t brw_get_surface_num_multisamples(unsigned num_samples); void brw_create_constant_surface(struct brw_context *brw, drm_intel_bo *bo, int width, @@ -197,6 +199,8 @@ GLuint translate_tex_format(gl_format mesa_format, /* gen7_wm_surface_state.c */ void gen7_set_surface_tiling(struct gen7_surface_state *surf, uint32_t tiling); +void gen7_set_surface_num_multisamples(struct gen7_surface_state *surf, + unsigned num_samples); void gen7_init_vtable_surface_functions(struct brw_context *brw); void gen7_create_constant_surface(struct brw_context *brw, drm_intel_bo *bo, diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index b02e1600d62..551fa6a5df1 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -153,6 +153,7 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_samplers, &gen6_sampler_state, + &gen6_multisample_state, /* TODO: is this the right spot? */ &gen6_vs_state, &gen6_gs_state, @@ -221,6 +222,7 @@ const struct brw_tracked_state *gen7_atoms[] = &brw_wm_binding_table, &gen7_samplers, + &gen6_multisample_state, /* TODO: is this the right spot? */ &gen7_disable_stages, &gen7_vs_state, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 104d475f3f9..849da852277 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -633,6 +633,17 @@ brw_get_surface_tiling_bits(uint32_t tiling) } } + +uint32_t +brw_get_surface_num_multisamples(unsigned num_samples) +{ + if (num_samples > 0) + return BRW_SURFACE_MULTISAMPLECOUNT_4; + else + return BRW_SURFACE_MULTISAMPLECOUNT_1; +} + + static void brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) { @@ -943,7 +954,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, intel_image->base.Base.Level, intel_image->base.Base.Level, width, height, depth, - true); + true, + 0 /* num_samples */); intel_miptree_copy_teximage(intel, intel_image, new_mt); intel_miptree_reference(&irb->mt, intel_image->mt); @@ -993,7 +1005,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf[3] = (brw_get_surface_tiling_bits(region->tiling) | ((region->pitch * region->cpp) - 1) << BRW_SURFACE_PITCH_SHIFT); - surf[4] = 0; + surf[4] = brw_get_surface_num_multisamples(mt->num_samples); assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); /* Note that the low bits of these fields are missing, so diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index 00aeda63e60..6db8f40c33b 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -100,28 +100,8 @@ gen6_blorp_emit_batch_head(struct brw_context *brw, ADVANCE_BATCH(); } - /* 3DSTATE_MULTISAMPLE */ - { - int length = intel->gen == 7 ? 4 : 3; - - BEGIN_BATCH(length); - OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (length - 2)); - OUT_BATCH(MS_PIXEL_LOCATION_CENTER | - MS_NUMSAMPLES_1); - OUT_BATCH(0); - if (length >= 4) - OUT_BATCH(0); - ADVANCE_BATCH(); - - } - - /* 3DSTATE_SAMPLE_MASK */ - { - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2)); - OUT_BATCH(1); - ADVANCE_BATCH(); - } + gen6_emit_3dstate_multisample(brw, params->num_samples); + gen6_emit_3dstate_sample_mask(brw, params->num_samples); /* CMD_STATE_BASE_ADDRESS * @@ -426,6 +406,10 @@ gen6_blorp_emit_surface_state(struct brw_context *brw, uint32_t wm_surf_offset; uint32_t width, height; surface->get_miplevel_dims(&width, &height); + if (surface->num_samples > 0) { /* TODO: seems clumsy */ + width /= 2; + height /= 2; + } if (surface->map_stencil_as_y_tiled) { width *= 2; height /= 2; @@ -462,7 +446,7 @@ gen6_blorp_emit_surface_state(struct brw_context *brw, 0 << BRW_SURFACE_DEPTH_SHIFT | (pitch_bytes - 1) << BRW_SURFACE_PITCH_SHIFT); - surf[4] = 0; + surf[4] = brw_get_surface_num_multisamples(surface->num_samples); surf[5] = (0 << BRW_SURFACE_X_OFFSET_SHIFT | 0 << BRW_SURFACE_Y_OFFSET_SHIFT | @@ -695,7 +679,9 @@ gen6_blorp_emit_sf_config(struct brw_context *brw, OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */ 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | 0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); - for (int i = 0; i < 18; ++i) + OUT_BATCH(0); /* dw2 */ + OUT_BATCH(params->num_samples > 0 ? GEN6_SF_MSRAST_ON_PATTERN : 0); + for (int i = 0; i < 16; ++i) OUT_BATCH(0); ADVANCE_BATCH(); } @@ -754,6 +740,14 @@ gen6_blorp_emit_wm_config(struct brw_context *brw, dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */ } + if (params->num_samples > 0) { + dw6 |= GEN6_WM_MSRAST_ON_PATTERN; + dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; + } else { + dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; + dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; + } + BEGIN_BATCH(9); OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); OUT_BATCH(params->use_wm_prog ? prog_offset : 0); @@ -761,7 +755,7 @@ gen6_blorp_emit_wm_config(struct brw_context *brw, OUT_BATCH(0); /* No scratch needed */ OUT_BATCH(dw4); OUT_BATCH(dw5); - OUT_BATCH(dw6); /* only position */ + OUT_BATCH(dw6); OUT_BATCH(0); /* No other programs */ OUT_BATCH(0); /* No other programs */ ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c new file mode 100644 index 00000000000..e01ead10522 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c @@ -0,0 +1,102 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "intel_batchbuffer.h" + +#include "brw_context.h" +#include "brw_defines.h" + + +/** + * 3DSTATE_MULTISAMPLE + */ +void +gen6_emit_3dstate_multisample(struct brw_context *brw, + unsigned num_samples) +{ + struct intel_context *intel = &brw->intel; + + /* TODO: MSAA only implemented on Gen6 */ + if (intel->gen != 6) { + assert(num_samples == 0); + } + + int len = intel->gen >= 7 ? 4 : 3; + BEGIN_BATCH(len); + OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2)); + OUT_BATCH(MS_PIXEL_LOCATION_CENTER | + (num_samples > 0 ? MS_NUMSAMPLES_4 : MS_NUMSAMPLES_1)); + OUT_BATCH(num_samples > 0 ? 0xae2ae662 : 0); /* positions for 4/8-sample */ + if (intel->gen >= 7) + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/** + * 3DSTATE_SAMPLE_MASK + */ +void +gen6_emit_3dstate_sample_mask(struct brw_context *brw, + unsigned num_samples) +{ + struct intel_context *intel = &brw->intel; + + /* TODO: MSAA only implemented on Gen6 */ + if (intel->gen != 6) { + assert(num_samples == 0); + } + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2)); + OUT_BATCH(num_samples > 0 ? 15 : 1); + ADVANCE_BATCH(); +} + + +static void upload_multisample_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + unsigned num_samples = 0; + + /* _NEW_BUFFERS */ + if (ctx->DrawBuffer->_ColorDrawBuffers[0]) + num_samples = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples; + + /* 3DSTATE_MULTISAMPLE is nonpipelined. */ + intel_emit_post_sync_nonzero_flush(intel); + + gen6_emit_3dstate_multisample(brw, num_samples); + gen6_emit_3dstate_sample_mask(brw, num_samples); +} + + +const struct brw_tracked_state gen6_multisample_state = { + .dirty = { + .mesa = _NEW_BUFFERS, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_multisample_state +}; diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 5c4293ca91e..e0aaa9074f1 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -122,6 +122,10 @@ upload_sf_state(struct brw_context *brw) int i; /* _NEW_BUFFER */ bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer); + bool multisampled = false; + if (ctx->DrawBuffer->_ColorDrawBuffers[0]) + multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0; + int attr = 0, input_index = 0; int urb_entry_read_offset = 1; float point_size; @@ -226,13 +230,20 @@ upload_sf_state(struct brw_context *brw) } /* _NEW_LINE */ - dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) << - GEN6_SF_LINE_WIDTH_SHIFT; + { + uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7); + /* TODO: line width of 0 is not allowed when MSAA enabled */ + if (line_width_u3_7 == 0) + line_width_u3_7 = 1; + dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT; + } if (ctx->Line.SmoothFlag) { dw3 |= GEN6_SF_LINE_AA_ENABLE; dw3 |= GEN6_SF_LINE_AA_MODE_TRUE; dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0; } + if (multisampled) + dw3 |= GEN6_SF_MSRAST_ON_PATTERN; /* _NEW_PROGRAM | _NEW_POINT */ if (!(ctx->VertexProgram.PointSizeEnabled || diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index fd1eca45049..28b3c2989c3 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -98,6 +98,11 @@ upload_wm_state(struct brw_context *brw) const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); uint32_t dw2, dw4, dw5, dw6; + bool multisampled = false; + + /* _NEW_BUFFERS */ + if (ctx->DrawBuffer->_ColorDrawBuffers[0]) + multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0; /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params == 0) { @@ -185,6 +190,13 @@ upload_wm_state(struct brw_context *brw) dw6 |= _mesa_bitcount_64(brw->fragment_program->Base.InputsRead) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; + if (multisampled) { + dw6 |= GEN6_WM_MSRAST_ON_PATTERN; + dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; + } else { + dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; + dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; + } BEGIN_BATCH(9); OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index f10d0aab2f8..fbb94dfed56 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -143,6 +143,10 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, uint32_t wm_surf_offset; uint32_t width, height; surface->get_miplevel_dims(&width, &height); + if (surface->num_samples > 0) { /* TODO: wrong for 8x */ + width /= 2; + height /= 2; + } if (surface->map_stencil_as_y_tiled) { width *= 2; height /= 2; @@ -181,6 +185,8 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, pitch_bytes *= 2; surf->ss3.pitch = pitch_bytes - 1; + gen7_set_surface_num_multisamples(surf, surface->num_samples); + if (intel->is_haswell) { surf->ss7.shader_chanel_select_r = HSW_SCS_RED; surf->ss7.shader_chanel_select_g = HSW_SCS_GREEN; @@ -366,7 +372,7 @@ gen7_blorp_emit_sf_config(struct brw_context *brw, OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2)); OUT_BATCH(params->depth_format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); - OUT_BATCH(0); + OUT_BATCH(params->num_samples > 0 ? GEN6_SF_MSRAST_ON_PATTERN : 0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -397,7 +403,7 @@ gen7_blorp_emit_wm_config(struct brw_context *brw, { struct intel_context *intel = &brw->intel; - uint32_t dw1 = 0; + uint32_t dw1 = 0, dw2 = 0; switch (params->hiz_op) { case GEN6_HIZ_OP_DEPTH_CLEAR: @@ -423,10 +429,18 @@ gen7_blorp_emit_wm_config(struct brw_context *brw, dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */ } + if (params->num_samples > 0) { + dw1 |= GEN7_WM_MSRAST_ON_PATTERN; + dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; + } else { + dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; + dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; + } + BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); OUT_BATCH(dw1); - OUT_BATCH(0); + OUT_BATCH(dw2); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 5c6fcedcedf..8a6c09bf8ac 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -161,6 +161,9 @@ upload_sf_state(struct brw_context *brw) float point_size; /* _NEW_BUFFERS */ bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer); + bool multisampled = false; + if (ctx->DrawBuffer->_ColorDrawBuffers[0]) + multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0; dw1 = GEN6_SF_STATISTICS_ENABLE | GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; @@ -243,8 +246,13 @@ upload_sf_state(struct brw_context *brw) dw2 |= GEN6_SF_SCISSOR_ENABLE; /* _NEW_LINE */ - dw2 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) << - GEN6_SF_LINE_WIDTH_SHIFT; + { + uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7); + /* TODO: line width of 0 is not allowed when MSAA enabled */ + if (line_width_u3_7 == 0) + line_width_u3_7 = 1; + dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT; + } if (ctx->Line.SmoothFlag) { dw2 |= GEN6_SF_LINE_AA_ENABLE; dw2 |= GEN6_SF_LINE_AA_MODE_TRUE; @@ -253,6 +261,8 @@ upload_sf_state(struct brw_context *brw) if (ctx->Line.StippleFlag && intel->is_haswell) { dw2 |= HSW_SF_LINE_STIPPLE_ENABLE; } + if (multisampled) + dw2 |= GEN6_SF_MSRAST_ON_PATTERN; /* FINISHME: Last Pixel Enable? Vertex Sub Pixel Precision Select? * FINISHME: AA Line Distance Mode? diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 024c855ab12..2a0462f3fc9 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -39,9 +39,14 @@ upload_wm_state(struct brw_context *brw) const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); bool writes_depth = false; - uint32_t dw1; + bool multisampled = false; + uint32_t dw1, dw2; - dw1 = 0; + /* _NEW_BUFFERS */ + if (ctx->DrawBuffer->_ColorDrawBuffers[0]) + multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0; + + dw1 = dw2 = 0; dw1 |= GEN7_WM_STATISTICS_ENABLE; dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; @@ -74,11 +79,18 @@ upload_wm_state(struct brw_context *brw) dw1 & GEN7_WM_KILL_ENABLE) { dw1 |= GEN7_WM_DISPATCH_ENABLE; } + if (multisampled) { + dw1 |= GEN7_WM_MSRAST_ON_PATTERN; + dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; + } else { + dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; + dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; + } BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); OUT_BATCH(dw1); - OUT_BATCH(0); + OUT_BATCH(dw2); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index d84e07582a0..5aa62bddb1a 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -54,6 +54,20 @@ gen7_set_surface_tiling(struct gen7_surface_state *surf, uint32_t tiling) } } + +void +gen7_set_surface_num_multisamples(struct gen7_surface_state *surf, + unsigned num_samples) +{ + if (num_samples > 4) + surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_8; + else if (num_samples > 0) + surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_4; + else + surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_1; +} + + static void gen7_update_buffer_texture_surface(struct gl_context *ctx, GLuint unit) { @@ -328,6 +342,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, gen7_set_surface_tiling(surf, region->tiling); surf->ss3.pitch = (region->pitch * region->cpp) - 1; + gen7_set_surface_num_multisamples(surf, irb->mt->num_samples); + if (intel->is_haswell) { surf->ss7.shader_chanel_select_r = HSW_SCS_RED; surf->ss7.shader_chanel_select_g = HSW_SCS_GREEN; diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index f92d78f378a..bbd5f6652e9 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -188,6 +188,29 @@ intel_unmap_renderbuffer(struct gl_context *ctx, } +/** + * Round up the requested multisample count to the next supported sample size. + */ +static unsigned +quantize_num_samples(struct intel_context *intel, unsigned num_samples) +{ + switch (intel->gen) { + case 6: + /* Gen6 supports only 4x multisampling. */ + if (num_samples > 0) + return 4; + else + return 0; + case 7: + /* TODO: MSAA only implemented on Gen6 */ + return 0; + default: + /* MSAA unsupported */ + return 0; + } +} + + /** * Called via glRenderbufferStorageEXT() to set the format and allocate * storage for a user-created renderbuffer. @@ -199,6 +222,7 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer { struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *irb = intel_renderbuffer(rb); + rb->NumSamples = quantize_num_samples(intel, rb->NumSamples); ASSERT(rb->Name != 0); @@ -241,12 +265,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer return true; irb->mt = intel_miptree_create_for_renderbuffer(intel, rb->Format, - width, height); + width, height, + rb->NumSamples); if (!irb->mt) return false; if (intel->vtbl.is_hiz_depth_format(intel, rb->Format)) { - bool ok = intel_miptree_alloc_hiz(intel, irb->mt); + bool ok = intel_miptree_alloc_hiz(intel, irb->mt, rb->NumSamples); if (!ok) { intel_miptree_release(&irb->mt); return false; @@ -495,7 +520,7 @@ intel_renderbuffer_update_wrapper(struct intel_context *intel, if (mt->hiz_mt == NULL && intel->vtbl.is_hiz_depth_format(intel, rb->Format)) { - intel_miptree_alloc_hiz(intel, mt); + intel_miptree_alloc_hiz(intel, mt, 0 /* num_samples */); if (!mt->hiz_mt) return false; } diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 91ebc8d4e4d..99f42303006 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -72,7 +72,8 @@ intel_miptree_create_internal(struct intel_context *intel, GLuint width0, GLuint height0, GLuint depth0, - bool for_region) + bool for_region, + GLuint num_samples) { struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); int compress_byte = 0; @@ -92,6 +93,7 @@ intel_miptree_create_internal(struct intel_context *intel, mt->width0 = width0; mt->height0 = height0; mt->cpp = compress_byte ? compress_byte : _mesa_get_format_bytes(mt->format); + mt->num_samples = num_samples; mt->compressed = compress_byte ? 1 : 0; mt->refcount = 1; @@ -115,7 +117,8 @@ intel_miptree_create_internal(struct intel_context *intel, mt->width0, mt->height0, mt->depth0, - true); + true, + num_samples); if (!mt->stencil_mt) { intel_miptree_release(&mt); return NULL; @@ -161,7 +164,8 @@ intel_miptree_create(struct intel_context *intel, GLuint width0, GLuint height0, GLuint depth0, - bool expect_accelerated_upload) + bool expect_accelerated_upload, + GLuint num_samples) { struct intel_mipmap_tree *mt; uint32_t tiling = I915_TILING_NONE; @@ -172,7 +176,21 @@ intel_miptree_create(struct intel_context *intel, (base_format == GL_DEPTH_COMPONENT || base_format == GL_DEPTH_STENCIL_EXT)) tiling = I915_TILING_Y; - else if (width0 >= 64) + else if (num_samples > 0) { + /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled + * Surface"): + * + * [DevSNB+]: For multi-sample render targets, this field must be + * 1. MSRTs can only be tiled. + * + * Our usual reason for preferring X tiling (fast blits using the + * blitting engine) doesn't apply to MSAA, since we'll generally be + * downsampling or upsampling when blitting between the MSAA buffer + * and another buffer, and the blitting engine doesn't support that. + * So use Y tiling, since it makes better use of the cache. + */ + tiling = I915_TILING_Y; + } else if (width0 >= 64) tiling = I915_TILING_X; } @@ -189,7 +207,7 @@ intel_miptree_create(struct intel_context *intel, mt = intel_miptree_create_internal(intel, target, format, first_level, last_level, width0, height0, depth0, - false); + false, num_samples); /* * pitch == 0 || height == 0 indicates the null texture */ @@ -225,7 +243,7 @@ intel_miptree_create_for_region(struct intel_context *intel, mt = intel_miptree_create_internal(intel, target, format, 0, 0, region->width, region->height, 1, - true); + true, 0 /* num_samples */); if (!mt) return mt; @@ -238,12 +256,24 @@ struct intel_mipmap_tree* intel_miptree_create_for_renderbuffer(struct intel_context *intel, gl_format format, uint32_t width, - uint32_t height) + uint32_t height, + uint32_t num_samples) { struct intel_mipmap_tree *mt; + /* Adjust width/height for MSAA */ + if (num_samples > 4) { + num_samples = 8; + width *= 4; + height *= 2; + } else if (num_samples > 0) { + num_samples = 4; + width *= 2; + height *= 2; + } + mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0, - width, height, 1, true); + width, height, 1, true, num_samples); return mt; } @@ -513,7 +543,8 @@ intel_miptree_copy_teximage(struct intel_context *intel, bool intel_miptree_alloc_hiz(struct intel_context *intel, - struct intel_mipmap_tree *mt) + struct intel_mipmap_tree *mt, + GLuint num_samples) { assert(mt->hiz_mt == NULL); mt->hiz_mt = intel_miptree_create(intel, @@ -524,7 +555,8 @@ intel_miptree_alloc_hiz(struct intel_context *intel, mt->width0, mt->height0, mt->depth0, - true); + true, + num_samples); if (!mt->hiz_mt) return false; diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index 0886c95f234..ca1666da2f5 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -169,6 +169,7 @@ struct intel_mipmap_tree GLuint width0, height0, depth0; /**< Level zero image dimensions */ GLuint cpp; + GLuint num_samples; bool compressed; /* Derived from the above: @@ -231,7 +232,8 @@ struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel, GLuint width0, GLuint height0, GLuint depth0, - bool expect_accelerated_upload); + bool expect_accelerated_upload, + GLuint num_samples); struct intel_mipmap_tree * intel_miptree_create_for_region(struct intel_context *intel, @@ -250,7 +252,8 @@ struct intel_mipmap_tree* intel_miptree_create_for_renderbuffer(struct intel_context *intel, gl_format format, uint32_t width, - uint32_t height); + uint32_t height, + uint32_t num_samples); /** \brief Assert that the level and layer are valid for the miptree. */ static inline void @@ -341,7 +344,8 @@ intel_miptree_s8z24_gather(struct intel_context *intel, bool intel_miptree_alloc_hiz(struct intel_context *intel, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + GLuint num_samples); void intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt, diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 651095a27de..68f4ff4c9ed 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -99,7 +99,8 @@ intel_miptree_create_for_teximage(struct intel_context *intel, width, height, depth, - expect_accelerated_upload); + expect_accelerated_upload, + 0 /* num_samples */); } /* There are actually quite a few combinations this will work for, diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 256c21ec826..cadba29ff41 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -86,7 +86,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) width, height, depth, - true); + true, + 0 /* num_samples */); if (!intelObj->mt) return false; } -- 2.30.2