From d23aa634e0d45bbeda0f48033cc42656259ce0ef Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 14 Apr 2015 14:57:51 -0700 Subject: [PATCH] i965/skl: Add fast color clear infrastructure Patch was originally called: i965/skl: Enable fast color clears on SKL Skylake introduces some differences in the way that fast clears are programmed and in the restrictions for using fast clears. Since some of these are non-obvious, and fast clears are currently disabled globally, we can enable the simple stuff here and leave the weirder stuff and separately reviewable work. Based on a patch originally from Kristian. Note that within this patch the change in scaling factors could be achieved with this hunk instead. I've opted to keep things more like how the docs describe it however. --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -150,9 +150,13 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw, /* In release builds, fall through */ case I915_TILING_Y: *width_px = 32 / mt->cpp; - *height = 4; + if (brw->gen >= 9) + *height = 2; + else + *height = 4; v2: Add braces for the multiline (Matt + Chad) Comment updates (requested by Chad) Modified commit message Commit message from Chad explaining the MCS height change (Chad) Signed-off-by: Ben Widawsky Reviewed-by: Neil Roberts Reviewed-by: Chad Versace --- .../drivers/dri/i965/brw_meta_fast_clear.c | 55 +++++++++++++------ .../drivers/dri/i965/gen8_surface_state.c | 16 +++++- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 17 ++++++ src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 13 ++++- 4 files changed, 81 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index 211c0a44162..938e028f58d 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -204,7 +204,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances) } static void -get_fast_clear_rect(struct gl_framebuffer *fb, +get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb, struct intel_renderbuffer *irb, struct rect *rect) { unsigned int x_align, y_align; @@ -228,7 +228,14 @@ get_fast_clear_rect(struct gl_framebuffer *fb, */ intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align); x_align *= 16; - y_align *= 32; + + /* SKL+ line alignment requirement for Y-tiled are half those of the prior + * generations. + */ + if (brw->gen >= 9) + y_align *= 16; + else + y_align *= 32; /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "Fast Color Clear" bullet (p327): @@ -265,8 +272,10 @@ get_fast_clear_rect(struct gl_framebuffer *fb, * terms of (width,height) of the RT. * * MSAA Width of Clear Rect Height of Clear Rect + * 2X Ceil(1/8*width) Ceil(1/2*height) * 4X Ceil(1/8*width) Ceil(1/2*height) * 8X Ceil(1/2*width) Ceil(1/2*height) + * 16X width Ceil(1/2*height) * * The text "with upper left co-ordinate to coincide with actual * rectangle being cleared" is a little confusing--it seems to imply @@ -289,6 +298,9 @@ get_fast_clear_rect(struct gl_framebuffer *fb, case 8: x_scaledown = 2; break; + case 16: + x_scaledown = 1; + break; default: unreachable("Unexpected sample count for fast clear"); } @@ -357,18 +369,25 @@ is_color_fast_clear_compatible(struct brw_context *brw, /** * Convert the given color to a bitfield suitable for ORing into DWORD 7 of - * SURFACE_STATE. + * SURFACE_STATE (DWORD 12-15 on SKL+). */ -static uint32_t -compute_fast_clear_color_bits(const union gl_color_union *color) +static void +set_fast_clear_color(struct brw_context *brw, + struct intel_mipmap_tree *mt, + const union gl_color_union *color) { - uint32_t bits = 0; - for (int i = 0; i < 4; i++) { - /* Testing for non-0 works for integer and float colors */ - if (color->f[i] != 0.0f) - bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i)); + if (brw->gen >= 9) { + mt->gen9_fast_clear_color = *color; + } else { + mt->fast_clear_color_value = 0; + for (int i = 0; i < 4; i++) { + /* Testing for non-0 works for integer and float colors */ + if (color->f[i] != 0.0f) { + mt->fast_clear_color_value |= + 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i)); + } + } } - return bits; } static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 }; @@ -510,8 +529,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, switch (clear_type) { case FAST_CLEAR: - irb->mt->fast_clear_color_value = - compute_fast_clear_color_bits(&ctx->Color.ClearColor); + set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor); irb->need_downsample = true; /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the @@ -527,7 +545,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; irb->need_downsample = true; fast_clear_buffers |= 1 << index; - get_fast_clear_rect(fb, irb, &fast_clear_rect); + get_fast_clear_rect(brw, fb, irb, &fast_clear_rect); break; case REP_CLEAR: @@ -662,8 +680,9 @@ get_resolve_rect(struct brw_context *brw, * * The scaledown factors in the table that follows are related to the * alignment size returned by intel_get_non_msrt_mcs_alignment() by a - * multiplier. For IVB and HSW, we divide by two, for BDW we multiply - * by 8 and 16 and 8 and 8 for SKL. + * multiplier. For IVB and HSW, we divide by two, for BDW we multiply + * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling + * by a factor of 2. */ intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align); @@ -709,6 +728,10 @@ brw_meta_resolve_color(struct brw_context *brw, brw_bind_rep_write_shader(brw, (float *) fast_clear_color); + /* SKL+ also has a resolve mode for compressed render targets and thus more + * bits to let us select the type of resolve. For fast clear resolves, it + * turns out we can use the same value as pre-SKL though. + */ set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE); mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 140a6544983..69098583357 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -187,7 +187,21 @@ gen8_emit_fast_clear_color(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t *surf) { - surf[7] |= mt->fast_clear_color_value; + if (brw->gen >= 9) { +#define check_fast_clear_val(x) \ + assert(mt->gen9_fast_clear_color.f[x] == 0.0 || \ + mt->gen9_fast_clear_color.f[x] == 1.0) + check_fast_clear_val(0); + check_fast_clear_val(1); + check_fast_clear_val(2); + check_fast_clear_val(3); +#undef check_fast_clear_val + surf[12] = mt->gen9_fast_clear_color.ui[0]; + surf[13] = mt->gen9_fast_clear_color.ui[1]; + surf[14] = mt->gen9_fast_clear_color.ui[2]; + surf[15] = mt->gen9_fast_clear_color.ui[3]; + } else + surf[7] |= mt->fast_clear_color_value; } static void diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index b1a7632d82f..4c3f2c00d6f 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -192,6 +192,12 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) * * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, * 64bpp, and 128bpp. + * + * From the Skylake documentation, it is made clear that X-tiling is no longer + * supported: + * + * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf + * non-MSRTs only. */ static bool intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, @@ -1495,6 +1501,17 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height); unsigned width_divisor = block_width_px * 4; unsigned height_divisor = block_height * 8; + + /* The Skylake MCS is twice as tall as the Broadwell MCS. + * + * In pre-Skylake, each bit in the MCS contained the state of 2 cachelines + * in the main surface. In Skylake, it's two bits. The extra bit + * doubles the MCS height, not width, because in Skylake the MCS is always + * Y-tiled. + */ + if (brw->gen >= 9) + height_divisor /= 2; + unsigned mcs_width = ALIGN(mt->logical_width0, width_divisor) / width_divisor; unsigned mcs_height = diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 805cd714d88..64f73ea9ae5 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -633,15 +633,22 @@ struct intel_mipmap_tree * The SURFACE_STATE bits associated with the last fast color clear to this * color mipmap tree, if any. * - * This value will only ever contain ones in bits 28-31, so it is safe to - * OR into dword 7 of SURFACE_STATE. + * Prior to GEN9 there is a single bit for RGBA clear values which gives you + * the option of 2^4 clear colors. Each bit determines if the color channel + * is fully saturated or unsaturated (Cherryview does add a 32b value per + * channel, but it is globally applied instead of being part of the render + * surface state). Starting with GEN9, the surface state accepts a 32b value + * for each color channel. * * @see RENDER_SURFACE_STATE.RedClearColor * @see RENDER_SURFACE_STATE.GreenClearColor * @see RENDER_SURFACE_STATE.BlueClearColor * @see RENDER_SURFACE_STATE.AlphaClearColor */ - uint32_t fast_clear_color_value; + union { + uint32_t fast_clear_color_value; + union gl_color_union gen9_fast_clear_color; + }; /** * Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS -- 2.30.2