From 09fbc298283b41cf3f25d75842a6d8ab4952dca1 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 2 Feb 2013 12:46:49 -0800 Subject: [PATCH] i965: Fix the SF Vertex URB Read Length calculation for Sandybridge. (This commit message was primarily written by Paul Berry, who explained what's going on far better than I would have.) Previous to this patch, we thought that the only restrictions on 3DSTATE_SF's URB read length were (a) it needs to be large enough to read all the VUE data that the SF needs, and (b) it can't be so large that it tries to read VUE data that doesn't exist. Since the VUE map already tells us how much VUE data exists, we didn't bother worrying about restriction (a); we just did the easy thing and programmed the read length to satisfy restriction (b). However, we didn't notice this erratum in the hardware docs: "[errata] Corruption/Hang possible if length programmed larger than recommended". Judging by the context surrounding this erratum, it's pretty clear that it means "URB read length must be exactly the size necessary to read all the VUE data that the SF needs, and no larger". Which means that we can't program the read length based on restriction (b)--we have to program it based on restriction (a). The URB read size needs to precisely match the amount of data that the SF consumes; it doesn't work to simply base it on the size of the VUE. Thankfully, the PRM contains the precise formula the hardware expects. Fixes random UI corruption in Steam's "Big Picture Mode", random terrain corruption in PlaneShift, and Piglit's fbo-5-varyings test. NOTE: This is a candidate for all stable branches. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56920 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=60172 Tested-by: Jordan Justen (v1/Piglit) Tested-by: Martin Steigerwald (PlaneShift) Reviewed-by: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen6_sf_state.c | 34 ++++++++++++----------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 5d7fd46bd86..7071acc8d27 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -118,7 +118,6 @@ upload_sf_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; - uint32_t urb_entry_read_length; /* BRW_NEW_FRAGMENT_PROGRAM */ uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead); /* _NEW_LIGHT */ @@ -135,21 +134,7 @@ upload_sf_state(struct brw_context *brw) uint16_t attr_overrides[FRAG_ATTRIB_MAX]; uint32_t point_sprite_origin; - /* CACHE_NEW_VS_PROG */ - urb_entry_read_length = ((brw->vs.prog_data->vue_map.num_slots + 1) / 2 - - urb_entry_read_offset); - if (urb_entry_read_length == 0) { - /* Setting the URB entry read length to 0 causes undefined behavior, so - * if we have no URB data to read, set it to 1. - */ - urb_entry_read_length = 1; - } - - dw1 = - GEN6_SF_SWIZZLE_ENABLE | - num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | - urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | - urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT; dw2 = GEN6_SF_STATISTICS_ENABLE | GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; @@ -328,6 +313,23 @@ upload_sf_state(struct brw_context *brw) for (; input_index < FRAG_ATTRIB_MAX; input_index++) attr_overrides[input_index] = 0; + /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for + * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length": + * + * "This field should be set to the minimum length required to read the + * maximum source attribute. The maximum source attribute is indicated + * by the maximum value of the enabled Attribute # Source Attribute if + * Attribute Swizzle Enable is set, Number of Output Attributes-1 if + * enable is not set. + * read_length = ceiling((max_source_attr + 1) / 2) + * + * [errata] Corruption/Hang possible if length programmed larger than + * recommended" + */ + uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2; + dw1 |= urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + BEGIN_BATCH(20); OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); OUT_BATCH(dw1); -- 2.30.2