From f3c068c5c89c8c3dce257ecc2b640f375d3f4836 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 21 Jan 2017 03:50:42 -0800 Subject: [PATCH] i965: Use a better guardband calculation. (Patch co-authored by Jason and Ken.) We scaled the guardband based on the viewport size, but failed to take into account the translation portion of the viewport transform. This meant the guardband was always centered around the origin. We want it to be centered around the screen-space drawing area, which is the intersection of the viewport and the render target. At best, getting this wrong would reduce the guardband's effectiveness in some cases. At worst, it might break things - objects outside of the guardband are trivially rejected, so getting the guardband in the wrong place and leaving guardband clipping enabled could cause problems. v2: drop clamping of positive maximums. Cc: "17.0" Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_state.h | 5 + .../drivers/dri/i965/gen6_viewport_state.c | 116 ++++++++++++++---- .../drivers/dri/i965/gen7_viewport_state.c | 39 +++--- .../drivers/dri/i965/gen8_viewport_state.c | 48 ++------ 4 files changed, 126 insertions(+), 82 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index ec6006c3fc6..36307c7ef90 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -454,6 +454,11 @@ use_state_point_size(const struct brw_context *brw) (brw->vue_map_geom_out.slots_valid & VARYING_BIT_PSIZ) == 0; } +void brw_calculate_guardband_size(const struct gen_device_info *devinfo, + uint32_t fb_width, uint32_t fb_height, + float m00, float m11, float m30, float m31, + float *xmin, float *xmax, + float *ymin, float *ymax); #ifdef __cplusplus } diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index 2e08f1a1290..569e5bd4a15 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -33,18 +33,105 @@ #include "main/framebuffer.h" #include "main/viewport.h" +void +brw_calculate_guardband_size(const struct gen_device_info *devinfo, + uint32_t fb_width, uint32_t fb_height, + float m00, float m11, float m30, float m31, + float *xmin, float *xmax, + float *ymin, float *ymax) +{ + /* According to the "Vertex X,Y Clamping and Quantization" section of the + * Strips and Fans documentation: + * + * "The vertex X and Y screen-space coordinates are also /clamped/ to the + * fixed-point "guardband" range supported by the rasterization hardware" + * + * and + * + * "In almost all circumstances, if an object’s vertices are actually + * modified by this clamping (i.e., had X or Y coordinates outside of + * the guardband extent the rendered object will not match the intended + * result. Therefore software should take steps to ensure that this does + * not happen - e.g., by clipping objects such that they do not exceed + * these limits after the Drawing Rectangle is applied." + * + * I believe the fundamental restriction is that the rasterizer (in + * the SF/WM stages) have a limit on the number of pixels that can be + * rasterized. We need to ensure any coordinates beyond the rasterizer + * limit are handled by the clipper. So effectively that limit becomes + * the clipper's guardband size. + * + * It goes on to say: + * + * "In addition, in order to be correctly rendered, objects must have a + * screenspace bounding box not exceeding 8K in the X or Y direction. + * This additional restriction must also be comprehended by software, + * i.e., enforced by use of clipping." + * + * This makes no sense. Gen7+ hardware supports 16K render targets, + * and you definitely need to be able to draw polygons that fill the + * surface. Our assumption is that the rasterizer was limited to 8K + * on Sandybridge, which only supports 8K surfaces, and it was actually + * increased to 16K on Ivybridge and later. + * + * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge. + */ + const float gb_size = devinfo->gen >= 7 ? 16384.0f : 8192.0f; + + if (m00 != 0 && m11 != 0) { + /* First, we compute the screen-space render area */ + const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00); + const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00); + const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11); + const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11); + + /* We want the guardband to be centered on that */ + const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size; + const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size; + const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size; + const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size; + + /* Now we need it in native device coordinates */ + const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00; + const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00; + const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11; + const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11; + + /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be + * flipped upside-down. X should be fine though. + */ + assert(ndc_gb_xmin <= ndc_gb_xmax); + *xmin = ndc_gb_xmin; + *xmax = ndc_gb_xmax; + *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax); + *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax); + } else { + /* The viewport scales to 0, so nothing will be rendered. */ + *xmin = 0.0f; + *xmax = 0.0f; + *ymin = 0.0f; + *ymax = 0.0f; + } +} + static void gen6_upload_sf_and_clip_viewports(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + const struct gen_device_info *devinfo = &brw->screen->devinfo; struct gen6_sf_viewport *sfv; struct brw_clipper_viewport *clv; GLfloat y_scale, y_bias; - const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); /* BRW_NEW_VIEWPORT_COUNT */ const unsigned viewport_count = brw->clip.viewport_count; + /* _NEW_BUFFERS */ + struct gl_framebuffer *fb = ctx->DrawBuffer; + const bool render_to_fbo = _mesa_is_user_fbo(fb); + const uint32_t fb_width = _mesa_geometric_width(ctx->DrawBuffer); + const uint32_t fb_height = _mesa_geometric_height(ctx->DrawBuffer); + sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, sizeof(*sfv) * viewport_count, 32, &brw->sf.vp_offset); @@ -54,13 +141,12 @@ gen6_upload_sf_and_clip_viewports(struct brw_context *brw) sizeof(*clv) * viewport_count, 32, &brw->clip.vp_offset); - /* _NEW_BUFFERS */ if (render_to_fbo) { y_scale = 1.0; y_bias = 0.0; } else { y_scale = -1.0; - y_bias = (float)_mesa_geometric_height(ctx->DrawBuffer); + y_bias = (float)fb_height; } for (unsigned i = 0; i < viewport_count; i++) { @@ -75,25 +161,11 @@ gen6_upload_sf_and_clip_viewports(struct brw_context *brw) sfv[i].m31 = translate[1] * y_scale + y_bias; sfv[i].m32 = translate[2]; - /* According to the "Vertex X,Y Clamping and Quantization" section of the - * Strips and Fans documentation, objects must not have a screen-space - * extents of over 8192 pixels, or they may be mis-rasterized. The maximum - * screen space coordinates of a small object may larger, but we have no - * way to enforce the object size other than through clipping. - * - * If you're surprised that we set clip to -gbx to +gbx and it seems like - * we'll end up with 16384 wide, note that for a 8192-wide render target, - * we'll end up with a normal (-1, 1) clip volume that just covers the - * drawable. - */ - const float maximum_post_clamp_delta = 8192; - float gbx = maximum_post_clamp_delta / ctx->ViewportArray[i].Width; - float gby = maximum_post_clamp_delta / ctx->ViewportArray[i].Height; - - clv[i].xmin = -gbx; - clv[i].xmax = gbx; - clv[i].ymin = -gby; - clv[i].ymax = gby; + brw_calculate_guardband_size(devinfo, fb_width, fb_height, + sfv[i].m00, sfv[i].m11, + sfv[i].m30, sfv[i].m31, + &clv[i].xmin, &clv[i].xmax, + &clv[i].ymin, &clv[i].ymax); } brw->ctx.NewDriverState |= BRW_NEW_SF_VP | BRW_NEW_CLIP_VP; diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c index c447331a2e5..000f238f3fe 100644 --- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c @@ -33,13 +33,19 @@ static void gen7_upload_sf_clip_viewport(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + const struct gen_device_info *devinfo = &brw->screen->devinfo; GLfloat y_scale, y_bias; - const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); struct gen7_sf_clip_viewport *vp; /* BRW_NEW_VIEWPORT_COUNT */ const unsigned viewport_count = brw->clip.viewport_count; + /* _NEW_BUFFERS */ + struct gl_framebuffer *fb = ctx->DrawBuffer; + const bool render_to_fbo = _mesa_is_user_fbo(fb); + const uint32_t fb_width = _mesa_geometric_width(ctx->DrawBuffer); + const uint32_t fb_height = _mesa_geometric_height(ctx->DrawBuffer); + vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, sizeof(*vp) * viewport_count, 64, &brw->sf.vp_offset); @@ -52,34 +58,13 @@ gen7_upload_sf_clip_viewport(struct brw_context *brw) y_bias = 0.0; } else { y_scale = -1.0; - y_bias = (float)_mesa_geometric_height(ctx->DrawBuffer); + y_bias = (float)fb_height; } for (unsigned i = 0; i < viewport_count; i++) { float scale[3], translate[3]; _mesa_get_viewport_xform(ctx, i, scale, translate); - /* According to the "Vertex X,Y Clamping and Quantization" section of - * the Strips and Fans documentation, objects must not have a - * screen-space extents of over 8192 pixels, or they may be - * mis-rasterized. The maximum screen space coordinates of a small - * object may larger, but we have no way to enforce the object size - * other than through clipping. - * - * If you're surprised that we set clip to -gbx to +gbx and it seems - * like we'll end up with 16384 wide, note that for a 8192-wide render - * target, we'll end up with a normal (-1, 1) clip volume that just - * covers the drawable. - */ - const float maximum_guardband_extent = 8192; - const float gbx = maximum_guardband_extent / ctx->ViewportArray[i].Width; - const float gby = maximum_guardband_extent / ctx->ViewportArray[i].Height; - - vp[i].guardband.xmin = -gbx; - vp[i].guardband.xmax = gbx; - vp[i].guardband.ymin = -gby; - vp[i].guardband.ymax = gby; - /* _NEW_VIEWPORT */ vp[i].viewport.m00 = scale[0]; vp[i].viewport.m11 = scale[1] * y_scale; @@ -87,6 +72,14 @@ gen7_upload_sf_clip_viewport(struct brw_context *brw) vp[i].viewport.m30 = translate[0]; vp[i].viewport.m31 = translate[1] * y_scale + y_bias; vp[i].viewport.m32 = translate[2]; + + brw_calculate_guardband_size(devinfo, fb_width, fb_height, + vp[i].viewport.m00, vp[i].viewport.m11, + vp[i].viewport.m30, vp[i].viewport.m31, + &vp[i].guardband.xmin, + &vp[i].guardband.xmax, + &vp[i].guardband.ymin, + &vp[i].guardband.ymax); } BEGIN_BATCH(2); diff --git a/src/mesa/drivers/dri/i965/gen8_viewport_state.c b/src/mesa/drivers/dri/i965/gen8_viewport_state.c index 84000e3a7e2..101ad2b110e 100644 --- a/src/mesa/drivers/dri/i965/gen8_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen8_viewport_state.c @@ -33,13 +33,18 @@ static void gen8_upload_sf_clip_viewport(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + const struct gen_device_info *devinfo = &brw->screen->devinfo; float y_scale, y_bias; - const float fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer); - const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); /* BRW_NEW_VIEWPORT_COUNT */ const unsigned viewport_count = brw->clip.viewport_count; + /* _NEW_BUFFERS */ + struct gl_framebuffer *fb = ctx->DrawBuffer; + const bool render_to_fbo = _mesa_is_user_fbo(fb); + const uint32_t fb_width = _mesa_geometric_width(ctx->DrawBuffer); + const uint32_t fb_height = _mesa_geometric_height(ctx->DrawBuffer); + float *vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, 16 * 4 * viewport_count, 64, &brw->sf.vp_offset); @@ -52,7 +57,7 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw) y_bias = 0; } else { y_scale = -1.0; - y_bias = fb_height; + y_bias = (float)fb_height; } for (unsigned i = 0; i < viewport_count; i++) { @@ -71,40 +76,9 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw) vp[6] = 0; vp[7] = 0; - /* According to the "Vertex X,Y Clamping and Quantization" section of the - * Strips and Fans documentation, objects must not have a screen-space - * extents of over 8192 pixels, or they may be mis-rasterized. The - * maximum screen space coordinates of a small object may larger, but we - * have no way to enforce the object size other than through clipping. - * - * The goal is to create the maximum sized guardband (8K x 8K) with the - * viewport rectangle in the center of the guardband. This looks weird - * because the hardware wants coordinates that are scaled to the viewport - * in NDC. In other words, an 8K x 8K viewport would have [-1,1] for X and Y. - * A 4K viewport would be [-2,2], 2K := [-4,4] etc. - * - * -------------------------------- - * |Guardband | - * | | - * | ------------ | - * | |viewport | | - * | | | | - * | | | | - * | |__________| | - * | | - * | | - * |______________________________| - * - */ - const float maximum_guardband_extent = 8192; - float gbx = maximum_guardband_extent / ctx->ViewportArray[i].Width; - float gby = maximum_guardband_extent / ctx->ViewportArray[i].Height; - - /* _NEW_VIEWPORT: Guardband Clipping */ - vp[8] = -gbx; /* x-min */ - vp[9] = gbx; /* x-max */ - vp[10] = -gby; /* y-min */ - vp[11] = gby; /* y-max */ + brw_calculate_guardband_size(devinfo, fb_width, fb_height, + vp[0], vp[1], vp[3], vp[4], + &vp[8], &vp[9], &vp[10], &vp[11]); /* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport * The hardware will take the intersection of the drawing rectangle, -- 2.30.2