From 13f0c278c528167b14badf8172827412526d2160 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 19 Jun 2019 15:52:55 -0500 Subject: [PATCH] i965,iris: Move guardband calculations to a common location Reviewed-by: Kenneth Graunke Reviewed-by: Lionel Landwerlin --- src/gallium/drivers/iris/iris_state.c | 89 +------------ src/intel/Makefile.sources | 1 + src/intel/common/gen_guardband.h | 117 ++++++++++++++++++ src/intel/common/meson.build | 1 + src/mesa/drivers/dri/i965/genX_state_upload.c | 94 +------------- 5 files changed, 126 insertions(+), 176 deletions(-) create mode 100644 src/intel/common/gen_guardband.h diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 13942df5008..b8d76bc8e12 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -161,6 +161,7 @@ __gen_combine_address(struct iris_batch *batch, void *location, #include "genxml/genX_pack.h" #include "genxml/gen_macros.h" #include "genxml/genX_bits.h" +#include "intel/common/gen_guardband.h" #if GEN_GEN == 8 #define MOCS_PTE 0x18 @@ -2347,86 +2348,6 @@ viewport_extent(const struct pipe_viewport_state *state, int axis, float sign) return copysignf(state->scale[axis], sign) + state->translate[axis]; } -static void -calculate_guardband_size(uint32_t fb_width, uint32_t fb_height, - float m00, float m11, float m30, float m31, - float *xmin, float *xmax, - float *ymin, float *ymax) -{ - /* According to the "Vertex X,Y Clamping and Quantization" section of the - * Strips and Fans documentation: - * - * "The vertex X and Y screen-space coordinates are also /clamped/ to the - * fixed-point "guardband" range supported by the rasterization hardware" - * - * and - * - * "In almost all circumstances, if an object’s vertices are actually - * modified by this clamping (i.e., had X or Y coordinates outside of - * the guardband extent the rendered object will not match the intended - * result. Therefore software should take steps to ensure that this does - * not happen - e.g., by clipping objects such that they do not exceed - * these limits after the Drawing Rectangle is applied." - * - * I believe the fundamental restriction is that the rasterizer (in - * the SF/WM stages) have a limit on the number of pixels that can be - * rasterized. We need to ensure any coordinates beyond the rasterizer - * limit are handled by the clipper. So effectively that limit becomes - * the clipper's guardband size. - * - * It goes on to say: - * - * "In addition, in order to be correctly rendered, objects must have a - * screenspace bounding box not exceeding 8K in the X or Y direction. - * This additional restriction must also be comprehended by software, - * i.e., enforced by use of clipping." - * - * This makes no sense. Gen7+ hardware supports 16K render targets, - * and you definitely need to be able to draw polygons that fill the - * surface. Our assumption is that the rasterizer was limited to 8K - * on Sandybridge, which only supports 8K surfaces, and it was actually - * increased to 16K on Ivybridge and later. - * - * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge. - */ - const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f; - - if (m00 != 0 && m11 != 0) { - /* First, we compute the screen-space render area */ - const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00); - const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00); - const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11); - const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11); - - /* We want the guardband to be centered on that */ - const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size; - const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size; - const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size; - const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size; - - /* Now we need it in native device coordinates */ - const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00; - const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00; - const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11; - const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11; - - /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be - * flipped upside-down. X should be fine though. - */ - assert(ndc_gb_xmin <= ndc_gb_xmax); - *xmin = ndc_gb_xmin; - *xmax = ndc_gb_xmax; - *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax); - *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax); - } else { - /* The viewport scales to 0, so nothing will be rendered. */ - *xmin = 0.0f; - *xmax = 0.0f; - *ymin = 0.0f; - *ymax = 0.0f; - } -} - /** * The pipe->set_viewport_states() driver hook. * @@ -4560,10 +4481,10 @@ iris_upload_dirty_render_state(struct iris_context *ice, float vp_ymin = viewport_extent(state, 1, -1.0f); float vp_ymax = viewport_extent(state, 1, 1.0f); - calculate_guardband_size(cso_fb->width, cso_fb->height, - state->scale[0], state->scale[1], - state->translate[0], state->translate[1], - &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax); + gen_calculate_guardband_size(cso_fb->width, cso_fb->height, + state->scale[0], state->scale[1], + state->translate[0], state->translate[1], + &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax); iris_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp) { vp.ViewportMatrixElementm00 = state->scale[0]; diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index c1f87e13e4e..7a44b72f449 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -16,6 +16,7 @@ COMMON_FILES = \ common/gen_disasm.h \ common/gen_defines.h \ common/gen_gem.h \ + common/gen_guardband.h \ common/gen_l3_config.c \ common/gen_l3_config.h \ common/gen_urb_config.c \ diff --git a/src/intel/common/gen_guardband.h b/src/intel/common/gen_guardband.h new file mode 100644 index 00000000000..757324fd2ee --- /dev/null +++ b/src/intel/common/gen_guardband.h @@ -0,0 +1,117 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef GEN_GUARDBAND_H +#define GEN_GUARDBAND_H + +static inline void +gen_calculate_guardband_size(uint32_t fb_width, uint32_t fb_height, + float m00, float m11, float m30, float m31, + float *xmin, float *xmax, + float *ymin, float *ymax) +{ + /* According to the "Vertex X,Y Clamping and Quantization" section of the + * Strips and Fans documentation: + * + * "The vertex X and Y screen-space coordinates are also /clamped/ to the + * fixed-point "guardband" range supported by the rasterization hardware" + * + * and + * + * "In almost all circumstances, if an object’s vertices are actually + * modified by this clamping (i.e., had X or Y coordinates outside of + * the guardband extent the rendered object will not match the intended + * result. Therefore software should take steps to ensure that this does + * not happen - e.g., by clipping objects such that they do not exceed + * these limits after the Drawing Rectangle is applied." + * + * I believe the fundamental restriction is that the rasterizer (in + * the SF/WM stages) have a limit on the number of pixels that can be + * rasterized. We need to ensure any coordinates beyond the rasterizer + * limit are handled by the clipper. So effectively that limit becomes + * the clipper's guardband size. + * + * It goes on to say: + * + * "In addition, in order to be correctly rendered, objects must have a + * screenspace bounding box not exceeding 8K in the X or Y direction. + * This additional restriction must also be comprehended by software, + * i.e., enforced by use of clipping." + * + * This makes no sense. Gen7+ hardware supports 16K render targets, + * and you definitely need to be able to draw polygons that fill the + * surface. Our assumption is that the rasterizer was limited to 8K + * on Sandybridge, which only supports 8K surfaces, and it was actually + * increased to 16K on Ivybridge and later. + * + * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge. + */ + const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f; + + /* Workaround: prevent gpu hangs on SandyBridge + * by disabling guardband clipping for odd dimensions. + */ + if (GEN_GEN == 6 && (fb_width & 1 || fb_height & 1)) { + *xmin = -1.0f; + *xmax = 1.0f; + *ymin = -1.0f; + *ymax = 1.0f; + return; + } + + if (m00 != 0 && m11 != 0) { + /* First, we compute the screen-space render area */ + const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00); + const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00); + const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11); + const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11); + + /* We want the guardband to be centered on that */ + const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size; + const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size; + const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size; + const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size; + + /* Now we need it in native device coordinates */ + const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00; + const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00; + const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11; + const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11; + + /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be + * flipped upside-down. X should be fine though. + */ + assert(ndc_gb_xmin <= ndc_gb_xmax); + *xmin = ndc_gb_xmin; + *xmax = ndc_gb_xmax; + *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax); + *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax); + } else { + /* The viewport scales to 0, so nothing will be rendered. */ + *xmin = 0.0f; + *xmax = 0.0f; + *ymin = 0.0f; + *ymax = 0.0f; + } +} + +#endif /* GEN_GUARDBAND_H */ diff --git a/src/intel/common/meson.build b/src/intel/common/meson.build index bf8d6feab2a..540d78f90b5 100644 --- a/src/intel/common/meson.build +++ b/src/intel/common/meson.build @@ -28,6 +28,7 @@ files_libintel_common = files( 'gen_disasm.c', 'gen_disasm.h', 'gen_gem.h', + 'gen_guardband.h', 'gen_l3_config.c', 'gen_l3_config.h', 'gen_urb_config.c', diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 961306b04fd..f182683d8a8 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -26,6 +26,7 @@ #include "dev/gen_device_info.h" #include "common/gen_sample_positions.h" #include "genxml/gen_macros.h" +#include "common/gen_guardband.h" #include "main/bufferobj.h" #include "main/context.h" @@ -2421,97 +2422,6 @@ static const struct brw_tracked_state genX(scissor_state) = { /* ---------------------------------------------------------------------- */ -static void -brw_calculate_guardband_size(uint32_t fb_width, uint32_t fb_height, - float m00, float m11, float m30, float m31, - float *xmin, float *xmax, - float *ymin, float *ymax) -{ - /* According to the "Vertex X,Y Clamping and Quantization" section of the - * Strips and Fans documentation: - * - * "The vertex X and Y screen-space coordinates are also /clamped/ to the - * fixed-point "guardband" range supported by the rasterization hardware" - * - * and - * - * "In almost all circumstances, if an object’s vertices are actually - * modified by this clamping (i.e., had X or Y coordinates outside of - * the guardband extent the rendered object will not match the intended - * result. Therefore software should take steps to ensure that this does - * not happen - e.g., by clipping objects such that they do not exceed - * these limits after the Drawing Rectangle is applied." - * - * I believe the fundamental restriction is that the rasterizer (in - * the SF/WM stages) have a limit on the number of pixels that can be - * rasterized. We need to ensure any coordinates beyond the rasterizer - * limit are handled by the clipper. So effectively that limit becomes - * the clipper's guardband size. - * - * It goes on to say: - * - * "In addition, in order to be correctly rendered, objects must have a - * screenspace bounding box not exceeding 8K in the X or Y direction. - * This additional restriction must also be comprehended by software, - * i.e., enforced by use of clipping." - * - * This makes no sense. Gen7+ hardware supports 16K render targets, - * and you definitely need to be able to draw polygons that fill the - * surface. Our assumption is that the rasterizer was limited to 8K - * on Sandybridge, which only supports 8K surfaces, and it was actually - * increased to 16K on Ivybridge and later. - * - * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge. - */ - const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f; - - /* Workaround: prevent gpu hangs on SandyBridge - * by disabling guardband clipping for odd dimensions. - */ - if (GEN_GEN == 6 && (fb_width & 1 || fb_height & 1)) { - *xmin = -1.0f; - *xmax = 1.0f; - *ymin = -1.0f; - *ymax = 1.0f; - return; - } - - if (m00 != 0 && m11 != 0) { - /* First, we compute the screen-space render area */ - const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00); - const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00); - const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11); - const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11); - - /* We want the guardband to be centered on that */ - const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size; - const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size; - const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size; - const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size; - - /* Now we need it in native device coordinates */ - const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00; - const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00; - const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11; - const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11; - - /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be - * flipped upside-down. X should be fine though. - */ - assert(ndc_gb_xmin <= ndc_gb_xmax); - *xmin = ndc_gb_xmin; - *xmax = ndc_gb_xmax; - *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax); - *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax); - } else { - /* The viewport scales to 0, so nothing will be rendered. */ - *xmin = 0.0f; - *xmax = 0.0f; - *ymin = 0.0f; - *ymax = 0.0f; - } -} - static void genX(upload_sf_clip_viewport)(struct brw_context *brw) { @@ -2565,7 +2475,7 @@ genX(upload_sf_clip_viewport)(struct brw_context *brw) sfv.ViewportMatrixElementm30 = translate[0], sfv.ViewportMatrixElementm31 = translate[1] * y_scale + y_bias, sfv.ViewportMatrixElementm32 = translate[2], - brw_calculate_guardband_size(fb_width, fb_height, + gen_calculate_guardband_size(fb_width, fb_height, sfv.ViewportMatrixElementm00, sfv.ViewportMatrixElementm11, sfv.ViewportMatrixElementm30, -- 2.30.2