From: Marek Olšák Date: Fri, 28 Sep 2018 22:49:29 +0000 (-0400) Subject: radeonsi: center viewport to improve guardband clipping for high resolutions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9e182b8313c5ab952498a76495f57e8420f9e5ad;p=mesa.git radeonsi: center viewport to improve guardband clipping for high resolutions This will be more useful when we change the quant mode to increase subpixel precision and decrease the viewport range (which might not be possible if the viewport is not centered in the viewport range). --- diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 3ddd7864d1c..84f5e4c8722 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -348,6 +348,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ] = 0x3f800000; + ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE] = 0xffff; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1] = 0x00000000; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index a170d525ecf..babd171cbdb 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2730,6 +2730,15 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, bool unbound = false; int i; + /* Reject zero-sized framebuffers due to a hw bug on SI that occurs + * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. + * We could implement the full workaround here, but it's a useless case. + */ + if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) { + unreachable("the framebuffer shouldn't have zero area"); + return; + } + si_update_fb_dirtiness_after_rendering(sctx); for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { @@ -4879,8 +4888,6 @@ static void si_init_config(struct si_context *sctx) S_028230_ER_LINE_RL(0x26) | S_028230_ER_LINE_TB(0xA) | S_028230_ER_LINE_BT(0xA)); - /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ - si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index fffc63680d0..173e210a5a0 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -275,6 +275,8 @@ enum si_tracked_reg { SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ, SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ, + SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, + SI_TRACKED_PA_SC_CLIPRECT_RULE, SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index 8dc68b126eb..335d63b1814 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -126,6 +126,18 @@ static void si_emit_one_scissor(struct si_context *ctx, if (scissor) si_clip_scissor(&final, scissor); + /* Workaround for a hw bug on SI that occurs when PA_SU_HARDWARE_- + * SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. + */ + if (ctx->chip_class == SI && (final.maxx == 0 || final.maxy == 0)) { + radeon_emit(cs, S_028250_TL_X(1) | + S_028250_TL_Y(1) | + S_028250_WINDOW_OFFSET_DISABLE(1)); + radeon_emit(cs, S_028254_BR_X(1) | + S_028254_BR_Y(1)); + return; + } + radeon_emit(cs, S_028250_TL_X(final.minx) | S_028250_TL_Y(final.miny) | S_028250_WINDOW_OFFSET_DISABLE(1)); @@ -138,8 +150,7 @@ static void si_emit_one_scissor(struct si_context *ctx, static void si_emit_guardband(struct si_context *ctx) { - const struct si_signed_scissor *vp_as_scissor; - struct si_signed_scissor max_vp_scissor; + struct si_signed_scissor vp_as_scissor; struct pipe_viewport_state vp; float left, top, right, bottom, max_range, guardband_x, guardband_y; float discard_x, discard_y; @@ -147,26 +158,49 @@ static void si_emit_guardband(struct si_context *ctx) if (ctx->vs_writes_viewport_index) { /* Shaders can draw to any viewport. Make a union of all * viewports. */ - max_vp_scissor = ctx->viewports.as_scissor[0]; + vp_as_scissor = ctx->viewports.as_scissor[0]; for (unsigned i = 1; i < SI_MAX_VIEWPORTS; i++) { - si_scissor_make_union(&max_vp_scissor, + si_scissor_make_union(&vp_as_scissor, &ctx->viewports.as_scissor[i]); } - vp_as_scissor = &max_vp_scissor; } else { - vp_as_scissor = &ctx->viewports.as_scissor[0]; + vp_as_scissor = ctx->viewports.as_scissor[0]; } + /* Determine the optimal hardware screen offset to center the viewport + * within the viewport range in order to maximize the guardband size. + */ + int hw_screen_offset_x = (vp_as_scissor.maxx - vp_as_scissor.minx) / 2; + int hw_screen_offset_y = (vp_as_scissor.maxy - vp_as_scissor.miny) / 2; + + const unsigned hw_screen_offset_max = 8176; + /* SI-CI need to align the offset to an ubertile consisting of all SEs. */ + const unsigned hw_screen_offset_alignment = + ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16); + + hw_screen_offset_x = MIN2(hw_screen_offset_x, hw_screen_offset_max); + hw_screen_offset_y = MIN2(hw_screen_offset_y, hw_screen_offset_max); + + /* Align the screen offset by dropping the low 4 bits. */ + hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1); + hw_screen_offset_y &= ~(hw_screen_offset_alignment - 1); + + /* Apply the offset to center the viewport and maximize the guardband. */ + vp_as_scissor.minx -= hw_screen_offset_x; + vp_as_scissor.maxx -= hw_screen_offset_x; + vp_as_scissor.miny -= hw_screen_offset_y; + vp_as_scissor.maxy -= hw_screen_offset_y; + /* Reconstruct the viewport transformation from the scissor. */ - vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0; - vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0; - vp.scale[0] = vp_as_scissor->maxx - vp.translate[0]; - vp.scale[1] = vp_as_scissor->maxy - vp.translate[1]; + vp.translate[0] = (vp_as_scissor.minx + vp_as_scissor.maxx) / 2.0; + vp.translate[1] = (vp_as_scissor.miny + vp_as_scissor.maxy) / 2.0; + vp.scale[0] = vp_as_scissor.maxx - vp.translate[0]; + vp.scale[1] = vp_as_scissor.maxy - vp.translate[1]; /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */ - if (vp_as_scissor->minx == vp_as_scissor->maxx) + if (vp_as_scissor.minx == vp_as_scissor.maxx) vp.scale[0] = 0.5; - if (vp_as_scissor->miny == vp_as_scissor->maxy) + if (vp_as_scissor.miny == vp_as_scissor.maxy) vp.scale[1] = 0.5; /* Find the biggest guard band that is inside the supported viewport @@ -221,6 +255,10 @@ static void si_emit_guardband(struct si_context *ctx) SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, fui(guardband_y), fui(discard_y), fui(guardband_x), fui(discard_x)); + radeon_opt_set_context_reg(ctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, + SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, + S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) | + S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4)); } static void si_emit_scissors(struct si_context *ctx)