radeonsi: center viewport to improve guardband clipping for high resolutions
authorMarek Olšák <marek.olsak@amd.com>
Fri, 28 Sep 2018 22:49:29 +0000 (18:49 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 16 Oct 2018 19:28:22 +0000 (15:28 -0400)
This will be more useful when we change the quant mode to increase subpixel
precision and decrease the viewport range (which might not be possible
if the viewport is not centered in the viewport range).

src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_viewport.c

index 3ddd7864d1c73281dd9ec7e896613f5959a2ab64..84f5e4c872225888aac19fecff16852da72ed879 100644 (file)
@@ -348,6 +348,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ]  = 0x3f800000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ]  = 0x3f800000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ]  = 0x3f800000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE]     = 0xffff;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1]  = 0x00000000;
index a170d525ecf253780df50a96d43d1332059c6087..babd171cbdbe6a7a791ca35787297f110d428671 100644 (file)
@@ -2730,6 +2730,15 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        bool unbound = false;
        int i;
 
+       /* Reject zero-sized framebuffers due to a hw bug on SI that occurs
+        * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
+        * We could implement the full workaround here, but it's a useless case.
+        */
+       if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) {
+               unreachable("the framebuffer shouldn't have zero area");
+               return;
+       }
+
        si_update_fb_dirtiness_after_rendering(sctx);
 
        for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
@@ -4879,8 +4888,6 @@ static void si_init_config(struct si_context *sctx)
                               S_028230_ER_LINE_RL(0x26) |
                               S_028230_ER_LINE_TB(0xA) |
                               S_028230_ER_LINE_BT(0xA));
-               /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
-               si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
                si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
                si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
                si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
index fffc63680d048b9a5cc6bf85e308eeb723c4de11..173e210a5a0f72555e325e356b66d993001d9749 100644 (file)
@@ -275,6 +275,8 @@ enum si_tracked_reg {
        SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ,
        SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ,
 
+       SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
+
        SI_TRACKED_PA_SC_CLIPRECT_RULE,
 
        SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
index 8dc68b126eb7e70f8335f3c67c0f2f79fc9e4b65..335d63b181467a74680e393f40d51d35580116d6 100644 (file)
@@ -126,6 +126,18 @@ static void si_emit_one_scissor(struct si_context *ctx,
        if (scissor)
                si_clip_scissor(&final, scissor);
 
+       /* Workaround for a hw bug on SI that occurs when PA_SU_HARDWARE_-
+        * SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
+        */
+       if (ctx->chip_class == SI && (final.maxx == 0 || final.maxy == 0)) {
+               radeon_emit(cs, S_028250_TL_X(1) |
+                               S_028250_TL_Y(1) |
+                               S_028250_WINDOW_OFFSET_DISABLE(1));
+               radeon_emit(cs, S_028254_BR_X(1) |
+                               S_028254_BR_Y(1));
+               return;
+       }
+
        radeon_emit(cs, S_028250_TL_X(final.minx) |
                        S_028250_TL_Y(final.miny) |
                        S_028250_WINDOW_OFFSET_DISABLE(1));
@@ -138,8 +150,7 @@ static void si_emit_one_scissor(struct si_context *ctx,
 
 static void si_emit_guardband(struct si_context *ctx)
 {
-       const struct si_signed_scissor *vp_as_scissor;
-       struct si_signed_scissor max_vp_scissor;
+       struct si_signed_scissor vp_as_scissor;
        struct pipe_viewport_state vp;
        float left, top, right, bottom, max_range, guardband_x, guardband_y;
        float discard_x, discard_y;
@@ -147,26 +158,49 @@ static void si_emit_guardband(struct si_context *ctx)
        if (ctx->vs_writes_viewport_index) {
                /* Shaders can draw to any viewport. Make a union of all
                 * viewports. */
-               max_vp_scissor = ctx->viewports.as_scissor[0];
+               vp_as_scissor = ctx->viewports.as_scissor[0];
                for (unsigned i = 1; i < SI_MAX_VIEWPORTS; i++) {
-                       si_scissor_make_union(&max_vp_scissor,
+                       si_scissor_make_union(&vp_as_scissor,
                                              &ctx->viewports.as_scissor[i]);
                }
-               vp_as_scissor = &max_vp_scissor;
        } else {
-               vp_as_scissor = &ctx->viewports.as_scissor[0];
+               vp_as_scissor = ctx->viewports.as_scissor[0];
        }
 
+       /* Determine the optimal hardware screen offset to center the viewport
+        * within the viewport range in order to maximize the guardband size.
+        */
+       int hw_screen_offset_x = (vp_as_scissor.maxx - vp_as_scissor.minx) / 2;
+       int hw_screen_offset_y = (vp_as_scissor.maxy - vp_as_scissor.miny) / 2;
+
+       const unsigned hw_screen_offset_max = 8176;
+       /* SI-CI need to align the offset to an ubertile consisting of all SEs. */
+       const unsigned hw_screen_offset_alignment =
+               ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);
+
+       hw_screen_offset_x = MIN2(hw_screen_offset_x, hw_screen_offset_max);
+       hw_screen_offset_y = MIN2(hw_screen_offset_y, hw_screen_offset_max);
+
+       /* Align the screen offset by dropping the low 4 bits. */
+       hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1);
+       hw_screen_offset_y &= ~(hw_screen_offset_alignment - 1);
+
+       /* Apply the offset to center the viewport and maximize the guardband. */
+       vp_as_scissor.minx -= hw_screen_offset_x;
+       vp_as_scissor.maxx -= hw_screen_offset_x;
+       vp_as_scissor.miny -= hw_screen_offset_y;
+       vp_as_scissor.maxy -= hw_screen_offset_y;
+
        /* Reconstruct the viewport transformation from the scissor. */
-       vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
-       vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
-       vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
-       vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
+       vp.translate[0] = (vp_as_scissor.minx + vp_as_scissor.maxx) / 2.0;
+       vp.translate[1] = (vp_as_scissor.miny + vp_as_scissor.maxy) / 2.0;
+       vp.scale[0] = vp_as_scissor.maxx - vp.translate[0];
+       vp.scale[1] = vp_as_scissor.maxy - vp.translate[1];
 
        /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
-       if (vp_as_scissor->minx == vp_as_scissor->maxx)
+       if (vp_as_scissor.minx == vp_as_scissor.maxx)
                vp.scale[0] = 0.5;
-       if (vp_as_scissor->miny == vp_as_scissor->maxy)
+       if (vp_as_scissor.miny == vp_as_scissor.maxy)
                vp.scale[1] = 0.5;
 
        /* Find the biggest guard band that is inside the supported viewport
@@ -221,6 +255,10 @@ static void si_emit_guardband(struct si_context *ctx)
                                    SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
                                    fui(guardband_y), fui(discard_y),
                                    fui(guardband_x), fui(discard_x));
+       radeon_opt_set_context_reg(ctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
+                                  SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
+                                  S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) |
+                                  S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4));
 }
 
 static void si_emit_scissors(struct si_context *ctx)