- struct radeon_winsys_cs *cs = ctx->gfx_cs;
- struct pipe_viewport_state vp;
- float left, top, right, bottom, max_range, guardband_x, guardband_y;
- float discard_x, discard_y;
-
- /* Reconstruct the viewport transformation from the scissor. */
- vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
- vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
- vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
- vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
-
- /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
- if (vp_as_scissor->minx == vp_as_scissor->maxx)
- vp.scale[0] = 0.5;
- if (vp_as_scissor->miny == vp_as_scissor->maxy)
- vp.scale[1] = 0.5;
-
- /* Find the biggest guard band that is inside the supported viewport
- * range. The guard band is specified as a horizontal and vertical
- * distance from (0,0) in clip space.
- *
- * This is done by applying the inverse viewport transformation
- * on the viewport limits to get those limits in clip space.
- *
- * Use a limit one pixel smaller to allow for some precision error.
- */
- max_range = SI_MAX_VIEWPORT_RANGE - 1;
- left = (-max_range - vp.translate[0]) / vp.scale[0];
- right = ( max_range - vp.translate[0]) / vp.scale[0];
- top = (-max_range - vp.translate[1]) / vp.scale[1];
- bottom = ( max_range - vp.translate[1]) / vp.scale[1];
-
- assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
-
- guardband_x = MIN2(-left, right);
- guardband_y = MIN2(-top, bottom);
-
- discard_x = 1.0;
- discard_y = 1.0;
-
- if (unlikely(ctx->current_rast_prim < PIPE_PRIM_TRIANGLES) &&
- ctx->queued.named.rasterizer) {
- /* When rendering wide points or lines, we need to be more
- * conservative about when to discard them entirely. */
- const struct si_state_rasterizer *rs = ctx->queued.named.rasterizer;
- float pixels;
-
- if (ctx->current_rast_prim == PIPE_PRIM_POINTS)
- pixels = rs->max_point_size;
- else
- pixels = rs->line_width;
-
- /* Add half the point size / line width */
- discard_x += pixels / (2.0 * vp.scale[0]);
- discard_y += pixels / (2.0 * vp.scale[1]);
-
- /* Discard primitives that would lie entirely outside the clip
- * region. */
- discard_x = MIN2(discard_x, guardband_x);
- discard_y = MIN2(discard_y, guardband_y);
- }
-
- /* If any of the GB registers is updated, all of them must be updated. */
- radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
-
- radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
- radeon_emit(cs, fui(discard_y)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
- radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
- radeon_emit(cs, fui(discard_x)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
+ const struct si_state_rasterizer *rs = ctx->queued.named.rasterizer;
+ struct si_signed_scissor vp_as_scissor;
+ struct pipe_viewport_state vp;
+ float left, top, right, bottom, max_range, guardband_x, guardband_y;
+ float discard_x, discard_y;
+
+ if (ctx->vs_writes_viewport_index) {
+ /* Shaders can draw to any viewport. Make a union of all
+ * viewports. */
+ vp_as_scissor = ctx->viewports.as_scissor[0];
+ for (unsigned i = 1; i < SI_MAX_VIEWPORTS; i++) {
+ si_scissor_make_union(&vp_as_scissor, &ctx->viewports.as_scissor[i]);
+ }
+ } else {
+ vp_as_scissor = ctx->viewports.as_scissor[0];
+ }
+
+ /* Blits don't set the viewport state. The vertex shader determines
+ * the viewport size by scaling the coordinates, so we don't know
+ * how large the viewport is. Assume the worst case.
+ */
+ if (ctx->vs_disables_clipping_viewport)
+ vp_as_scissor.quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
+
+ /* Determine the optimal hardware screen offset to center the viewport
+ * within the viewport range in order to maximize the guardband size.
+ */
+ int hw_screen_offset_x = (vp_as_scissor.maxx + vp_as_scissor.minx) / 2;
+ int hw_screen_offset_y = (vp_as_scissor.maxy + vp_as_scissor.miny) / 2;
+
+ /* GFX6-GFX7 need to align the offset to an ubertile consisting of all SEs. */
+ const unsigned hw_screen_offset_alignment =
+ ctx->chip_class >= GFX8 ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);
+
+ /* Indexed by quantization modes */
+ static int max_viewport_size[] = {65535, 16383, 4095};
+
+ /* Ensure that the whole viewport stays representable in
+ * absolute coordinates.
+ * See comment in si_set_viewport_states.
+ */
+ assert(vp_as_scissor.maxx <= max_viewport_size[vp_as_scissor.quant_mode] &&
+ vp_as_scissor.maxy <= max_viewport_size[vp_as_scissor.quant_mode]);
+
+ hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
+ hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
+
+ /* Align the screen offset by dropping the low bits. */
+ hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1);
+ hw_screen_offset_y &= ~(hw_screen_offset_alignment - 1);
+
+ /* Apply the offset to center the viewport and maximize the guardband. */
+ vp_as_scissor.minx -= hw_screen_offset_x;
+ vp_as_scissor.maxx -= hw_screen_offset_x;
+ vp_as_scissor.miny -= hw_screen_offset_y;
+ vp_as_scissor.maxy -= hw_screen_offset_y;
+
+ /* Reconstruct the viewport transformation from the scissor. */
+ vp.translate[0] = (vp_as_scissor.minx + vp_as_scissor.maxx) / 2.0;
+ vp.translate[1] = (vp_as_scissor.miny + vp_as_scissor.maxy) / 2.0;
+ vp.scale[0] = vp_as_scissor.maxx - vp.translate[0];
+ vp.scale[1] = vp_as_scissor.maxy - vp.translate[1];
+
+ /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
+ if (vp_as_scissor.minx == vp_as_scissor.maxx)
+ vp.scale[0] = 0.5;
+ if (vp_as_scissor.miny == vp_as_scissor.maxy)
+ vp.scale[1] = 0.5;
+
+ /* Find the biggest guard band that is inside the supported viewport
+ * range. The guard band is specified as a horizontal and vertical
+ * distance from (0,0) in clip space.
+ *
+ * This is done by applying the inverse viewport transformation
+ * on the viewport limits to get those limits in clip space.
+ *
+ * The viewport range is [-max_viewport_size/2, max_viewport_size/2].
+ */
+ assert(vp_as_scissor.quant_mode < ARRAY_SIZE(max_viewport_size));
+ max_range = max_viewport_size[vp_as_scissor.quant_mode] / 2;
+ left = (-max_range - vp.translate[0]) / vp.scale[0];
+ right = (max_range - vp.translate[0]) / vp.scale[0];
+ top = (-max_range - vp.translate[1]) / vp.scale[1];
+ bottom = (max_range - vp.translate[1]) / vp.scale[1];
+
+ assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
+
+ guardband_x = MIN2(-left, right);
+ guardband_y = MIN2(-top, bottom);
+
+ discard_x = 1.0;
+ discard_y = 1.0;
+
+ if (unlikely(util_prim_is_points_or_lines(ctx->current_rast_prim))) {
+ /* When rendering wide points or lines, we need to be more
+ * conservative about when to discard them entirely. */
+ float pixels;
+
+ if (ctx->current_rast_prim == PIPE_PRIM_POINTS)
+ pixels = rs->max_point_size;
+ else
+ pixels = rs->line_width;
+
+ /* Add half the point size / line width */
+ discard_x += pixels / (2.0 * vp.scale[0]);
+ discard_y += pixels / (2.0 * vp.scale[1]);
+
+ /* Discard primitives that would lie entirely outside the clip
+ * region. */
+ discard_x = MIN2(discard_x, guardband_x);
+ discard_y = MIN2(discard_y, guardband_y);
+ }
+
+ /* If any of the GB registers is updated, all of them must be updated.
+ * R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ
+ * R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ
+ */
+ unsigned initial_cdw = ctx->gfx_cs->current.cdw;
+ radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ,
+ SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, fui(guardband_y), fui(discard_y),
+ fui(guardband_x), fui(discard_x));
+ radeon_opt_set_context_reg(ctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
+ SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
+ S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) |
+ S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4));
+ radeon_opt_set_context_reg(
+ ctx, R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL,
+ S_028BE4_PIX_CENTER(rs->half_pixel_center) |
+ S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH + vp_as_scissor.quant_mode));
+ if (initial_cdw != ctx->gfx_cs->current.cdw)
+ ctx->context_roll = true;
+
+ si_update_ngg_small_prim_precision(ctx);