radeonsi: separate code computing info for small primitive culling
[mesa.git] / src / gallium / drivers / radeonsi / si_state_viewport.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_build_pm4.h"
26 #include "util/u_viewport.h"
27
28 #define SI_MAX_SCISSOR 16384
29
30 void si_get_small_prim_cull_info(struct si_context *sctx,
31 struct si_small_prim_cull_info *out)
32 {
33 /* This is needed by the small primitive culling, because it's done
34 * in screen space.
35 */
36 struct si_small_prim_cull_info info;
37 unsigned num_samples = sctx->framebuffer.nr_samples;
38 assert(num_samples >= 1);
39
40 info.scale[0] = sctx->viewports.states[0].scale[0];
41 info.scale[1] = sctx->viewports.states[0].scale[1];
42 info.translate[0] = sctx->viewports.states[0].translate[0];
43 info.translate[1] = sctx->viewports.states[0].translate[1];
44
45 /* The viewport shouldn't flip the X axis for the small prim culling to work. */
46 assert(-info.scale[0] + info.translate[0] <= info.scale[0] + info.translate[0]);
47
48 /* If the Y axis is inverted (OpenGL default framebuffer), reverse it.
49 * This is because the viewport transformation inverts the clip space
50 * bounding box, so min becomes max, which breaks small primitive
51 * culling.
52 */
53 if (sctx->viewports.y_inverted) {
54 info.scale[1] = -info.scale[1];
55 info.translate[1] = -info.translate[1];
56 }
57
58 /* Scale the framebuffer up, so that samples become pixels and small
59 * primitive culling is the same for all sample counts.
60 * This only works with the standard DX sample positions, because
61 * the samples are evenly spaced on both X and Y axes.
62 */
63 for (unsigned i = 0; i < 2; i++) {
64 info.scale[i] *= num_samples;
65 info.translate[i] *= num_samples;
66 }
67 *out = info;
68 }
69
70 static void si_set_scissor_states(struct pipe_context *pctx,
71 unsigned start_slot,
72 unsigned num_scissors,
73 const struct pipe_scissor_state *state)
74 {
75 struct si_context *ctx = (struct si_context *)pctx;
76 int i;
77
78 for (i = 0; i < num_scissors; i++)
79 ctx->scissors[start_slot + i] = state[i];
80
81 if (!ctx->queued.named.rasterizer->scissor_enable)
82 return;
83
84 si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
85 }
86
87 /* Since the guard band disables clipping, we have to clip per-pixel
88 * using a scissor.
89 */
90 static void si_get_scissor_from_viewport(struct si_context *ctx,
91 const struct pipe_viewport_state *vp,
92 struct si_signed_scissor *scissor)
93 {
94 float tmp, minx, miny, maxx, maxy;
95
96 /* Convert (-1, -1) and (1, 1) from clip space into window space. */
97 minx = -vp->scale[0] + vp->translate[0];
98 miny = -vp->scale[1] + vp->translate[1];
99 maxx = vp->scale[0] + vp->translate[0];
100 maxy = vp->scale[1] + vp->translate[1];
101
102 /* Handle inverted viewports. */
103 if (minx > maxx) {
104 tmp = minx;
105 minx = maxx;
106 maxx = tmp;
107 }
108 if (miny > maxy) {
109 tmp = miny;
110 miny = maxy;
111 maxy = tmp;
112 }
113
114 /* Convert to integer and round up the max bounds. */
115 scissor->minx = minx;
116 scissor->miny = miny;
117 scissor->maxx = ceilf(maxx);
118 scissor->maxy = ceilf(maxy);
119 }
120
121 static void si_clamp_scissor(struct si_context *ctx,
122 struct pipe_scissor_state *out,
123 struct si_signed_scissor *scissor)
124 {
125 out->minx = CLAMP(scissor->minx, 0, SI_MAX_SCISSOR);
126 out->miny = CLAMP(scissor->miny, 0, SI_MAX_SCISSOR);
127 out->maxx = CLAMP(scissor->maxx, 0, SI_MAX_SCISSOR);
128 out->maxy = CLAMP(scissor->maxy, 0, SI_MAX_SCISSOR);
129 }
130
131 static void si_clip_scissor(struct pipe_scissor_state *out,
132 struct pipe_scissor_state *clip)
133 {
134 out->minx = MAX2(out->minx, clip->minx);
135 out->miny = MAX2(out->miny, clip->miny);
136 out->maxx = MIN2(out->maxx, clip->maxx);
137 out->maxy = MIN2(out->maxy, clip->maxy);
138 }
139
140 static void si_scissor_make_union(struct si_signed_scissor *out,
141 struct si_signed_scissor *in)
142 {
143 out->minx = MIN2(out->minx, in->minx);
144 out->miny = MIN2(out->miny, in->miny);
145 out->maxx = MAX2(out->maxx, in->maxx);
146 out->maxy = MAX2(out->maxy, in->maxy);
147 out->quant_mode = MIN2(out->quant_mode, in->quant_mode);
148 }
149
150 static void si_emit_one_scissor(struct si_context *ctx,
151 struct radeon_cmdbuf *cs,
152 struct si_signed_scissor *vp_scissor,
153 struct pipe_scissor_state *scissor)
154 {
155 struct pipe_scissor_state final;
156
157 if (ctx->vs_disables_clipping_viewport) {
158 final.minx = final.miny = 0;
159 final.maxx = final.maxy = SI_MAX_SCISSOR;
160 } else {
161 si_clamp_scissor(ctx, &final, vp_scissor);
162 }
163
164 if (scissor)
165 si_clip_scissor(&final, scissor);
166
167 /* Workaround for a hw bug on GFX6 that occurs when PA_SU_HARDWARE_-
168 * SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
169 */
170 if (ctx->chip_class == GFX6 && (final.maxx == 0 || final.maxy == 0)) {
171 radeon_emit(cs, S_028250_TL_X(1) |
172 S_028250_TL_Y(1) |
173 S_028250_WINDOW_OFFSET_DISABLE(1));
174 radeon_emit(cs, S_028254_BR_X(1) |
175 S_028254_BR_Y(1));
176 return;
177 }
178
179 radeon_emit(cs, S_028250_TL_X(final.minx) |
180 S_028250_TL_Y(final.miny) |
181 S_028250_WINDOW_OFFSET_DISABLE(1));
182 radeon_emit(cs, S_028254_BR_X(final.maxx) |
183 S_028254_BR_Y(final.maxy));
184 }
185
186 #define MAX_PA_SU_HARDWARE_SCREEN_OFFSET 8176
187
188 static void si_emit_guardband(struct si_context *ctx)
189 {
190 const struct si_state_rasterizer *rs = ctx->queued.named.rasterizer;
191 struct si_signed_scissor vp_as_scissor;
192 struct pipe_viewport_state vp;
193 float left, top, right, bottom, max_range, guardband_x, guardband_y;
194 float discard_x, discard_y;
195
196 if (ctx->vs_writes_viewport_index) {
197 /* Shaders can draw to any viewport. Make a union of all
198 * viewports. */
199 vp_as_scissor = ctx->viewports.as_scissor[0];
200 for (unsigned i = 1; i < SI_MAX_VIEWPORTS; i++) {
201 si_scissor_make_union(&vp_as_scissor,
202 &ctx->viewports.as_scissor[i]);
203 }
204 } else {
205 vp_as_scissor = ctx->viewports.as_scissor[0];
206 }
207
208 /* Blits don't set the viewport state. The vertex shader determines
209 * the viewport size by scaling the coordinates, so we don't know
210 * how large the viewport is. Assume the worst case.
211 */
212 if (ctx->vs_disables_clipping_viewport)
213 vp_as_scissor.quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
214
215 /* Determine the optimal hardware screen offset to center the viewport
216 * within the viewport range in order to maximize the guardband size.
217 */
218 int hw_screen_offset_x = (vp_as_scissor.maxx + vp_as_scissor.minx) / 2;
219 int hw_screen_offset_y = (vp_as_scissor.maxy + vp_as_scissor.miny) / 2;
220
221 /* GFX6-GFX7 need to align the offset to an ubertile consisting of all SEs. */
222 const unsigned hw_screen_offset_alignment =
223 ctx->chip_class >= GFX8 ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);
224
225 /* Indexed by quantization modes */
226 static int max_viewport_size[] = {65535, 16383, 4095};
227
228 /* Ensure that the whole viewport stays representable in
229 * absolute coordinates.
230 * See comment in si_set_viewport_states.
231 */
232 assert(vp_as_scissor.maxx <= max_viewport_size[vp_as_scissor.quant_mode] &&
233 vp_as_scissor.maxy <= max_viewport_size[vp_as_scissor.quant_mode]);
234
235 hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
236 hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
237
238 /* Align the screen offset by dropping the low bits. */
239 hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1);
240 hw_screen_offset_y &= ~(hw_screen_offset_alignment - 1);
241
242 /* Apply the offset to center the viewport and maximize the guardband. */
243 vp_as_scissor.minx -= hw_screen_offset_x;
244 vp_as_scissor.maxx -= hw_screen_offset_x;
245 vp_as_scissor.miny -= hw_screen_offset_y;
246 vp_as_scissor.maxy -= hw_screen_offset_y;
247
248 /* Reconstruct the viewport transformation from the scissor. */
249 vp.translate[0] = (vp_as_scissor.minx + vp_as_scissor.maxx) / 2.0;
250 vp.translate[1] = (vp_as_scissor.miny + vp_as_scissor.maxy) / 2.0;
251 vp.scale[0] = vp_as_scissor.maxx - vp.translate[0];
252 vp.scale[1] = vp_as_scissor.maxy - vp.translate[1];
253
254 /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
255 if (vp_as_scissor.minx == vp_as_scissor.maxx)
256 vp.scale[0] = 0.5;
257 if (vp_as_scissor.miny == vp_as_scissor.maxy)
258 vp.scale[1] = 0.5;
259
260 /* Find the biggest guard band that is inside the supported viewport
261 * range. The guard band is specified as a horizontal and vertical
262 * distance from (0,0) in clip space.
263 *
264 * This is done by applying the inverse viewport transformation
265 * on the viewport limits to get those limits in clip space.
266 *
267 * The viewport range is [-max_viewport_size/2, max_viewport_size/2].
268 */
269 assert(vp_as_scissor.quant_mode < ARRAY_SIZE(max_viewport_size));
270 max_range = max_viewport_size[vp_as_scissor.quant_mode] / 2;
271 left = (-max_range - vp.translate[0]) / vp.scale[0];
272 right = ( max_range - vp.translate[0]) / vp.scale[0];
273 top = (-max_range - vp.translate[1]) / vp.scale[1];
274 bottom = ( max_range - vp.translate[1]) / vp.scale[1];
275
276 assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
277
278 guardband_x = MIN2(-left, right);
279 guardband_y = MIN2(-top, bottom);
280
281 discard_x = 1.0;
282 discard_y = 1.0;
283
284 if (unlikely(util_prim_is_points_or_lines(ctx->current_rast_prim))) {
285 /* When rendering wide points or lines, we need to be more
286 * conservative about when to discard them entirely. */
287 float pixels;
288
289 if (ctx->current_rast_prim == PIPE_PRIM_POINTS)
290 pixels = rs->max_point_size;
291 else
292 pixels = rs->line_width;
293
294 /* Add half the point size / line width */
295 discard_x += pixels / (2.0 * vp.scale[0]);
296 discard_y += pixels / (2.0 * vp.scale[1]);
297
298 /* Discard primitives that would lie entirely outside the clip
299 * region. */
300 discard_x = MIN2(discard_x, guardband_x);
301 discard_y = MIN2(discard_y, guardband_y);
302 }
303
304 /* If any of the GB registers is updated, all of them must be updated.
305 * R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ
306 * R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ
307 */
308 unsigned initial_cdw = ctx->gfx_cs->current.cdw;
309 radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ,
310 SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
311 fui(guardband_y), fui(discard_y),
312 fui(guardband_x), fui(discard_x));
313 radeon_opt_set_context_reg(ctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
314 SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
315 S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) |
316 S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4));
317 radeon_opt_set_context_reg(ctx, R_028BE4_PA_SU_VTX_CNTL,
318 SI_TRACKED_PA_SU_VTX_CNTL,
319 S_028BE4_PIX_CENTER(rs->half_pixel_center) |
320 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH +
321 vp_as_scissor.quant_mode));
322 if (initial_cdw != ctx->gfx_cs->current.cdw)
323 ctx->context_roll = true;
324 }
325
326 static void si_emit_scissors(struct si_context *ctx)
327 {
328 struct radeon_cmdbuf *cs = ctx->gfx_cs;
329 struct pipe_scissor_state *states = ctx->scissors;
330 bool scissor_enabled = ctx->queued.named.rasterizer->scissor_enable;
331
332 /* The simple case: Only 1 viewport is active. */
333 if (!ctx->vs_writes_viewport_index) {
334 struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0];
335
336 radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
337 si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL);
338 return;
339 }
340
341 /* All registers in the array need to be updated if any of them is changed.
342 * This is a hardware requirement.
343 */
344 radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
345 SI_MAX_VIEWPORTS * 2);
346 for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) {
347 si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i],
348 scissor_enabled ? &states[i] : NULL);
349 }
350 }
351
352 static void si_set_viewport_states(struct pipe_context *pctx,
353 unsigned start_slot,
354 unsigned num_viewports,
355 const struct pipe_viewport_state *state)
356 {
357 struct si_context *ctx = (struct si_context *)pctx;
358 int i;
359
360 for (i = 0; i < num_viewports; i++) {
361 unsigned index = start_slot + i;
362 struct si_signed_scissor *scissor = &ctx->viewports.as_scissor[index];
363
364 ctx->viewports.states[index] = state[i];
365
366 si_get_scissor_from_viewport(ctx, &state[i], scissor);
367
368 unsigned w = scissor->maxx - scissor->minx;
369 unsigned h = scissor->maxy - scissor->miny;
370 unsigned max_extent = MAX2(w, h);
371
372 int max_corner = MAX2(scissor->maxx, scissor->maxy);
373
374 unsigned center_x = (scissor->maxx + scissor->minx) / 2;
375 unsigned center_y = (scissor->maxy + scissor->miny) / 2;
376 unsigned max_center = MAX2(center_x, center_y);
377
378 /* PA_SU_HARDWARE_SCREEN_OFFSET can't center viewports whose
379 * center start farther than MAX_PA_SU_HARDWARE_SCREEN_OFFSET.
380 * (for example, a 1x1 viewport in the lower right corner of
381 * 16Kx16K) Such viewports need a greater guardband, so they
382 * have to use a worse quantization mode.
383 */
384 unsigned distance_off_center =
385 MAX2(0, (int)max_center - MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
386 max_extent += distance_off_center;
387
388 /* Determine the best quantization mode (subpixel precision),
389 * but also leave enough space for the guardband.
390 *
391 * Note that primitive binning requires QUANT_MODE == 16_8 on Vega10
392 * and Raven1 for line and rectangle primitive types to work correctly.
393 * Always use 16_8 if primitive binning is possible to occur.
394 */
395 if ((ctx->family == CHIP_VEGA10 || ctx->family == CHIP_RAVEN) &&
396 ctx->screen->dpbb_allowed)
397 max_extent = 16384; /* Use QUANT_MODE == 16_8. */
398
399 /* Another constraint is that all coordinates in the viewport
400 * are representable in fixed point with respect to the
401 * surface origin.
402 *
403 * It means that PA_SU_HARDWARE_SCREEN_OFFSET can't be given
404 * an offset that would make the upper corner of the viewport
405 * greater than the maximum representable number post
406 * quantization, ie 2^quant_bits.
407 *
408 * This does not matter for 14.10 and 16.8 formats since the
409 * offset is already limited at 8k, but it means we can't use
410 * 12.12 if we are drawing to some pixels outside the lower
411 * 4k x 4k of the render target.
412 */
413
414 if (max_extent <= 1024 && max_corner < 4096) /* 4K scanline area for guardband */
415 scissor->quant_mode = SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH;
416 else if (max_extent <= 4096) /* 16K scanline area for guardband */
417 scissor->quant_mode = SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH;
418 else /* 64K scanline area for guardband */
419 scissor->quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
420 }
421
422 if (start_slot == 0) {
423 ctx->viewports.y_inverted =
424 -state->scale[1] + state->translate[1] >
425 state->scale[1] + state->translate[1];
426 }
427
428 si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
429 si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
430 si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
431 }
432
433 static void si_emit_one_viewport(struct si_context *ctx,
434 struct pipe_viewport_state *state)
435 {
436 struct radeon_cmdbuf *cs = ctx->gfx_cs;
437
438 radeon_emit(cs, fui(state->scale[0]));
439 radeon_emit(cs, fui(state->translate[0]));
440 radeon_emit(cs, fui(state->scale[1]));
441 radeon_emit(cs, fui(state->translate[1]));
442 radeon_emit(cs, fui(state->scale[2]));
443 radeon_emit(cs, fui(state->translate[2]));
444 }
445
446 static void si_emit_viewports(struct si_context *ctx)
447 {
448 struct radeon_cmdbuf *cs = ctx->gfx_cs;
449 struct pipe_viewport_state *states = ctx->viewports.states;
450
451 /* The simple case: Only 1 viewport is active. */
452 if (!ctx->vs_writes_viewport_index) {
453 radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
454 si_emit_one_viewport(ctx, &states[0]);
455 return;
456 }
457
458 /* All registers in the array need to be updated if any of them is changed.
459 * This is a hardware requirement.
460 */
461 radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
462 0, SI_MAX_VIEWPORTS * 6);
463 for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++)
464 si_emit_one_viewport(ctx, &states[i]);
465 }
466
467 static inline void
468 si_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
469 bool window_space_position, float *zmin, float *zmax)
470 {
471 if (window_space_position) {
472 *zmin = 0;
473 *zmax = 1;
474 return;
475 }
476 util_viewport_zmin_zmax(vp, halfz, zmin, zmax);
477 }
478
479 static void si_emit_depth_ranges(struct si_context *ctx)
480 {
481 struct radeon_cmdbuf *cs = ctx->gfx_cs;
482 struct pipe_viewport_state *states = ctx->viewports.states;
483 bool clip_halfz = ctx->queued.named.rasterizer->clip_halfz;
484 bool window_space = ctx->vs_disables_clipping_viewport;
485 float zmin, zmax;
486
487 /* The simple case: Only 1 viewport is active. */
488 if (!ctx->vs_writes_viewport_index) {
489 si_viewport_zmin_zmax(&states[0], clip_halfz, window_space,
490 &zmin, &zmax);
491
492 radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
493 radeon_emit(cs, fui(zmin));
494 radeon_emit(cs, fui(zmax));
495 return;
496 }
497
498 /* All registers in the array need to be updated if any of them is changed.
499 * This is a hardware requirement.
500 */
501 radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0,
502 SI_MAX_VIEWPORTS * 2);
503 for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) {
504 si_viewport_zmin_zmax(&states[i], clip_halfz, window_space,
505 &zmin, &zmax);
506 radeon_emit(cs, fui(zmin));
507 radeon_emit(cs, fui(zmax));
508 }
509 }
510
511 static void si_emit_viewport_states(struct si_context *ctx)
512 {
513 si_emit_viewports(ctx);
514 si_emit_depth_ranges(ctx);
515 }
516
517 /**
518 * This reacts to 2 state changes:
519 * - VS.writes_viewport_index
520 * - VS output position in window space (enable/disable)
521 *
522 * Normally, we only emit 1 viewport and 1 scissor if no shader is using
523 * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
524 * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
525 * called to emit the rest.
526 */
527 void si_update_vs_viewport_state(struct si_context *ctx)
528 {
529 struct si_shader_info *info = si_get_vs_info(ctx);
530 bool vs_window_space;
531
532 if (!info)
533 return;
534
535 /* When the VS disables clipping and viewport transformation. */
536 vs_window_space =
537 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
538
539 if (ctx->vs_disables_clipping_viewport != vs_window_space) {
540 ctx->vs_disables_clipping_viewport = vs_window_space;
541 si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
542 si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
543 }
544
545 /* Viewport index handling. */
546 if (ctx->vs_writes_viewport_index == info->writes_viewport_index)
547 return;
548
549 /* This changes how the guardband is computed. */
550 ctx->vs_writes_viewport_index = info->writes_viewport_index;
551 si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
552
553 /* Emit scissors and viewports that were enabled by having
554 * the ViewportIndex output.
555 */
556 if (info->writes_viewport_index) {
557 si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
558 si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
559 }
560 }
561
562 static void si_emit_window_rectangles(struct si_context *sctx)
563 {
564 /* There are four clipping rectangles. Their corner coordinates are inclusive.
565 * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
566 * on whether the pixel is inside cliprects 0-3, respectively. For example,
567 * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
568 * the number 3 (binary 0011).
569 *
570 * If CLIPRECT_RULE & (1 << number), the pixel is rasterized.
571 */
572 struct radeon_cmdbuf *cs = sctx->gfx_cs;
573 static const unsigned outside[4] = {
574 /* outside rectangle 0 */
575 V_02820C_OUT |
576 V_02820C_IN_1 |
577 V_02820C_IN_2 |
578 V_02820C_IN_21 |
579 V_02820C_IN_3 |
580 V_02820C_IN_31 |
581 V_02820C_IN_32 |
582 V_02820C_IN_321,
583 /* outside rectangles 0, 1 */
584 V_02820C_OUT |
585 V_02820C_IN_2 |
586 V_02820C_IN_3 |
587 V_02820C_IN_32,
588 /* outside rectangles 0, 1, 2 */
589 V_02820C_OUT |
590 V_02820C_IN_3,
591 /* outside rectangles 0, 1, 2, 3 */
592 V_02820C_OUT,
593 };
594 const unsigned disabled = 0xffff; /* all inside and outside cases */
595 unsigned num_rectangles = sctx->num_window_rectangles;
596 struct pipe_scissor_state *rects = sctx->window_rectangles;
597 unsigned rule;
598
599 assert(num_rectangles <= 4);
600
601 if (num_rectangles == 0)
602 rule = disabled;
603 else if (sctx->window_rectangles_include)
604 rule = ~outside[num_rectangles - 1];
605 else
606 rule = outside[num_rectangles - 1];
607
608 radeon_opt_set_context_reg(sctx, R_02820C_PA_SC_CLIPRECT_RULE,
609 SI_TRACKED_PA_SC_CLIPRECT_RULE, rule);
610 if (num_rectangles == 0)
611 return;
612
613 radeon_set_context_reg_seq(cs, R_028210_PA_SC_CLIPRECT_0_TL,
614 num_rectangles * 2);
615 for (unsigned i = 0; i < num_rectangles; i++) {
616 radeon_emit(cs, S_028210_TL_X(rects[i].minx) |
617 S_028210_TL_Y(rects[i].miny));
618 radeon_emit(cs, S_028214_BR_X(rects[i].maxx) |
619 S_028214_BR_Y(rects[i].maxy));
620 }
621 }
622
623 static void si_set_window_rectangles(struct pipe_context *ctx,
624 bool include,
625 unsigned num_rectangles,
626 const struct pipe_scissor_state *rects)
627 {
628 struct si_context *sctx = (struct si_context *)ctx;
629
630 sctx->num_window_rectangles = num_rectangles;
631 sctx->window_rectangles_include = include;
632 if (num_rectangles) {
633 memcpy(sctx->window_rectangles, rects,
634 sizeof(*rects) * num_rectangles);
635 }
636
637 si_mark_atom_dirty(sctx, &sctx->atoms.s.window_rectangles);
638 }
639
640 void si_init_viewport_functions(struct si_context *ctx)
641 {
642 ctx->atoms.s.guardband.emit = si_emit_guardband;
643 ctx->atoms.s.scissors.emit = si_emit_scissors;
644 ctx->atoms.s.viewports.emit = si_emit_viewport_states;
645 ctx->atoms.s.window_rectangles.emit = si_emit_window_rectangles;
646
647 ctx->b.set_scissor_states = si_set_scissor_states;
648 ctx->b.set_viewport_states = si_set_viewport_states;
649 ctx->b.set_window_rectangles = si_set_window_rectangles;
650
651 for (unsigned i = 0; i < 16; i++)
652 ctx->viewports.as_scissor[i].quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
653 }