radeonsi/gfx10: implement NGG culling for 4x wave32 subgroups
[mesa.git] / src / gallium / drivers / radeonsi / si_state_viewport.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_build_pm4.h"
26 #include "util/u_upload_mgr.h"
27 #include "util/u_viewport.h"
28
29 #define SI_MAX_SCISSOR 16384
30
31 void si_update_ngg_small_prim_precision(struct si_context *ctx)
32 {
33 if (!ctx->screen->use_ngg_culling)
34 return;
35
36 /* Set VS_STATE.SMALL_PRIM_PRECISION for NGG culling. */
37 unsigned num_samples = ctx->framebuffer.nr_samples;
38 unsigned quant_mode = ctx->viewports.as_scissor[0].quant_mode;
39 float precision;
40
41 if (quant_mode == SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH)
42 precision = num_samples / 4096.0;
43 else if (quant_mode == SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH)
44 precision = num_samples / 1024.0;
45 else
46 precision = num_samples / 256.0;
47
48 ctx->current_vs_state &= C_VS_STATE_SMALL_PRIM_PRECISION;
49 ctx->current_vs_state |= S_VS_STATE_SMALL_PRIM_PRECISION(fui(precision) >> 23);
50 }
51
52 void si_get_small_prim_cull_info(struct si_context *sctx,
53 struct si_small_prim_cull_info *out)
54 {
55 /* This is needed by the small primitive culling, because it's done
56 * in screen space.
57 */
58 struct si_small_prim_cull_info info;
59 unsigned num_samples = sctx->framebuffer.nr_samples;
60 assert(num_samples >= 1);
61
62 info.scale[0] = sctx->viewports.states[0].scale[0];
63 info.scale[1] = sctx->viewports.states[0].scale[1];
64 info.translate[0] = sctx->viewports.states[0].translate[0];
65 info.translate[1] = sctx->viewports.states[0].translate[1];
66
67 /* The viewport shouldn't flip the X axis for the small prim culling to work. */
68 assert(-info.scale[0] + info.translate[0] <= info.scale[0] + info.translate[0]);
69
70 /* If the Y axis is inverted (OpenGL default framebuffer), reverse it.
71 * This is because the viewport transformation inverts the clip space
72 * bounding box, so min becomes max, which breaks small primitive
73 * culling.
74 */
75 if (sctx->viewports.y_inverted) {
76 info.scale[1] = -info.scale[1];
77 info.translate[1] = -info.translate[1];
78 }
79
80 /* Scale the framebuffer up, so that samples become pixels and small
81 * primitive culling is the same for all sample counts.
82 * This only works with the standard DX sample positions, because
83 * the samples are evenly spaced on both X and Y axes.
84 */
85 for (unsigned i = 0; i < 2; i++) {
86 info.scale[i] *= num_samples;
87 info.translate[i] *= num_samples;
88 }
89 *out = info;
90 }
91
92 static void si_set_scissor_states(struct pipe_context *pctx,
93 unsigned start_slot,
94 unsigned num_scissors,
95 const struct pipe_scissor_state *state)
96 {
97 struct si_context *ctx = (struct si_context *)pctx;
98 int i;
99
100 for (i = 0; i < num_scissors; i++)
101 ctx->scissors[start_slot + i] = state[i];
102
103 if (!ctx->queued.named.rasterizer->scissor_enable)
104 return;
105
106 si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
107 }
108
109 /* Since the guard band disables clipping, we have to clip per-pixel
110 * using a scissor.
111 */
112 static void si_get_scissor_from_viewport(struct si_context *ctx,
113 const struct pipe_viewport_state *vp,
114 struct si_signed_scissor *scissor)
115 {
116 float tmp, minx, miny, maxx, maxy;
117
118 /* Convert (-1, -1) and (1, 1) from clip space into window space. */
119 minx = -vp->scale[0] + vp->translate[0];
120 miny = -vp->scale[1] + vp->translate[1];
121 maxx = vp->scale[0] + vp->translate[0];
122 maxy = vp->scale[1] + vp->translate[1];
123
124 /* Handle inverted viewports. */
125 if (minx > maxx) {
126 tmp = minx;
127 minx = maxx;
128 maxx = tmp;
129 }
130 if (miny > maxy) {
131 tmp = miny;
132 miny = maxy;
133 maxy = tmp;
134 }
135
136 /* Convert to integer and round up the max bounds. */
137 scissor->minx = minx;
138 scissor->miny = miny;
139 scissor->maxx = ceilf(maxx);
140 scissor->maxy = ceilf(maxy);
141 }
142
143 static void si_clamp_scissor(struct si_context *ctx,
144 struct pipe_scissor_state *out,
145 struct si_signed_scissor *scissor)
146 {
147 out->minx = CLAMP(scissor->minx, 0, SI_MAX_SCISSOR);
148 out->miny = CLAMP(scissor->miny, 0, SI_MAX_SCISSOR);
149 out->maxx = CLAMP(scissor->maxx, 0, SI_MAX_SCISSOR);
150 out->maxy = CLAMP(scissor->maxy, 0, SI_MAX_SCISSOR);
151 }
152
153 static void si_clip_scissor(struct pipe_scissor_state *out,
154 struct pipe_scissor_state *clip)
155 {
156 out->minx = MAX2(out->minx, clip->minx);
157 out->miny = MAX2(out->miny, clip->miny);
158 out->maxx = MIN2(out->maxx, clip->maxx);
159 out->maxy = MIN2(out->maxy, clip->maxy);
160 }
161
162 static void si_scissor_make_union(struct si_signed_scissor *out,
163 struct si_signed_scissor *in)
164 {
165 out->minx = MIN2(out->minx, in->minx);
166 out->miny = MIN2(out->miny, in->miny);
167 out->maxx = MAX2(out->maxx, in->maxx);
168 out->maxy = MAX2(out->maxy, in->maxy);
169 out->quant_mode = MIN2(out->quant_mode, in->quant_mode);
170 }
171
172 static void si_emit_one_scissor(struct si_context *ctx,
173 struct radeon_cmdbuf *cs,
174 struct si_signed_scissor *vp_scissor,
175 struct pipe_scissor_state *scissor)
176 {
177 struct pipe_scissor_state final;
178
179 if (ctx->vs_disables_clipping_viewport) {
180 final.minx = final.miny = 0;
181 final.maxx = final.maxy = SI_MAX_SCISSOR;
182 } else {
183 si_clamp_scissor(ctx, &final, vp_scissor);
184 }
185
186 if (scissor)
187 si_clip_scissor(&final, scissor);
188
189 /* Workaround for a hw bug on GFX6 that occurs when PA_SU_HARDWARE_-
190 * SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
191 */
192 if (ctx->chip_class == GFX6 && (final.maxx == 0 || final.maxy == 0)) {
193 radeon_emit(cs, S_028250_TL_X(1) |
194 S_028250_TL_Y(1) |
195 S_028250_WINDOW_OFFSET_DISABLE(1));
196 radeon_emit(cs, S_028254_BR_X(1) |
197 S_028254_BR_Y(1));
198 return;
199 }
200
201 radeon_emit(cs, S_028250_TL_X(final.minx) |
202 S_028250_TL_Y(final.miny) |
203 S_028250_WINDOW_OFFSET_DISABLE(1));
204 radeon_emit(cs, S_028254_BR_X(final.maxx) |
205 S_028254_BR_Y(final.maxy));
206 }
207
208 #define MAX_PA_SU_HARDWARE_SCREEN_OFFSET 8176
209
210 static void si_emit_guardband(struct si_context *ctx)
211 {
212 const struct si_state_rasterizer *rs = ctx->queued.named.rasterizer;
213 struct si_signed_scissor vp_as_scissor;
214 struct pipe_viewport_state vp;
215 float left, top, right, bottom, max_range, guardband_x, guardband_y;
216 float discard_x, discard_y;
217
218 if (ctx->vs_writes_viewport_index) {
219 /* Shaders can draw to any viewport. Make a union of all
220 * viewports. */
221 vp_as_scissor = ctx->viewports.as_scissor[0];
222 for (unsigned i = 1; i < SI_MAX_VIEWPORTS; i++) {
223 si_scissor_make_union(&vp_as_scissor,
224 &ctx->viewports.as_scissor[i]);
225 }
226 } else {
227 vp_as_scissor = ctx->viewports.as_scissor[0];
228 }
229
230 /* Blits don't set the viewport state. The vertex shader determines
231 * the viewport size by scaling the coordinates, so we don't know
232 * how large the viewport is. Assume the worst case.
233 */
234 if (ctx->vs_disables_clipping_viewport)
235 vp_as_scissor.quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
236
237 /* Determine the optimal hardware screen offset to center the viewport
238 * within the viewport range in order to maximize the guardband size.
239 */
240 int hw_screen_offset_x = (vp_as_scissor.maxx + vp_as_scissor.minx) / 2;
241 int hw_screen_offset_y = (vp_as_scissor.maxy + vp_as_scissor.miny) / 2;
242
243 /* GFX6-GFX7 need to align the offset to an ubertile consisting of all SEs. */
244 const unsigned hw_screen_offset_alignment =
245 ctx->chip_class >= GFX8 ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);
246
247 /* Indexed by quantization modes */
248 static int max_viewport_size[] = {65535, 16383, 4095};
249
250 /* Ensure that the whole viewport stays representable in
251 * absolute coordinates.
252 * See comment in si_set_viewport_states.
253 */
254 assert(vp_as_scissor.maxx <= max_viewport_size[vp_as_scissor.quant_mode] &&
255 vp_as_scissor.maxy <= max_viewport_size[vp_as_scissor.quant_mode]);
256
257 hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
258 hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
259
260 /* Align the screen offset by dropping the low bits. */
261 hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1);
262 hw_screen_offset_y &= ~(hw_screen_offset_alignment - 1);
263
264 /* Apply the offset to center the viewport and maximize the guardband. */
265 vp_as_scissor.minx -= hw_screen_offset_x;
266 vp_as_scissor.maxx -= hw_screen_offset_x;
267 vp_as_scissor.miny -= hw_screen_offset_y;
268 vp_as_scissor.maxy -= hw_screen_offset_y;
269
270 /* Reconstruct the viewport transformation from the scissor. */
271 vp.translate[0] = (vp_as_scissor.minx + vp_as_scissor.maxx) / 2.0;
272 vp.translate[1] = (vp_as_scissor.miny + vp_as_scissor.maxy) / 2.0;
273 vp.scale[0] = vp_as_scissor.maxx - vp.translate[0];
274 vp.scale[1] = vp_as_scissor.maxy - vp.translate[1];
275
276 /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
277 if (vp_as_scissor.minx == vp_as_scissor.maxx)
278 vp.scale[0] = 0.5;
279 if (vp_as_scissor.miny == vp_as_scissor.maxy)
280 vp.scale[1] = 0.5;
281
282 /* Find the biggest guard band that is inside the supported viewport
283 * range. The guard band is specified as a horizontal and vertical
284 * distance from (0,0) in clip space.
285 *
286 * This is done by applying the inverse viewport transformation
287 * on the viewport limits to get those limits in clip space.
288 *
289 * The viewport range is [-max_viewport_size/2, max_viewport_size/2].
290 */
291 assert(vp_as_scissor.quant_mode < ARRAY_SIZE(max_viewport_size));
292 max_range = max_viewport_size[vp_as_scissor.quant_mode] / 2;
293 left = (-max_range - vp.translate[0]) / vp.scale[0];
294 right = ( max_range - vp.translate[0]) / vp.scale[0];
295 top = (-max_range - vp.translate[1]) / vp.scale[1];
296 bottom = ( max_range - vp.translate[1]) / vp.scale[1];
297
298 assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
299
300 guardband_x = MIN2(-left, right);
301 guardband_y = MIN2(-top, bottom);
302
303 discard_x = 1.0;
304 discard_y = 1.0;
305
306 if (unlikely(util_prim_is_points_or_lines(ctx->current_rast_prim))) {
307 /* When rendering wide points or lines, we need to be more
308 * conservative about when to discard them entirely. */
309 float pixels;
310
311 if (ctx->current_rast_prim == PIPE_PRIM_POINTS)
312 pixels = rs->max_point_size;
313 else
314 pixels = rs->line_width;
315
316 /* Add half the point size / line width */
317 discard_x += pixels / (2.0 * vp.scale[0]);
318 discard_y += pixels / (2.0 * vp.scale[1]);
319
320 /* Discard primitives that would lie entirely outside the clip
321 * region. */
322 discard_x = MIN2(discard_x, guardband_x);
323 discard_y = MIN2(discard_y, guardband_y);
324 }
325
326 /* If any of the GB registers is updated, all of them must be updated.
327 * R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ
328 * R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ
329 */
330 unsigned initial_cdw = ctx->gfx_cs->current.cdw;
331 radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ,
332 SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
333 fui(guardband_y), fui(discard_y),
334 fui(guardband_x), fui(discard_x));
335 radeon_opt_set_context_reg(ctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
336 SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
337 S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) |
338 S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4));
339 radeon_opt_set_context_reg(ctx, R_028BE4_PA_SU_VTX_CNTL,
340 SI_TRACKED_PA_SU_VTX_CNTL,
341 S_028BE4_PIX_CENTER(rs->half_pixel_center) |
342 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH +
343 vp_as_scissor.quant_mode));
344 if (initial_cdw != ctx->gfx_cs->current.cdw)
345 ctx->context_roll = true;
346
347 si_update_ngg_small_prim_precision(ctx);
348 }
349
350 static void si_emit_scissors(struct si_context *ctx)
351 {
352 struct radeon_cmdbuf *cs = ctx->gfx_cs;
353 struct pipe_scissor_state *states = ctx->scissors;
354 bool scissor_enabled = ctx->queued.named.rasterizer->scissor_enable;
355
356 /* The simple case: Only 1 viewport is active. */
357 if (!ctx->vs_writes_viewport_index) {
358 struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0];
359
360 radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
361 si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL);
362 return;
363 }
364
365 /* All registers in the array need to be updated if any of them is changed.
366 * This is a hardware requirement.
367 */
368 radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
369 SI_MAX_VIEWPORTS * 2);
370 for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) {
371 si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i],
372 scissor_enabled ? &states[i] : NULL);
373 }
374 }
375
376 static void si_set_viewport_states(struct pipe_context *pctx,
377 unsigned start_slot,
378 unsigned num_viewports,
379 const struct pipe_viewport_state *state)
380 {
381 struct si_context *ctx = (struct si_context *)pctx;
382 int i;
383
384 for (i = 0; i < num_viewports; i++) {
385 unsigned index = start_slot + i;
386 struct si_signed_scissor *scissor = &ctx->viewports.as_scissor[index];
387
388 ctx->viewports.states[index] = state[i];
389
390 si_get_scissor_from_viewport(ctx, &state[i], scissor);
391
392 unsigned w = scissor->maxx - scissor->minx;
393 unsigned h = scissor->maxy - scissor->miny;
394 unsigned max_extent = MAX2(w, h);
395
396 int max_corner = MAX2(scissor->maxx, scissor->maxy);
397
398 unsigned center_x = (scissor->maxx + scissor->minx) / 2;
399 unsigned center_y = (scissor->maxy + scissor->miny) / 2;
400 unsigned max_center = MAX2(center_x, center_y);
401
402 /* PA_SU_HARDWARE_SCREEN_OFFSET can't center viewports whose
403 * center start farther than MAX_PA_SU_HARDWARE_SCREEN_OFFSET.
404 * (for example, a 1x1 viewport in the lower right corner of
405 * 16Kx16K) Such viewports need a greater guardband, so they
406 * have to use a worse quantization mode.
407 */
408 unsigned distance_off_center =
409 MAX2(0, (int)max_center - MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
410 max_extent += distance_off_center;
411
412 /* Determine the best quantization mode (subpixel precision),
413 * but also leave enough space for the guardband.
414 *
415 * Note that primitive binning requires QUANT_MODE == 16_8 on Vega10
416 * and Raven1 for line and rectangle primitive types to work correctly.
417 * Always use 16_8 if primitive binning is possible to occur.
418 */
419 if ((ctx->family == CHIP_VEGA10 || ctx->family == CHIP_RAVEN) &&
420 ctx->screen->dpbb_allowed)
421 max_extent = 16384; /* Use QUANT_MODE == 16_8. */
422
423 /* Another constraint is that all coordinates in the viewport
424 * are representable in fixed point with respect to the
425 * surface origin.
426 *
427 * It means that PA_SU_HARDWARE_SCREEN_OFFSET can't be given
428 * an offset that would make the upper corner of the viewport
429 * greater than the maximum representable number post
430 * quantization, ie 2^quant_bits.
431 *
432 * This does not matter for 14.10 and 16.8 formats since the
433 * offset is already limited at 8k, but it means we can't use
434 * 12.12 if we are drawing to some pixels outside the lower
435 * 4k x 4k of the render target.
436 */
437
438 if (max_extent <= 1024 && max_corner < 4096) /* 4K scanline area for guardband */
439 scissor->quant_mode = SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH;
440 else if (max_extent <= 4096) /* 16K scanline area for guardband */
441 scissor->quant_mode = SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH;
442 else /* 64K scanline area for guardband */
443 scissor->quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
444 }
445
446 if (start_slot == 0) {
447 ctx->viewports.y_inverted =
448 -state->scale[1] + state->translate[1] >
449 state->scale[1] + state->translate[1];
450 }
451
452 si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
453 si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
454 si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
455 }
456
457 static void si_emit_one_viewport(struct si_context *ctx,
458 struct pipe_viewport_state *state)
459 {
460 struct radeon_cmdbuf *cs = ctx->gfx_cs;
461
462 radeon_emit(cs, fui(state->scale[0]));
463 radeon_emit(cs, fui(state->translate[0]));
464 radeon_emit(cs, fui(state->scale[1]));
465 radeon_emit(cs, fui(state->translate[1]));
466 radeon_emit(cs, fui(state->scale[2]));
467 radeon_emit(cs, fui(state->translate[2]));
468 }
469
470 static void si_emit_viewports(struct si_context *ctx)
471 {
472 struct radeon_cmdbuf *cs = ctx->gfx_cs;
473 struct pipe_viewport_state *states = ctx->viewports.states;
474
475 if (ctx->screen->use_ngg_culling) {
476 /* Set the viewport info for small primitive culling. */
477 struct si_small_prim_cull_info info;
478 si_get_small_prim_cull_info(ctx, &info);
479
480 if (memcmp(&info, &ctx->last_small_prim_cull_info, sizeof(info))) {
481 unsigned offset = 0;
482
483 /* Align to 256, because the address is shifted by 8 bits. */
484 u_upload_data(ctx->b.const_uploader, 0, sizeof(info), 256,
485 &info, &offset,
486 (struct pipe_resource**)&ctx->small_prim_cull_info_buf);
487
488 ctx->small_prim_cull_info_address =
489 ctx->small_prim_cull_info_buf->gpu_address + offset;
490 ctx->last_small_prim_cull_info = info;
491 ctx->small_prim_cull_info_dirty = true;
492 }
493
494 if (ctx->small_prim_cull_info_dirty) {
495 /* This will end up in SGPR6 as (value << 8), shifted by the hw. */
496 radeon_add_to_buffer_list(ctx, ctx->gfx_cs, ctx->small_prim_cull_info_buf,
497 RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
498 radeon_set_sh_reg(ctx->gfx_cs, R_00B220_SPI_SHADER_PGM_LO_GS,
499 ctx->small_prim_cull_info_address >> 8);
500 ctx->small_prim_cull_info_dirty = false;
501 }
502 }
503
504 /* The simple case: Only 1 viewport is active. */
505 if (!ctx->vs_writes_viewport_index) {
506 radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
507 si_emit_one_viewport(ctx, &states[0]);
508 return;
509 }
510
511 /* All registers in the array need to be updated if any of them is changed.
512 * This is a hardware requirement.
513 */
514 radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
515 0, SI_MAX_VIEWPORTS * 6);
516 for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++)
517 si_emit_one_viewport(ctx, &states[i]);
518 }
519
520 static inline void
521 si_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
522 bool window_space_position, float *zmin, float *zmax)
523 {
524 if (window_space_position) {
525 *zmin = 0;
526 *zmax = 1;
527 return;
528 }
529 util_viewport_zmin_zmax(vp, halfz, zmin, zmax);
530 }
531
532 static void si_emit_depth_ranges(struct si_context *ctx)
533 {
534 struct radeon_cmdbuf *cs = ctx->gfx_cs;
535 struct pipe_viewport_state *states = ctx->viewports.states;
536 bool clip_halfz = ctx->queued.named.rasterizer->clip_halfz;
537 bool window_space = ctx->vs_disables_clipping_viewport;
538 float zmin, zmax;
539
540 /* The simple case: Only 1 viewport is active. */
541 if (!ctx->vs_writes_viewport_index) {
542 si_viewport_zmin_zmax(&states[0], clip_halfz, window_space,
543 &zmin, &zmax);
544
545 radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
546 radeon_emit(cs, fui(zmin));
547 radeon_emit(cs, fui(zmax));
548 return;
549 }
550
551 /* All registers in the array need to be updated if any of them is changed.
552 * This is a hardware requirement.
553 */
554 radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0,
555 SI_MAX_VIEWPORTS * 2);
556 for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) {
557 si_viewport_zmin_zmax(&states[i], clip_halfz, window_space,
558 &zmin, &zmax);
559 radeon_emit(cs, fui(zmin));
560 radeon_emit(cs, fui(zmax));
561 }
562 }
563
564 static void si_emit_viewport_states(struct si_context *ctx)
565 {
566 si_emit_viewports(ctx);
567 si_emit_depth_ranges(ctx);
568 }
569
570 /**
571 * This reacts to 2 state changes:
572 * - VS.writes_viewport_index
573 * - VS output position in window space (enable/disable)
574 *
575 * Normally, we only emit 1 viewport and 1 scissor if no shader is using
576 * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
577 * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
578 * called to emit the rest.
579 */
580 void si_update_vs_viewport_state(struct si_context *ctx)
581 {
582 struct si_shader_info *info = si_get_vs_info(ctx);
583 bool vs_window_space;
584
585 if (!info)
586 return;
587
588 /* When the VS disables clipping and viewport transformation. */
589 vs_window_space =
590 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
591
592 if (ctx->vs_disables_clipping_viewport != vs_window_space) {
593 ctx->vs_disables_clipping_viewport = vs_window_space;
594 si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
595 si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
596 }
597
598 /* Viewport index handling. */
599 if (ctx->vs_writes_viewport_index == info->writes_viewport_index)
600 return;
601
602 /* This changes how the guardband is computed. */
603 ctx->vs_writes_viewport_index = info->writes_viewport_index;
604 si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
605
606 /* Emit scissors and viewports that were enabled by having
607 * the ViewportIndex output.
608 */
609 if (info->writes_viewport_index) {
610 si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
611 si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
612 }
613 }
614
615 static void si_emit_window_rectangles(struct si_context *sctx)
616 {
617 /* There are four clipping rectangles. Their corner coordinates are inclusive.
618 * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
619 * on whether the pixel is inside cliprects 0-3, respectively. For example,
620 * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
621 * the number 3 (binary 0011).
622 *
623 * If CLIPRECT_RULE & (1 << number), the pixel is rasterized.
624 */
625 struct radeon_cmdbuf *cs = sctx->gfx_cs;
626 static const unsigned outside[4] = {
627 /* outside rectangle 0 */
628 V_02820C_OUT |
629 V_02820C_IN_1 |
630 V_02820C_IN_2 |
631 V_02820C_IN_21 |
632 V_02820C_IN_3 |
633 V_02820C_IN_31 |
634 V_02820C_IN_32 |
635 V_02820C_IN_321,
636 /* outside rectangles 0, 1 */
637 V_02820C_OUT |
638 V_02820C_IN_2 |
639 V_02820C_IN_3 |
640 V_02820C_IN_32,
641 /* outside rectangles 0, 1, 2 */
642 V_02820C_OUT |
643 V_02820C_IN_3,
644 /* outside rectangles 0, 1, 2, 3 */
645 V_02820C_OUT,
646 };
647 const unsigned disabled = 0xffff; /* all inside and outside cases */
648 unsigned num_rectangles = sctx->num_window_rectangles;
649 struct pipe_scissor_state *rects = sctx->window_rectangles;
650 unsigned rule;
651
652 assert(num_rectangles <= 4);
653
654 if (num_rectangles == 0)
655 rule = disabled;
656 else if (sctx->window_rectangles_include)
657 rule = ~outside[num_rectangles - 1];
658 else
659 rule = outside[num_rectangles - 1];
660
661 radeon_opt_set_context_reg(sctx, R_02820C_PA_SC_CLIPRECT_RULE,
662 SI_TRACKED_PA_SC_CLIPRECT_RULE, rule);
663 if (num_rectangles == 0)
664 return;
665
666 radeon_set_context_reg_seq(cs, R_028210_PA_SC_CLIPRECT_0_TL,
667 num_rectangles * 2);
668 for (unsigned i = 0; i < num_rectangles; i++) {
669 radeon_emit(cs, S_028210_TL_X(rects[i].minx) |
670 S_028210_TL_Y(rects[i].miny));
671 radeon_emit(cs, S_028214_BR_X(rects[i].maxx) |
672 S_028214_BR_Y(rects[i].maxy));
673 }
674 }
675
676 static void si_set_window_rectangles(struct pipe_context *ctx,
677 bool include,
678 unsigned num_rectangles,
679 const struct pipe_scissor_state *rects)
680 {
681 struct si_context *sctx = (struct si_context *)ctx;
682
683 sctx->num_window_rectangles = num_rectangles;
684 sctx->window_rectangles_include = include;
685 if (num_rectangles) {
686 memcpy(sctx->window_rectangles, rects,
687 sizeof(*rects) * num_rectangles);
688 }
689
690 si_mark_atom_dirty(sctx, &sctx->atoms.s.window_rectangles);
691 }
692
693 void si_init_viewport_functions(struct si_context *ctx)
694 {
695 ctx->atoms.s.guardband.emit = si_emit_guardband;
696 ctx->atoms.s.scissors.emit = si_emit_scissors;
697 ctx->atoms.s.viewports.emit = si_emit_viewport_states;
698 ctx->atoms.s.window_rectangles.emit = si_emit_window_rectangles;
699
700 ctx->b.set_scissor_states = si_set_scissor_states;
701 ctx->b.set_viewport_states = si_set_viewport_states;
702 ctx->b.set_window_rectangles = si_set_window_rectangles;
703
704 for (unsigned i = 0; i < 16; i++)
705 ctx->viewports.as_scissor[i].quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
706 }