radeonsi: simplify the signature of si_update_vs_writes_viewport_index
[mesa.git] / src / gallium / drivers / radeonsi / si_state_viewport.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_pipe.h"
25 #include "sid.h"
26 #include "radeon/r600_cs.h"
27 #include "util/u_viewport.h"
28 #include "tgsi/tgsi_scan.h"
29
30 #define SI_MAX_SCISSOR 16384
31
32 static void si_set_scissor_states(struct pipe_context *pctx,
33 unsigned start_slot,
34 unsigned num_scissors,
35 const struct pipe_scissor_state *state)
36 {
37 struct si_context *ctx = (struct si_context *)pctx;
38 int i;
39
40 for (i = 0; i < num_scissors; i++)
41 ctx->scissors.states[start_slot + i] = state[i];
42
43 if (!ctx->scissor_enabled)
44 return;
45
46 ctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
47 si_mark_atom_dirty(ctx, &ctx->scissors.atom);
48 }
49
50 /* Since the guard band disables clipping, we have to clip per-pixel
51 * using a scissor.
52 */
53 static void si_get_scissor_from_viewport(struct si_context *ctx,
54 const struct pipe_viewport_state *vp,
55 struct si_signed_scissor *scissor)
56 {
57 float tmp, minx, miny, maxx, maxy;
58
59 /* Convert (-1, -1) and (1, 1) from clip space into window space. */
60 minx = -vp->scale[0] + vp->translate[0];
61 miny = -vp->scale[1] + vp->translate[1];
62 maxx = vp->scale[0] + vp->translate[0];
63 maxy = vp->scale[1] + vp->translate[1];
64
65 /* r600_draw_rectangle sets this. Disable the scissor. */
66 if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) {
67 scissor->minx = scissor->miny = 0;
68 scissor->maxx = scissor->maxy = SI_MAX_SCISSOR;
69 return;
70 }
71
72 /* Handle inverted viewports. */
73 if (minx > maxx) {
74 tmp = minx;
75 minx = maxx;
76 maxx = tmp;
77 }
78 if (miny > maxy) {
79 tmp = miny;
80 miny = maxy;
81 maxy = tmp;
82 }
83
84 /* Convert to integer and round up the max bounds. */
85 scissor->minx = minx;
86 scissor->miny = miny;
87 scissor->maxx = ceilf(maxx);
88 scissor->maxy = ceilf(maxy);
89 }
90
91 static void si_clamp_scissor(struct si_context *ctx,
92 struct pipe_scissor_state *out,
93 struct si_signed_scissor *scissor)
94 {
95 out->minx = CLAMP(scissor->minx, 0, SI_MAX_SCISSOR);
96 out->miny = CLAMP(scissor->miny, 0, SI_MAX_SCISSOR);
97 out->maxx = CLAMP(scissor->maxx, 0, SI_MAX_SCISSOR);
98 out->maxy = CLAMP(scissor->maxy, 0, SI_MAX_SCISSOR);
99 }
100
101 static void si_clip_scissor(struct pipe_scissor_state *out,
102 struct pipe_scissor_state *clip)
103 {
104 out->minx = MAX2(out->minx, clip->minx);
105 out->miny = MAX2(out->miny, clip->miny);
106 out->maxx = MIN2(out->maxx, clip->maxx);
107 out->maxy = MIN2(out->maxy, clip->maxy);
108 }
109
110 static void si_scissor_make_union(struct si_signed_scissor *out,
111 struct si_signed_scissor *in)
112 {
113 out->minx = MIN2(out->minx, in->minx);
114 out->miny = MIN2(out->miny, in->miny);
115 out->maxx = MAX2(out->maxx, in->maxx);
116 out->maxy = MAX2(out->maxy, in->maxy);
117 }
118
119 static void si_emit_one_scissor(struct si_context *ctx,
120 struct radeon_winsys_cs *cs,
121 struct si_signed_scissor *vp_scissor,
122 struct pipe_scissor_state *scissor)
123 {
124 struct pipe_scissor_state final;
125
126 if (ctx->vs_disables_clipping_viewport) {
127 final.minx = final.miny = 0;
128 final.maxx = final.maxy = SI_MAX_SCISSOR;
129 } else {
130 si_clamp_scissor(ctx, &final, vp_scissor);
131 }
132
133 if (scissor)
134 si_clip_scissor(&final, scissor);
135
136 radeon_emit(cs, S_028250_TL_X(final.minx) |
137 S_028250_TL_Y(final.miny) |
138 S_028250_WINDOW_OFFSET_DISABLE(1));
139 radeon_emit(cs, S_028254_BR_X(final.maxx) |
140 S_028254_BR_Y(final.maxy));
141 }
142
143 /* the range is [-MAX, MAX] */
144 #define GET_MAX_VIEWPORT_RANGE(rctx) (32768)
145
146 static void si_emit_guardband(struct si_context *ctx,
147 struct si_signed_scissor *vp_as_scissor)
148 {
149 struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
150 struct pipe_viewport_state vp;
151 float left, top, right, bottom, max_range, guardband_x, guardband_y;
152 float discard_x, discard_y;
153
154 /* Reconstruct the viewport transformation from the scissor. */
155 vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
156 vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
157 vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
158 vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
159
160 /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
161 if (vp_as_scissor->minx == vp_as_scissor->maxx)
162 vp.scale[0] = 0.5;
163 if (vp_as_scissor->miny == vp_as_scissor->maxy)
164 vp.scale[1] = 0.5;
165
166 /* Find the biggest guard band that is inside the supported viewport
167 * range. The guard band is specified as a horizontal and vertical
168 * distance from (0,0) in clip space.
169 *
170 * This is done by applying the inverse viewport transformation
171 * on the viewport limits to get those limits in clip space.
172 *
173 * Use a limit one pixel smaller to allow for some precision error.
174 */
175 max_range = GET_MAX_VIEWPORT_RANGE(ctx) - 1;
176 left = (-max_range - vp.translate[0]) / vp.scale[0];
177 right = ( max_range - vp.translate[0]) / vp.scale[0];
178 top = (-max_range - vp.translate[1]) / vp.scale[1];
179 bottom = ( max_range - vp.translate[1]) / vp.scale[1];
180
181 assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
182
183 guardband_x = MIN2(-left, right);
184 guardband_y = MIN2(-top, bottom);
185
186 discard_x = 1.0;
187 discard_y = 1.0;
188
189 if (ctx->current_rast_prim < PIPE_PRIM_TRIANGLES) {
190 /* When rendering wide points or lines, we need to be more
191 * conservative about when to discard them entirely. Since
192 * point size can be determined by the VS output, we basically
193 * disable discard completely completely here.
194 *
195 * TODO: This can hurt performance when rendering lines and
196 * points with fixed size, and could be improved.
197 */
198 discard_x = guardband_x;
199 discard_y = guardband_y;
200 }
201
202 /* If any of the GB registers is updated, all of them must be updated. */
203 radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
204
205 radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
206 radeon_emit(cs, fui(discard_y)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
207 radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
208 radeon_emit(cs, fui(discard_x)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
209 }
210
211 static void si_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
212 {
213 struct si_context *ctx = (struct si_context *)rctx;
214 struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
215 struct pipe_scissor_state *states = ctx->scissors.states;
216 unsigned mask = ctx->scissors.dirty_mask;
217 bool scissor_enabled = ctx->scissor_enabled;
218 struct si_signed_scissor max_vp_scissor;
219 int i;
220
221 /* The simple case: Only 1 viewport is active. */
222 if (!ctx->vs_writes_viewport_index) {
223 struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0];
224
225 if (!(mask & 1))
226 return;
227
228 radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
229 si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL);
230 si_emit_guardband(ctx, vp);
231 ctx->scissors.dirty_mask &= ~1; /* clear one bit */
232 return;
233 }
234
235 /* Shaders can draw to any viewport. Make a union of all viewports. */
236 max_vp_scissor = ctx->viewports.as_scissor[0];
237 for (i = 1; i < SI_MAX_VIEWPORTS; i++)
238 si_scissor_make_union(&max_vp_scissor,
239 &ctx->viewports.as_scissor[i]);
240
241 while (mask) {
242 int start, count, i;
243
244 u_bit_scan_consecutive_range(&mask, &start, &count);
245
246 radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
247 start * 4 * 2, count * 2);
248 for (i = start; i < start+count; i++) {
249 si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i],
250 scissor_enabled ? &states[i] : NULL);
251 }
252 }
253 si_emit_guardband(ctx, &max_vp_scissor);
254 ctx->scissors.dirty_mask = 0;
255 }
256
257 static void si_set_viewport_states(struct pipe_context *pctx,
258 unsigned start_slot,
259 unsigned num_viewports,
260 const struct pipe_viewport_state *state)
261 {
262 struct si_context *ctx = (struct si_context *)pctx;
263 unsigned mask;
264 int i;
265
266 for (i = 0; i < num_viewports; i++) {
267 unsigned index = start_slot + i;
268
269 ctx->viewports.states[index] = state[i];
270 si_get_scissor_from_viewport(ctx, &state[i],
271 &ctx->viewports.as_scissor[index]);
272 }
273
274 mask = ((1 << num_viewports) - 1) << start_slot;
275 ctx->viewports.dirty_mask |= mask;
276 ctx->viewports.depth_range_dirty_mask |= mask;
277 ctx->scissors.dirty_mask |= mask;
278 si_mark_atom_dirty(ctx, &ctx->viewports.atom);
279 si_mark_atom_dirty(ctx, &ctx->scissors.atom);
280 }
281
282 static void si_emit_one_viewport(struct si_context *ctx,
283 struct pipe_viewport_state *state)
284 {
285 struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
286
287 radeon_emit(cs, fui(state->scale[0]));
288 radeon_emit(cs, fui(state->translate[0]));
289 radeon_emit(cs, fui(state->scale[1]));
290 radeon_emit(cs, fui(state->translate[1]));
291 radeon_emit(cs, fui(state->scale[2]));
292 radeon_emit(cs, fui(state->translate[2]));
293 }
294
295 static void si_emit_viewports(struct si_context *ctx)
296 {
297 struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
298 struct pipe_viewport_state *states = ctx->viewports.states;
299 unsigned mask = ctx->viewports.dirty_mask;
300
301 /* The simple case: Only 1 viewport is active. */
302 if (!ctx->vs_writes_viewport_index) {
303 if (!(mask & 1))
304 return;
305
306 radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
307 si_emit_one_viewport(ctx, &states[0]);
308 ctx->viewports.dirty_mask &= ~1; /* clear one bit */
309 return;
310 }
311
312 while (mask) {
313 int start, count, i;
314
315 u_bit_scan_consecutive_range(&mask, &start, &count);
316
317 radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
318 start * 4 * 6, count * 6);
319 for (i = start; i < start+count; i++)
320 si_emit_one_viewport(ctx, &states[i]);
321 }
322 ctx->viewports.dirty_mask = 0;
323 }
324
325 static void si_emit_depth_ranges(struct si_context *ctx)
326 {
327 struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
328 struct pipe_viewport_state *states = ctx->viewports.states;
329 unsigned mask = ctx->viewports.depth_range_dirty_mask;
330 float zmin, zmax;
331
332 /* The simple case: Only 1 viewport is active. */
333 if (!ctx->vs_writes_viewport_index) {
334 if (!(mask & 1))
335 return;
336
337 util_viewport_zmin_zmax(&states[0], ctx->clip_halfz, &zmin, &zmax);
338
339 radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
340 radeon_emit(cs, fui(zmin));
341 radeon_emit(cs, fui(zmax));
342 ctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */
343 return;
344 }
345
346 while (mask) {
347 int start, count, i;
348
349 u_bit_scan_consecutive_range(&mask, &start, &count);
350
351 radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
352 start * 4 * 2, count * 2);
353 for (i = start; i < start+count; i++) {
354 util_viewport_zmin_zmax(&states[i], ctx->clip_halfz, &zmin, &zmax);
355 radeon_emit(cs, fui(zmin));
356 radeon_emit(cs, fui(zmax));
357 }
358 }
359 ctx->viewports.depth_range_dirty_mask = 0;
360 }
361
362 static void si_emit_viewport_states(struct r600_common_context *rctx,
363 struct r600_atom *atom)
364 {
365 struct si_context *ctx = (struct si_context *)rctx;
366 si_emit_viewports(ctx);
367 si_emit_depth_ranges(ctx);
368 }
369
370 /* Set viewport dependencies on pipe_rasterizer_state. */
371 void si_viewport_set_rast_deps(struct si_context *ctx,
372 bool scissor_enable, bool clip_halfz)
373 {
374 if (ctx->scissor_enabled != scissor_enable) {
375 ctx->scissor_enabled = scissor_enable;
376 ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
377 si_mark_atom_dirty(ctx, &ctx->scissors.atom);
378 }
379 if (ctx->clip_halfz != clip_halfz) {
380 ctx->clip_halfz = clip_halfz;
381 ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
382 si_mark_atom_dirty(ctx, &ctx->viewports.atom);
383 }
384 }
385
386 /**
387 * Normally, we only emit 1 viewport and 1 scissor if no shader is using
388 * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
389 * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
390 * called to emit the rest.
391 */
392 void si_update_vs_writes_viewport_index(struct si_context *ctx)
393 {
394 struct tgsi_shader_info *info = si_get_vs_info(ctx);
395 bool vs_window_space;
396
397 if (!info)
398 return;
399
400 /* When the VS disables clipping and viewport transformation. */
401 vs_window_space =
402 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
403
404 if (ctx->vs_disables_clipping_viewport != vs_window_space) {
405 ctx->vs_disables_clipping_viewport = vs_window_space;
406 ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
407 si_mark_atom_dirty(ctx, &ctx->scissors.atom);
408 }
409
410 /* Viewport index handling. */
411 ctx->vs_writes_viewport_index = info->writes_viewport_index;
412 if (!ctx->vs_writes_viewport_index)
413 return;
414
415 if (ctx->scissors.dirty_mask)
416 si_mark_atom_dirty(ctx, &ctx->scissors.atom);
417
418 if (ctx->viewports.dirty_mask ||
419 ctx->viewports.depth_range_dirty_mask)
420 si_mark_atom_dirty(ctx, &ctx->viewports.atom);
421 }
422
423 void si_init_viewport_functions(struct si_context *ctx)
424 {
425 ctx->scissors.atom.emit = si_emit_scissors;
426 ctx->viewports.atom.emit = si_emit_viewport_states;
427
428 ctx->scissors.atom.num_dw = (2 + 16 * 2) + 6;
429 ctx->viewports.atom.num_dw = 2 + 16 * 6;
430
431 ctx->b.b.set_scissor_states = si_set_scissor_states;
432 ctx->b.b.set_viewport_states = si_set_viewport_states;
433 }