Added few more stubs so that control reaches to DestroyDevice().
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_state_validate.c
1 #include "util/format/u_format.h"
2 #include "util/u_framebuffer.h"
3 #include "util/u_math.h"
4 #include "util/u_viewport.h"
5
6 #include "nvc0/nvc0_context.h"
7
8 #if 0
9 static void
10 nvc0_validate_zcull(struct nvc0_context *nvc0)
11 {
12 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
13 struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
14 struct nv50_surface *sf = nv50_surface(fb->zsbuf);
15 struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
16 struct nouveau_bo *bo = mt->base.bo;
17 uint32_t size;
18 uint32_t offset = align(mt->total_size, 1 << 17);
19 unsigned width, height;
20
21 assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);
22
23 size = mt->total_size * 2;
24
25 height = align(fb->height, 32);
26 width = fb->width % 224;
27 if (width)
28 width = fb->width + (224 - width);
29 else
30 width = fb->width;
31
32 BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1);
33 PUSH_DATA (push, 0);
34 BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2);
35 PUSH_DATAh(push, bo->offset + offset);
36 PUSH_DATA (push, bo->offset + offset);
37 offset += 1 << 17;
38 BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2);
39 PUSH_DATAh(push, bo->offset + offset);
40 PUSH_DATA (push, bo->offset + offset);
41 BEGIN_NVC0(push, SUBC_3D(0x07e0), 2);
42 PUSH_DATA (push, size);
43 PUSH_DATA (push, size >> 16);
44 BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */
45 PUSH_DATA (push, 2);
46 BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4);
47 PUSH_DATA (push, width);
48 PUSH_DATA (push, height);
49 PUSH_DATA (push, 1);
50 PUSH_DATA (push, 0);
51 BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2);
52 PUSH_DATA (push, 0);
53 PUSH_DATA (push, 0);
54 BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1);
55 PUSH_DATA (push, 0);
56 }
57 #endif
58
59 static inline void
60 nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
61 {
62 BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
63 PUSH_DATA (push, 0);
64 PUSH_DATA (push, 0);
65 PUSH_DATA (push, 64); // width
66 PUSH_DATA (push, 0); // height
67 PUSH_DATA (push, 0); // format
68 PUSH_DATA (push, 0); // tile mode
69 PUSH_DATA (push, layers); // layers
70 PUSH_DATA (push, 0); // layer stride
71 PUSH_DATA (push, 0); // base layer
72 }
73
74 static uint32_t
75 gm200_encode_cb_sample_location(uint8_t x, uint8_t y)
76 {
77 static const uint8_t lut[] = {
78 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
79 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
80 uint32_t result = 0;
81 /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */
82 result |= lut[x] << 8 | lut[y] << 24;
83 /* fill in gaps with data in a representation for SV_SAMPLE_POS */
84 result |= x << 12 | y << 28;
85 return result;
86 }
87
88 static void
89 gm200_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
90 {
91 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
92 struct nvc0_screen *screen = nvc0->screen;
93 unsigned grid_width, grid_height, hw_grid_width;
94 uint8_t sample_locations[16][2];
95 unsigned cb[64];
96 unsigned i, pixel, pixel_y, pixel_x, sample;
97 uint32_t packed_locations[4] = {};
98
99 screen->base.base.get_sample_pixel_grid(
100 &screen->base.base, ms, &grid_width, &grid_height);
101
102 hw_grid_width = grid_width;
103 if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */
104 hw_grid_width = 4;
105
106 if (nvc0->sample_locations_enabled) {
107 uint8_t locations[2 * 4 * 8];
108 memcpy(locations, nvc0->sample_locations, sizeof(locations));
109 util_sample_locations_flip_y(
110 &screen->base.base, nvc0->framebuffer.height, ms, locations);
111
112 for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) {
113 for (sample = 0; sample < ms; sample++) {
114 unsigned pixel_x = pixel % hw_grid_width;
115 unsigned pixel_y = pixel / hw_grid_width;
116 unsigned wi = pixel * ms + sample;
117 unsigned ri = (pixel_y * grid_width + pixel_x % grid_width);
118 ri = ri * ms + sample;
119 sample_locations[wi][0] = locations[ri] & 0xf;
120 sample_locations[wi][1] = 16 - (locations[ri] >> 4);
121 }
122 }
123 } else {
124 const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
125 for (i = 0; i < 16; i++) {
126 sample_locations[i][0] = ptr[i % ms][0];
127 sample_locations[i][1] = ptr[i % ms][1];
128 }
129 }
130
131 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
132 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
133 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
134 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
135 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 64);
136 PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
137 for (pixel_y = 0; pixel_y < 4; pixel_y++) {
138 for (pixel_x = 0; pixel_x < 2; pixel_x++) {
139 for (sample = 0; sample < ms; sample++) {
140 unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample;
141 unsigned read_index = pixel_y % grid_height * hw_grid_width;
142 uint8_t x, y;
143 read_index += pixel_x % grid_width;
144 read_index = read_index * ms + sample;
145 x = sample_locations[read_index][0];
146 y = sample_locations[read_index][1];
147 cb[write_index] = gm200_encode_cb_sample_location(x, y);
148 }
149 }
150 }
151 PUSH_DATAp(push, cb, 64);
152
153 for (i = 0; i < 16; i++) {
154 packed_locations[i / 4] |= sample_locations[i][0] << ((i % 4) * 8);
155 packed_locations[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4);
156 }
157
158 BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
159 PUSH_DATAp(push, packed_locations, 4);
160 }
161
162 static void
163 nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
164 {
165 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
166 struct nvc0_screen *screen = nvc0->screen;
167 unsigned i;
168
169 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
170 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
171 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
172 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
173 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
174 PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
175 for (i = 0; i < ms; i++) {
176 float xy[2];
177 nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
178 PUSH_DATAf(push, xy[0]);
179 PUSH_DATAf(push, xy[1]);
180 }
181 }
182
183 static void
184 validate_sample_locations(struct nvc0_context *nvc0)
185 {
186 unsigned ms = util_framebuffer_get_num_samples(&nvc0->framebuffer);
187
188 if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)
189 gm200_validate_sample_locations(nvc0, ms);
190 else
191 nvc0_validate_sample_locations(nvc0, ms);
192 }
193
194 static void
195 nvc0_validate_fb(struct nvc0_context *nvc0)
196 {
197 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
198 struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
199 unsigned i;
200 unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
201 unsigned nr_cbufs = fb->nr_cbufs;
202 bool serialize = false;
203
204 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
205
206 BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
207 PUSH_DATA (push, fb->width << 16);
208 PUSH_DATA (push, fb->height << 16);
209
210 for (i = 0; i < fb->nr_cbufs; ++i) {
211 struct nv50_surface *sf;
212 struct nv04_resource *res;
213 struct nouveau_bo *bo;
214
215 if (!fb->cbufs[i]) {
216 nvc0_fb_set_null_rt(push, i, 0);
217 continue;
218 }
219
220 sf = nv50_surface(fb->cbufs[i]);
221 res = nv04_resource(sf->base.texture);
222 bo = res->bo;
223
224 BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
225 PUSH_DATAh(push, res->address + sf->offset);
226 PUSH_DATA (push, res->address + sf->offset);
227 if (likely(nouveau_bo_memtype(bo))) {
228 struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
229
230 assert(sf->base.texture->target != PIPE_BUFFER);
231
232 PUSH_DATA(push, sf->width);
233 PUSH_DATA(push, sf->height);
234 PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
235 PUSH_DATA(push, (mt->layout_3d << 16) |
236 mt->level[sf->base.u.tex.level].tile_mode);
237 PUSH_DATA(push, sf->base.u.tex.first_layer + sf->depth);
238 PUSH_DATA(push, mt->layer_stride >> 2);
239 PUSH_DATA(push, sf->base.u.tex.first_layer);
240
241 ms_mode = mt->ms_mode;
242 } else {
243 if (res->base.target == PIPE_BUFFER) {
244 PUSH_DATA(push, 262144);
245 PUSH_DATA(push, 1);
246 } else {
247 PUSH_DATA(push, nv50_miptree(sf->base.texture)->level[0].pitch);
248 PUSH_DATA(push, sf->height);
249 }
250 PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
251 PUSH_DATA(push, 1 << 12);
252 PUSH_DATA(push, 1);
253 PUSH_DATA(push, 0);
254 PUSH_DATA(push, 0);
255
256 nvc0_resource_fence(res, NOUVEAU_BO_WR);
257
258 assert(!fb->zsbuf);
259 }
260
261 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING)
262 serialize = true;
263 res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
264 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
265
266 /* only register for writing, otherwise we'd always serialize here */
267 BCTX_REFN(nvc0->bufctx_3d, 3D_FB, res, WR);
268 }
269
270 if (fb->zsbuf) {
271 struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
272 struct nv50_surface *sf = nv50_surface(fb->zsbuf);
273 int unk = mt->base.base.target == PIPE_TEXTURE_2D;
274
275 BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
276 PUSH_DATAh(push, mt->base.address + sf->offset);
277 PUSH_DATA (push, mt->base.address + sf->offset);
278 PUSH_DATA (push, nvc0_format_table[fb->zsbuf->format].rt);
279 PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
280 PUSH_DATA (push, mt->layer_stride >> 2);
281 BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
282 PUSH_DATA (push, 1);
283 BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
284 PUSH_DATA (push, sf->width);
285 PUSH_DATA (push, sf->height);
286 PUSH_DATA (push, (unk << 16) |
287 (sf->base.u.tex.first_layer + sf->depth));
288 BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
289 PUSH_DATA (push, sf->base.u.tex.first_layer);
290
291 ms_mode = mt->ms_mode;
292
293 if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
294 serialize = true;
295 mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
296 mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
297
298 BCTX_REFN(nvc0->bufctx_3d, 3D_FB, &mt->base, WR);
299 } else {
300 BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
301 PUSH_DATA (push, 0);
302 }
303
304 if (nr_cbufs == 0 && !fb->zsbuf) {
305 assert(util_is_power_of_two_or_zero(fb->samples));
306 assert(fb->samples <= 8);
307
308 nvc0_fb_set_null_rt(push, 0, fb->layers);
309
310 if (fb->samples > 1)
311 ms_mode = ffs(fb->samples) - 1;
312 nr_cbufs = 1;
313 }
314
315 BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
316 PUSH_DATA (push, (076543210 << 4) | nr_cbufs);
317 IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
318
319 if (serialize)
320 IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
321
322 NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, serialize);
323 }
324
325 static void
326 nvc0_validate_blend_colour(struct nvc0_context *nvc0)
327 {
328 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
329
330 BEGIN_NVC0(push, NVC0_3D(BLEND_COLOR(0)), 4);
331 PUSH_DATAf(push, nvc0->blend_colour.color[0]);
332 PUSH_DATAf(push, nvc0->blend_colour.color[1]);
333 PUSH_DATAf(push, nvc0->blend_colour.color[2]);
334 PUSH_DATAf(push, nvc0->blend_colour.color[3]);
335 }
336
337 static void
338 nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
339 {
340 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
341 const ubyte *ref = &nvc0->stencil_ref.ref_value[0];
342
343 IMMED_NVC0(push, NVC0_3D(STENCIL_FRONT_FUNC_REF), ref[0]);
344 IMMED_NVC0(push, NVC0_3D(STENCIL_BACK_FUNC_REF), ref[1]);
345 }
346
347 static void
348 nvc0_validate_stipple(struct nvc0_context *nvc0)
349 {
350 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
351 unsigned i;
352
353 BEGIN_NVC0(push, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
354 for (i = 0; i < 32; ++i)
355 PUSH_DATA(push, util_bswap32(nvc0->stipple.stipple[i]));
356 }
357
358 static void
359 nvc0_validate_scissor(struct nvc0_context *nvc0)
360 {
361 int i;
362 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
363
364 if (!(nvc0->dirty_3d & NVC0_NEW_3D_SCISSOR) &&
365 nvc0->rast->pipe.scissor == nvc0->state.scissor)
366 return;
367
368 if (nvc0->state.scissor != nvc0->rast->pipe.scissor)
369 nvc0->scissors_dirty = (1 << NVC0_MAX_VIEWPORTS) - 1;
370
371 nvc0->state.scissor = nvc0->rast->pipe.scissor;
372
373 for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
374 struct pipe_scissor_state *s = &nvc0->scissors[i];
375 if (!(nvc0->scissors_dirty & (1 << i)))
376 continue;
377
378 BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(i)), 2);
379 if (nvc0->rast->pipe.scissor) {
380 PUSH_DATA(push, (s->maxx << 16) | s->minx);
381 PUSH_DATA(push, (s->maxy << 16) | s->miny);
382 } else {
383 PUSH_DATA(push, (0xffff << 16) | 0);
384 PUSH_DATA(push, (0xffff << 16) | 0);
385 }
386 }
387 nvc0->scissors_dirty = 0;
388 }
389
390 static void
391 nvc0_validate_viewport(struct nvc0_context *nvc0)
392 {
393 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
394 uint16_t class_3d = nvc0->screen->base.class_3d;
395 int x, y, w, h, i;
396 float zmin, zmax;
397
398 for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
399 struct pipe_viewport_state *vp = &nvc0->viewports[i];
400
401 if (!(nvc0->viewports_dirty & (1 << i)))
402 continue;
403
404 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSLATE_X(i)), 3);
405 PUSH_DATAf(push, vp->translate[0]);
406 PUSH_DATAf(push, vp->translate[1]);
407 PUSH_DATAf(push, vp->translate[2]);
408
409 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SCALE_X(i)), 3);
410 PUSH_DATAf(push, vp->scale[0]);
411 PUSH_DATAf(push, vp->scale[1]);
412 PUSH_DATAf(push, vp->scale[2]);
413
414 /* now set the viewport rectangle to viewport dimensions for clipping */
415
416 x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0])));
417 y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1])));
418 w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x;
419 h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y;
420
421 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(i)), 2);
422 PUSH_DATA (push, (w << 16) | x);
423 PUSH_DATA (push, (h << 16) | y);
424
425 /* If the halfz setting ever changes, the viewports will also get
426 * updated. The rast will get updated before the validate function has a
427 * chance to hit, so we can just use it directly without an atom
428 * dependency.
429 */
430 util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax);
431
432 BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2);
433 PUSH_DATAf(push, zmin);
434 PUSH_DATAf(push, zmax);
435
436 if (class_3d >= GM200_3D_CLASS) {
437 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SWIZZLE(i)), 1);
438 PUSH_DATA (push, vp->swizzle_x << 0 |
439 vp->swizzle_y << 4 |
440 vp->swizzle_z << 8 |
441 vp->swizzle_w << 12);
442 }
443 }
444 nvc0->viewports_dirty = 0;
445 }
446
447 static void
448 nvc0_validate_window_rects(struct nvc0_context *nvc0)
449 {
450 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
451 bool enable = nvc0->window_rect.rects > 0 || nvc0->window_rect.inclusive;
452 int i;
453
454 IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_EN), enable);
455 if (!enable)
456 return;
457
458 IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), !nvc0->window_rect.inclusive);
459 BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), NVC0_MAX_WINDOW_RECTANGLES * 2);
460 for (i = 0; i < nvc0->window_rect.rects; i++) {
461 struct pipe_scissor_state *s = &nvc0->window_rect.rect[i];
462 PUSH_DATA(push, (s->maxx << 16) | s->minx);
463 PUSH_DATA(push, (s->maxy << 16) | s->miny);
464 }
465 for (; i < NVC0_MAX_WINDOW_RECTANGLES; i++) {
466 PUSH_DATA(push, 0);
467 PUSH_DATA(push, 0);
468 }
469 }
470
471 static inline void
472 nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
473 {
474 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
475 struct nvc0_screen *screen = nvc0->screen;
476
477 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
478 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
479 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
480 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
481 BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
482 PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO);
483 PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
484 }
485
486 static inline void
487 nvc0_check_program_ucps(struct nvc0_context *nvc0,
488 struct nvc0_program *vp, uint8_t mask)
489 {
490 const unsigned n = util_logbase2(mask) + 1;
491
492 if (vp->vp.num_ucps >= n)
493 return;
494 nvc0_program_destroy(nvc0, vp);
495
496 vp->vp.num_ucps = n;
497 if (likely(vp == nvc0->vertprog))
498 nvc0_vertprog_validate(nvc0);
499 else
500 if (likely(vp == nvc0->gmtyprog))
501 nvc0_gmtyprog_validate(nvc0);
502 else
503 nvc0_tevlprog_validate(nvc0);
504 }
505
506 static void
507 nvc0_validate_clip(struct nvc0_context *nvc0)
508 {
509 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
510 struct nvc0_program *vp;
511 unsigned stage;
512 uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable;
513
514 if (nvc0->gmtyprog) {
515 stage = 3;
516 vp = nvc0->gmtyprog;
517 } else
518 if (nvc0->tevlprog) {
519 stage = 2;
520 vp = nvc0->tevlprog;
521 } else {
522 stage = 0;
523 vp = nvc0->vertprog;
524 }
525
526 if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)
527 nvc0_check_program_ucps(nvc0, vp, clip_enable);
528
529 if (nvc0->dirty_3d & (NVC0_NEW_3D_CLIP | (NVC0_NEW_3D_VERTPROG << stage)))
530 if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)
531 nvc0_upload_uclip_planes(nvc0, stage);
532
533 clip_enable &= vp->vp.clip_enable;
534 clip_enable |= vp->vp.cull_enable;
535
536 if (nvc0->state.clip_enable != clip_enable) {
537 nvc0->state.clip_enable = clip_enable;
538 IMMED_NVC0(push, NVC0_3D(CLIP_DISTANCE_ENABLE), clip_enable);
539 }
540 if (nvc0->state.clip_mode != vp->vp.clip_mode) {
541 nvc0->state.clip_mode = vp->vp.clip_mode;
542 BEGIN_NVC0(push, NVC0_3D(CLIP_DISTANCE_MODE), 1);
543 PUSH_DATA (push, vp->vp.clip_mode);
544 }
545 }
546
547 static void
548 nvc0_validate_blend(struct nvc0_context *nvc0)
549 {
550 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
551
552 PUSH_SPACE(push, nvc0->blend->size);
553 PUSH_DATAp(push, nvc0->blend->state, nvc0->blend->size);
554 }
555
556 static void
557 nvc0_validate_zsa(struct nvc0_context *nvc0)
558 {
559 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
560
561 PUSH_SPACE(push, nvc0->zsa->size);
562 PUSH_DATAp(push, nvc0->zsa->state, nvc0->zsa->size);
563 }
564
565 static void
566 nvc0_validate_rasterizer(struct nvc0_context *nvc0)
567 {
568 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
569
570 PUSH_SPACE(push, nvc0->rast->size);
571 PUSH_DATAp(push, nvc0->rast->state, nvc0->rast->size);
572 }
573
574 static void
575 nvc0_constbufs_validate(struct nvc0_context *nvc0)
576 {
577 unsigned s;
578
579 bool can_serialize = true;
580
581 for (s = 0; s < 5; ++s) {
582 while (nvc0->constbuf_dirty[s]) {
583 int i = ffs(nvc0->constbuf_dirty[s]) - 1;
584 nvc0->constbuf_dirty[s] &= ~(1 << i);
585
586 if (nvc0->constbuf[s][i].user) {
587 struct nouveau_bo *bo = nvc0->screen->uniform_bo;
588 const unsigned base = NVC0_CB_USR_INFO(s);
589 const unsigned size = nvc0->constbuf[s][0].size;
590 assert(i == 0); /* we really only want OpenGL uniforms here */
591 assert(nvc0->constbuf[s][0].u.data);
592
593 if (!nvc0->state.uniform_buffer_bound[s]) {
594 nvc0->state.uniform_buffer_bound[s] = true;
595
596 nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
597 NVC0_MAX_CONSTBUF_SIZE, bo->offset + base);
598 }
599 nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
600 base, NVC0_MAX_CONSTBUF_SIZE,
601 0, (size + 3) / 4,
602 nvc0->constbuf[s][0].u.data);
603 } else {
604 struct nv04_resource *res =
605 nv04_resource(nvc0->constbuf[s][i].u.buf);
606 if (res) {
607 nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
608 nvc0->constbuf[s][i].size,
609 res->address + nvc0->constbuf[s][i].offset);
610
611 BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD);
612
613 nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
614 res->cb_bindings[s] |= 1 << i;
615
616 if (i == 0)
617 nvc0->state.uniform_buffer_bound[s] = false;
618 } else if (i != 0) {
619 nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, -1, 0);
620 }
621 }
622 }
623 }
624
625 if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
626 /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
627 nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
628 nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
629 nvc0->state.uniform_buffer_bound[5] = false;
630 }
631 }
632
633 static void
634 nvc0_validate_buffers(struct nvc0_context *nvc0)
635 {
636 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
637 struct nvc0_screen *screen = nvc0->screen;
638 int i, s;
639
640 for (s = 0; s < 5; s++) {
641 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
642 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
643 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
644 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
645 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
646 PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
647 for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
648 if (nvc0->buffers[s][i].buffer) {
649 struct nv04_resource *res =
650 nv04_resource(nvc0->buffers[s][i].buffer);
651 PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
652 PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
653 PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
654 PUSH_DATA (push, 0);
655 BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR);
656 util_range_add(&res->base, &res->valid_buffer_range,
657 nvc0->buffers[s][i].buffer_offset,
658 nvc0->buffers[s][i].buffer_offset +
659 nvc0->buffers[s][i].buffer_size);
660 } else {
661 PUSH_DATA (push, 0);
662 PUSH_DATA (push, 0);
663 PUSH_DATA (push, 0);
664 PUSH_DATA (push, 0);
665 }
666 }
667 }
668
669 }
670
671 static void
672 nvc0_validate_sample_mask(struct nvc0_context *nvc0)
673 {
674 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
675
676 unsigned mask[4] =
677 {
678 nvc0->sample_mask & 0xffff,
679 nvc0->sample_mask & 0xffff,
680 nvc0->sample_mask & 0xffff,
681 nvc0->sample_mask & 0xffff
682 };
683
684 BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4);
685 PUSH_DATA (push, mask[0]);
686 PUSH_DATA (push, mask[1]);
687 PUSH_DATA (push, mask[2]);
688 PUSH_DATA (push, mask[3]);
689 }
690
691 static void
692 nvc0_validate_min_samples(struct nvc0_context *nvc0)
693 {
694 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
695 int samples;
696
697 samples = util_next_power_of_two(nvc0->min_samples);
698 if (samples > 1) {
699 // If we're using the incoming sample mask and doing sample shading, we
700 // have to do sample shading "to the max", otherwise there's no way to
701 // tell which sets of samples are covered by the current invocation.
702 // Similarly for reading the framebuffer.
703 if (nvc0->fragprog && (
704 nvc0->fragprog->fp.sample_mask_in ||
705 nvc0->fragprog->fp.reads_framebuffer))
706 samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);
707 samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
708 }
709
710 IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples);
711 }
712
713 static void
714 nvc0_validate_driverconst(struct nvc0_context *nvc0)
715 {
716 struct nvc0_screen *screen = nvc0->screen;
717 int i;
718
719 for (i = 0; i < 5; ++i)
720 nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE,
721 screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
722
723 nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
724 }
725
726 static void
727 nvc0_validate_fp_zsa_rast(struct nvc0_context *nvc0)
728 {
729 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
730 bool rasterizer_discard;
731
732 if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) {
733 rasterizer_discard = true;
734 } else {
735 bool zs = nvc0->zsa &&
736 (nvc0->zsa->pipe.depth.enabled || nvc0->zsa->pipe.stencil[0].enabled);
737 rasterizer_discard = !zs &&
738 (!nvc0->fragprog || !nvc0->fragprog->hdr[18]);
739 }
740
741 if (rasterizer_discard != nvc0->state.rasterizer_discard) {
742 nvc0->state.rasterizer_discard = rasterizer_discard;
743 IMMED_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), !rasterizer_discard);
744 }
745 }
746
747 /* alpha test is disabled if there are no color RTs, so make sure we have at
748 * least one if alpha test is enabled. Note that this must run after
749 * nvc0_validate_fb, otherwise that will override the RT count setting.
750 */
751 static void
752 nvc0_validate_zsa_fb(struct nvc0_context *nvc0)
753 {
754 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
755
756 if (nvc0->zsa && nvc0->zsa->pipe.alpha.enabled &&
757 nvc0->framebuffer.zsbuf &&
758 nvc0->framebuffer.nr_cbufs == 0) {
759 nvc0_fb_set_null_rt(push, 0, 0);
760 BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
761 PUSH_DATA (push, (076543210 << 4) | 1);
762 }
763 }
764
765 static void
766 nvc0_validate_rast_fb(struct nvc0_context *nvc0)
767 {
768 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
769 struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
770 struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
771
772 if (!rast)
773 return;
774
775 if (rast->offset_units_unscaled) {
776 BEGIN_NVC0(push, NVC0_3D(POLYGON_OFFSET_UNITS), 1);
777 if (fb->zsbuf && fb->zsbuf->format == PIPE_FORMAT_Z16_UNORM)
778 PUSH_DATAf(push, rast->offset_units * (1 << 16));
779 else
780 PUSH_DATAf(push, rast->offset_units * (1 << 24));
781 }
782 }
783
784
785 static void
786 nvc0_validate_tess_state(struct nvc0_context *nvc0)
787 {
788 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
789
790 BEGIN_NVC0(push, NVC0_3D(TESS_LEVEL_OUTER(0)), 6);
791 PUSH_DATAp(push, nvc0->default_tess_outer, 4);
792 PUSH_DATAp(push, nvc0->default_tess_inner, 2);
793 }
794
795 /* If we have a frag shader bound which tries to read from the framebuffer, we
796 * have to make sure that the fb is bound as a texture in the expected
797 * location. For Fermi, that's in the special driver slot 16, while for Kepler
798 * it's a regular binding stored in the driver constbuf.
799 */
800 static void
801 nvc0_validate_fbread(struct nvc0_context *nvc0)
802 {
803 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
804 struct nvc0_screen *screen = nvc0->screen;
805 struct pipe_context *pipe = &nvc0->base.pipe;
806 struct pipe_sampler_view *old_view = nvc0->fbtexture;
807 struct pipe_sampler_view *new_view = NULL;
808
809 if (nvc0->fragprog &&
810 nvc0->fragprog->fp.reads_framebuffer &&
811 nvc0->framebuffer.nr_cbufs &&
812 nvc0->framebuffer.cbufs[0]) {
813 struct pipe_sampler_view tmpl;
814 struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];
815
816 tmpl.target = PIPE_TEXTURE_2D_ARRAY;
817 tmpl.format = sf->format;
818 tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level;
819 tmpl.u.tex.first_layer = sf->u.tex.first_layer;
820 tmpl.u.tex.last_layer = sf->u.tex.last_layer;
821 tmpl.swizzle_r = PIPE_SWIZZLE_X;
822 tmpl.swizzle_g = PIPE_SWIZZLE_Y;
823 tmpl.swizzle_b = PIPE_SWIZZLE_Z;
824 tmpl.swizzle_a = PIPE_SWIZZLE_W;
825
826 /* Bail if it's the same parameters */
827 if (old_view && old_view->texture == sf->texture &&
828 old_view->format == sf->format &&
829 old_view->u.tex.first_level == sf->u.tex.level &&
830 old_view->u.tex.first_layer == sf->u.tex.first_layer &&
831 old_view->u.tex.last_layer == sf->u.tex.last_layer)
832 return;
833
834 new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl);
835 } else if (old_view == NULL) {
836 return;
837 }
838
839 if (old_view)
840 pipe_sampler_view_reference(&nvc0->fbtexture, NULL);
841 nvc0->fbtexture = new_view;
842
843 if (new_view) {
844 struct nv50_tic_entry *tic = nv50_tic_entry(new_view);
845 assert(tic->id < 0);
846 tic->id = nvc0_screen_tic_alloc(screen, tic);
847 nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32,
848 NV_VRAM_DOMAIN(&screen->base), 32, tic->tic);
849 screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
850
851 if (screen->base.class_3d >= NVE4_3D_CLASS) {
852 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
853 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
854 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
855 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
856 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);
857 PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);
858 PUSH_DATA (push, (0 << 20) | tic->id);
859 } else {
860 BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);
861 PUSH_DATA (push, (tic->id << 9) | 1);
862 }
863
864 IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
865 }
866 }
867
868 static void
869 nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
870 {
871 struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx;
872 unsigned s;
873
874 if (ctx_from)
875 ctx_to->state = ctx_from->state;
876 else
877 ctx_to->state = ctx_to->screen->save_state;
878
879 ctx_to->dirty_3d = ~0;
880 ctx_to->dirty_cp = ~0;
881 ctx_to->viewports_dirty = ~0;
882 ctx_to->scissors_dirty = ~0;
883
884 for (s = 0; s < 6; ++s) {
885 ctx_to->samplers_dirty[s] = ~0;
886 ctx_to->textures_dirty[s] = ~0;
887 ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;
888 ctx_to->buffers_dirty[s] = ~0;
889 ctx_to->images_dirty[s] = ~0;
890 }
891
892 /* Reset tfb as the shader that owns it may have been deleted. */
893 ctx_to->state.tfb = NULL;
894
895 if (!ctx_to->vertex)
896 ctx_to->dirty_3d &= ~(NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS);
897
898 if (!ctx_to->vertprog)
899 ctx_to->dirty_3d &= ~NVC0_NEW_3D_VERTPROG;
900 if (!ctx_to->fragprog)
901 ctx_to->dirty_3d &= ~NVC0_NEW_3D_FRAGPROG;
902
903 if (!ctx_to->blend)
904 ctx_to->dirty_3d &= ~NVC0_NEW_3D_BLEND;
905 if (!ctx_to->rast)
906 ctx_to->dirty_3d &= ~(NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_SCISSOR);
907 if (!ctx_to->zsa)
908 ctx_to->dirty_3d &= ~NVC0_NEW_3D_ZSA;
909
910 ctx_to->screen->cur_ctx = ctx_to;
911 }
912
913 static struct nvc0_state_validate
914 validate_list_3d[] = {
915 { nvc0_validate_fb, NVC0_NEW_3D_FRAMEBUFFER },
916 { nvc0_validate_blend, NVC0_NEW_3D_BLEND },
917 { nvc0_validate_zsa, NVC0_NEW_3D_ZSA },
918 { nvc0_validate_sample_mask, NVC0_NEW_3D_SAMPLE_MASK },
919 { nvc0_validate_rasterizer, NVC0_NEW_3D_RASTERIZER },
920 { nvc0_validate_blend_colour, NVC0_NEW_3D_BLEND_COLOUR },
921 { nvc0_validate_stencil_ref, NVC0_NEW_3D_STENCIL_REF },
922 { nvc0_validate_stipple, NVC0_NEW_3D_STIPPLE },
923 { nvc0_validate_scissor, NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_RASTERIZER },
924 { nvc0_validate_viewport, NVC0_NEW_3D_VIEWPORT },
925 { nvc0_validate_window_rects, NVC0_NEW_3D_WINDOW_RECTS },
926 { nvc0_vertprog_validate, NVC0_NEW_3D_VERTPROG },
927 { nvc0_tctlprog_validate, NVC0_NEW_3D_TCTLPROG },
928 { nvc0_tevlprog_validate, NVC0_NEW_3D_TEVLPROG },
929 { nvc0_validate_tess_state, NVC0_NEW_3D_TESSFACTOR },
930 { nvc0_gmtyprog_validate, NVC0_NEW_3D_GMTYPROG },
931 { nvc0_validate_min_samples, NVC0_NEW_3D_MIN_SAMPLES |
932 NVC0_NEW_3D_FRAGPROG |
933 NVC0_NEW_3D_FRAMEBUFFER },
934 { nvc0_fragprog_validate, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER },
935 { nvc0_validate_fp_zsa_rast, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA |
936 NVC0_NEW_3D_RASTERIZER },
937 { nvc0_validate_zsa_fb, NVC0_NEW_3D_ZSA | NVC0_NEW_3D_FRAMEBUFFER },
938 { nvc0_validate_rast_fb, NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_FRAMEBUFFER },
939 { nvc0_validate_clip, NVC0_NEW_3D_CLIP | NVC0_NEW_3D_RASTERIZER |
940 NVC0_NEW_3D_VERTPROG |
941 NVC0_NEW_3D_TEVLPROG |
942 NVC0_NEW_3D_GMTYPROG },
943 { nvc0_constbufs_validate, NVC0_NEW_3D_CONSTBUF },
944 { nvc0_validate_textures, NVC0_NEW_3D_TEXTURES },
945 { nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS },
946 { nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
947 { nvc0_validate_fbread, NVC0_NEW_3D_FRAGPROG |
948 NVC0_NEW_3D_FRAMEBUFFER },
949 { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
950 { nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES },
951 { nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS },
952 { nvc0_tfb_validate, NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG },
953 { nvc0_layer_validate, NVC0_NEW_3D_VERTPROG |
954 NVC0_NEW_3D_TEVLPROG |
955 NVC0_NEW_3D_GMTYPROG },
956 { nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST },
957 { validate_sample_locations, NVC0_NEW_3D_SAMPLE_LOCATIONS |
958 NVC0_NEW_3D_FRAMEBUFFER},
959 };
960
961 bool
962 nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask,
963 struct nvc0_state_validate *validate_list, int size,
964 uint32_t *dirty, struct nouveau_bufctx *bufctx)
965 {
966 uint32_t state_mask;
967 int ret;
968 unsigned i;
969
970 if (nvc0->screen->cur_ctx != nvc0)
971 nvc0_switch_pipe_context(nvc0);
972
973 state_mask = *dirty & mask;
974
975 if (state_mask) {
976 for (i = 0; i < size; ++i) {
977 struct nvc0_state_validate *validate = &validate_list[i];
978
979 if (state_mask & validate->states)
980 validate->func(nvc0);
981 }
982 *dirty &= ~state_mask;
983
984 nvc0_bufctx_fence(nvc0, bufctx, false);
985 }
986
987 nouveau_pushbuf_bufctx(nvc0->base.pushbuf, bufctx);
988 ret = nouveau_pushbuf_validate(nvc0->base.pushbuf);
989
990 return !ret;
991 }
992
993 bool
994 nvc0_state_validate_3d(struct nvc0_context *nvc0, uint32_t mask)
995 {
996 bool ret;
997
998 ret = nvc0_state_validate(nvc0, mask, validate_list_3d,
999 ARRAY_SIZE(validate_list_3d), &nvc0->dirty_3d,
1000 nvc0->bufctx_3d);
1001
1002 if (unlikely(nvc0->state.flushed)) {
1003 nvc0->state.flushed = false;
1004 nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, true);
1005 }
1006 return ret;
1007 }