gallium: remove pipe_index_buffer and set_index_buffer
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_state_validate.c
1
2 #include "util/u_framebuffer.h"
3 #include "util/u_math.h"
4 #include "util/u_viewport.h"
5
6 #include "nvc0/nvc0_context.h"
7
8 #if 0
9 static void
10 nvc0_validate_zcull(struct nvc0_context *nvc0)
11 {
12 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
13 struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
14 struct nv50_surface *sf = nv50_surface(fb->zsbuf);
15 struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
16 struct nouveau_bo *bo = mt->base.bo;
17 uint32_t size;
18 uint32_t offset = align(mt->total_size, 1 << 17);
19 unsigned width, height;
20
21 assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);
22
23 size = mt->total_size * 2;
24
25 height = align(fb->height, 32);
26 width = fb->width % 224;
27 if (width)
28 width = fb->width + (224 - width);
29 else
30 width = fb->width;
31
32 BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1);
33 PUSH_DATA (push, 0);
34 BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2);
35 PUSH_DATAh(push, bo->offset + offset);
36 PUSH_DATA (push, bo->offset + offset);
37 offset += 1 << 17;
38 BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2);
39 PUSH_DATAh(push, bo->offset + offset);
40 PUSH_DATA (push, bo->offset + offset);
41 BEGIN_NVC0(push, SUBC_3D(0x07e0), 2);
42 PUSH_DATA (push, size);
43 PUSH_DATA (push, size >> 16);
44 BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */
45 PUSH_DATA (push, 2);
46 BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4);
47 PUSH_DATA (push, width);
48 PUSH_DATA (push, height);
49 PUSH_DATA (push, 1);
50 PUSH_DATA (push, 0);
51 BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2);
52 PUSH_DATA (push, 0);
53 PUSH_DATA (push, 0);
54 BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1);
55 PUSH_DATA (push, 0);
56 }
57 #endif
58
59 static inline void
60 nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
61 {
62 BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
63 PUSH_DATA (push, 0);
64 PUSH_DATA (push, 0);
65 PUSH_DATA (push, 64); // width
66 PUSH_DATA (push, 0); // height
67 PUSH_DATA (push, 0); // format
68 PUSH_DATA (push, 0); // tile mode
69 PUSH_DATA (push, layers); // layers
70 PUSH_DATA (push, 0); // layer stride
71 PUSH_DATA (push, 0); // base layer
72 }
73
74 static void
75 nvc0_validate_fb(struct nvc0_context *nvc0)
76 {
77 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
78 struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
79 struct nvc0_screen *screen = nvc0->screen;
80 unsigned i, ms;
81 unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
82 unsigned nr_cbufs = fb->nr_cbufs;
83 bool serialize = false;
84
85 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
86
87 BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
88 PUSH_DATA (push, fb->width << 16);
89 PUSH_DATA (push, fb->height << 16);
90
91 for (i = 0; i < fb->nr_cbufs; ++i) {
92 struct nv50_surface *sf;
93 struct nv04_resource *res;
94 struct nouveau_bo *bo;
95
96 if (!fb->cbufs[i]) {
97 nvc0_fb_set_null_rt(push, i, 0);
98 continue;
99 }
100
101 sf = nv50_surface(fb->cbufs[i]);
102 res = nv04_resource(sf->base.texture);
103 bo = res->bo;
104
105 BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
106 PUSH_DATAh(push, res->address + sf->offset);
107 PUSH_DATA (push, res->address + sf->offset);
108 if (likely(nouveau_bo_memtype(bo))) {
109 struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
110
111 assert(sf->base.texture->target != PIPE_BUFFER);
112
113 PUSH_DATA(push, sf->width);
114 PUSH_DATA(push, sf->height);
115 PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
116 PUSH_DATA(push, (mt->layout_3d << 16) |
117 mt->level[sf->base.u.tex.level].tile_mode);
118 PUSH_DATA(push, sf->base.u.tex.first_layer + sf->depth);
119 PUSH_DATA(push, mt->layer_stride >> 2);
120 PUSH_DATA(push, sf->base.u.tex.first_layer);
121
122 ms_mode = mt->ms_mode;
123 } else {
124 if (res->base.target == PIPE_BUFFER) {
125 PUSH_DATA(push, 262144);
126 PUSH_DATA(push, 1);
127 } else {
128 PUSH_DATA(push, nv50_miptree(sf->base.texture)->level[0].pitch);
129 PUSH_DATA(push, sf->height);
130 }
131 PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
132 PUSH_DATA(push, 1 << 12);
133 PUSH_DATA(push, 1);
134 PUSH_DATA(push, 0);
135 PUSH_DATA(push, 0);
136
137 nvc0_resource_fence(res, NOUVEAU_BO_WR);
138
139 assert(!fb->zsbuf);
140 }
141
142 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING)
143 serialize = true;
144 res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
145 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
146
147 /* only register for writing, otherwise we'd always serialize here */
148 BCTX_REFN(nvc0->bufctx_3d, 3D_FB, res, WR);
149 }
150
151 if (fb->zsbuf) {
152 struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
153 struct nv50_surface *sf = nv50_surface(fb->zsbuf);
154 int unk = mt->base.base.target == PIPE_TEXTURE_2D;
155
156 BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
157 PUSH_DATAh(push, mt->base.address + sf->offset);
158 PUSH_DATA (push, mt->base.address + sf->offset);
159 PUSH_DATA (push, nvc0_format_table[fb->zsbuf->format].rt);
160 PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
161 PUSH_DATA (push, mt->layer_stride >> 2);
162 BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
163 PUSH_DATA (push, 1);
164 BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
165 PUSH_DATA (push, sf->width);
166 PUSH_DATA (push, sf->height);
167 PUSH_DATA (push, (unk << 16) |
168 (sf->base.u.tex.first_layer + sf->depth));
169 BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
170 PUSH_DATA (push, sf->base.u.tex.first_layer);
171
172 ms_mode = mt->ms_mode;
173
174 if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
175 serialize = true;
176 mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
177 mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
178
179 BCTX_REFN(nvc0->bufctx_3d, 3D_FB, &mt->base, WR);
180 } else {
181 BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
182 PUSH_DATA (push, 0);
183 }
184
185 if (nr_cbufs == 0 && !fb->zsbuf) {
186 assert(util_is_power_of_two(fb->samples));
187 assert(fb->samples <= 8);
188
189 nvc0_fb_set_null_rt(push, 0, fb->layers);
190
191 if (fb->samples > 1)
192 ms_mode = ffs(fb->samples) - 1;
193 nr_cbufs = 1;
194 }
195
196 BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
197 PUSH_DATA (push, (076543210 << 4) | nr_cbufs);
198 IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
199
200 ms = 1 << ms_mode;
201 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
202 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
203 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
204 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
205 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
206 PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
207 for (i = 0; i < ms; i++) {
208 float xy[2];
209 nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
210 PUSH_DATAf(push, xy[0]);
211 PUSH_DATAf(push, xy[1]);
212 }
213
214 if (screen->base.class_3d >= GM200_3D_CLASS) {
215 const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
216 uint32_t val[4] = {};
217
218 for (i = 0; i < 16; i++) {
219 val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0);
220 val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4);
221 }
222
223 BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
224 PUSH_DATAp(push, val, 4);
225 }
226
227 if (serialize)
228 IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
229
230 NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, serialize);
231 }
232
233 static void
234 nvc0_validate_blend_colour(struct nvc0_context *nvc0)
235 {
236 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
237
238 BEGIN_NVC0(push, NVC0_3D(BLEND_COLOR(0)), 4);
239 PUSH_DATAf(push, nvc0->blend_colour.color[0]);
240 PUSH_DATAf(push, nvc0->blend_colour.color[1]);
241 PUSH_DATAf(push, nvc0->blend_colour.color[2]);
242 PUSH_DATAf(push, nvc0->blend_colour.color[3]);
243 }
244
245 static void
246 nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
247 {
248 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
249 const ubyte *ref = &nvc0->stencil_ref.ref_value[0];
250
251 IMMED_NVC0(push, NVC0_3D(STENCIL_FRONT_FUNC_REF), ref[0]);
252 IMMED_NVC0(push, NVC0_3D(STENCIL_BACK_FUNC_REF), ref[1]);
253 }
254
255 static void
256 nvc0_validate_stipple(struct nvc0_context *nvc0)
257 {
258 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
259 unsigned i;
260
261 BEGIN_NVC0(push, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
262 for (i = 0; i < 32; ++i)
263 PUSH_DATA(push, util_bswap32(nvc0->stipple.stipple[i]));
264 }
265
266 static void
267 nvc0_validate_scissor(struct nvc0_context *nvc0)
268 {
269 int i;
270 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
271
272 if (!(nvc0->dirty_3d & NVC0_NEW_3D_SCISSOR) &&
273 nvc0->rast->pipe.scissor == nvc0->state.scissor)
274 return;
275
276 if (nvc0->state.scissor != nvc0->rast->pipe.scissor)
277 nvc0->scissors_dirty = (1 << NVC0_MAX_VIEWPORTS) - 1;
278
279 nvc0->state.scissor = nvc0->rast->pipe.scissor;
280
281 for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
282 struct pipe_scissor_state *s = &nvc0->scissors[i];
283 if (!(nvc0->scissors_dirty & (1 << i)))
284 continue;
285
286 BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(i)), 2);
287 if (nvc0->rast->pipe.scissor) {
288 PUSH_DATA(push, (s->maxx << 16) | s->minx);
289 PUSH_DATA(push, (s->maxy << 16) | s->miny);
290 } else {
291 PUSH_DATA(push, (0xffff << 16) | 0);
292 PUSH_DATA(push, (0xffff << 16) | 0);
293 }
294 }
295 nvc0->scissors_dirty = 0;
296 }
297
298 static void
299 nvc0_validate_viewport(struct nvc0_context *nvc0)
300 {
301 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
302 int x, y, w, h, i;
303 float zmin, zmax;
304
305 for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
306 struct pipe_viewport_state *vp = &nvc0->viewports[i];
307
308 if (!(nvc0->viewports_dirty & (1 << i)))
309 continue;
310
311 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSLATE_X(i)), 3);
312 PUSH_DATAf(push, vp->translate[0]);
313 PUSH_DATAf(push, vp->translate[1]);
314 PUSH_DATAf(push, vp->translate[2]);
315
316 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SCALE_X(i)), 3);
317 PUSH_DATAf(push, vp->scale[0]);
318 PUSH_DATAf(push, vp->scale[1]);
319 PUSH_DATAf(push, vp->scale[2]);
320
321 /* now set the viewport rectangle to viewport dimensions for clipping */
322
323 x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0])));
324 y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1])));
325 w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x;
326 h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y;
327
328 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(i)), 2);
329 PUSH_DATA (push, (w << 16) | x);
330 PUSH_DATA (push, (h << 16) | y);
331
332 /* If the halfz setting ever changes, the viewports will also get
333 * updated. The rast will get updated before the validate function has a
334 * chance to hit, so we can just use it directly without an atom
335 * dependency.
336 */
337 util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax);
338
339 BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2);
340 PUSH_DATAf(push, zmin);
341 PUSH_DATAf(push, zmax);
342 }
343 nvc0->viewports_dirty = 0;
344 }
345
346 static void
347 nvc0_validate_window_rects(struct nvc0_context *nvc0)
348 {
349 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
350 bool enable = nvc0->window_rect.rects > 0 || nvc0->window_rect.inclusive;
351 int i;
352
353 IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_EN), enable);
354 if (!enable)
355 return;
356
357 IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), !nvc0->window_rect.inclusive);
358 BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), NVC0_MAX_WINDOW_RECTANGLES * 2);
359 for (i = 0; i < nvc0->window_rect.rects; i++) {
360 struct pipe_scissor_state *s = &nvc0->window_rect.rect[i];
361 PUSH_DATA(push, (s->maxx << 16) | s->minx);
362 PUSH_DATA(push, (s->maxy << 16) | s->miny);
363 }
364 for (; i < NVC0_MAX_WINDOW_RECTANGLES; i++) {
365 PUSH_DATA(push, 0);
366 PUSH_DATA(push, 0);
367 }
368 }
369
370 static inline void
371 nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
372 {
373 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
374 struct nvc0_screen *screen = nvc0->screen;
375
376 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
377 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
378 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
379 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
380 BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
381 PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO);
382 PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
383 }
384
385 static inline void
386 nvc0_check_program_ucps(struct nvc0_context *nvc0,
387 struct nvc0_program *vp, uint8_t mask)
388 {
389 const unsigned n = util_logbase2(mask) + 1;
390
391 if (vp->vp.num_ucps >= n)
392 return;
393 nvc0_program_destroy(nvc0, vp);
394
395 vp->vp.num_ucps = n;
396 if (likely(vp == nvc0->vertprog))
397 nvc0_vertprog_validate(nvc0);
398 else
399 if (likely(vp == nvc0->gmtyprog))
400 nvc0_gmtyprog_validate(nvc0);
401 else
402 nvc0_tevlprog_validate(nvc0);
403 }
404
405 static void
406 nvc0_validate_clip(struct nvc0_context *nvc0)
407 {
408 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
409 struct nvc0_program *vp;
410 unsigned stage;
411 uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable;
412
413 if (nvc0->gmtyprog) {
414 stage = 3;
415 vp = nvc0->gmtyprog;
416 } else
417 if (nvc0->tevlprog) {
418 stage = 2;
419 vp = nvc0->tevlprog;
420 } else {
421 stage = 0;
422 vp = nvc0->vertprog;
423 }
424
425 if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)
426 nvc0_check_program_ucps(nvc0, vp, clip_enable);
427
428 if (nvc0->dirty_3d & (NVC0_NEW_3D_CLIP | (NVC0_NEW_3D_VERTPROG << stage)))
429 if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)
430 nvc0_upload_uclip_planes(nvc0, stage);
431
432 clip_enable &= vp->vp.clip_enable;
433 clip_enable |= vp->vp.cull_enable;
434
435 if (nvc0->state.clip_enable != clip_enable) {
436 nvc0->state.clip_enable = clip_enable;
437 IMMED_NVC0(push, NVC0_3D(CLIP_DISTANCE_ENABLE), clip_enable);
438 }
439 if (nvc0->state.clip_mode != vp->vp.clip_mode) {
440 nvc0->state.clip_mode = vp->vp.clip_mode;
441 BEGIN_NVC0(push, NVC0_3D(CLIP_DISTANCE_MODE), 1);
442 PUSH_DATA (push, vp->vp.clip_mode);
443 }
444 }
445
446 static void
447 nvc0_validate_blend(struct nvc0_context *nvc0)
448 {
449 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
450
451 PUSH_SPACE(push, nvc0->blend->size);
452 PUSH_DATAp(push, nvc0->blend->state, nvc0->blend->size);
453 }
454
455 static void
456 nvc0_validate_zsa(struct nvc0_context *nvc0)
457 {
458 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
459
460 PUSH_SPACE(push, nvc0->zsa->size);
461 PUSH_DATAp(push, nvc0->zsa->state, nvc0->zsa->size);
462 }
463
464 static void
465 nvc0_validate_rasterizer(struct nvc0_context *nvc0)
466 {
467 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
468
469 PUSH_SPACE(push, nvc0->rast->size);
470 PUSH_DATAp(push, nvc0->rast->state, nvc0->rast->size);
471 }
472
473 static void
474 nvc0_constbufs_validate(struct nvc0_context *nvc0)
475 {
476 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
477 unsigned s;
478
479 for (s = 0; s < 5; ++s) {
480 while (nvc0->constbuf_dirty[s]) {
481 int i = ffs(nvc0->constbuf_dirty[s]) - 1;
482 nvc0->constbuf_dirty[s] &= ~(1 << i);
483
484 if (nvc0->constbuf[s][i].user) {
485 struct nouveau_bo *bo = nvc0->screen->uniform_bo;
486 const unsigned base = NVC0_CB_USR_INFO(s);
487 const unsigned size = nvc0->constbuf[s][0].size;
488 assert(i == 0); /* we really only want OpenGL uniforms here */
489 assert(nvc0->constbuf[s][0].u.data);
490
491 if (nvc0->state.uniform_buffer_bound[s] < size) {
492 nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
493
494 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
495 PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
496 PUSH_DATAh(push, bo->offset + base);
497 PUSH_DATA (push, bo->offset + base);
498 BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
499 PUSH_DATA (push, (0 << 4) | 1);
500 }
501 nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
502 base, nvc0->state.uniform_buffer_bound[s],
503 0, (size + 3) / 4,
504 nvc0->constbuf[s][0].u.data);
505 } else {
506 struct nv04_resource *res =
507 nv04_resource(nvc0->constbuf[s][i].u.buf);
508 if (res) {
509 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
510 PUSH_DATA (push, nvc0->constbuf[s][i].size);
511 PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
512 PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
513 BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
514 PUSH_DATA (push, (i << 4) | 1);
515
516 BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD);
517
518 nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
519 res->cb_bindings[s] |= 1 << i;
520 } else {
521 BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
522 PUSH_DATA (push, (i << 4) | 0);
523 }
524 if (i == 0)
525 nvc0->state.uniform_buffer_bound[s] = 0;
526 }
527 }
528 }
529
530 if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
531 /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
532 nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
533 nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
534 nvc0->state.uniform_buffer_bound[5] = 0;
535 }
536 }
537
538 static void
539 nvc0_validate_buffers(struct nvc0_context *nvc0)
540 {
541 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
542 struct nvc0_screen *screen = nvc0->screen;
543 int i, s;
544
545 for (s = 0; s < 5; s++) {
546 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
547 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
548 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
549 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
550 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
551 PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
552 for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
553 if (nvc0->buffers[s][i].buffer) {
554 struct nv04_resource *res =
555 nv04_resource(nvc0->buffers[s][i].buffer);
556 PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
557 PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
558 PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
559 PUSH_DATA (push, 0);
560 BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR);
561 util_range_add(&res->valid_buffer_range,
562 nvc0->buffers[s][i].buffer_offset,
563 nvc0->buffers[s][i].buffer_offset +
564 nvc0->buffers[s][i].buffer_size);
565 } else {
566 PUSH_DATA (push, 0);
567 PUSH_DATA (push, 0);
568 PUSH_DATA (push, 0);
569 PUSH_DATA (push, 0);
570 }
571 }
572 }
573
574 }
575
576 static void
577 nvc0_validate_sample_mask(struct nvc0_context *nvc0)
578 {
579 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
580
581 unsigned mask[4] =
582 {
583 nvc0->sample_mask & 0xffff,
584 nvc0->sample_mask & 0xffff,
585 nvc0->sample_mask & 0xffff,
586 nvc0->sample_mask & 0xffff
587 };
588
589 BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4);
590 PUSH_DATA (push, mask[0]);
591 PUSH_DATA (push, mask[1]);
592 PUSH_DATA (push, mask[2]);
593 PUSH_DATA (push, mask[3]);
594 }
595
596 static void
597 nvc0_validate_min_samples(struct nvc0_context *nvc0)
598 {
599 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
600 int samples;
601
602 samples = util_next_power_of_two(nvc0->min_samples);
603 if (samples > 1) {
604 // If we're using the incoming sample mask and doing sample shading, we
605 // have to do sample shading "to the max", otherwise there's no way to
606 // tell which sets of samples are covered by the current invocation.
607 // Similarly for reading the framebuffer.
608 if (nvc0->fragprog && (
609 nvc0->fragprog->fp.sample_mask_in ||
610 nvc0->fragprog->fp.reads_framebuffer))
611 samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);
612 samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
613 }
614
615 IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples);
616 }
617
618 static void
619 nvc0_validate_driverconst(struct nvc0_context *nvc0)
620 {
621 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
622 struct nvc0_screen *screen = nvc0->screen;
623 int i;
624
625 for (i = 0; i < 5; ++i) {
626 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
627 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
628 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
629 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
630 BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
631 PUSH_DATA (push, (15 << 4) | 1);
632 }
633
634 nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
635 }
636
637 static void
638 nvc0_validate_fp_zsa_rast(struct nvc0_context *nvc0)
639 {
640 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
641 bool rasterizer_discard;
642
643 if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) {
644 rasterizer_discard = true;
645 } else {
646 bool zs = nvc0->zsa &&
647 (nvc0->zsa->pipe.depth.enabled || nvc0->zsa->pipe.stencil[0].enabled);
648 rasterizer_discard = !zs &&
649 (!nvc0->fragprog || !nvc0->fragprog->hdr[18]);
650 }
651
652 if (rasterizer_discard != nvc0->state.rasterizer_discard) {
653 nvc0->state.rasterizer_discard = rasterizer_discard;
654 IMMED_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), !rasterizer_discard);
655 }
656 }
657
658 /* alpha test is disabled if there are no color RTs, so make sure we have at
659 * least one if alpha test is enabled. Note that this must run after
660 * nvc0_validate_fb, otherwise that will override the RT count setting.
661 */
662 static void
663 nvc0_validate_zsa_fb(struct nvc0_context *nvc0)
664 {
665 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
666
667 if (nvc0->zsa && nvc0->zsa->pipe.alpha.enabled &&
668 nvc0->framebuffer.zsbuf &&
669 nvc0->framebuffer.nr_cbufs == 0) {
670 nvc0_fb_set_null_rt(push, 0, 0);
671 BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
672 PUSH_DATA (push, (076543210 << 4) | 1);
673 }
674 }
675
676 static void
677 nvc0_validate_rast_fb(struct nvc0_context *nvc0)
678 {
679 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
680 struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
681 struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
682
683 if (!rast)
684 return;
685
686 if (rast->offset_units_unscaled) {
687 BEGIN_NVC0(push, NVC0_3D(POLYGON_OFFSET_UNITS), 1);
688 if (fb->zsbuf && fb->zsbuf->format == PIPE_FORMAT_Z16_UNORM)
689 PUSH_DATAf(push, rast->offset_units * (1 << 16));
690 else
691 PUSH_DATAf(push, rast->offset_units * (1 << 24));
692 }
693 }
694
695
696 static void
697 nvc0_validate_tess_state(struct nvc0_context *nvc0)
698 {
699 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
700
701 BEGIN_NVC0(push, NVC0_3D(TESS_LEVEL_OUTER(0)), 6);
702 PUSH_DATAp(push, nvc0->default_tess_outer, 4);
703 PUSH_DATAp(push, nvc0->default_tess_inner, 2);
704 }
705
706 /* If we have a frag shader bound which tries to read from the framebuffer, we
707 * have to make sure that the fb is bound as a texture in the expected
708 * location. For Fermi, that's in the special driver slot 16, while for Kepler
709 * it's a regular binding stored in the driver constbuf.
710 */
711 static void
712 nvc0_validate_fbread(struct nvc0_context *nvc0)
713 {
714 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
715 struct nvc0_screen *screen = nvc0->screen;
716 struct pipe_context *pipe = &nvc0->base.pipe;
717 struct pipe_sampler_view *old_view = nvc0->fbtexture;
718 struct pipe_sampler_view *new_view = NULL;
719
720 if (nvc0->fragprog &&
721 nvc0->fragprog->fp.reads_framebuffer &&
722 nvc0->framebuffer.nr_cbufs &&
723 nvc0->framebuffer.cbufs[0]) {
724 struct pipe_sampler_view tmpl;
725 struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];
726
727 tmpl.target = PIPE_TEXTURE_2D_ARRAY;
728 tmpl.format = sf->format;
729 tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level;
730 tmpl.u.tex.first_layer = sf->u.tex.first_layer;
731 tmpl.u.tex.last_layer = sf->u.tex.last_layer;
732 tmpl.swizzle_r = PIPE_SWIZZLE_X;
733 tmpl.swizzle_g = PIPE_SWIZZLE_Y;
734 tmpl.swizzle_b = PIPE_SWIZZLE_Z;
735 tmpl.swizzle_a = PIPE_SWIZZLE_W;
736
737 /* Bail if it's the same parameters */
738 if (old_view && old_view->texture == sf->texture &&
739 old_view->format == sf->format &&
740 old_view->u.tex.first_level == sf->u.tex.level &&
741 old_view->u.tex.first_layer == sf->u.tex.first_layer &&
742 old_view->u.tex.last_layer == sf->u.tex.last_layer)
743 return;
744
745 new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl);
746 } else if (old_view == NULL) {
747 return;
748 }
749
750 if (old_view)
751 pipe_sampler_view_reference(&nvc0->fbtexture, NULL);
752 nvc0->fbtexture = new_view;
753
754 if (screen->default_tsc->id < 0) {
755 struct nv50_tsc_entry *tsc = nv50_tsc_entry(screen->default_tsc);
756 tsc->id = nvc0_screen_tsc_alloc(screen, tsc);
757 nvc0->base.push_data(&nvc0->base, screen->txc, 65536 + tsc->id * 32,
758 NV_VRAM_DOMAIN(&screen->base), 32, tsc->tsc);
759 screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
760
761 IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
762 if (screen->base.class_3d < NVE4_3D_CLASS) {
763 BEGIN_NVC0(push, NVC0_3D(BIND_TSC2(0)), 1);
764 PUSH_DATA (push, (tsc->id << 12) | 1);
765 }
766 }
767
768 if (new_view) {
769 struct nv50_tic_entry *tic = nv50_tic_entry(new_view);
770 assert(tic->id < 0);
771 tic->id = nvc0_screen_tic_alloc(screen, tic);
772 nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32,
773 NV_VRAM_DOMAIN(&screen->base), 32, tic->tic);
774 screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
775
776 if (screen->base.class_3d >= NVE4_3D_CLASS) {
777 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
778 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
779 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
780 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
781 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);
782 PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);
783 PUSH_DATA (push, (screen->default_tsc->id << 20) | tic->id);
784 } else {
785 BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);
786 PUSH_DATA (push, (tic->id << 9) | 1);
787 }
788
789 IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
790 }
791 }
792
793 static void
794 nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
795 {
796 struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx;
797 unsigned s;
798
799 if (ctx_from)
800 ctx_to->state = ctx_from->state;
801 else
802 ctx_to->state = ctx_to->screen->save_state;
803
804 ctx_to->dirty_3d = ~0;
805 ctx_to->dirty_cp = ~0;
806 ctx_to->viewports_dirty = ~0;
807 ctx_to->scissors_dirty = ~0;
808
809 for (s = 0; s < 6; ++s) {
810 ctx_to->samplers_dirty[s] = ~0;
811 ctx_to->textures_dirty[s] = ~0;
812 ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;
813 ctx_to->buffers_dirty[s] = ~0;
814 ctx_to->images_dirty[s] = ~0;
815 }
816
817 /* Reset tfb as the shader that owns it may have been deleted. */
818 ctx_to->state.tfb = NULL;
819
820 if (!ctx_to->vertex)
821 ctx_to->dirty_3d &= ~(NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS);
822
823 if (!ctx_to->vertprog)
824 ctx_to->dirty_3d &= ~NVC0_NEW_3D_VERTPROG;
825 if (!ctx_to->fragprog)
826 ctx_to->dirty_3d &= ~NVC0_NEW_3D_FRAGPROG;
827
828 if (!ctx_to->blend)
829 ctx_to->dirty_3d &= ~NVC0_NEW_3D_BLEND;
830 if (!ctx_to->rast)
831 ctx_to->dirty_3d &= ~(NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_SCISSOR);
832 if (!ctx_to->zsa)
833 ctx_to->dirty_3d &= ~NVC0_NEW_3D_ZSA;
834
835 ctx_to->screen->cur_ctx = ctx_to;
836 }
837
838 static struct nvc0_state_validate
839 validate_list_3d[] = {
840 { nvc0_validate_fb, NVC0_NEW_3D_FRAMEBUFFER },
841 { nvc0_validate_blend, NVC0_NEW_3D_BLEND },
842 { nvc0_validate_zsa, NVC0_NEW_3D_ZSA },
843 { nvc0_validate_sample_mask, NVC0_NEW_3D_SAMPLE_MASK },
844 { nvc0_validate_rasterizer, NVC0_NEW_3D_RASTERIZER },
845 { nvc0_validate_blend_colour, NVC0_NEW_3D_BLEND_COLOUR },
846 { nvc0_validate_stencil_ref, NVC0_NEW_3D_STENCIL_REF },
847 { nvc0_validate_stipple, NVC0_NEW_3D_STIPPLE },
848 { nvc0_validate_scissor, NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_RASTERIZER },
849 { nvc0_validate_viewport, NVC0_NEW_3D_VIEWPORT },
850 { nvc0_validate_window_rects, NVC0_NEW_3D_WINDOW_RECTS },
851 { nvc0_vertprog_validate, NVC0_NEW_3D_VERTPROG },
852 { nvc0_tctlprog_validate, NVC0_NEW_3D_TCTLPROG },
853 { nvc0_tevlprog_validate, NVC0_NEW_3D_TEVLPROG },
854 { nvc0_validate_tess_state, NVC0_NEW_3D_TESSFACTOR },
855 { nvc0_gmtyprog_validate, NVC0_NEW_3D_GMTYPROG },
856 { nvc0_validate_min_samples, NVC0_NEW_3D_MIN_SAMPLES |
857 NVC0_NEW_3D_FRAGPROG |
858 NVC0_NEW_3D_FRAMEBUFFER },
859 { nvc0_fragprog_validate, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER },
860 { nvc0_validate_fp_zsa_rast, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA |
861 NVC0_NEW_3D_RASTERIZER },
862 { nvc0_validate_zsa_fb, NVC0_NEW_3D_ZSA | NVC0_NEW_3D_FRAMEBUFFER },
863 { nvc0_validate_rast_fb, NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_FRAMEBUFFER },
864 { nvc0_validate_clip, NVC0_NEW_3D_CLIP | NVC0_NEW_3D_RASTERIZER |
865 NVC0_NEW_3D_VERTPROG |
866 NVC0_NEW_3D_TEVLPROG |
867 NVC0_NEW_3D_GMTYPROG },
868 { nvc0_constbufs_validate, NVC0_NEW_3D_CONSTBUF },
869 { nvc0_validate_textures, NVC0_NEW_3D_TEXTURES },
870 { nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS },
871 { nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
872 { nvc0_validate_fbread, NVC0_NEW_3D_FRAGPROG |
873 NVC0_NEW_3D_FRAMEBUFFER },
874 { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
875 { nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES },
876 { nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS },
877 { nvc0_tfb_validate, NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG },
878 { nvc0_layer_validate, NVC0_NEW_3D_VERTPROG |
879 NVC0_NEW_3D_TEVLPROG |
880 NVC0_NEW_3D_GMTYPROG },
881 { nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST },
882 };
883
884 bool
885 nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask,
886 struct nvc0_state_validate *validate_list, int size,
887 uint32_t *dirty, struct nouveau_bufctx *bufctx)
888 {
889 uint32_t state_mask;
890 int ret;
891 unsigned i;
892
893 if (nvc0->screen->cur_ctx != nvc0)
894 nvc0_switch_pipe_context(nvc0);
895
896 state_mask = *dirty & mask;
897
898 if (state_mask) {
899 for (i = 0; i < size; ++i) {
900 struct nvc0_state_validate *validate = &validate_list[i];
901
902 if (state_mask & validate->states)
903 validate->func(nvc0);
904 }
905 *dirty &= ~state_mask;
906
907 nvc0_bufctx_fence(nvc0, bufctx, false);
908 }
909
910 nouveau_pushbuf_bufctx(nvc0->base.pushbuf, bufctx);
911 ret = nouveau_pushbuf_validate(nvc0->base.pushbuf);
912
913 return !ret;
914 }
915
916 bool
917 nvc0_state_validate_3d(struct nvc0_context *nvc0, uint32_t mask)
918 {
919 bool ret;
920
921 ret = nvc0_state_validate(nvc0, mask, validate_list_3d,
922 ARRAY_SIZE(validate_list_3d), &nvc0->dirty_3d,
923 nvc0->bufctx_3d);
924
925 if (unlikely(nvc0->state.flushed)) {
926 nvc0->state.flushed = false;
927 nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, true);
928 }
929 return ret;
930 }