nv50,nvc0: set constbufs dirty on pipe context switch
[mesa.git] / src / gallium / drivers / nouveau / nv50 / nv50_state_validate.c
1
2 #include "nv50/nv50_context.h"
3 #include "nv50/nv50_defs.xml.h"
4
5 static INLINE void
6 nv50_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i)
7 {
8 BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 4);
9 PUSH_DATA (push, 0);
10 PUSH_DATA (push, 0);
11 PUSH_DATA (push, NV50_SURFACE_FORMAT_NONE);
12 PUSH_DATA (push, 0);
13 BEGIN_NV04(push, NV50_3D(RT_HORIZ(i)), 2);
14 PUSH_DATA (push, 64);
15 PUSH_DATA (push, 0);
16 }
17
18 static void
19 nv50_validate_fb(struct nv50_context *nv50)
20 {
21 struct nouveau_pushbuf *push = nv50->base.pushbuf;
22 struct pipe_framebuffer_state *fb = &nv50->framebuffer;
23 unsigned i;
24 unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1;
25 uint32_t array_size = 0xffff, array_mode = 0;
26
27 nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
28
29 BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
30 PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
31 BEGIN_NV04(push, NV50_3D(SCREEN_SCISSOR_HORIZ), 2);
32 PUSH_DATA (push, fb->width << 16);
33 PUSH_DATA (push, fb->height << 16);
34
35 for (i = 0; i < fb->nr_cbufs; ++i) {
36 struct nv50_miptree *mt;
37 struct nv50_surface *sf;
38 struct nouveau_bo *bo;
39
40 if (!fb->cbufs[i]) {
41 nv50_fb_set_null_rt(push, i);
42 continue;
43 }
44
45 mt = nv50_miptree(fb->cbufs[i]->texture);
46 sf = nv50_surface(fb->cbufs[i]);
47 bo = mt->base.bo;
48
49 array_size = MIN2(array_size, sf->depth);
50 if (mt->layout_3d)
51 array_mode = NV50_3D_RT_ARRAY_MODE_MODE_3D; /* 1 << 16 */
52
53 /* can't mix 3D with ARRAY or have RTs of different depth/array_size */
54 assert(mt->layout_3d || !array_mode || array_size == 1);
55
56 BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 5);
57 PUSH_DATAh(push, bo->offset + sf->offset);
58 PUSH_DATA (push, bo->offset + sf->offset);
59 PUSH_DATA (push, nv50_format_table[sf->base.format].rt);
60 if (likely(nouveau_bo_memtype(bo))) {
61 PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
62 PUSH_DATA (push, mt->layer_stride >> 2);
63 BEGIN_NV04(push, NV50_3D(RT_HORIZ(i)), 2);
64 PUSH_DATA (push, sf->width);
65 PUSH_DATA (push, sf->height);
66 BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
67 PUSH_DATA (push, array_mode | array_size);
68 nv50->rt_array_mode = array_mode | array_size;
69 } else {
70 PUSH_DATA (push, 0);
71 PUSH_DATA (push, 0);
72 BEGIN_NV04(push, NV50_3D(RT_HORIZ(i)), 2);
73 PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | mt->level[0].pitch);
74 PUSH_DATA (push, sf->height);
75 BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
76 PUSH_DATA (push, 0);
77
78 assert(!fb->zsbuf);
79 assert(!mt->ms_mode);
80 }
81
82 ms_mode = mt->ms_mode;
83
84 if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
85 nv50->state.rt_serialize = TRUE;
86 mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
87 mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
88
89 /* only register for writing, otherwise we'd always serialize here */
90 BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
91 }
92
93 if (fb->zsbuf) {
94 struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
95 struct nv50_surface *sf = nv50_surface(fb->zsbuf);
96 struct nouveau_bo *bo = mt->base.bo;
97 int unk = mt->base.base.target == PIPE_TEXTURE_3D || sf->depth == 1;
98
99 BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5);
100 PUSH_DATAh(push, bo->offset + sf->offset);
101 PUSH_DATA (push, bo->offset + sf->offset);
102 PUSH_DATA (push, nv50_format_table[fb->zsbuf->format].rt);
103 PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
104 PUSH_DATA (push, mt->layer_stride >> 2);
105 BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
106 PUSH_DATA (push, 1);
107 BEGIN_NV04(push, NV50_3D(ZETA_HORIZ), 3);
108 PUSH_DATA (push, sf->width);
109 PUSH_DATA (push, sf->height);
110 PUSH_DATA (push, (unk << 16) | sf->depth);
111
112 ms_mode = mt->ms_mode;
113
114 if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
115 nv50->state.rt_serialize = TRUE;
116 mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
117 mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
118
119 BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
120 } else {
121 BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
122 PUSH_DATA (push, 0);
123 }
124
125 BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
126 PUSH_DATA (push, ms_mode);
127
128 /* Only need to initialize the first viewport, which is used for clears */
129 BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
130 PUSH_DATA (push, fb->width << 16);
131 PUSH_DATA (push, fb->height << 16);
132
133 if (nv50->screen->tesla->oclass >= NVA3_3D_CLASS) {
134 unsigned ms = 1 << ms_mode;
135 BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
136 PUSH_DATA (push, (NV50_CB_AUX_SAMPLE_OFFSET << (8 - 2)) | NV50_CB_AUX);
137 BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 2 * ms);
138 for (i = 0; i < ms; i++) {
139 float xy[2];
140 nv50->base.pipe.get_sample_position(&nv50->base.pipe, ms, i, xy);
141 PUSH_DATAf(push, xy[0]);
142 PUSH_DATAf(push, xy[1]);
143 }
144 }
145 }
146
147 static void
148 nv50_validate_blend_colour(struct nv50_context *nv50)
149 {
150 struct nouveau_pushbuf *push = nv50->base.pushbuf;
151
152 BEGIN_NV04(push, NV50_3D(BLEND_COLOR(0)), 4);
153 PUSH_DATAf(push, nv50->blend_colour.color[0]);
154 PUSH_DATAf(push, nv50->blend_colour.color[1]);
155 PUSH_DATAf(push, nv50->blend_colour.color[2]);
156 PUSH_DATAf(push, nv50->blend_colour.color[3]);
157 }
158
159 static void
160 nv50_validate_stencil_ref(struct nv50_context *nv50)
161 {
162 struct nouveau_pushbuf *push = nv50->base.pushbuf;
163
164 BEGIN_NV04(push, NV50_3D(STENCIL_FRONT_FUNC_REF), 1);
165 PUSH_DATA (push, nv50->stencil_ref.ref_value[0]);
166 BEGIN_NV04(push, NV50_3D(STENCIL_BACK_FUNC_REF), 1);
167 PUSH_DATA (push, nv50->stencil_ref.ref_value[1]);
168 }
169
170 static void
171 nv50_validate_stipple(struct nv50_context *nv50)
172 {
173 struct nouveau_pushbuf *push = nv50->base.pushbuf;
174 unsigned i;
175
176 BEGIN_NV04(push, NV50_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
177 for (i = 0; i < 32; ++i)
178 PUSH_DATA(push, util_bswap32(nv50->stipple.stipple[i]));
179 }
180
181 static void
182 nv50_validate_scissor(struct nv50_context *nv50)
183 {
184 struct nouveau_pushbuf *push = nv50->base.pushbuf;
185 #ifdef NV50_SCISSORS_CLIPPING
186 int minx, maxx, miny, maxy, i;
187
188 if (!(nv50->dirty &
189 (NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) &&
190 nv50->state.scissor == nv50->rast->pipe.scissor)
191 return;
192
193 if (nv50->state.scissor != nv50->rast->pipe.scissor)
194 nv50->scissors_dirty = (1 << NV50_MAX_VIEWPORTS) - 1;
195
196 nv50->state.scissor = nv50->rast->pipe.scissor;
197
198 if ((nv50->dirty & NV50_NEW_FRAMEBUFFER) && !nv50->state.scissor)
199 nv50->scissors_dirty = (1 << NV50_MAX_VIEWPORTS) - 1;
200
201 for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {
202 struct pipe_scissor_state *s = &nv50->scissors[i];
203 struct pipe_viewport_state *vp = &nv50->viewports[i];
204
205 if (!(nv50->scissors_dirty & (1 << i)) &&
206 !(nv50->viewports_dirty & (1 << i)))
207 continue;
208
209 if (nv50->state.scissor) {
210 minx = s->minx;
211 maxx = s->maxx;
212 miny = s->miny;
213 maxy = s->maxy;
214 } else {
215 minx = 0;
216 maxx = nv50->framebuffer.width;
217 miny = 0;
218 maxy = nv50->framebuffer.height;
219 }
220
221 minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0])));
222 maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0])));
223 miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1])));
224 maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1])));
225
226 minx = MIN2(minx, 8192);
227 maxx = MAX2(maxx, 0);
228 miny = MIN2(miny, 8192);
229 maxy = MAX2(maxy, 0);
230
231 BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(i)), 2);
232 PUSH_DATA (push, (maxx << 16) | minx);
233 PUSH_DATA (push, (maxy << 16) | miny);
234 #else
235 BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(i)), 2);
236 PUSH_DATA (push, (s->maxx << 16) | s->minx);
237 PUSH_DATA (push, (s->maxy << 16) | s->miny);
238 #endif
239 }
240
241 nv50->scissors_dirty = 0;
242 }
243
244 static void
245 nv50_validate_viewport(struct nv50_context *nv50)
246 {
247 struct nouveau_pushbuf *push = nv50->base.pushbuf;
248 float zmin, zmax;
249 int i;
250
251 for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {
252 struct pipe_viewport_state *vpt = &nv50->viewports[i];
253
254 if (!(nv50->viewports_dirty & (1 << i)))
255 continue;
256
257 BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSLATE_X(i)), 3);
258 PUSH_DATAf(push, vpt->translate[0]);
259 PUSH_DATAf(push, vpt->translate[1]);
260 PUSH_DATAf(push, vpt->translate[2]);
261 BEGIN_NV04(push, NV50_3D(VIEWPORT_SCALE_X(i)), 3);
262 PUSH_DATAf(push, vpt->scale[0]);
263 PUSH_DATAf(push, vpt->scale[1]);
264 PUSH_DATAf(push, vpt->scale[2]);
265
266 zmin = vpt->translate[2] - fabsf(vpt->scale[2]);
267 zmax = vpt->translate[2] + fabsf(vpt->scale[2]);
268
269 #ifdef NV50_SCISSORS_CLIPPING
270 BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2);
271 PUSH_DATAf(push, zmin);
272 PUSH_DATAf(push, zmax);
273 #endif
274 }
275
276 nv50->viewports_dirty = 0;
277 }
278
279 static INLINE void
280 nv50_check_program_ucps(struct nv50_context *nv50,
281 struct nv50_program *vp, uint8_t mask)
282 {
283 const unsigned n = util_logbase2(mask) + 1;
284
285 if (vp->vp.clpd_nr >= n)
286 return;
287 nv50_program_destroy(nv50, vp);
288
289 vp->vp.clpd_nr = n;
290 if (likely(vp == nv50->vertprog)) {
291 nv50->dirty |= NV50_NEW_VERTPROG;
292 nv50_vertprog_validate(nv50);
293 } else {
294 nv50->dirty |= NV50_NEW_GMTYPROG;
295 nv50_gmtyprog_validate(nv50);
296 }
297 nv50_fp_linkage_validate(nv50);
298 }
299
300 static void
301 nv50_validate_clip(struct nv50_context *nv50)
302 {
303 struct nouveau_pushbuf *push = nv50->base.pushbuf;
304 struct nv50_program *vp;
305 uint8_t clip_enable;
306
307 if (nv50->dirty & NV50_NEW_CLIP) {
308 BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
309 PUSH_DATA (push, (NV50_CB_AUX_UCP_OFFSET << 8) | NV50_CB_AUX);
310 BEGIN_NI04(push, NV50_3D(CB_DATA(0)), PIPE_MAX_CLIP_PLANES * 4);
311 PUSH_DATAp(push, &nv50->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
312 }
313
314 vp = nv50->gmtyprog;
315 if (likely(!vp))
316 vp = nv50->vertprog;
317
318 clip_enable = nv50->rast->pipe.clip_plane_enable;
319
320 BEGIN_NV04(push, NV50_3D(CLIP_DISTANCE_ENABLE), 1);
321 PUSH_DATA (push, clip_enable);
322
323 if (clip_enable)
324 nv50_check_program_ucps(nv50, vp, clip_enable);
325 }
326
327 static void
328 nv50_validate_blend(struct nv50_context *nv50)
329 {
330 struct nouveau_pushbuf *push = nv50->base.pushbuf;
331
332 PUSH_SPACE(push, nv50->blend->size);
333 PUSH_DATAp(push, nv50->blend->state, nv50->blend->size);
334 }
335
336 static void
337 nv50_validate_zsa(struct nv50_context *nv50)
338 {
339 struct nouveau_pushbuf *push = nv50->base.pushbuf;
340
341 PUSH_SPACE(push, nv50->zsa->size);
342 PUSH_DATAp(push, nv50->zsa->state, nv50->zsa->size);
343 }
344
345 static void
346 nv50_validate_rasterizer(struct nv50_context *nv50)
347 {
348 struct nouveau_pushbuf *push = nv50->base.pushbuf;
349
350 PUSH_SPACE(push, nv50->rast->size);
351 PUSH_DATAp(push, nv50->rast->state, nv50->rast->size);
352 }
353
354 static void
355 nv50_validate_sample_mask(struct nv50_context *nv50)
356 {
357 struct nouveau_pushbuf *push = nv50->base.pushbuf;
358
359 unsigned mask[4] =
360 {
361 nv50->sample_mask & 0xffff,
362 nv50->sample_mask & 0xffff,
363 nv50->sample_mask & 0xffff,
364 nv50->sample_mask & 0xffff
365 };
366
367 BEGIN_NV04(push, NV50_3D(MSAA_MASK(0)), 4);
368 PUSH_DATA (push, mask[0]);
369 PUSH_DATA (push, mask[1]);
370 PUSH_DATA (push, mask[2]);
371 PUSH_DATA (push, mask[3]);
372 }
373
374 static void
375 nv50_validate_min_samples(struct nv50_context *nv50)
376 {
377 struct nouveau_pushbuf *push = nv50->base.pushbuf;
378 int samples;
379
380 if (nv50->screen->tesla->oclass < NVA3_3D_CLASS)
381 return;
382
383 samples = util_next_power_of_two(nv50->min_samples);
384 if (samples > 1)
385 samples |= NVA3_3D_SAMPLE_SHADING_ENABLE;
386
387 BEGIN_NV04(push, SUBC_3D(NVA3_3D_SAMPLE_SHADING), 1);
388 PUSH_DATA (push, samples);
389 }
390
391 static void
392 nv50_switch_pipe_context(struct nv50_context *ctx_to)
393 {
394 struct nv50_context *ctx_from = ctx_to->screen->cur_ctx;
395
396 if (ctx_from)
397 ctx_to->state = ctx_from->state;
398
399 ctx_to->dirty = ~0;
400 ctx_to->viewports_dirty = ~0;
401 ctx_to->scissors_dirty = ~0;
402
403 ctx_to->constbuf_dirty[0] =
404 ctx_to->constbuf_dirty[1] =
405 ctx_to->constbuf_dirty[2] = (1 << NV50_MAX_PIPE_CONSTBUFS) - 1;
406
407 if (!ctx_to->vertex)
408 ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS);
409
410 if (!ctx_to->vertprog)
411 ctx_to->dirty &= ~NV50_NEW_VERTPROG;
412 if (!ctx_to->fragprog)
413 ctx_to->dirty &= ~NV50_NEW_FRAGPROG;
414
415 if (!ctx_to->blend)
416 ctx_to->dirty &= ~NV50_NEW_BLEND;
417 if (!ctx_to->rast)
418 #ifdef NV50_SCISSORS_CLIPPING
419 ctx_to->dirty &= ~(NV50_NEW_RASTERIZER | NV50_NEW_SCISSOR);
420 #else
421 ctx_to->dirty &= ~NV50_NEW_RASTERIZER;
422 #endif
423 if (!ctx_to->zsa)
424 ctx_to->dirty &= ~NV50_NEW_ZSA;
425
426 ctx_to->screen->cur_ctx = ctx_to;
427 }
428
429 static struct state_validate {
430 void (*func)(struct nv50_context *);
431 uint32_t states;
432 } validate_list[] = {
433 { nv50_validate_fb, NV50_NEW_FRAMEBUFFER },
434 { nv50_validate_blend, NV50_NEW_BLEND },
435 { nv50_validate_zsa, NV50_NEW_ZSA },
436 { nv50_validate_sample_mask, NV50_NEW_SAMPLE_MASK },
437 { nv50_validate_rasterizer, NV50_NEW_RASTERIZER },
438 { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR },
439 { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF },
440 { nv50_validate_stipple, NV50_NEW_STIPPLE },
441 #ifdef NV50_SCISSORS_CLIPPING
442 { nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT |
443 NV50_NEW_RASTERIZER |
444 NV50_NEW_FRAMEBUFFER },
445 #else
446 { nv50_validate_scissor, NV50_NEW_SCISSOR },
447 #endif
448 { nv50_validate_viewport, NV50_NEW_VIEWPORT },
449 { nv50_vertprog_validate, NV50_NEW_VERTPROG },
450 { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
451 { nv50_fragprog_validate, NV50_NEW_FRAGPROG |
452 NV50_NEW_MIN_SAMPLES },
453 { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
454 NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER },
455 { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
456 { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
457 NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
458 { nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
459 NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
460 { nv50_constbufs_validate, NV50_NEW_CONSTBUF },
461 { nv50_validate_textures, NV50_NEW_TEXTURES },
462 { nv50_validate_samplers, NV50_NEW_SAMPLERS },
463 { nv50_stream_output_validate, NV50_NEW_STRMOUT |
464 NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
465 { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
466 { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES },
467 };
468 #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
469
470 boolean
471 nv50_state_validate(struct nv50_context *nv50, uint32_t mask, unsigned words)
472 {
473 uint32_t state_mask;
474 int ret;
475 unsigned i;
476
477 if (nv50->screen->cur_ctx != nv50)
478 nv50_switch_pipe_context(nv50);
479
480 state_mask = nv50->dirty & mask;
481
482 if (state_mask) {
483 for (i = 0; i < validate_list_len; ++i) {
484 struct state_validate *validate = &validate_list[i];
485
486 if (state_mask & validate->states)
487 validate->func(nv50);
488 }
489 nv50->dirty &= ~state_mask;
490
491 if (nv50->state.rt_serialize) {
492 nv50->state.rt_serialize = FALSE;
493 BEGIN_NV04(nv50->base.pushbuf, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
494 PUSH_DATA (nv50->base.pushbuf, 0);
495 }
496
497 nv50_bufctx_fence(nv50->bufctx_3d, FALSE);
498 }
499 nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d);
500 ret = nouveau_pushbuf_validate(nv50->base.pushbuf);
501
502 if (unlikely(nv50->state.flushed)) {
503 nv50->state.flushed = FALSE;
504 nv50_bufctx_fence(nv50->bufctx_3d, TRUE);
505 }
506 return !ret;
507 }