nv50: Silence uninitialized variable warning.
[mesa.git] / src / gallium / drivers / nv50 / nv50_shader_state.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 * Copyright 2010 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
20 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "pipe/p_context.h"
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "util/u_inlines.h"
28
29 #include "nv50_context.h"
30
31 static void
32 nv50_transfer_constbuf(struct nv50_context *nv50,
33 struct pipe_resource *buf, unsigned size, unsigned cbi)
34 {
35 struct pipe_context *pipe = &nv50->pipe;
36 struct pipe_transfer *transfer;
37 struct nouveau_channel *chan = nv50->screen->base.channel;
38 struct nouveau_grobj *tesla = nv50->screen->tesla;
39 uint32_t *map;
40 unsigned count, start;
41
42 map = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &transfer);
43 if (!map)
44 return;
45
46 count = (buf->width0 + 3) / 4;
47 start = 0;
48
49 while (count) {
50 unsigned nr = count;
51 nr = MIN2(nr, 2047);
52
53 /* FIXME: emit relocs for unsuiTed MM */
54 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
55 OUT_RING (chan, (start << 8) | cbi);
56 BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
57 OUT_RINGp (chan, map, nr);
58
59 count -= nr;
60 start += nr;
61 map += nr;
62 }
63
64 pipe_buffer_unmap(pipe, buf, transfer);
65 }
66
67 static void
68 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
69 {
70 struct nouveau_channel *chan = nv50->screen->base.channel;
71 struct nouveau_grobj *tesla = nv50->screen->tesla;
72 unsigned cbi;
73
74 if (p->immd_size) {
75 uint32_t *data = p->immd;
76 unsigned count = p->immd_size / 4;
77 unsigned start = 0;
78
79 while (count) {
80 unsigned nr = count;
81 nr = MIN2(nr, 2047);
82
83 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
84 OUT_RING (chan, (start << 8) | NV50_CB_PMISC);
85 BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
86 OUT_RINGp (chan, data, nr);
87
88 count -= nr;
89 start += nr;
90 data += nr;
91 }
92 }
93
94 /* If the state tracker doesn't change the constbuf, and it is first
95 * validated with a program that doesn't use it, this check prevents
96 * it from even being uploaded. */
97 /*
98 if (p->parm_size == 0)
99 return;
100 */
101
102 switch (p->type) {
103 case PIPE_SHADER_VERTEX:
104 cbi = NV50_CB_PVP;
105 break;
106 case PIPE_SHADER_FRAGMENT:
107 cbi = NV50_CB_PFP;
108 break;
109 case PIPE_SHADER_GEOMETRY:
110 cbi = NV50_CB_PGP;
111 break;
112 default:
113 assert(0);
114 cbi = 0;
115 break;
116 }
117
118 nv50_transfer_constbuf(nv50, nv50->constbuf[p->type], p->parm_size, cbi);
119 }
120
121 static void
122 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
123 {
124 struct nouveau_channel *chan = nv50->screen->base.channel;
125 struct nouveau_grobj *tesla = nv50->screen->tesla;
126 struct nouveau_grobj *eng2d = nv50->screen->eng2d;
127 int ret;
128 unsigned offset;
129 unsigned size = p->code_size;
130 uint32_t *data = p->code;
131
132 assert(p->translated);
133
134 /* TODO: use a single bo (for each type) for shader code */
135 if (p->bo)
136 return;
137 ret = nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, size, &p->bo);
138 assert(!ret);
139
140 offset = p->code_start = 0;
141
142 BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
143 OUT_RING (chan, NV50_2D_DST_FORMAT_R8_UNORM);
144 OUT_RING (chan, 1);
145 BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
146 OUT_RING (chan, 0x40000);
147 BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 2);
148 OUT_RING (chan, 0x10000);
149 OUT_RING (chan, 1);
150
151 while (size) {
152 unsigned nr = size / 4;
153
154 if (AVAIL_RING(chan) < 32)
155 FIRE_RING(chan);
156
157 nr = MIN2(nr, AVAIL_RING(chan) - 18);
158 nr = MIN2(nr, 1792);
159 if (nr < (size / 4))
160 nr &= ~0x3f;
161 assert(!(size & 3));
162
163 BEGIN_RING(chan, eng2d, NV50_2D_DST_ADDRESS_HIGH, 2);
164 OUT_RELOCh(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
165 OUT_RELOCl(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
166 BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
167 OUT_RING (chan, 0);
168 OUT_RING (chan, NV50_2D_SIFC_FORMAT_R8_UNORM);
169 BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
170 OUT_RING (chan, nr * 4);
171 OUT_RING (chan, 1);
172 OUT_RING (chan, 0);
173 OUT_RING (chan, 1);
174 OUT_RING (chan, 0);
175 OUT_RING (chan, 1);
176 OUT_RING (chan, 0);
177 OUT_RING (chan, 0);
178 OUT_RING (chan, 0);
179 OUT_RING (chan, 0);
180
181 BEGIN_RING_NI(chan, eng2d, NV50_2D_SIFC_DATA, nr);
182 OUT_RINGp (chan, data, nr);
183
184 data += nr;
185 offset += nr * 4;
186 size -= nr * 4;
187 }
188
189 BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
190 OUT_RING (chan, 0);
191 }
192
193 static void
194 nv50_vp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
195 {
196 struct nouveau_grobj *tesla = nv50->screen->tesla;
197 struct nouveau_stateobj *so = so_new(5, 7, 2);
198
199 nv50_program_validate_code(nv50, p);
200
201 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
202 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
203 NOUVEAU_BO_HIGH, 0, 0);
204 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
205 NOUVEAU_BO_LOW, 0, 0);
206 so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
207 so_data (so, p->vp.attrs[0]);
208 so_data (so, p->vp.attrs[1]);
209 so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
210 so_data (so, p->max_out);
211 so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1);
212 so_data (so, p->max_gpr);
213 so_method(so, tesla, NV50TCL_VP_START_ID, 1);
214 so_data (so, p->code_start);
215
216 so_ref(so, &p->so);
217 so_ref(NULL, &so);
218 }
219
220 static void
221 nv50_fp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
222 {
223 struct nouveau_grobj *tesla = nv50->screen->tesla;
224 struct nouveau_stateobj *so = so_new(6, 7, 2);
225
226 nv50_program_validate_code(nv50, p);
227
228 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
229 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
230 NOUVEAU_BO_HIGH, 0, 0);
231 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
232 NOUVEAU_BO_LOW, 0, 0);
233 so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1);
234 so_data (so, p->max_gpr);
235 so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
236 so_data (so, p->max_out);
237 so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
238 so_data (so, p->fp.flags[0]);
239 so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
240 so_data (so, p->fp.flags[1]);
241 so_method(so, tesla, NV50TCL_FP_START_ID, 1);
242 so_data (so, p->code_start);
243
244 so_ref(so, &p->so);
245 so_ref(NULL, &so);
246 }
247
248 static void
249 nv50_gp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
250 {
251 struct nouveau_grobj *tesla = nv50->screen->tesla;
252 struct nouveau_stateobj *so = so_new(6, 7, 2);
253
254 nv50_program_validate_code(nv50, p);
255
256 so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
257 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
258 NOUVEAU_BO_HIGH, 0, 0);
259 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
260 NOUVEAU_BO_LOW, 0, 0);
261 so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1);
262 so_data (so, p->max_gpr);
263 so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1);
264 so_data (so, p->max_out);
265 so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1);
266 so_data (so, p->gp.prim_type);
267 so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1);
268 so_data (so, p->gp.vert_count);
269 so_method(so, tesla, NV50TCL_GP_START_ID, 1);
270 so_data (so, p->code_start);
271
272 so_ref(so, &p->so);
273 so_ref(NULL, &so);
274 }
275
276 static boolean
277 nv50_program_validate(struct nv50_program *p)
278 {
279 p->translated = nv50_program_tx(p);
280 assert(p->translated);
281 return p->translated;
282 }
283
284 struct nouveau_stateobj *
285 nv50_vertprog_validate(struct nv50_context *nv50)
286 {
287 struct nv50_program *p = nv50->vertprog;
288 struct nouveau_stateobj *so = NULL;
289
290 if (!p->translated) {
291 if (nv50_program_validate(p))
292 nv50_vp_update_stateobj(nv50, p);
293 else
294 return NULL;
295 }
296
297 if (nv50->dirty & NV50_NEW_VERTPROG_CB)
298 nv50_program_validate_data(nv50, p);
299
300 if (!(nv50->dirty & NV50_NEW_VERTPROG))
301 return NULL;
302
303 nv50_program_validate_code(nv50, p);
304
305 so_ref(p->so, &so);
306 return so;
307 }
308
309 struct nouveau_stateobj *
310 nv50_fragprog_validate(struct nv50_context *nv50)
311 {
312 struct nv50_program *p = nv50->fragprog;
313 struct nouveau_stateobj *so = NULL;
314
315 if (!p->translated) {
316 if (nv50_program_validate(p))
317 nv50_fp_update_stateobj(nv50, p);
318 else
319 return NULL;
320 }
321
322 if (nv50->dirty & NV50_NEW_FRAGPROG_CB)
323 nv50_program_validate_data(nv50, p);
324
325 if (!(nv50->dirty & NV50_NEW_FRAGPROG))
326 return NULL;
327
328 nv50_program_validate_code(nv50, p);
329
330 so_ref(p->so, &so);
331 return so;
332 }
333
334 struct nouveau_stateobj *
335 nv50_geomprog_validate(struct nv50_context *nv50)
336 {
337 struct nv50_program *p = nv50->geomprog;
338 struct nouveau_stateobj *so = NULL;
339
340 if (!p->translated) {
341 if (nv50_program_validate(p))
342 nv50_gp_update_stateobj(nv50, p);
343 else
344 return NULL;
345 }
346
347 if (nv50->dirty & NV50_NEW_GEOMPROG_CB)
348 nv50_program_validate_data(nv50, p);
349
350 if (!(nv50->dirty & NV50_NEW_GEOMPROG))
351 return NULL;
352
353 nv50_program_validate_code(nv50, p);
354
355 so_ref(p->so, &so);
356 return so;
357 }
358
359 /* XXX: this might not work correctly in all cases yet: we assume that
360 * an FP generic input that is not written in the VP is gl_PointCoord.
361 */
362 static uint32_t
363 nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned m)
364 {
365 struct nv50_program *vp = nv50->vertprog;
366 struct nv50_program *fp = nv50->fragprog;
367 unsigned i, c;
368
369 memset(pntc, 0, 8 * sizeof(uint32_t));
370
371 if (nv50->geomprog)
372 vp = nv50->geomprog;
373
374 for (i = 0; i < fp->in_nr; i++) {
375 unsigned j, n = util_bitcount(fp->in[i].mask);
376
377 if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
378 m += n;
379 continue;
380 }
381
382 for (j = 0; j < vp->out_nr; ++j)
383 if (vp->out[j].sn == fp->in[i].sn && vp->out[j].si == fp->in[i].si)
384 break;
385
386 if (j < vp->out_nr) {
387 uint32_t en = nv50->rasterizer->pipe.sprite_coord_enable;
388
389 if (!(en & (1 << vp->out[j].si))) {
390 m += n;
391 continue;
392 }
393 }
394
395 /* this is either PointCoord or replaced by sprite coords */
396 for (c = 0; c < 4; c++) {
397 if (!(fp->in[i].mask & (1 << c)))
398 continue;
399 pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
400 ++m;
401 }
402 }
403 if (nv50->rasterizer->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
404 return 0;
405 return (1 << 4);
406 }
407
408 static int
409 nv50_vec4_map(uint32_t *map32, int mid, uint32_t lin[4],
410 struct nv50_varying *in, struct nv50_varying *out)
411 {
412 int c;
413 uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
414 uint8_t *map = (uint8_t *)map32;
415
416 for (c = 0; c < 4; ++c) {
417 if (mf & 1) {
418 if (in->linear)
419 lin[mid / 32] |= 1 << (mid % 32);
420 if (mv & 1)
421 map[mid] = oid;
422 else
423 if (c == 3)
424 map[mid] |= 1;
425 ++mid;
426 }
427
428 oid += mv & 1;
429 mf >>= 1;
430 mv >>= 1;
431 }
432
433 return mid;
434 }
435
436 struct nouveau_stateobj *
437 nv50_fp_linkage_validate(struct nv50_context *nv50)
438 {
439 struct nouveau_grobj *tesla = nv50->screen->tesla;
440 struct nv50_program *vp;
441 struct nv50_program *fp = nv50->fragprog;
442 struct nouveau_stateobj *so;
443 struct nv50_varying dummy;
444 int i, n, c, m;
445
446 uint32_t map[16], lin[4], pntc[8];
447
448 uint32_t interp = fp->fp.interp;
449 uint32_t colors = fp->fp.colors;
450 uint32_t clip = 0x04;
451 uint32_t psiz = 0x000;
452 uint32_t primid = 0;
453 uint32_t sysval = 0;
454
455 if (nv50->geomprog) {
456 vp = nv50->geomprog;
457 memset(map, 0x80, sizeof(map));
458 } else {
459 vp = nv50->vertprog;
460 memset(map, 0x40, sizeof(map));
461 }
462 memset(lin, 0, sizeof(lin));
463
464 dummy.linear = 0;
465 dummy.mask = 0xf; /* map all components of HPOS */
466 m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
467
468 if (vp->vp.clpd < 0x40) {
469 for (c = 0; c < vp->vp.clpd_nr; ++c) {
470 map[m / 4] |= (vp->vp.clpd + c) << ((m % 4) * 8);
471 ++m;
472 }
473 clip |= vp->vp.clpd_nr << 8;
474 }
475
476 colors |= m << 8; /* adjust BFC0 id */
477
478 /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
479 if (nv50->rasterizer->pipe.light_twoside) {
480 for (i = 0; i < 2; ++i)
481 m = nv50_vec4_map(map, m, lin,
482 &fp->in[fp->vp.bfc[i]],
483 &vp->out[vp->vp.bfc[i]]);
484 }
485
486 colors += m - 4; /* adjust FFC0 id */
487 interp |= m << 8; /* set mid where 'normal' FP inputs start */
488
489 dummy.mask = 0x0;
490 for (i = 0; i < fp->in_nr; i++) {
491 for (n = 0; n < vp->out_nr; ++n)
492 if (vp->out[n].sn == fp->in[i].sn &&
493 vp->out[n].si == fp->in[i].si)
494 break;
495
496 m = nv50_vec4_map(map, m, lin,
497 &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
498 }
499
500 /* PrimitiveID either is replaced by the system value, or
501 * written by the geometry shader into an output register
502 */
503 if (fp->gp.primid < 0x40) {
504 i = (m % 4) * 8;
505 map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->gp.primid << i);
506 primid = m++;
507 }
508
509 if (nv50->rasterizer->pipe.point_size_per_vertex) {
510 i = (m % 4) * 8;
511 map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->vp.psiz << i);
512 psiz = (m++ << 4) | 1;
513 }
514
515 /* now fill the stateobj (at most 28 so_data) */
516 so = so_new(10, 54, 0);
517
518 n = (m + 3) / 4;
519 assert(m <= 64);
520 if (vp->type == PIPE_SHADER_GEOMETRY) {
521 so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1);
522 so_data (so, m);
523 so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n);
524 so_datap (so, map, n);
525 } else {
526 so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
527 so_data (so, vp->vp.attrs[2]);
528
529 so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1);
530 so_data (so, primid);
531
532 so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
533 so_data (so, m);
534 so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
535 so_datap (so, map, n);
536 }
537
538 so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
539 so_data (so, colors);
540 so_data (so, clip);
541 so_data (so, sysval);
542 so_data (so, psiz);
543
544 so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
545 so_data (so, interp);
546
547 so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
548 so_datap (so, lin, 4);
549
550 if (nv50->rasterizer->pipe.point_quad_rasterization) {
551 so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1);
552 so_data (so,
553 nv50_pntc_replace(nv50, pntc, (interp >> 8) & 0xff));
554
555 so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8);
556 so_datap (so, pntc, 8);
557 }
558
559 so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
560 so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
561
562 return so;
563 }
564
565 static int
566 nv50_vp_gp_mapping(uint32_t *map32, int m,
567 struct nv50_program *vp, struct nv50_program *gp)
568 {
569 uint8_t *map = (uint8_t *)map32;
570 int i, j, c;
571
572 for (i = 0; i < gp->in_nr; ++i) {
573 uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
574
575 for (j = 0; j < vp->out_nr; ++j) {
576 if (vp->out[j].sn == gp->in[i].sn &&
577 vp->out[j].si == gp->in[i].si) {
578 mv = vp->out[j].mask;
579 oid = vp->out[j].hw;
580 break;
581 }
582 }
583
584 for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
585 if (mg & mv & 1)
586 map[m++] = oid;
587 else
588 if (mg & 1)
589 map[m++] = (c == 3) ? 0x41 : 0x40;
590 oid += mv & 1;
591 }
592 }
593 return m;
594 }
595
596 struct nouveau_stateobj *
597 nv50_gp_linkage_validate(struct nv50_context *nv50)
598 {
599 struct nouveau_grobj *tesla = nv50->screen->tesla;
600 struct nouveau_stateobj *so;
601 struct nv50_program *vp = nv50->vertprog;
602 struct nv50_program *gp = nv50->geomprog;
603 uint32_t map[16];
604 int m = 0;
605
606 if (!gp)
607 return NULL;
608 memset(map, 0, sizeof(map));
609
610 m = nv50_vp_gp_mapping(map, m, vp, gp);
611
612 so = so_new(3, 24 - 3, 0);
613
614 so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
615 so_data (so, vp->vp.attrs[2] | gp->vp.attrs[2]);
616
617 assert(m <= 32);
618 so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
619 so_data (so, m);
620
621 m = (m + 3) / 4;
622 so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
623 so_datap (so, map, m);
624
625 return so;
626 }