nv50: import new compiler
[mesa.git] / src / gallium / drivers / nv50 / nv50_shader_state.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 * Copyright 2010 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
20 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "pipe/p_context.h"
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "util/u_inlines.h"
28
29 #include "nv50_context.h"
30 #include "nv50_transfer.h"
31
32 static void
33 nv50_transfer_constbuf(struct nv50_context *nv50,
34 struct pipe_resource *buf, unsigned size, unsigned cbi)
35 {
36 struct pipe_context *pipe = &nv50->pipe;
37 struct pipe_transfer *transfer;
38 struct nouveau_channel *chan = nv50->screen->base.channel;
39 struct nouveau_grobj *tesla = nv50->screen->tesla;
40 uint32_t *map;
41 unsigned count, start;
42
43 map = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &transfer);
44 if (!map)
45 return;
46
47 count = MIN2(buf->width0, size);
48 start = 0;
49
50 while (count) {
51 unsigned nr = count;
52 nr = MIN2(nr, 2047);
53
54 /* FIXME: emit relocs for unsuiTed MM */
55 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
56 OUT_RING (chan, (start << 8) | cbi);
57 BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
58 OUT_RINGp (chan, map, nr);
59
60 count -= nr;
61 start += nr;
62 map += nr;
63 }
64
65 pipe_buffer_unmap(pipe, buf, transfer);
66 }
67
68 static void
69 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
70 {
71 struct nouveau_channel *chan = nv50->screen->base.channel;
72 struct nouveau_grobj *tesla = nv50->screen->tesla;
73 unsigned cbi;
74
75 if (p->immd_size) {
76 uint32_t *data = p->immd;
77 unsigned count = p->immd_size / 4;
78 unsigned start = 0;
79
80 while (count) {
81 unsigned nr = count;
82 nr = MIN2(nr, 2047);
83
84 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
85 OUT_RING (chan, (start << 8) | NV50_CB_PMISC);
86 BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
87 OUT_RINGp (chan, data, nr);
88
89 count -= nr;
90 start += nr;
91 data += nr;
92 }
93 }
94
95 if (p->parm_size == 0)
96 return;
97
98 switch (p->type) {
99 case PIPE_SHADER_VERTEX:
100 cbi = NV50_CB_PVP;
101 break;
102 case PIPE_SHADER_FRAGMENT:
103 cbi = NV50_CB_PFP;
104 break;
105 case PIPE_SHADER_GEOMETRY:
106 cbi = NV50_CB_PGP;
107 break;
108 default:
109 assert(0);
110 break;
111 }
112
113 nv50_transfer_constbuf(nv50, nv50->constbuf[p->type], p->parm_size, cbi);
114 }
115
116 static void
117 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
118 {
119 struct nouveau_channel *chan = nv50->screen->base.channel;
120 struct nouveau_grobj *tesla = nv50->screen->tesla;
121 struct nouveau_grobj *eng2d = nv50->screen->eng2d;
122 int ret;
123 unsigned offset;
124 unsigned size = p->code_size;
125 uint32_t *data = p->code;
126
127 assert(p->translated);
128
129 /* TODO: use a single bo (for each type) for shader code */
130 if (p->bo)
131 return;
132 ret = nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, size, &p->bo);
133 assert(!ret);
134
135 offset = p->code_start = 0;
136
137 BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
138 OUT_RING (chan, NV50_2D_DST_FORMAT_R8_UNORM);
139 OUT_RING (chan, 1);
140 BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
141 OUT_RING (chan, 0x40000);
142 BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 2);
143 OUT_RING (chan, 0x10000);
144 OUT_RING (chan, 1);
145
146 while (size) {
147 unsigned nr = size / 4;
148
149 if (AVAIL_RING(chan) < 32)
150 FIRE_RING(chan);
151
152 nr = MIN2(nr, AVAIL_RING(chan) - 18);
153 nr = MIN2(nr, 1792);
154 if (nr < (size / 4))
155 nr &= ~0x3f;
156 assert(!(size & 3));
157
158 BEGIN_RING(chan, eng2d, NV50_2D_DST_ADDRESS_HIGH, 2);
159 OUT_RELOCh(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
160 OUT_RELOCl(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
161 BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
162 OUT_RING (chan, 0);
163 OUT_RING (chan, NV50_2D_SIFC_FORMAT_R8_UNORM);
164 BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
165 OUT_RING (chan, nr * 4);
166 OUT_RING (chan, 1);
167 OUT_RING (chan, 0);
168 OUT_RING (chan, 1);
169 OUT_RING (chan, 0);
170 OUT_RING (chan, 1);
171 OUT_RING (chan, 0);
172 OUT_RING (chan, 0);
173 OUT_RING (chan, 0);
174 OUT_RING (chan, 0);
175
176 BEGIN_RING_NI(chan, eng2d, NV50_2D_SIFC_DATA, nr);
177 OUT_RINGp (chan, data, nr);
178
179 data += nr;
180 offset += nr * 4;
181 size -= nr * 4;
182 }
183
184 BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
185 OUT_RING (chan, 0);
186 }
187
188 static void
189 nv50_vp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
190 {
191 struct nouveau_grobj *tesla = nv50->screen->tesla;
192 struct nouveau_stateobj *so = so_new(5, 7, 2);
193
194 nv50_program_validate_code(nv50, p);
195
196 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
197 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
198 NOUVEAU_BO_HIGH, 0, 0);
199 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
200 NOUVEAU_BO_LOW, 0, 0);
201 so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
202 so_data (so, p->vp.attrs[0]);
203 so_data (so, p->vp.attrs[1]);
204 so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
205 so_data (so, p->max_out);
206 so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1);
207 so_data (so, p->max_gpr);
208 so_method(so, tesla, NV50TCL_VP_START_ID, 1);
209 so_data (so, p->code_start);
210
211 so_ref(so, &p->so);
212 so_ref(NULL, &so);
213 }
214
215 static void
216 nv50_fp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
217 {
218 struct nouveau_grobj *tesla = nv50->screen->tesla;
219 struct nouveau_stateobj *so = so_new(6, 7, 2);
220
221 nv50_program_validate_code(nv50, p);
222
223 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
224 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
225 NOUVEAU_BO_HIGH, 0, 0);
226 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
227 NOUVEAU_BO_LOW, 0, 0);
228 so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1);
229 so_data (so, p->max_gpr);
230 so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
231 so_data (so, p->max_out);
232 so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
233 so_data (so, p->fp.flags[0]);
234 so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
235 so_data (so, p->fp.flags[1]);
236 so_method(so, tesla, NV50TCL_FP_START_ID, 1);
237 so_data (so, p->code_start);
238
239 so_ref(so, &p->so);
240 so_ref(NULL, &so);
241 }
242
243 static void
244 nv50_gp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
245 {
246 struct nouveau_grobj *tesla = nv50->screen->tesla;
247 struct nouveau_stateobj *so = so_new(6, 7, 2);
248
249 nv50_program_validate_code(nv50, p);
250
251 so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
252 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
253 NOUVEAU_BO_HIGH, 0, 0);
254 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
255 NOUVEAU_BO_LOW, 0, 0);
256 so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1);
257 so_data (so, p->max_gpr);
258 so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1);
259 so_data (so, p->max_out);
260 so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1);
261 so_data (so, p->gp.prim_type);
262 so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1);
263 so_data (so, p->gp.vert_count);
264 so_method(so, tesla, NV50TCL_GP_START_ID, 1);
265 so_data (so, p->code_start);
266
267 so_ref(so, &p->so);
268 so_ref(NULL, &so);
269 }
270
271 static boolean
272 nv50_program_validate(struct nv50_program *p)
273 {
274 p->translated = nv50_program_tx(p);
275 assert(p->translated);
276 return p->translated;
277 }
278
279 struct nouveau_stateobj *
280 nv50_vertprog_validate(struct nv50_context *nv50)
281 {
282 struct nv50_program *p = nv50->vertprog;
283 struct nouveau_stateobj *so = NULL;
284
285 if (!p->translated) {
286 if (nv50_program_validate(p))
287 nv50_vp_update_stateobj(nv50, p);
288 else
289 return NULL;
290 }
291
292 if (nv50->dirty & NV50_NEW_VERTPROG_CB)
293 nv50_program_validate_data(nv50, p);
294
295 if (!(nv50->dirty & NV50_NEW_VERTPROG))
296 return NULL;
297
298 nv50_program_validate_code(nv50, p);
299
300 so_ref(p->so, &so);
301 return so;
302 }
303
304 struct nouveau_stateobj *
305 nv50_fragprog_validate(struct nv50_context *nv50)
306 {
307 struct nv50_program *p = nv50->fragprog;
308 struct nouveau_stateobj *so = NULL;
309
310 if (!p->translated) {
311 if (nv50_program_validate(p))
312 nv50_fp_update_stateobj(nv50, p);
313 else
314 return NULL;
315 }
316
317 if (nv50->dirty & NV50_NEW_FRAGPROG_CB)
318 nv50_program_validate_data(nv50, p);
319
320 if (!(nv50->dirty & NV50_NEW_FRAGPROG))
321 return NULL;
322
323 nv50_program_validate_code(nv50, p);
324
325 so_ref(p->so, &so);
326 return so;
327 }
328
329 struct nouveau_stateobj *
330 nv50_geomprog_validate(struct nv50_context *nv50)
331 {
332 struct nv50_program *p = nv50->geomprog;
333 struct nouveau_stateobj *so = NULL;
334
335 if (!p->translated) {
336 if (nv50_program_validate(p))
337 nv50_gp_update_stateobj(nv50, p);
338 else
339 return NULL;
340 }
341
342 if (nv50->dirty & NV50_NEW_GEOMPROG_CB)
343 nv50_program_validate_data(nv50, p);
344
345 if (!(nv50->dirty & NV50_NEW_GEOMPROG))
346 return NULL;
347
348 nv50_program_validate_code(nv50, p);
349
350 so_ref(p->so, &so);
351 return so;
352 }
353
354 /* XXX: this might not work correctly in all cases yet: we assume that
355 * an FP generic input that is not written in the VP is gl_PointCoord.
356 */
357 static uint32_t
358 nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned m)
359 {
360 struct nv50_program *vp = nv50->vertprog;
361 struct nv50_program *fp = nv50->fragprog;
362 unsigned i, c;
363
364 memset(pntc, 0, 8 * sizeof(uint32_t));
365
366 if (nv50->geomprog)
367 vp = nv50->geomprog;
368
369 for (i = 0; i < fp->in_nr; i++) {
370 unsigned j, n = util_bitcount(fp->in[i].mask);
371
372 if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
373 m += n;
374 continue;
375 }
376
377 for (j = 0; j < vp->out_nr; ++j)
378 if (vp->out[j].sn == fp->in[i].sn && vp->out[j].si == fp->in[i].si)
379 break;
380
381 if (j < vp->out_nr) {
382 ubyte en = nv50->rasterizer->pipe.sprite_coord_enable;
383
384 if (!(en & (1 << vp->out[j].si))) {
385 m += n;
386 continue;
387 }
388 }
389
390 /* this is either PointCoord or replaced by sprite coords */
391 for (c = 0; c < 4; c++) {
392 if (!(fp->in[i].mask & (1 << c)))
393 continue;
394 pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
395 ++m;
396 }
397 }
398 if (nv50->rasterizer->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
399 return 0;
400 return (1 << 4);
401 }
402
403 static int
404 nv50_vec4_map(uint32_t *map32, int mid, uint32_t lin[4],
405 struct nv50_varying *in, struct nv50_varying *out)
406 {
407 int c;
408 uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
409 uint8_t *map = (uint8_t *)map32;
410
411 for (c = 0; c < 4; ++c) {
412 if (mf & 1) {
413 if (in->linear)
414 lin[mid / 32] |= 1 << (mid % 32);
415 if (mv & 1)
416 map[mid] = oid;
417 else
418 if (c == 3)
419 map[mid] |= 1;
420 ++mid;
421 }
422
423 oid += mv & 1;
424 mf >>= 1;
425 mv >>= 1;
426 }
427
428 return mid;
429 }
430
431 struct nouveau_stateobj *
432 nv50_fp_linkage_validate(struct nv50_context *nv50)
433 {
434 struct nouveau_grobj *tesla = nv50->screen->tesla;
435 struct nv50_program *vp;
436 struct nv50_program *fp = nv50->fragprog;
437 struct nouveau_stateobj *so;
438 struct nv50_varying dummy;
439 int i, n, c, m;
440
441 uint32_t map[16], lin[4], pntc[8];
442
443 uint32_t interp = fp->fp.interp;
444 uint32_t colors = fp->fp.colors;
445 uint32_t clip = 0x04;
446 uint32_t psiz = 0x000;
447 uint32_t primid = 0;
448 uint32_t sysval = 0;
449
450 if (nv50->geomprog) {
451 vp = nv50->geomprog;
452 memset(map, 0x80, sizeof(map));
453 } else {
454 vp = nv50->vertprog;
455 memset(map, 0x40, sizeof(map));
456 }
457 memset(lin, 0, sizeof(lin));
458
459 dummy.linear = 0;
460 dummy.mask = 0xf; /* map all components of HPOS */
461 m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
462
463 if (vp->vp.clpd < 0x40) {
464 for (c = 0; c < vp->vp.clpd_nr; ++c) {
465 map[m / 4] |= (vp->vp.clpd + c) << ((m % 4) * 8);
466 ++m;
467 }
468 clip |= vp->vp.clpd_nr << 8;
469 }
470
471 colors |= m << 8; /* adjust BFC0 id */
472
473 /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
474 if (nv50->rasterizer->pipe.light_twoside) {
475 for (i = 0; i < 2; ++i)
476 m = nv50_vec4_map(map, m, lin,
477 &fp->in[fp->vp.bfc[i]],
478 &vp->out[vp->vp.bfc[i]]);
479 }
480
481 colors += m - 4; /* adjust FFC0 id */
482 interp |= m << 8; /* set mid where 'normal' FP inputs start */
483
484 dummy.mask = 0x0;
485 for (i = 0; i < fp->in_nr; i++) {
486 for (n = 0; n < vp->out_nr; ++n)
487 if (vp->out[n].sn == fp->in[i].sn &&
488 vp->out[n].si == fp->in[i].si)
489 break;
490
491 m = nv50_vec4_map(map, m, lin,
492 &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
493 }
494 /* PrimitiveID either is replaced by the system value, or
495 * written by the geometry shader into an output register
496 */
497 if (fp->gp.primid < 0x40) {
498 map[m / 4] |= vp->gp.primid << ((m % 4) * 8);
499 primid = m++;
500 }
501
502 if (nv50->rasterizer->pipe.point_size_per_vertex) {
503 map[m / 4] |= vp->vp.psiz << ((m % 4) * 8);
504 psiz = (m++ << 4) | 1;
505 }
506
507 /* now fill the stateobj (at most 28 so_data) */
508 so = so_new(10, 54, 0);
509
510 n = (m + 3) / 4;
511 assert(m <= 64);
512 if (vp->type == PIPE_SHADER_GEOMETRY) {
513 so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1);
514 so_data (so, m);
515 so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n);
516 so_datap (so, map, n);
517 } else {
518 so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
519 so_data (so, vp->vp.attrs[2]);
520
521 so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1);
522 so_data (so, primid);
523
524 so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
525 so_data (so, m);
526 so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
527 so_datap (so, map, n);
528 }
529
530 //colors = 0x01000404;
531 so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
532 so_data (so, colors);
533 so_data (so, clip);
534 so_data (so, sysval);
535 so_data (so, psiz);
536
537 so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
538 so_data (so, interp);
539
540 so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
541 so_datap (so, lin, 4);
542
543 if (nv50->rasterizer->pipe.sprite_coord_enable) {
544 so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1);
545 so_data (so,
546 nv50_pntc_replace(nv50, pntc, (interp >> 8) & 0xff));
547
548 so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8);
549 so_datap (so, pntc, 8);
550 }
551
552 so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
553 so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
554
555 return so;
556 }
557
558 static int
559 nv50_vp_gp_mapping(uint32_t *map32, int m,
560 struct nv50_program *vp, struct nv50_program *gp)
561 {
562 uint8_t *map = (uint8_t *)map32;
563 int i, j, c;
564
565 for (i = 0; i < gp->in_nr; ++i) {
566 uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
567
568 for (j = 0; j < vp->out_nr; ++j) {
569 if (vp->out[j].sn == gp->in[i].sn &&
570 vp->out[j].si == gp->in[i].si) {
571 mv = vp->out[j].mask;
572 oid = vp->out[j].hw;
573 break;
574 }
575 }
576
577 for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
578 if (mg & mv & 1)
579 map[m++] = oid;
580 else
581 if (mg & 1)
582 map[m++] = (c == 3) ? 0x41 : 0x40;
583 oid += mv & 1;
584 }
585 }
586 return m;
587 }
588
589 struct nouveau_stateobj *
590 nv50_gp_linkage_validate(struct nv50_context *nv50)
591 {
592 struct nouveau_grobj *tesla = nv50->screen->tesla;
593 struct nouveau_stateobj *so;
594 struct nv50_program *vp = nv50->vertprog;
595 struct nv50_program *gp = nv50->geomprog;
596 uint32_t map[16];
597 int m = 0;
598
599 if (!gp)
600 return NULL;
601 memset(map, 0, sizeof(map));
602
603 m = nv50_vp_gp_mapping(map, m, vp, gp);
604
605 so = so_new(3, 24 - 3, 0);
606
607 so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
608 so_data (so, vp->vp.attrs[2] | gp->vp.attrs[2]);
609
610 assert(m <= 32);
611 so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
612 so_data (so, m);
613
614 m = (m + 3) / 4;
615 so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
616 so_datap (so, map, m);
617
618 return so;
619 }