nv50: Remove unnecessary headers.
[mesa.git] / src / gallium / drivers / nv50 / nv50_shader_state.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 * Copyright 2010 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
20 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "pipe/p_context.h"
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "util/u_inlines.h"
28
29 #include "nv50_context.h"
30
31 static void
32 nv50_transfer_constbuf(struct nv50_context *nv50,
33 struct pipe_resource *buf, unsigned size, unsigned cbi)
34 {
35 struct pipe_context *pipe = &nv50->pipe;
36 struct pipe_transfer *transfer;
37 struct nouveau_channel *chan = nv50->screen->base.channel;
38 struct nouveau_grobj *tesla = nv50->screen->tesla;
39 uint32_t *map;
40 unsigned count, start;
41
42 map = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &transfer);
43 if (!map)
44 return;
45
46 count = (buf->width0 + 3) / 4;
47 start = 0;
48
49 while (count) {
50 unsigned nr = count;
51 nr = MIN2(nr, 2047);
52
53 /* FIXME: emit relocs for unsuiTed MM */
54 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
55 OUT_RING (chan, (start << 8) | cbi);
56 BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
57 OUT_RINGp (chan, map, nr);
58
59 count -= nr;
60 start += nr;
61 map += nr;
62 }
63
64 pipe_buffer_unmap(pipe, buf, transfer);
65 }
66
67 static void
68 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
69 {
70 struct nouveau_channel *chan = nv50->screen->base.channel;
71 struct nouveau_grobj *tesla = nv50->screen->tesla;
72 unsigned cbi;
73
74 if (p->immd_size) {
75 uint32_t *data = p->immd;
76 unsigned count = p->immd_size / 4;
77 unsigned start = 0;
78
79 while (count) {
80 unsigned nr = count;
81 nr = MIN2(nr, 2047);
82
83 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
84 OUT_RING (chan, (start << 8) | NV50_CB_PMISC);
85 BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
86 OUT_RINGp (chan, data, nr);
87
88 count -= nr;
89 start += nr;
90 data += nr;
91 }
92 }
93
94 /* If the state tracker doesn't change the constbuf, and it is first
95 * validated with a program that doesn't use it, this check prevents
96 * it from even being uploaded. */
97 /*
98 if (p->parm_size == 0)
99 return;
100 */
101
102 switch (p->type) {
103 case PIPE_SHADER_VERTEX:
104 cbi = NV50_CB_PVP;
105 break;
106 case PIPE_SHADER_FRAGMENT:
107 cbi = NV50_CB_PFP;
108 break;
109 case PIPE_SHADER_GEOMETRY:
110 cbi = NV50_CB_PGP;
111 break;
112 default:
113 assert(0);
114 break;
115 }
116
117 nv50_transfer_constbuf(nv50, nv50->constbuf[p->type], p->parm_size, cbi);
118 }
119
120 static void
121 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
122 {
123 struct nouveau_channel *chan = nv50->screen->base.channel;
124 struct nouveau_grobj *tesla = nv50->screen->tesla;
125 struct nouveau_grobj *eng2d = nv50->screen->eng2d;
126 int ret;
127 unsigned offset;
128 unsigned size = p->code_size;
129 uint32_t *data = p->code;
130
131 assert(p->translated);
132
133 /* TODO: use a single bo (for each type) for shader code */
134 if (p->bo)
135 return;
136 ret = nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, size, &p->bo);
137 assert(!ret);
138
139 offset = p->code_start = 0;
140
141 BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
142 OUT_RING (chan, NV50_2D_DST_FORMAT_R8_UNORM);
143 OUT_RING (chan, 1);
144 BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
145 OUT_RING (chan, 0x40000);
146 BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 2);
147 OUT_RING (chan, 0x10000);
148 OUT_RING (chan, 1);
149
150 while (size) {
151 unsigned nr = size / 4;
152
153 if (AVAIL_RING(chan) < 32)
154 FIRE_RING(chan);
155
156 nr = MIN2(nr, AVAIL_RING(chan) - 18);
157 nr = MIN2(nr, 1792);
158 if (nr < (size / 4))
159 nr &= ~0x3f;
160 assert(!(size & 3));
161
162 BEGIN_RING(chan, eng2d, NV50_2D_DST_ADDRESS_HIGH, 2);
163 OUT_RELOCh(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
164 OUT_RELOCl(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
165 BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
166 OUT_RING (chan, 0);
167 OUT_RING (chan, NV50_2D_SIFC_FORMAT_R8_UNORM);
168 BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
169 OUT_RING (chan, nr * 4);
170 OUT_RING (chan, 1);
171 OUT_RING (chan, 0);
172 OUT_RING (chan, 1);
173 OUT_RING (chan, 0);
174 OUT_RING (chan, 1);
175 OUT_RING (chan, 0);
176 OUT_RING (chan, 0);
177 OUT_RING (chan, 0);
178 OUT_RING (chan, 0);
179
180 BEGIN_RING_NI(chan, eng2d, NV50_2D_SIFC_DATA, nr);
181 OUT_RINGp (chan, data, nr);
182
183 data += nr;
184 offset += nr * 4;
185 size -= nr * 4;
186 }
187
188 BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
189 OUT_RING (chan, 0);
190 }
191
192 static void
193 nv50_vp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
194 {
195 struct nouveau_grobj *tesla = nv50->screen->tesla;
196 struct nouveau_stateobj *so = so_new(5, 7, 2);
197
198 nv50_program_validate_code(nv50, p);
199
200 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
201 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
202 NOUVEAU_BO_HIGH, 0, 0);
203 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
204 NOUVEAU_BO_LOW, 0, 0);
205 so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
206 so_data (so, p->vp.attrs[0]);
207 so_data (so, p->vp.attrs[1]);
208 so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
209 so_data (so, p->max_out);
210 so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1);
211 so_data (so, p->max_gpr);
212 so_method(so, tesla, NV50TCL_VP_START_ID, 1);
213 so_data (so, p->code_start);
214
215 so_ref(so, &p->so);
216 so_ref(NULL, &so);
217 }
218
219 static void
220 nv50_fp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
221 {
222 struct nouveau_grobj *tesla = nv50->screen->tesla;
223 struct nouveau_stateobj *so = so_new(6, 7, 2);
224
225 nv50_program_validate_code(nv50, p);
226
227 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
228 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
229 NOUVEAU_BO_HIGH, 0, 0);
230 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
231 NOUVEAU_BO_LOW, 0, 0);
232 so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1);
233 so_data (so, p->max_gpr);
234 so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
235 so_data (so, p->max_out);
236 so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
237 so_data (so, p->fp.flags[0]);
238 so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
239 so_data (so, p->fp.flags[1]);
240 so_method(so, tesla, NV50TCL_FP_START_ID, 1);
241 so_data (so, p->code_start);
242
243 so_ref(so, &p->so);
244 so_ref(NULL, &so);
245 }
246
247 static void
248 nv50_gp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
249 {
250 struct nouveau_grobj *tesla = nv50->screen->tesla;
251 struct nouveau_stateobj *so = so_new(6, 7, 2);
252
253 nv50_program_validate_code(nv50, p);
254
255 so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
256 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
257 NOUVEAU_BO_HIGH, 0, 0);
258 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
259 NOUVEAU_BO_LOW, 0, 0);
260 so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1);
261 so_data (so, p->max_gpr);
262 so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1);
263 so_data (so, p->max_out);
264 so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1);
265 so_data (so, p->gp.prim_type);
266 so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1);
267 so_data (so, p->gp.vert_count);
268 so_method(so, tesla, NV50TCL_GP_START_ID, 1);
269 so_data (so, p->code_start);
270
271 so_ref(so, &p->so);
272 so_ref(NULL, &so);
273 }
274
275 static boolean
276 nv50_program_validate(struct nv50_program *p)
277 {
278 p->translated = nv50_program_tx(p);
279 assert(p->translated);
280 return p->translated;
281 }
282
283 struct nouveau_stateobj *
284 nv50_vertprog_validate(struct nv50_context *nv50)
285 {
286 struct nv50_program *p = nv50->vertprog;
287 struct nouveau_stateobj *so = NULL;
288
289 if (!p->translated) {
290 if (nv50_program_validate(p))
291 nv50_vp_update_stateobj(nv50, p);
292 else
293 return NULL;
294 }
295
296 if (nv50->dirty & NV50_NEW_VERTPROG_CB)
297 nv50_program_validate_data(nv50, p);
298
299 if (!(nv50->dirty & NV50_NEW_VERTPROG))
300 return NULL;
301
302 nv50_program_validate_code(nv50, p);
303
304 so_ref(p->so, &so);
305 return so;
306 }
307
308 struct nouveau_stateobj *
309 nv50_fragprog_validate(struct nv50_context *nv50)
310 {
311 struct nv50_program *p = nv50->fragprog;
312 struct nouveau_stateobj *so = NULL;
313
314 if (!p->translated) {
315 if (nv50_program_validate(p))
316 nv50_fp_update_stateobj(nv50, p);
317 else
318 return NULL;
319 }
320
321 if (nv50->dirty & NV50_NEW_FRAGPROG_CB)
322 nv50_program_validate_data(nv50, p);
323
324 if (!(nv50->dirty & NV50_NEW_FRAGPROG))
325 return NULL;
326
327 nv50_program_validate_code(nv50, p);
328
329 so_ref(p->so, &so);
330 return so;
331 }
332
333 struct nouveau_stateobj *
334 nv50_geomprog_validate(struct nv50_context *nv50)
335 {
336 struct nv50_program *p = nv50->geomprog;
337 struct nouveau_stateobj *so = NULL;
338
339 if (!p->translated) {
340 if (nv50_program_validate(p))
341 nv50_gp_update_stateobj(nv50, p);
342 else
343 return NULL;
344 }
345
346 if (nv50->dirty & NV50_NEW_GEOMPROG_CB)
347 nv50_program_validate_data(nv50, p);
348
349 if (!(nv50->dirty & NV50_NEW_GEOMPROG))
350 return NULL;
351
352 nv50_program_validate_code(nv50, p);
353
354 so_ref(p->so, &so);
355 return so;
356 }
357
358 /* XXX: this might not work correctly in all cases yet: we assume that
359 * an FP generic input that is not written in the VP is gl_PointCoord.
360 */
361 static uint32_t
362 nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned m)
363 {
364 struct nv50_program *vp = nv50->vertprog;
365 struct nv50_program *fp = nv50->fragprog;
366 unsigned i, c;
367
368 memset(pntc, 0, 8 * sizeof(uint32_t));
369
370 if (nv50->geomprog)
371 vp = nv50->geomprog;
372
373 for (i = 0; i < fp->in_nr; i++) {
374 unsigned j, n = util_bitcount(fp->in[i].mask);
375
376 if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
377 m += n;
378 continue;
379 }
380
381 for (j = 0; j < vp->out_nr; ++j)
382 if (vp->out[j].sn == fp->in[i].sn && vp->out[j].si == fp->in[i].si)
383 break;
384
385 if (j < vp->out_nr) {
386 uint32_t en = nv50->rasterizer->pipe.sprite_coord_enable;
387
388 if (!(en & (1 << vp->out[j].si))) {
389 m += n;
390 continue;
391 }
392 }
393
394 /* this is either PointCoord or replaced by sprite coords */
395 for (c = 0; c < 4; c++) {
396 if (!(fp->in[i].mask & (1 << c)))
397 continue;
398 pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
399 ++m;
400 }
401 }
402 if (nv50->rasterizer->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
403 return 0;
404 return (1 << 4);
405 }
406
407 static int
408 nv50_vec4_map(uint32_t *map32, int mid, uint32_t lin[4],
409 struct nv50_varying *in, struct nv50_varying *out)
410 {
411 int c;
412 uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
413 uint8_t *map = (uint8_t *)map32;
414
415 for (c = 0; c < 4; ++c) {
416 if (mf & 1) {
417 if (in->linear)
418 lin[mid / 32] |= 1 << (mid % 32);
419 if (mv & 1)
420 map[mid] = oid;
421 else
422 if (c == 3)
423 map[mid] |= 1;
424 ++mid;
425 }
426
427 oid += mv & 1;
428 mf >>= 1;
429 mv >>= 1;
430 }
431
432 return mid;
433 }
434
435 struct nouveau_stateobj *
436 nv50_fp_linkage_validate(struct nv50_context *nv50)
437 {
438 struct nouveau_grobj *tesla = nv50->screen->tesla;
439 struct nv50_program *vp;
440 struct nv50_program *fp = nv50->fragprog;
441 struct nouveau_stateobj *so;
442 struct nv50_varying dummy;
443 int i, n, c, m;
444
445 uint32_t map[16], lin[4], pntc[8];
446
447 uint32_t interp = fp->fp.interp;
448 uint32_t colors = fp->fp.colors;
449 uint32_t clip = 0x04;
450 uint32_t psiz = 0x000;
451 uint32_t primid = 0;
452 uint32_t sysval = 0;
453
454 if (nv50->geomprog) {
455 vp = nv50->geomprog;
456 memset(map, 0x80, sizeof(map));
457 } else {
458 vp = nv50->vertprog;
459 memset(map, 0x40, sizeof(map));
460 }
461 memset(lin, 0, sizeof(lin));
462
463 dummy.linear = 0;
464 dummy.mask = 0xf; /* map all components of HPOS */
465 m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
466
467 if (vp->vp.clpd < 0x40) {
468 for (c = 0; c < vp->vp.clpd_nr; ++c) {
469 map[m / 4] |= (vp->vp.clpd + c) << ((m % 4) * 8);
470 ++m;
471 }
472 clip |= vp->vp.clpd_nr << 8;
473 }
474
475 colors |= m << 8; /* adjust BFC0 id */
476
477 /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
478 if (nv50->rasterizer->pipe.light_twoside) {
479 for (i = 0; i < 2; ++i)
480 m = nv50_vec4_map(map, m, lin,
481 &fp->in[fp->vp.bfc[i]],
482 &vp->out[vp->vp.bfc[i]]);
483 }
484
485 colors += m - 4; /* adjust FFC0 id */
486 interp |= m << 8; /* set mid where 'normal' FP inputs start */
487
488 dummy.mask = 0x0;
489 for (i = 0; i < fp->in_nr; i++) {
490 for (n = 0; n < vp->out_nr; ++n)
491 if (vp->out[n].sn == fp->in[i].sn &&
492 vp->out[n].si == fp->in[i].si)
493 break;
494
495 m = nv50_vec4_map(map, m, lin,
496 &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
497 }
498
499 /* PrimitiveID either is replaced by the system value, or
500 * written by the geometry shader into an output register
501 */
502 if (fp->gp.primid < 0x40) {
503 i = (m % 4) * 8;
504 map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->gp.primid << i);
505 primid = m++;
506 }
507
508 if (nv50->rasterizer->pipe.point_size_per_vertex) {
509 i = (m % 4) * 8;
510 map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->vp.psiz << i);
511 psiz = (m++ << 4) | 1;
512 }
513
514 /* now fill the stateobj (at most 28 so_data) */
515 so = so_new(10, 54, 0);
516
517 n = (m + 3) / 4;
518 assert(m <= 64);
519 if (vp->type == PIPE_SHADER_GEOMETRY) {
520 so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1);
521 so_data (so, m);
522 so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n);
523 so_datap (so, map, n);
524 } else {
525 so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
526 so_data (so, vp->vp.attrs[2]);
527
528 so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1);
529 so_data (so, primid);
530
531 so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
532 so_data (so, m);
533 so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
534 so_datap (so, map, n);
535 }
536
537 so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
538 so_data (so, colors);
539 so_data (so, clip);
540 so_data (so, sysval);
541 so_data (so, psiz);
542
543 so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
544 so_data (so, interp);
545
546 so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
547 so_datap (so, lin, 4);
548
549 if (nv50->rasterizer->pipe.point_quad_rasterization) {
550 so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1);
551 so_data (so,
552 nv50_pntc_replace(nv50, pntc, (interp >> 8) & 0xff));
553
554 so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8);
555 so_datap (so, pntc, 8);
556 }
557
558 so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
559 so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
560
561 return so;
562 }
563
564 static int
565 nv50_vp_gp_mapping(uint32_t *map32, int m,
566 struct nv50_program *vp, struct nv50_program *gp)
567 {
568 uint8_t *map = (uint8_t *)map32;
569 int i, j, c;
570
571 for (i = 0; i < gp->in_nr; ++i) {
572 uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
573
574 for (j = 0; j < vp->out_nr; ++j) {
575 if (vp->out[j].sn == gp->in[i].sn &&
576 vp->out[j].si == gp->in[i].si) {
577 mv = vp->out[j].mask;
578 oid = vp->out[j].hw;
579 break;
580 }
581 }
582
583 for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
584 if (mg & mv & 1)
585 map[m++] = oid;
586 else
587 if (mg & 1)
588 map[m++] = (c == 3) ? 0x41 : 0x40;
589 oid += mv & 1;
590 }
591 }
592 return m;
593 }
594
595 struct nouveau_stateobj *
596 nv50_gp_linkage_validate(struct nv50_context *nv50)
597 {
598 struct nouveau_grobj *tesla = nv50->screen->tesla;
599 struct nouveau_stateobj *so;
600 struct nv50_program *vp = nv50->vertprog;
601 struct nv50_program *gp = nv50->geomprog;
602 uint32_t map[16];
603 int m = 0;
604
605 if (!gp)
606 return NULL;
607 memset(map, 0, sizeof(map));
608
609 m = nv50_vp_gp_mapping(map, m, vp, gp);
610
611 so = so_new(3, 24 - 3, 0);
612
613 so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
614 so_data (so, vp->vp.attrs[2] | gp->vp.attrs[2]);
615
616 assert(m <= 32);
617 so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
618 so_data (so, m);
619
620 m = (m + 3) / 4;
621 so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
622 so_datap (so, map, m);
623
624 return so;
625 }