nv50: abuse constbuf upload for program upload
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
5
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
9
10 #include "nv50_context.h"
11
12 #define NV50_SU_MAX_TEMP 64
13 #define NV50_PROGRAM_DUMP
14
15 /* ARL - gallium craps itself on progs/vp/arl.txt
16 *
17 * MSB - Like MAD, but MUL+SUB
18 * - Fuck it off, introduce a way to negate args for ops that
19 * support it.
20 *
21 * Look into inlining IMMD for ops other than MOV (make it general?)
22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
24 *
25 * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
26 * case, if the emit_src() causes the inst to suddenly become long.
27 *
28 * Verify half-insns work where expected - and force disable them where they
29 * don't work - MUL has it forcibly disabled atm as it fixes POW..
30 *
31 * FUCK! watch dst==src vectors, can overwrite components that are needed.
32 * ie. SUB R0, R0.yzxw, R0
33 *
34 * Things to check with renouveau:
35 * FP attr/result assignment - how?
36 * attrib
37 * - 0x16bc maps vp output onto fp hpos
38 * - 0x16c0 maps vp output onto fp col0
39 * result
40 * - colr always 0-3
41 * - depr always 4
42 * 0x16bc->0x16e8 --> some binding between vp/fp regs
43 * 0x16b8 --> VP output count
44 *
45 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
46 * "MOV rcol.x, fcol.y" = 0x00000004
47 * 0x19a8 --> as above but 0x00000100 and 0x00000000
48 * - 0x00100000 used when KIL used
49 * 0x196c --> as above but 0x00000011 and 0x00000000
50 *
51 * 0x1988 --> 0xXXNNNNNN
52 * - XX == FP high something
53 */
54 struct nv50_reg {
55 enum {
56 P_TEMP,
57 P_ATTR,
58 P_RESULT,
59 P_CONST,
60 P_IMMD
61 } type;
62 int index;
63
64 int hw;
65 int neg;
66 };
67
68 struct nv50_pc {
69 struct nv50_program *p;
70
71 /* hw resources */
72 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
73
74 /* tgsi resources */
75 struct nv50_reg *temp;
76 int temp_nr;
77 struct nv50_reg *attr;
78 int attr_nr;
79 struct nv50_reg *result;
80 int result_nr;
81 struct nv50_reg *param;
82 int param_nr;
83 struct nv50_reg *immd;
84 float *immd_buf;
85 int immd_nr;
86
87 struct nv50_reg *temp_temp[16];
88 unsigned temp_temp_nr;
89 };
90
91 static void
92 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
93 {
94 int i;
95
96 if (reg->type == P_RESULT) {
97 if (pc->p->cfg.high_result < (reg->hw + 1))
98 pc->p->cfg.high_result = reg->hw + 1;
99 }
100
101 if (reg->type != P_TEMP)
102 return;
103
104 if (reg->hw >= 0) {
105 /*XXX: do this here too to catch FP temp-as-attr usage..
106 * not clean, but works */
107 if (pc->p->cfg.high_temp < (reg->hw + 1))
108 pc->p->cfg.high_temp = reg->hw + 1;
109 return;
110 }
111
112 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
113 if (!(pc->r_temp[i])) {
114 pc->r_temp[i] = reg;
115 reg->hw = i;
116 if (pc->p->cfg.high_temp < (i + 1))
117 pc->p->cfg.high_temp = i + 1;
118 return;
119 }
120 }
121
122 assert(0);
123 }
124
125 static struct nv50_reg *
126 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
127 {
128 struct nv50_reg *r;
129 int i;
130
131 if (dst && dst->type == P_TEMP && dst->hw == -1)
132 return dst;
133
134 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
135 if (!pc->r_temp[i]) {
136 r = CALLOC_STRUCT(nv50_reg);
137 r->type = P_TEMP;
138 r->index = -1;
139 r->hw = i;
140 pc->r_temp[i] = r;
141 return r;
142 }
143 }
144
145 assert(0);
146 return NULL;
147 }
148
149 static void
150 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
151 {
152 if (r->index == -1) {
153 unsigned hw = r->hw;
154
155 FREE(pc->r_temp[hw]);
156 pc->r_temp[hw] = NULL;
157 }
158 }
159
160 static struct nv50_reg *
161 temp_temp(struct nv50_pc *pc)
162 {
163 if (pc->temp_temp_nr >= 16)
164 assert(0);
165
166 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
167 return pc->temp_temp[pc->temp_temp_nr++];
168 }
169
170 static void
171 kill_temp_temp(struct nv50_pc *pc)
172 {
173 int i;
174
175 for (i = 0; i < pc->temp_temp_nr; i++)
176 free_temp(pc, pc->temp_temp[i]);
177 pc->temp_temp_nr = 0;
178 }
179
180 static int
181 ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
182 {
183 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 *
184 sizeof(float));
185 pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
186 pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
187 pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
188 pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
189
190 return pc->immd_nr++;
191 }
192
193 static struct nv50_reg *
194 alloc_immd(struct nv50_pc *pc, float f)
195 {
196 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
197 unsigned hw;
198
199 hw = ctor_immd(pc, f, 0, 0, 0) * 4;
200 r->type = P_IMMD;
201 r->hw = hw;
202 r->index = -1;
203 return r;
204 }
205
206 static struct nv50_program_exec *
207 exec(struct nv50_pc *pc)
208 {
209 struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
210
211 e->param.index = -1;
212 return e;
213 }
214
215 static void
216 emit(struct nv50_pc *pc, struct nv50_program_exec *e)
217 {
218 struct nv50_program *p = pc->p;
219
220 if (p->exec_tail)
221 p->exec_tail->next = e;
222 if (!p->exec_head)
223 p->exec_head = e;
224 p->exec_tail = e;
225 p->exec_size += (e->inst[0] & 1) ? 2 : 1;
226 }
227
228 static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
229
230 static boolean
231 is_long(struct nv50_program_exec *e)
232 {
233 if (e->inst[0] & 1)
234 return TRUE;
235 return FALSE;
236 }
237
238 static boolean
239 is_immd(struct nv50_program_exec *e)
240 {
241 if (is_long(e) && (e->inst[1] & 3) == 3)
242 return TRUE;
243 return FALSE;
244 }
245
246 static INLINE void
247 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
248 struct nv50_program_exec *e)
249 {
250 set_long(pc, e);
251 e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
252 e->inst[1] |= (pred << 7) | (idx << 12);
253 }
254
255 static INLINE void
256 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
257 struct nv50_program_exec *e)
258 {
259 set_long(pc, e);
260 e->inst[1] &= ~((0x3 << 4) | (1 << 6));
261 e->inst[1] |= (idx << 4) | (on << 6);
262 }
263
264 static INLINE void
265 set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
266 {
267 if (is_long(e))
268 return;
269
270 e->inst[0] |= 1;
271 set_pred(pc, 0xf, 0, e);
272 set_pred_wr(pc, 0, 0, e);
273 }
274
275 static INLINE void
276 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
277 {
278 if (dst->type == P_RESULT) {
279 set_long(pc, e);
280 e->inst[1] |= 0x00000008;
281 }
282
283 alloc_reg(pc, dst);
284 e->inst[0] |= (dst->hw << 2);
285 }
286
287 static INLINE void
288 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
289 {
290 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
291
292 set_long(pc, e);
293 /*XXX: can't be predicated - bits overlap.. catch cases where both
294 * are required and avoid them. */
295 set_pred(pc, 0, 0, e);
296 set_pred_wr(pc, 0, 0, e);
297
298 e->inst[1] |= 0x00000002 | 0x00000001;
299 e->inst[0] |= (val & 0x3f) << 16;
300 e->inst[1] |= (val >> 6) << 2;
301 }
302
303 static void
304 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
305 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
306 {
307 struct nv50_program_exec *e = exec(pc);
308
309 e->inst[0] |= 0x80000000;
310 set_dst(pc, dst, e);
311 alloc_reg(pc, iv);
312 e->inst[0] |= (iv->hw << 9);
313 alloc_reg(pc, src);
314 e->inst[0] |= (src->hw << 16);
315 if (noperspective)
316 e->inst[0] |= (1 << 25);
317
318 emit(pc, e);
319 }
320
321 static void
322 set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
323 struct nv50_program_exec *e)
324 {
325 set_long(pc, e);
326 #if 1
327 e->inst[1] |= (1 << 22);
328 #else
329 if (src->type == P_IMMD) {
330 e->inst[1] |= (NV50_CB_PMISC << 22);
331 } else {
332 if (pc->p->type == PIPE_SHADER_VERTEX)
333 e->inst[1] |= (NV50_CB_PVP << 22);
334 else
335 e->inst[1] |= (NV50_CB_PFP << 22);
336 }
337 #endif
338
339 e->param.index = src->hw;
340 e->param.shift = s;
341 e->param.mask = m << (s % 32);
342 }
343
344 static void
345 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
346 {
347 struct nv50_program_exec *e = exec(pc);
348
349 e->inst[0] |= 0x10000000;
350
351 set_dst(pc, dst, e);
352
353 if (0 && dst->type != P_RESULT && src->type == P_IMMD) {
354 set_immd(pc, src, e);
355 /*XXX: 32-bit, but steals part of "half" reg space - need to
356 * catch and handle this case if/when we do half-regs
357 */
358 e->inst[0] |= 0x00008000;
359 } else
360 if (src->type == P_IMMD || src->type == P_CONST) {
361 set_long(pc, e);
362 set_data(pc, src, 0x7f, 9, e);
363 e->inst[1] |= 0x20000000; /* src0 const? */
364 } else {
365 if (src->type == P_ATTR) {
366 set_long(pc, e);
367 e->inst[1] |= 0x00200000;
368 }
369
370 alloc_reg(pc, src);
371 e->inst[0] |= (src->hw << 9);
372 }
373
374 /* We really should support "half" instructions here at some point,
375 * but I don't feel confident enough about them yet.
376 */
377 set_long(pc, e);
378 if (is_long(e) && !is_immd(e)) {
379 e->inst[1] |= 0x04000000; /* 32-bit */
380 e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
381 }
382
383 emit(pc, e);
384 }
385
386 static boolean
387 check_swap_src_0_1(struct nv50_pc *pc,
388 struct nv50_reg **s0, struct nv50_reg **s1)
389 {
390 struct nv50_reg *src0 = *s0, *src1 = *s1;
391
392 if (src0->type == P_CONST) {
393 if (src1->type != P_CONST) {
394 *s0 = src1;
395 *s1 = src0;
396 return TRUE;
397 }
398 } else
399 if (src1->type == P_ATTR) {
400 if (src0->type != P_ATTR) {
401 *s0 = src1;
402 *s1 = src0;
403 return TRUE;
404 }
405 }
406
407 return FALSE;
408 }
409
410 static void
411 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
412 {
413 if (src->type == P_ATTR) {
414 set_long(pc, e);
415 e->inst[1] |= 0x00200000;
416 } else
417 if (src->type == P_CONST || src->type == P_IMMD) {
418 struct nv50_reg *temp = temp_temp(pc);
419
420 emit_mov(pc, temp, src);
421 src = temp;
422 }
423
424 alloc_reg(pc, src);
425 e->inst[0] |= (src->hw << 9);
426 }
427
428 static void
429 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
430 {
431 if (src->type == P_ATTR) {
432 struct nv50_reg *temp = temp_temp(pc);
433
434 emit_mov(pc, temp, src);
435 src = temp;
436 } else
437 if (src->type == P_CONST || src->type == P_IMMD) {
438 assert(!(e->inst[0] & 0x00800000));
439 if (e->inst[0] & 0x01000000) {
440 struct nv50_reg *temp = temp_temp(pc);
441
442 emit_mov(pc, temp, src);
443 src = temp;
444 } else {
445 set_data(pc, src, 0x7f, 16, e);
446 e->inst[0] |= 0x00800000;
447 }
448 }
449
450 alloc_reg(pc, src);
451 e->inst[0] |= (src->hw << 16);
452 }
453
454 static void
455 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
456 {
457 set_long(pc, e);
458
459 if (src->type == P_ATTR) {
460 struct nv50_reg *temp = temp_temp(pc);
461
462 emit_mov(pc, temp, src);
463 src = temp;
464 } else
465 if (src->type == P_CONST || src->type == P_IMMD) {
466 assert(!(e->inst[0] & 0x01000000));
467 if (e->inst[0] & 0x00800000) {
468 struct nv50_reg *temp = temp_temp(pc);
469
470 emit_mov(pc, temp, src);
471 src = temp;
472 } else {
473 set_data(pc, src, 0x7f, 32+14, e);
474 e->inst[0] |= 0x01000000;
475 }
476 }
477
478 alloc_reg(pc, src);
479 e->inst[1] |= (src->hw << 14);
480 }
481
482 static void
483 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
484 struct nv50_reg *src1)
485 {
486 struct nv50_program_exec *e = exec(pc);
487
488 e->inst[0] |= 0xc0000000;
489 set_long(pc, e);
490
491 check_swap_src_0_1(pc, &src0, &src1);
492 set_dst(pc, dst, e);
493 set_src_0(pc, src0, e);
494 set_src_1(pc, src1, e);
495
496 emit(pc, e);
497 }
498
499 static void
500 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
501 struct nv50_reg *src0, struct nv50_reg *src1)
502 {
503 struct nv50_program_exec *e = exec(pc);
504
505 e->inst[0] |= 0xb0000000;
506
507 check_swap_src_0_1(pc, &src0, &src1);
508 set_dst(pc, dst, e);
509 set_src_0(pc, src0, e);
510 if (is_long(e))
511 set_src_2(pc, src1, e);
512 else
513 set_src_1(pc, src1, e);
514
515 emit(pc, e);
516 }
517
518 static void
519 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
520 struct nv50_reg *src0, struct nv50_reg *src1)
521 {
522 struct nv50_program_exec *e = exec(pc);
523
524 set_long(pc, e);
525 e->inst[0] |= 0xb0000000;
526 e->inst[1] |= (sub << 29);
527
528 check_swap_src_0_1(pc, &src0, &src1);
529 set_dst(pc, dst, e);
530 set_src_0(pc, src0, e);
531 set_src_1(pc, src1, e);
532
533 emit(pc, e);
534 }
535
536 static void
537 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
538 struct nv50_reg *src1)
539 {
540 struct nv50_program_exec *e = exec(pc);
541
542 e->inst[0] |= 0xb0000000;
543
544 set_long(pc, e);
545 if (check_swap_src_0_1(pc, &src0, &src1))
546 e->inst[1] |= 0x04000000;
547 else
548 e->inst[1] |= 0x08000000;
549
550 set_dst(pc, dst, e);
551 set_src_0(pc, src0, e);
552 set_src_2(pc, src1, e);
553
554 emit(pc, e);
555 }
556
557 static void
558 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
559 struct nv50_reg *src1, struct nv50_reg *src2)
560 {
561 struct nv50_program_exec *e = exec(pc);
562
563 e->inst[0] |= 0xe0000000;
564
565 check_swap_src_0_1(pc, &src0, &src1);
566 set_dst(pc, dst, e);
567 set_src_0(pc, src0, e);
568 set_src_1(pc, src1, e);
569 set_src_2(pc, src2, e);
570
571 emit(pc, e);
572 }
573
574 static void
575 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
576 struct nv50_reg *src1, struct nv50_reg *src2)
577 {
578 struct nv50_program_exec *e = exec(pc);
579
580 e->inst[0] |= 0xe0000000;
581 set_long(pc, e);
582 e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
583
584 check_swap_src_0_1(pc, &src0, &src1);
585 set_dst(pc, dst, e);
586 set_src_0(pc, src0, e);
587 set_src_1(pc, src1, e);
588 set_src_2(pc, src2, e);
589
590 emit(pc, e);
591 }
592
593 static void
594 emit_flop(struct nv50_pc *pc, unsigned sub,
595 struct nv50_reg *dst, struct nv50_reg *src)
596 {
597 struct nv50_program_exec *e = exec(pc);
598
599 e->inst[0] |= 0x90000000;
600 if (sub) {
601 set_long(pc, e);
602 e->inst[1] |= (sub << 29);
603 }
604
605 set_dst(pc, dst, e);
606 set_src_0(pc, src, e);
607
608 emit(pc, e);
609 }
610
611 static void
612 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
613 {
614 struct nv50_program_exec *e = exec(pc);
615
616 e->inst[0] |= 0xb0000000;
617
618 set_dst(pc, dst, e);
619 set_src_0(pc, src, e);
620 set_long(pc, e);
621 e->inst[1] |= (6 << 29) | 0x00004000;
622
623 emit(pc, e);
624 }
625
626 static void
627 emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
628 {
629 struct nv50_program_exec *e = exec(pc);
630
631 e->inst[0] |= 0xb0000000;
632
633 set_dst(pc, dst, e);
634 set_src_0(pc, src, e);
635 set_long(pc, e);
636 e->inst[1] |= (6 << 29);
637
638 emit(pc, e);
639 }
640
641 static void
642 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
643 struct nv50_reg *src0, struct nv50_reg *src1)
644 {
645 struct nv50_program_exec *e = exec(pc);
646 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
647 struct nv50_reg *rdst;
648
649 assert(c_op <= 7);
650 if (check_swap_src_0_1(pc, &src0, &src1))
651 c_op = inv_cop[c_op];
652
653 rdst = dst;
654 if (dst->type != P_TEMP)
655 dst = alloc_temp(pc, NULL);
656
657 /* set.u32 */
658 set_long(pc, e);
659 e->inst[0] |= 0xb0000000;
660 e->inst[1] |= (3 << 29);
661 e->inst[1] |= (c_op << 14);
662 /*XXX: breaks things, .u32 by default?
663 * decuda will disasm as .u16 and use .lo/.hi regs, but this
664 * doesn't seem to match what the hw actually does.
665 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
666 */
667 set_dst(pc, dst, e);
668 set_src_0(pc, src0, e);
669 set_src_1(pc, src1, e);
670 emit(pc, e);
671
672 /* cvt.f32.u32 */
673 e = exec(pc);
674 e->inst[0] = 0xa0000001;
675 e->inst[1] = 0x64014780;
676 set_dst(pc, rdst, e);
677 set_src_0(pc, dst, e);
678 emit(pc, e);
679
680 if (dst != rdst)
681 free_temp(pc, dst);
682 }
683
684 static void
685 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
686 {
687 struct nv50_program_exec *e = exec(pc);
688
689 e->inst[0] = 0xa0000000; /* cvt */
690 set_long(pc, e);
691 e->inst[1] |= (6 << 29); /* cvt */
692 e->inst[1] |= 0x08000000; /* integer mode */
693 e->inst[1] |= 0x04000000; /* 32 bit */
694 e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
695 e->inst[1] |= (1 << 14); /* src .f32 */
696 set_dst(pc, dst, e);
697 set_src_0(pc, src, e);
698
699 emit(pc, e);
700 }
701
702 static void
703 emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
704 struct nv50_reg *v, struct nv50_reg *e)
705 {
706 struct nv50_reg *temp = alloc_temp(pc, NULL);
707
708 emit_flop(pc, 3, temp, v);
709 emit_mul(pc, temp, temp, e);
710 emit_preex2(pc, temp, temp);
711 emit_flop(pc, 6, dst, temp);
712
713 free_temp(pc, temp);
714 }
715
716 static void
717 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
718 {
719 struct nv50_program_exec *e = exec(pc);
720
721 e->inst[0] = 0xa0000000; /* cvt */
722 set_long(pc, e);
723 e->inst[1] |= (6 << 29); /* cvt */
724 e->inst[1] |= 0x04000000; /* 32 bit */
725 e->inst[1] |= (1 << 14); /* src .f32 */
726 e->inst[1] |= ((1 << 6) << 14); /* .abs */
727 set_dst(pc, dst, e);
728 set_src_0(pc, src, e);
729
730 emit(pc, e);
731 }
732
733 static void
734 emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
735 struct nv50_reg **src)
736 {
737 struct nv50_reg *one = alloc_immd(pc, 1.0);
738 struct nv50_reg *zero = alloc_immd(pc, 0.0);
739 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
740 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
741 struct nv50_reg *tmp[4];
742
743 if (mask & (1 << 0))
744 emit_mov(pc, dst[0], one);
745
746 if (mask & (1 << 3))
747 emit_mov(pc, dst[3], one);
748
749 if (mask & (3 << 1)) {
750 if (mask & (1 << 1))
751 tmp[0] = dst[1];
752 else
753 tmp[0] = temp_temp(pc);
754 emit_minmax(pc, 4, tmp[0], src[0], zero);
755 }
756
757 if (mask & (1 << 2)) {
758 set_pred_wr(pc, 1, 0, pc->p->exec_tail);
759
760 tmp[1] = temp_temp(pc);
761 emit_minmax(pc, 4, tmp[1], src[1], zero);
762
763 tmp[3] = temp_temp(pc);
764 emit_minmax(pc, 4, tmp[3], src[3], neg128);
765 emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
766
767 emit_pow(pc, dst[2], tmp[1], tmp[3]);
768 emit_mov(pc, dst[2], zero);
769 set_pred(pc, 3, 0, pc->p->exec_tail);
770 }
771 }
772
773 static void
774 emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
775 {
776 struct nv50_program_exec *e = exec(pc);
777
778 set_long(pc, e);
779 e->inst[0] |= 0xa0000000; /* delta */
780 e->inst[1] |= (7 << 29); /* delta */
781 e->inst[1] |= 0x04000000; /* negate arg0? probably not */
782 e->inst[1] |= (1 << 14); /* src .f32 */
783 set_dst(pc, dst, e);
784 set_src_0(pc, src, e);
785
786 emit(pc, e);
787 }
788
789 static struct nv50_reg *
790 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
791 {
792 switch (dst->DstRegister.File) {
793 case TGSI_FILE_TEMPORARY:
794 return &pc->temp[dst->DstRegister.Index * 4 + c];
795 case TGSI_FILE_OUTPUT:
796 return &pc->result[dst->DstRegister.Index * 4 + c];
797 case TGSI_FILE_NULL:
798 return NULL;
799 default:
800 break;
801 }
802
803 return NULL;
804 }
805
806 static struct nv50_reg *
807 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
808 {
809 struct nv50_reg *r = NULL;
810 struct nv50_reg *temp;
811 unsigned c;
812
813 c = tgsi_util_get_full_src_register_extswizzle(src, chan);
814 switch (c) {
815 case TGSI_EXTSWIZZLE_X:
816 case TGSI_EXTSWIZZLE_Y:
817 case TGSI_EXTSWIZZLE_Z:
818 case TGSI_EXTSWIZZLE_W:
819 switch (src->SrcRegister.File) {
820 case TGSI_FILE_INPUT:
821 r = &pc->attr[src->SrcRegister.Index * 4 + c];
822 break;
823 case TGSI_FILE_TEMPORARY:
824 r = &pc->temp[src->SrcRegister.Index * 4 + c];
825 break;
826 case TGSI_FILE_CONSTANT:
827 r = &pc->param[src->SrcRegister.Index * 4 + c];
828 break;
829 case TGSI_FILE_IMMEDIATE:
830 r = &pc->immd[src->SrcRegister.Index * 4 + c];
831 break;
832 case TGSI_FILE_SAMPLER:
833 break;
834 default:
835 assert(0);
836 break;
837 }
838 break;
839 case TGSI_EXTSWIZZLE_ZERO:
840 r = alloc_immd(pc, 0.0);
841 break;
842 case TGSI_EXTSWIZZLE_ONE:
843 r = alloc_immd(pc, 1.0);
844 break;
845 default:
846 assert(0);
847 break;
848 }
849
850 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
851 case TGSI_UTIL_SIGN_KEEP:
852 break;
853 case TGSI_UTIL_SIGN_CLEAR:
854 temp = temp_temp(pc);
855 emit_abs(pc, temp, r);
856 r = temp;
857 break;
858 case TGSI_UTIL_SIGN_TOGGLE:
859 temp = temp_temp(pc);
860 emit_neg(pc, temp, r);
861 r = temp;
862 break;
863 case TGSI_UTIL_SIGN_SET:
864 temp = temp_temp(pc);
865 emit_abs(pc, temp, r);
866 emit_neg(pc, temp, r);
867 r = temp;
868 break;
869 default:
870 assert(0);
871 break;
872 }
873
874 return r;
875 }
876
877 static boolean
878 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
879 {
880 const struct tgsi_full_instruction *inst = &tok->FullInstruction;
881 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
882 unsigned mask, sat;
883 int i, c;
884
885 NOUVEAU_ERR("insn %p\n", tok);
886
887 mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
888 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
889
890 for (c = 0; c < 4; c++) {
891 if (mask & (1 << c))
892 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
893 else
894 dst[c] = NULL;
895 }
896
897 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
898 for (c = 0; c < 4; c++)
899 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
900 }
901
902 if (sat) {
903 for (c = 0; c < 4; c++) {
904 rdst[c] = dst[c];
905 dst[c] = temp_temp(pc);
906 }
907 }
908
909 switch (inst->Instruction.Opcode) {
910 case TGSI_OPCODE_ABS:
911 for (c = 0; c < 4; c++) {
912 if (!(mask & (1 << c)))
913 continue;
914 emit_abs(pc, dst[c], src[0][c]);
915 }
916 break;
917 case TGSI_OPCODE_ADD:
918 for (c = 0; c < 4; c++) {
919 if (!(mask & (1 << c)))
920 continue;
921 emit_add(pc, dst[c], src[0][c], src[1][c]);
922 }
923 break;
924 case TGSI_OPCODE_COS:
925 temp = alloc_temp(pc, NULL);
926 emit_precossin(pc, temp, src[0][0]);
927 emit_flop(pc, 5, temp, temp);
928 for (c = 0; c < 4; c++) {
929 if (!(mask & (1 << c)))
930 continue;
931 emit_mov(pc, dst[c], temp);
932 }
933 break;
934 case TGSI_OPCODE_DP3:
935 temp = alloc_temp(pc, NULL);
936 emit_mul(pc, temp, src[0][0], src[1][0]);
937 emit_mad(pc, temp, src[0][1], src[1][1], temp);
938 emit_mad(pc, temp, src[0][2], src[1][2], temp);
939 for (c = 0; c < 4; c++) {
940 if (!(mask & (1 << c)))
941 continue;
942 emit_mov(pc, dst[c], temp);
943 }
944 free_temp(pc, temp);
945 break;
946 case TGSI_OPCODE_DP4:
947 temp = alloc_temp(pc, NULL);
948 emit_mul(pc, temp, src[0][0], src[1][0]);
949 emit_mad(pc, temp, src[0][1], src[1][1], temp);
950 emit_mad(pc, temp, src[0][2], src[1][2], temp);
951 emit_mad(pc, temp, src[0][3], src[1][3], temp);
952 for (c = 0; c < 4; c++) {
953 if (!(mask & (1 << c)))
954 continue;
955 emit_mov(pc, dst[c], temp);
956 }
957 free_temp(pc, temp);
958 break;
959 case TGSI_OPCODE_DPH:
960 temp = alloc_temp(pc, NULL);
961 emit_mul(pc, temp, src[0][0], src[1][0]);
962 emit_mad(pc, temp, src[0][1], src[1][1], temp);
963 emit_mad(pc, temp, src[0][2], src[1][2], temp);
964 emit_add(pc, temp, src[1][3], temp);
965 for (c = 0; c < 4; c++) {
966 if (!(mask & (1 << c)))
967 continue;
968 emit_mov(pc, dst[c], temp);
969 }
970 free_temp(pc, temp);
971 break;
972 case TGSI_OPCODE_DST:
973 {
974 struct nv50_reg *one = alloc_immd(pc, 1.0);
975 if (mask & (1 << 0))
976 emit_mov(pc, dst[0], one);
977 if (mask & (1 << 1))
978 emit_mul(pc, dst[1], src[0][1], src[1][1]);
979 if (mask & (1 << 2))
980 emit_mov(pc, dst[2], src[0][2]);
981 if (mask & (1 << 3))
982 emit_mov(pc, dst[3], src[1][3]);
983 FREE(one);
984 }
985 break;
986 case TGSI_OPCODE_EX2:
987 temp = alloc_temp(pc, NULL);
988 emit_preex2(pc, temp, src[0][0]);
989 emit_flop(pc, 6, temp, temp);
990 for (c = 0; c < 4; c++) {
991 if (!(mask & (1 << c)))
992 continue;
993 emit_mov(pc, dst[c], temp);
994 }
995 free_temp(pc, temp);
996 break;
997 case TGSI_OPCODE_FLR:
998 for (c = 0; c < 4; c++) {
999 if (!(mask & (1 << c)))
1000 continue;
1001 emit_flr(pc, dst[c], src[0][c]);
1002 }
1003 break;
1004 case TGSI_OPCODE_FRC:
1005 temp = alloc_temp(pc, NULL);
1006 for (c = 0; c < 4; c++) {
1007 if (!(mask & (1 << c)))
1008 continue;
1009 emit_flr(pc, temp, src[0][c]);
1010 emit_sub(pc, dst[c], src[0][c], temp);
1011 }
1012 free_temp(pc, temp);
1013 break;
1014 case TGSI_OPCODE_LIT:
1015 emit_lit(pc, &dst[0], mask, &src[0][0]);
1016 break;
1017 case TGSI_OPCODE_LG2:
1018 temp = alloc_temp(pc, NULL);
1019 emit_flop(pc, 3, temp, src[0][0]);
1020 for (c = 0; c < 4; c++) {
1021 if (!(mask & (1 << c)))
1022 continue;
1023 emit_mov(pc, dst[c], temp);
1024 }
1025 break;
1026 case TGSI_OPCODE_LRP:
1027 for (c = 0; c < 4; c++) {
1028 if (!(mask & (1 << c)))
1029 continue;
1030 /*XXX: we can do better than this */
1031 temp = alloc_temp(pc, NULL);
1032 emit_neg(pc, temp, src[0][c]);
1033 emit_mad(pc, temp, temp, src[2][c], src[2][c]);
1034 emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
1035 free_temp(pc, temp);
1036 }
1037 break;
1038 case TGSI_OPCODE_MAD:
1039 for (c = 0; c < 4; c++) {
1040 if (!(mask & (1 << c)))
1041 continue;
1042 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
1043 }
1044 break;
1045 case TGSI_OPCODE_MAX:
1046 for (c = 0; c < 4; c++) {
1047 if (!(mask & (1 << c)))
1048 continue;
1049 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
1050 }
1051 break;
1052 case TGSI_OPCODE_MIN:
1053 for (c = 0; c < 4; c++) {
1054 if (!(mask & (1 << c)))
1055 continue;
1056 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
1057 }
1058 break;
1059 case TGSI_OPCODE_MOV:
1060 for (c = 0; c < 4; c++) {
1061 if (!(mask & (1 << c)))
1062 continue;
1063 emit_mov(pc, dst[c], src[0][c]);
1064 }
1065 break;
1066 case TGSI_OPCODE_MUL:
1067 for (c = 0; c < 4; c++) {
1068 if (!(mask & (1 << c)))
1069 continue;
1070 emit_mul(pc, dst[c], src[0][c], src[1][c]);
1071 }
1072 break;
1073 case TGSI_OPCODE_POW:
1074 temp = alloc_temp(pc, NULL);
1075 emit_pow(pc, temp, src[0][0], src[1][0]);
1076 for (c = 0; c < 4; c++) {
1077 if (!(mask & (1 << c)))
1078 continue;
1079 emit_mov(pc, dst[c], temp);
1080 }
1081 free_temp(pc, temp);
1082 break;
1083 case TGSI_OPCODE_RCP:
1084 for (c = 0; c < 4; c++) {
1085 if (!(mask & (1 << c)))
1086 continue;
1087 emit_flop(pc, 0, dst[c], src[0][0]);
1088 }
1089 break;
1090 case TGSI_OPCODE_RSQ:
1091 for (c = 0; c < 4; c++) {
1092 if (!(mask & (1 << c)))
1093 continue;
1094 emit_flop(pc, 2, dst[c], src[0][0]);
1095 }
1096 break;
1097 case TGSI_OPCODE_SCS:
1098 temp = alloc_temp(pc, NULL);
1099 emit_precossin(pc, temp, src[0][0]);
1100 if (mask & (1 << 0))
1101 emit_flop(pc, 5, dst[0], temp);
1102 if (mask & (1 << 1))
1103 emit_flop(pc, 4, dst[1], temp);
1104 break;
1105 case TGSI_OPCODE_SGE:
1106 for (c = 0; c < 4; c++) {
1107 if (!(mask & (1 << c)))
1108 continue;
1109 emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
1110 }
1111 break;
1112 case TGSI_OPCODE_SIN:
1113 temp = alloc_temp(pc, NULL);
1114 emit_precossin(pc, temp, src[0][0]);
1115 emit_flop(pc, 4, temp, temp);
1116 for (c = 0; c < 4; c++) {
1117 if (!(mask & (1 << c)))
1118 continue;
1119 emit_mov(pc, dst[c], temp);
1120 }
1121 break;
1122 case TGSI_OPCODE_SLT:
1123 for (c = 0; c < 4; c++) {
1124 if (!(mask & (1 << c)))
1125 continue;
1126 emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1127 }
1128 break;
1129 case TGSI_OPCODE_SUB:
1130 for (c = 0; c < 4; c++) {
1131 if (!(mask & (1 << c)))
1132 continue;
1133 emit_sub(pc, dst[c], src[0][c], src[1][c]);
1134 }
1135 break;
1136 case TGSI_OPCODE_TEX:
1137 break;
1138 case TGSI_OPCODE_XPD:
1139 temp = alloc_temp(pc, NULL);
1140 if (mask & (1 << 0)) {
1141 emit_mul(pc, temp, src[0][2], src[1][1]);
1142 emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1143 }
1144 if (mask & (1 << 1)) {
1145 emit_mul(pc, temp, src[0][0], src[1][2]);
1146 emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1147 }
1148 if (mask & (1 << 2)) {
1149 emit_mul(pc, temp, src[0][1], src[1][0]);
1150 emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1151 }
1152 free_temp(pc, temp);
1153 break;
1154 case TGSI_OPCODE_END:
1155 break;
1156 default:
1157 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1158 return FALSE;
1159 }
1160
1161 if (sat) {
1162 for (c = 0; c < 4; c++) {
1163 struct nv50_program_exec *e;
1164
1165 if (!(mask & (1 << c)))
1166 continue;
1167 e = exec(pc);
1168
1169 e->inst[0] = 0xa0000000; /* cvt */
1170 set_long(pc, e);
1171 e->inst[1] |= (6 << 29); /* cvt */
1172 e->inst[1] |= 0x04000000; /* 32 bit */
1173 e->inst[1] |= (1 << 14); /* src .f32 */
1174 e->inst[1] |= ((1 << 5) << 14); /* .sat */
1175 set_dst(pc, rdst[c], e);
1176 set_src_0(pc, dst[c], e);
1177 emit(pc, e);
1178 }
1179 }
1180
1181 kill_temp_temp(pc);
1182 return TRUE;
1183 }
1184
1185 static boolean
1186 nv50_program_tx_prep(struct nv50_pc *pc)
1187 {
1188 struct tgsi_parse_context p;
1189 boolean ret = FALSE;
1190 unsigned i, c;
1191
1192 tgsi_parse_init(&p, pc->p->pipe.tokens);
1193 while (!tgsi_parse_end_of_tokens(&p)) {
1194 const union tgsi_full_token *tok = &p.FullToken;
1195
1196 tgsi_parse_token(&p);
1197 switch (tok->Token.Type) {
1198 case TGSI_TOKEN_TYPE_IMMEDIATE:
1199 {
1200 const struct tgsi_full_immediate *imm =
1201 &p.FullToken.FullImmediate;
1202
1203 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1204 imm->u.ImmediateFloat32[1].Float,
1205 imm->u.ImmediateFloat32[2].Float,
1206 imm->u.ImmediateFloat32[3].Float);
1207 }
1208 break;
1209 case TGSI_TOKEN_TYPE_DECLARATION:
1210 {
1211 const struct tgsi_full_declaration *d;
1212 unsigned last;
1213
1214 d = &p.FullToken.FullDeclaration;
1215 last = d->u.DeclarationRange.Last;
1216
1217 switch (d->Declaration.File) {
1218 case TGSI_FILE_TEMPORARY:
1219 if (pc->temp_nr < (last + 1))
1220 pc->temp_nr = last + 1;
1221 break;
1222 case TGSI_FILE_OUTPUT:
1223 if (pc->result_nr < (last + 1))
1224 pc->result_nr = last + 1;
1225 break;
1226 case TGSI_FILE_INPUT:
1227 if (pc->attr_nr < (last + 1))
1228 pc->attr_nr = last + 1;
1229 break;
1230 case TGSI_FILE_CONSTANT:
1231 if (pc->param_nr < (last + 1))
1232 pc->param_nr = last + 1;
1233 break;
1234 case TGSI_FILE_SAMPLER:
1235 break;
1236 default:
1237 NOUVEAU_ERR("bad decl file %d\n",
1238 d->Declaration.File);
1239 goto out_err;
1240 }
1241 }
1242 break;
1243 case TGSI_TOKEN_TYPE_INSTRUCTION:
1244 break;
1245 default:
1246 break;
1247 }
1248 }
1249
1250 NOUVEAU_ERR("%d temps\n", pc->temp_nr);
1251 if (pc->temp_nr) {
1252 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
1253 if (!pc->temp)
1254 goto out_err;
1255
1256 for (i = 0; i < pc->temp_nr; i++) {
1257 for (c = 0; c < 4; c++) {
1258 pc->temp[i*4+c].type = P_TEMP;
1259 pc->temp[i*4+c].hw = -1;
1260 pc->temp[i*4+c].index = i;
1261 }
1262 }
1263 }
1264
1265 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
1266 if (pc->attr_nr) {
1267 struct nv50_reg *iv = NULL;
1268 int aid = 0;
1269
1270 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
1271 if (!pc->attr)
1272 goto out_err;
1273
1274 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1275 iv = alloc_temp(pc, NULL);
1276 emit_interp(pc, iv, iv, iv, FALSE);
1277 emit_flop(pc, 0, iv, iv);
1278 aid++;
1279 }
1280
1281 for (i = 0; i < pc->attr_nr; i++) {
1282 struct nv50_reg *a = &pc->attr[i*4];
1283
1284 for (c = 0; c < 4; c++) {
1285 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1286 struct nv50_reg *at =
1287 alloc_temp(pc, NULL);
1288 pc->attr[i*4+c].type = at->type;
1289 pc->attr[i*4+c].hw = at->hw;
1290 pc->attr[i*4+c].index = at->index;
1291 } else {
1292 pc->p->cfg.vp.attr[aid/32] |=
1293 (1 << (aid % 32));
1294 pc->attr[i*4+c].type = P_ATTR;
1295 pc->attr[i*4+c].hw = aid++;
1296 pc->attr[i*4+c].index = i;
1297 }
1298 }
1299
1300 if (pc->p->type != PIPE_SHADER_FRAGMENT)
1301 continue;
1302
1303 emit_interp(pc, &a[0], &a[0], iv, TRUE);
1304 emit_interp(pc, &a[1], &a[1], iv, TRUE);
1305 emit_interp(pc, &a[2], &a[2], iv, TRUE);
1306 emit_interp(pc, &a[3], &a[3], iv, TRUE);
1307 }
1308
1309 if (iv)
1310 free_temp(pc, iv);
1311 }
1312
1313 NOUVEAU_ERR("%d result regs\n", pc->result_nr);
1314 if (pc->result_nr) {
1315 int rid = 0;
1316
1317 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
1318 if (!pc->result)
1319 goto out_err;
1320
1321 for (i = 0; i < pc->result_nr; i++) {
1322 for (c = 0; c < 4; c++) {
1323 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1324 pc->result[i*4+c].type = P_TEMP;
1325 pc->result[i*4+c].hw = -1;
1326 } else {
1327 pc->result[i*4+c].type = P_RESULT;
1328 pc->result[i*4+c].hw = rid++;
1329 }
1330 pc->result[i*4+c].index = i;
1331 }
1332 }
1333 }
1334
1335 NOUVEAU_ERR("%d param regs\n", pc->param_nr);
1336 if (pc->param_nr) {
1337 int rid = 0;
1338
1339 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
1340 if (!pc->param)
1341 goto out_err;
1342
1343 for (i = 0; i < pc->param_nr; i++) {
1344 for (c = 0; c < 4; c++) {
1345 pc->param[i*4+c].type = P_CONST;
1346 pc->param[i*4+c].hw = rid++;
1347 pc->param[i*4+c].index = i;
1348 }
1349 }
1350 }
1351
1352 if (pc->immd_nr) {
1353 int rid = pc->param_nr * 4;
1354
1355 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
1356 if (!pc->immd)
1357 goto out_err;
1358
1359 for (i = 0; i < pc->immd_nr; i++) {
1360 for (c = 0; c < 4; c++) {
1361 pc->immd[i*4+c].type = P_IMMD;
1362 pc->immd[i*4+c].hw = rid++;
1363 pc->immd[i*4+c].index = i;
1364 }
1365 }
1366 }
1367
1368 ret = TRUE;
1369 out_err:
1370 tgsi_parse_free(&p);
1371 return ret;
1372 }
1373
1374 static boolean
1375 nv50_program_tx(struct nv50_program *p)
1376 {
1377 struct tgsi_parse_context parse;
1378 struct nv50_pc *pc;
1379 boolean ret;
1380
1381 pc = CALLOC_STRUCT(nv50_pc);
1382 if (!pc)
1383 return FALSE;
1384 pc->p = p;
1385 pc->p->cfg.high_temp = 4;
1386
1387 ret = nv50_program_tx_prep(pc);
1388 if (ret == FALSE)
1389 goto out_cleanup;
1390
1391 tgsi_parse_init(&parse, pc->p->pipe.tokens);
1392 while (!tgsi_parse_end_of_tokens(&parse)) {
1393 const union tgsi_full_token *tok = &parse.FullToken;
1394
1395 tgsi_parse_token(&parse);
1396
1397 switch (tok->Token.Type) {
1398 case TGSI_TOKEN_TYPE_INSTRUCTION:
1399 ret = nv50_program_tx_insn(pc, tok);
1400 if (ret == FALSE)
1401 goto out_err;
1402 break;
1403 default:
1404 break;
1405 }
1406 }
1407
1408 if (p->type == PIPE_SHADER_FRAGMENT) {
1409 struct nv50_reg out;
1410
1411 out.type = P_TEMP;
1412 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1413 emit_mov(pc, &out, &pc->result[out.hw]);
1414 }
1415
1416 assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
1417 pc->p->exec_tail->inst[1] |= 0x00000001;
1418
1419 p->param_nr = pc->param_nr * 4;
1420 p->immd_nr = pc->immd_nr * 4;
1421 p->immd = pc->immd_buf;
1422
1423 out_err:
1424 tgsi_parse_free(&parse);
1425
1426 out_cleanup:
1427 return ret;
1428 }
1429
1430 static void
1431 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1432 {
1433 if (nv50_program_tx(p) == FALSE)
1434 assert(0);
1435 p->translated = TRUE;
1436 }
1437
1438 static void
1439 nv50_program_upload_data(struct nv50_context *nv50, float *map,
1440 unsigned start, unsigned count)
1441 {
1442 while (count) {
1443 unsigned nr = count > 2047 ? 2047 : count;
1444
1445 BEGIN_RING(tesla, 0x00000f00, 1);
1446 OUT_RING ((NV50_CB_PMISC << 0) | (start << 8));
1447 BEGIN_RING(tesla, 0x40000f04, nr);
1448 OUT_RINGp (map, nr);
1449
1450 map += nr;
1451 start += nr;
1452 count -= nr;
1453 }
1454 }
1455
1456 static void
1457 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1458 {
1459 struct nouveau_winsys *nvws = nv50->screen->nvws;
1460 struct pipe_winsys *ws = nv50->pipe.winsys;
1461 unsigned nr = p->param_nr + p->immd_nr;
1462
1463 if (!p->data && nr) {
1464 struct nouveau_resource *heap = nv50->screen->vp_data_heap;
1465
1466 if (nvws->res_alloc(heap, nr, p, &p->data)) {
1467 while (heap->next && heap->size < nr) {
1468 struct nv50_program *evict = heap->next->priv;
1469 nvws->res_free(&evict->data);
1470 }
1471
1472 if (nvws->res_alloc(heap, nr, p, &p->data))
1473 assert(0);
1474 }
1475 }
1476
1477 if (p->param_nr) {
1478 float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
1479 PIPE_BUFFER_USAGE_CPU_READ);
1480 nv50_program_upload_data(nv50, map, p->data->start,
1481 p->param_nr);
1482 ws->buffer_unmap(ws, nv50->constbuf[p->type]);
1483 }
1484
1485 if (p->immd_nr) {
1486 nv50_program_upload_data(nv50, p->immd,
1487 p->data->start + p->param_nr,
1488 p->immd_nr);
1489 }
1490 }
1491
1492 static void
1493 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1494 {
1495 struct pipe_winsys *ws = nv50->pipe.winsys;
1496 struct nv50_program_exec *e;
1497 struct nouveau_stateobj *so;
1498 const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
1499 unsigned start, count, *up, *ptr;
1500 boolean upload = FALSE;
1501
1502 if (!p->buffer) {
1503 p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4);
1504 upload = TRUE;
1505 }
1506
1507 if (p->data && p->data->start != p->data_start) {
1508 for (e = p->exec_head; e; e = e->next) {
1509 unsigned ei, ci;
1510
1511 if (e->param.index < 0)
1512 continue;
1513 ei = e->param.shift >> 5;
1514 ci = e->param.index + p->data->start;
1515
1516 e->inst[ei] &= ~e->param.mask;
1517 e->inst[ei] |= (ci << e->param.shift);
1518 }
1519
1520 p->data_start = p->data->start;
1521 upload = TRUE;
1522 }
1523
1524 if (!upload)
1525 return FALSE;
1526
1527 up = ptr = MALLOC(p->exec_size * 4);
1528 for (e = p->exec_head; e; e = e->next) {
1529 *(ptr++) = e->inst[0];
1530 if (is_long(e))
1531 *(ptr++) = e->inst[1];
1532 }
1533
1534 so = so_new(3,2);
1535 so_method(so, nv50->screen->tesla, 0x1280, 3);
1536 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
1537 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0);
1538 so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
1539
1540 start = 0; count = p->exec_size;
1541 while (count) {
1542 struct nouveau_winsys *nvws = nv50->screen->nvws;
1543 unsigned nr;
1544
1545 so_emit(nvws, so);
1546
1547 nr = MIN2(count, 2047);
1548 nr = MIN2(nvws->channel->pushbuf->remaining, nr);
1549 if (nvws->channel->pushbuf->remaining < (nr + 3)) {
1550 FIRE_RING(NULL);
1551 continue;
1552 }
1553
1554 BEGIN_RING(tesla, 0x0f00, 1);
1555 OUT_RING ((start << 8) | NV50_CB_PUPLOAD);
1556 BEGIN_RING(tesla, 0x40000f04, nr);
1557 OUT_RINGp (up + start, nr);
1558
1559 start += nr;
1560 count -= nr;
1561 }
1562
1563 FREE(up);
1564 so_ref(NULL, &so);
1565 }
1566
1567 void
1568 nv50_vertprog_validate(struct nv50_context *nv50)
1569 {
1570 struct nouveau_grobj *tesla = nv50->screen->tesla;
1571 struct nv50_program *p = nv50->vertprog;
1572 struct nouveau_stateobj *so;
1573
1574 if (!p->translated) {
1575 nv50_program_validate(nv50, p);
1576 if (!p->translated)
1577 assert(0);
1578 }
1579
1580 nv50_program_validate_data(nv50, p);
1581 nv50_program_validate_code(nv50, p);
1582
1583 so = so_new(11, 2);
1584 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1585 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1586 NOUVEAU_BO_HIGH, 0, 0);
1587 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1588 NOUVEAU_BO_LOW, 0, 0);
1589 so_method(so, tesla, 0x1650, 2);
1590 so_data (so, p->cfg.vp.attr[0]);
1591 so_data (so, p->cfg.vp.attr[1]);
1592 so_method(so, tesla, 0x16b8, 1);
1593 so_data (so, p->cfg.high_result);
1594 so_method(so, tesla, 0x16ac, 2);
1595 so_data (so, 8);
1596 so_data (so, p->cfg.high_temp);
1597 so_method(so, tesla, 0x140c, 1);
1598 so_data (so, 0); /* program start offset */
1599 so_emit(nv50->screen->nvws, so);
1600 so_ref(NULL, &so);
1601 }
1602
1603 void
1604 nv50_fragprog_validate(struct nv50_context *nv50)
1605 {
1606 struct nouveau_grobj *tesla = nv50->screen->tesla;
1607 struct nv50_program *p = nv50->fragprog;
1608 struct nouveau_stateobj *so;
1609
1610 if (!p->translated) {
1611 nv50_program_validate(nv50, p);
1612 if (!p->translated)
1613 assert(0);
1614 }
1615
1616 nv50_program_validate_data(nv50, p);
1617 nv50_program_validate_code(nv50, p);
1618
1619 so = so_new(64, 2);
1620 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1621 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1622 NOUVEAU_BO_HIGH, 0, 0);
1623 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1624 NOUVEAU_BO_LOW, 0, 0);
1625 so_method(so, tesla, 0x1904, 4);
1626 so_data (so, 0x01040404); /* p: 0x01000404 */
1627 so_data (so, 0x00000004);
1628 so_data (so, 0x00000000);
1629 so_data (so, 0x00000000);
1630 so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */
1631 so_data (so, 0x03020100);
1632 so_data (so, 0x07060504);
1633 so_data (so, 0x0b0a0908);
1634 so_method(so, tesla, 0x1988, 2);
1635 so_data (so, 0x08040404); /* p: 0x0f000401 */
1636 so_data (so, p->cfg.high_temp);
1637 so_method(so, tesla, 0x1414, 1);
1638 so_data (so, 0); /* program start offset */
1639 so_emit(nv50->screen->nvws, so);
1640 so_ref(NULL, &so);
1641 }
1642
1643 void
1644 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1645 {
1646 struct pipe_winsys *ws = nv50->pipe.winsys;
1647
1648 while (p->exec_head) {
1649 struct nv50_program_exec *e = p->exec_head;
1650
1651 p->exec_head = e->next;
1652 FREE(e);
1653 }
1654 p->exec_tail = NULL;
1655 p->exec_size = 0;
1656
1657 if (p->buffer)
1658 pipe_buffer_reference(ws, &p->buffer, NULL);
1659
1660 p->translated = 0;
1661 }
1662