bc0a834aee6d249f3942576bba843fa70e319227
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
5
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
9
10 #include "nv50_context.h"
11 #include "nv50_state.h"
12
13 #define NV50_SU_MAX_TEMP 64
14
15 /* ARL
16 * LIT - other buggery
17 *
18 * MSB - Like MAD, but MUL+SUB
19 * - Fuck it off, introduce a way to negate args for ops that
20 * support it.
21 *
22 * Look into inlining IMMD for ops other than MOV (make it general?)
23 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
24 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
25 *
26 * Verify half-insns work where expected - and force disable them where they
27 * don't work - MUL has it forcibly disabled atm as it fixes POW..
28 */
29 struct nv50_reg {
30 enum {
31 P_TEMP,
32 P_ATTR,
33 P_RESULT,
34 P_CONST,
35 P_IMMD
36 } type;
37 int index;
38
39 int hw;
40 int neg;
41 };
42
43 struct nv50_pc {
44 struct nv50_program *p;
45
46 /* hw resources */
47 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
48
49 /* tgsi resources */
50 struct nv50_reg *temp;
51 int temp_nr;
52 struct nv50_reg *attr;
53 int attr_nr;
54 struct nv50_reg *result;
55 int result_nr;
56 struct nv50_reg *param;
57 int param_nr;
58 struct nv50_reg *immd;
59 float *immd_buf;
60 int immd_nr;
61
62 struct nv50_reg *temp_temp[8];
63 unsigned temp_temp_nr;
64 };
65
66 static void
67 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
68 {
69 int i;
70
71 if (reg->type != P_TEMP)
72 return;
73
74 if (reg->hw >= 0) {
75 /*XXX: do this here too to catch FP temp-as-attr usage..
76 * not clean, but works */
77 if (pc->p->cfg.high_temp < (reg->hw + 1))
78 pc->p->cfg.high_temp = reg->hw + 1;
79 return;
80 }
81
82 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
83 if (!(pc->r_temp[i])) {
84 pc->r_temp[i] = reg;
85 reg->hw = i;
86 if (pc->p->cfg.high_temp < (i + 1))
87 pc->p->cfg.high_temp = i + 1;
88 return;
89 }
90 }
91
92 assert(0);
93 }
94
95 static struct nv50_reg *
96 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
97 {
98 struct nv50_reg *r;
99 int i;
100
101 if (dst && dst->type == P_TEMP && dst->hw == -1)
102 return dst;
103
104 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
105 if (!pc->r_temp[i]) {
106 r = CALLOC_STRUCT(nv50_reg);
107 r->type = P_TEMP;
108 r->index = -1;
109 r->hw = i;
110 pc->r_temp[i] = r;
111 return r;
112 }
113 }
114
115 assert(0);
116 return NULL;
117 }
118
119 static void
120 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
121 {
122 if (r->index == -1) {
123 FREE(pc->r_temp[r->hw]);
124 pc->r_temp[r->hw] = NULL;
125 }
126 }
127
128 static struct nv50_reg *
129 temp_temp(struct nv50_pc *pc)
130 {
131 if (pc->temp_temp_nr >= 8)
132 assert(0);
133
134 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
135 return pc->temp_temp[pc->temp_temp_nr++];
136 }
137
138 static void
139 kill_temp_temp(struct nv50_pc *pc)
140 {
141 int i;
142
143 for (i = 0; i < pc->temp_temp_nr; i++)
144 free_temp(pc, pc->temp_temp[i]);
145 pc->temp_temp_nr = 0;
146 }
147
148 static int
149 ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
150 {
151 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 *
152 sizeof(float));
153 pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
154 pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
155 pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
156 pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
157
158 return pc->immd_nr++;
159 }
160
161 static struct nv50_reg *
162 alloc_immd(struct nv50_pc *pc, float f)
163 {
164 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
165 unsigned hw;
166
167 hw = ctor_immd(pc, f, 0, 0, 0);
168 r->type = P_IMMD;
169 r->hw = hw;
170 r->index = -1;
171 return r;
172 }
173
174 static struct nv50_reg *
175 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
176 {
177 switch (dst->DstRegister.File) {
178 case TGSI_FILE_TEMPORARY:
179 return &pc->temp[dst->DstRegister.Index * 4 + c];
180 case TGSI_FILE_OUTPUT:
181 return &pc->result[dst->DstRegister.Index * 4 + c];
182 case TGSI_FILE_NULL:
183 return NULL;
184 default:
185 break;
186 }
187
188 return NULL;
189 }
190
191 static struct nv50_reg *
192 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
193 {
194 struct nv50_reg *r = NULL;
195 unsigned c;
196
197 c = tgsi_util_get_full_src_register_extswizzle(src, chan);
198 switch (c) {
199 case TGSI_EXTSWIZZLE_X:
200 case TGSI_EXTSWIZZLE_Y:
201 case TGSI_EXTSWIZZLE_Z:
202 case TGSI_EXTSWIZZLE_W:
203 switch (src->SrcRegister.File) {
204 case TGSI_FILE_INPUT:
205 r = &pc->attr[src->SrcRegister.Index * 4 + c];
206 break;
207 case TGSI_FILE_TEMPORARY:
208 r = &pc->temp[src->SrcRegister.Index * 4 + c];
209 break;
210 case TGSI_FILE_CONSTANT:
211 r = &pc->param[src->SrcRegister.Index * 4 + c];
212 break;
213 case TGSI_FILE_IMMEDIATE:
214 r = &pc->immd[src->SrcRegister.Index * 4 + c];
215 break;
216 default:
217 assert(0);
218 break;
219 }
220 break;
221 case TGSI_EXTSWIZZLE_ZERO:
222 r = alloc_immd(pc, 0.0);
223 break;
224 case TGSI_EXTSWIZZLE_ONE:
225 r = alloc_immd(pc, 1.0);
226 break;
227 default:
228 assert(0);
229 break;
230 }
231
232 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
233 case TGSI_UTIL_SIGN_KEEP:
234 break;
235 default:
236 assert(0);
237 break;
238 }
239
240 return r;
241 }
242
243 static void
244 emit(struct nv50_pc *pc, unsigned *inst)
245 {
246 struct nv50_program *p = pc->p;
247
248 if (inst[0] & 1) {
249 p->insns_nr += 2;
250 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
251 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
252 } else {
253 p->insns_nr += 1;
254 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
255 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
256 }
257 }
258
259 static INLINE void set_long(struct nv50_pc *, unsigned *);
260
261 static boolean
262 is_long(unsigned *inst)
263 {
264 if (inst[0] & 1)
265 return TRUE;
266 return FALSE;
267 }
268
269 static boolean
270 is_immd(unsigned *inst)
271 {
272 if (is_long(inst) && (inst[1] & 3) == 3)
273 return TRUE;
274 return FALSE;
275 }
276
277 static INLINE void
278 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst)
279 {
280 set_long(pc, inst);
281 inst[1] &= ~((0x1f << 7) | (0x3 << 12));
282 inst[1] |= (pred << 7) | (idx << 12);
283 }
284
285 static INLINE void
286 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst)
287 {
288 set_long(pc, inst);
289 inst[1] &= ~((0x3 << 4) | (1 << 6));
290 inst[1] |= (idx << 4) | (on << 6);
291 }
292
293 static INLINE void
294 set_long(struct nv50_pc *pc, unsigned *inst)
295 {
296 if (is_long(inst))
297 return;
298
299 inst[0] |= 1;
300 set_pred(pc, 0xf, 0, inst);
301 set_pred_wr(pc, 0, 0, inst);
302 }
303
304 static INLINE void
305 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst)
306 {
307 if (dst->type == P_RESULT) {
308 set_long(pc, inst);
309 inst[1] |= 0x00000008;
310 }
311
312 alloc_reg(pc, dst);
313 inst[0] |= (dst->hw << 2);
314 }
315
316 static INLINE void
317 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
318 {
319 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
320
321 set_long(pc, inst);
322 /*XXX: can't be predicated - bits overlap.. catch cases where both
323 * are required and avoid them. */
324 set_pred(pc, 0, 0, inst);
325 set_pred_wr(pc, 0, 0, inst);
326
327 inst[1] |= 0x00000002 | 0x00000001;
328 inst[0] |= (val & 0x3f) << 16;
329 inst[1] |= (val >> 6) << 2;
330 }
331
332 static void
333 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
334 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
335 {
336 unsigned inst[2] = { 0, 0 };
337
338 inst[0] |= 0x80000000;
339 set_dst(pc, dst, inst);
340 alloc_reg(pc, iv);
341 inst[0] |= (iv->hw << 9);
342 alloc_reg(pc, src);
343 inst[0] |= (src->hw << 16);
344 if (noperspective)
345 inst[0] |= (1 << 25);
346
347 emit(pc, inst);
348 }
349
350 static void
351 set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
352 {
353 set_long(pc, inst);
354 if (src->type == P_IMMD) {
355 inst[1] |= (NV50_CB_PMISC << 22);
356 } else {
357 if (pc->p->type == NV50_PROG_VERTEX)
358 inst[1] |= (NV50_CB_PVP << 22);
359 else
360 inst[1] |= (NV50_CB_PFP << 22);
361 }
362 }
363
364 static void
365 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
366 {
367 unsigned inst[2] = { 0, 0 };
368
369 inst[0] |= 0x10000000;
370
371 set_dst(pc, dst, inst);
372
373 if (dst->type != P_RESULT && src->type == P_IMMD) {
374 set_immd(pc, src, inst);
375 /*XXX: 32-bit, but steals part of "half" reg space - need to
376 * catch and handle this case if/when we do half-regs
377 */
378 inst[0] |= 0x00008000;
379 } else
380 if (src->type == P_IMMD || src->type == P_CONST) {
381 set_long(pc, inst);
382 set_cseg(pc, src, inst);
383 inst[0] |= (src->hw << 9);
384 inst[1] |= 0x20000000; /* src0 const? */
385 } else {
386 if (src->type == P_ATTR) {
387 set_long(pc, inst);
388 inst[1] |= 0x00200000;
389 }
390
391 alloc_reg(pc, src);
392 inst[0] |= (src->hw << 9);
393 }
394
395 /* We really should support "half" instructions here at some point,
396 * but I don't feel confident enough about them yet.
397 */
398 set_long(pc, inst);
399 if (is_long(inst) && !is_immd(inst)) {
400 inst[1] |= 0x04000000; /* 32-bit */
401 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
402 }
403
404 emit(pc, inst);
405 }
406
407 static boolean
408 check_swap_src_0_1(struct nv50_pc *pc,
409 struct nv50_reg **s0, struct nv50_reg **s1)
410 {
411 struct nv50_reg *src0 = *s0, *src1 = *s1;
412
413 if (src0->type == P_CONST) {
414 if (src1->type != P_CONST) {
415 *s0 = src1;
416 *s1 = src0;
417 return TRUE;
418 }
419 } else
420 if (src1->type == P_ATTR) {
421 if (src0->type != P_ATTR) {
422 *s0 = src1;
423 *s1 = src0;
424 return TRUE;
425 }
426 }
427
428 return FALSE;
429 }
430
431 static void
432 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
433 {
434 if (src->type == P_ATTR) {
435 set_long(pc, inst);
436 inst[1] |= 0x00200000;
437 } else
438 if (src->type == P_CONST || src->type == P_IMMD) {
439 struct nv50_reg *temp = temp_temp(pc);
440
441 emit_mov(pc, temp, src);
442 src = temp;
443 }
444
445 alloc_reg(pc, src);
446 inst[0] |= (src->hw << 9);
447 }
448
449 static void
450 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
451 {
452 if (src->type == P_ATTR) {
453 struct nv50_reg *temp = temp_temp(pc);
454
455 emit_mov(pc, temp, src);
456 src = temp;
457 } else
458 if (src->type == P_CONST || src->type == P_IMMD) {
459 set_cseg(pc, src, inst);
460 inst[0] |= 0x00800000;
461 }
462
463 alloc_reg(pc, src);
464 inst[0] |= (src->hw << 16);
465 }
466
467 static void
468 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
469 {
470 set_long(pc, inst);
471
472 if (src->type == P_ATTR) {
473 struct nv50_reg *temp = temp_temp(pc);
474
475 emit_mov(pc, temp, src);
476 src = temp;
477 } else
478 if (src->type == P_CONST || src->type == P_IMMD) {
479 set_cseg(pc, src, inst);
480 inst[0] |= 0x01000000;
481 }
482
483 alloc_reg(pc, src);
484 inst[1] |= (src->hw << 14);
485 }
486
487 static void
488 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
489 struct nv50_reg *src1)
490 {
491 unsigned inst[2] = { 0, 0 };
492
493 inst[0] |= 0xc0000000;
494 set_long(pc, inst);
495
496 check_swap_src_0_1(pc, &src0, &src1);
497 set_dst(pc, dst, inst);
498 set_src_0(pc, src0, inst);
499 set_src_1(pc, src1, inst);
500
501 emit(pc, inst);
502 }
503
504 static void
505 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
506 struct nv50_reg *src0, struct nv50_reg *src1)
507 {
508 unsigned inst[2] = { 0, 0 };
509
510 inst[0] |= 0xb0000000;
511
512 check_swap_src_0_1(pc, &src0, &src1);
513 set_dst(pc, dst, inst);
514 set_src_0(pc, src0, inst);
515 if (is_long(inst))
516 set_src_2(pc, src1, inst);
517 else
518 set_src_1(pc, src1, inst);
519
520 emit(pc, inst);
521 }
522
523 static void
524 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
525 struct nv50_reg *src0, struct nv50_reg *src1)
526 {
527 unsigned inst[2] = { 0, 0 };
528
529 set_long(pc, inst);
530 inst[0] |= 0xb0000000;
531 inst[1] |= (sub << 29);
532
533 check_swap_src_0_1(pc, &src0, &src1);
534 set_dst(pc, dst, inst);
535 set_src_0(pc, src0, inst);
536 set_src_1(pc, src1, inst);
537
538 emit(pc, inst);
539 }
540
541 static void
542 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
543 struct nv50_reg *src1)
544 {
545 unsigned inst[2] = { 0, 0 };
546
547 inst[0] |= 0xb0000000;
548
549 set_long(pc, inst);
550 if (check_swap_src_0_1(pc, &src0, &src1))
551 inst[1] |= 0x04000000;
552 else
553 inst[1] |= 0x08000000;
554
555 set_dst(pc, dst, inst);
556 set_src_0(pc, src0, inst);
557 set_src_2(pc, src1, inst);
558
559 emit(pc, inst);
560 }
561
562 static void
563 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
564 struct nv50_reg *src1, struct nv50_reg *src2)
565 {
566 unsigned inst[2] = { 0, 0 };
567
568 inst[0] |= 0xe0000000;
569
570 check_swap_src_0_1(pc, &src0, &src1);
571 set_dst(pc, dst, inst);
572 set_src_0(pc, src0, inst);
573 set_src_1(pc, src1, inst);
574 set_src_2(pc, src2, inst);
575
576 emit(pc, inst);
577 }
578
579 static void
580 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
581 struct nv50_reg *src1, struct nv50_reg *src2)
582 {
583 unsigned inst[2] = { 0, 0 };
584
585 inst[0] |= 0xe0000000;
586 set_long(pc, inst);
587 inst[1] |= 0x08000000; /* src0 * src1 - src2 */
588
589 check_swap_src_0_1(pc, &src0, &src1);
590 set_dst(pc, dst, inst);
591 set_src_0(pc, src0, inst);
592 set_src_1(pc, src1, inst);
593 set_src_2(pc, src2, inst);
594
595 emit(pc, inst);
596 }
597
598 static void
599 emit_flop(struct nv50_pc *pc, unsigned sub,
600 struct nv50_reg *dst, struct nv50_reg *src)
601 {
602 unsigned inst[2] = { 0, 0 };
603
604 inst[0] |= 0x90000000;
605 if (sub) {
606 set_long(pc, inst);
607 inst[1] |= (sub << 29);
608 }
609
610 set_dst(pc, dst, inst);
611 set_src_0(pc, src, inst);
612
613 emit(pc, inst);
614 }
615
616 static void
617 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
618 {
619 unsigned inst[2] = { 0, 0 };
620
621 inst[0] |= 0xb0000000;
622
623 set_dst(pc, dst, inst);
624 set_src_0(pc, src, inst);
625 set_long(pc, inst);
626 inst[1] |= (6 << 29) | 0x00004000;
627
628 emit(pc, inst);
629 }
630
631 /*XXX: inaccurate results.. why? */
632 #define ALLOW_SET_SWAP 0
633
634 static void
635 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
636 struct nv50_reg *src0, struct nv50_reg *src1)
637 {
638 unsigned inst[2] = { 0, 0 };
639 #if ALLOW_SET_SWAP
640 unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 };
641 #endif
642 struct nv50_reg *rdst;
643
644 #if ALLOW_SET_SWAP
645 assert(c_op <= 7);
646 if (check_swap_src_0_1(pc, &src0, &src1))
647 c_op = inv_cop[c_op];
648 #endif
649
650 rdst = dst;
651 if (dst->type != P_TEMP)
652 dst = alloc_temp(pc, NULL);
653
654 /* set.u32 */
655 set_long(pc, inst);
656 inst[0] |= 0xb0000000;
657 inst[1] |= (3 << 29);
658 inst[1] |= (c_op << 14);
659 /*XXX: breaks things, .u32 by default?
660 * decuda will disasm as .u16 and use .lo/.hi regs, but this
661 * doesn't seem to match what the hw actually does.
662 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
663 */
664 set_dst(pc, dst, inst);
665 set_src_0(pc, src0, inst);
666 set_src_1(pc, src1, inst);
667 emit(pc, inst);
668
669 /* cvt.f32.u32 */
670 inst[0] = 0xa0000001;
671 inst[1] = 0x64014780;
672 set_dst(pc, rdst, inst);
673 set_src_0(pc, dst, inst);
674 emit(pc, inst);
675
676 if (dst != rdst)
677 free_temp(pc, dst);
678 }
679
680 static void
681 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
682 {
683 unsigned inst[2] = { 0, 0 };
684
685 inst[0] = 0xa0000000; /* cvt */
686 set_long(pc, inst);
687 inst[1] |= (6 << 29); /* cvt */
688 inst[1] |= 0x08000000; /* integer mode */
689 inst[1] |= 0x04000000; /* 32 bit */
690 inst[1] |= ((0x1 << 3)) << 14; /* .rn */
691 inst[1] |= (1 << 14); /* src .f32 */
692 set_dst(pc, dst, inst);
693 set_src_0(pc, src, inst);
694
695 emit(pc, inst);
696 }
697
698 static void
699 emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
700 struct nv50_reg *v, struct nv50_reg *e)
701 {
702 struct nv50_reg *temp = alloc_temp(pc, NULL);
703
704 emit_flop(pc, 3, temp, v);
705 emit_mul(pc, temp, temp, e);
706 emit_preex2(pc, temp, temp);
707 emit_flop(pc, 6, dst, temp);
708
709 free_temp(pc, temp);
710 }
711
712 static boolean
713 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
714 {
715 const struct tgsi_full_instruction *inst = &tok->FullInstruction;
716 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
717 unsigned mask, sat;
718 int i, c;
719
720 NOUVEAU_ERR("insn %p\n", tok);
721
722 mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
723 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
724
725 for (c = 0; c < 4; c++) {
726 if (mask & (1 << c))
727 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
728 else
729 dst[c] = NULL;
730 }
731
732 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
733 for (c = 0; c < 4; c++)
734 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
735 }
736
737 if (sat) {
738 for (c = 0; c < 4; c++) {
739 rdst[c] = dst[c];
740 dst[c] = temp_temp(pc);
741 }
742 }
743
744 switch (inst->Instruction.Opcode) {
745 case TGSI_OPCODE_ABS:
746 for (c = 0; c < 4; c++) {
747 unsigned inst[2] = { 0, 0 };
748
749 inst[0] = 0xa0000000; /* cvt */
750 set_long(pc, inst);
751 inst[1] |= (6 << 29); /* cvt */
752 inst[1] |= 0x04000000; /* 32 bit */
753 inst[1] |= (1 << 14); /* src .f32 */
754 inst[1] |= ((1 << 6) << 14); /* .abs */
755 set_dst(pc, dst[c], inst);
756 set_src_0(pc, src[0][c], inst);
757 emit(pc, inst);
758 }
759 break;
760 case TGSI_OPCODE_ADD:
761 for (c = 0; c < 4; c++) {
762 if (!(mask & (1 << c)))
763 continue;
764 emit_add(pc, dst[c], src[0][c], src[1][c]);
765 }
766 break;
767 case TGSI_OPCODE_COS:
768 for (c = 0; c < 4; c++) {
769 if (!(mask & (1 << c)))
770 continue;
771 emit_flop(pc, 5, dst[c], src[0][c]);
772 }
773 break;
774 case TGSI_OPCODE_DP3:
775 temp = alloc_temp(pc, NULL);
776 emit_mul(pc, temp, src[0][0], src[1][0]);
777 emit_mad(pc, temp, src[0][1], src[1][1], temp);
778 emit_mad(pc, temp, src[0][2], src[1][2], temp);
779 for (c = 0; c < 4; c++) {
780 if (!(mask & (1 << c)))
781 continue;
782 emit_mov(pc, dst[c], temp);
783 }
784 free_temp(pc, temp);
785 break;
786 case TGSI_OPCODE_DP4:
787 temp = alloc_temp(pc, NULL);
788 emit_mul(pc, temp, src[0][0], src[1][0]);
789 emit_mad(pc, temp, src[0][1], src[1][1], temp);
790 emit_mad(pc, temp, src[0][2], src[1][2], temp);
791 emit_mad(pc, temp, src[0][3], src[1][3], temp);
792 for (c = 0; c < 4; c++) {
793 if (!(mask & (1 << c)))
794 continue;
795 emit_mov(pc, dst[c], temp);
796 }
797 free_temp(pc, temp);
798 break;
799 case TGSI_OPCODE_DPH:
800 temp = alloc_temp(pc, NULL);
801 emit_mul(pc, temp, src[0][0], src[1][0]);
802 emit_mad(pc, temp, src[0][1], src[1][1], temp);
803 emit_mad(pc, temp, src[0][2], src[1][2], temp);
804 emit_add(pc, temp, src[1][3], temp);
805 for (c = 0; c < 4; c++) {
806 if (!(mask & (1 << c)))
807 continue;
808 emit_mov(pc, dst[c], temp);
809 }
810 free_temp(pc, temp);
811 break;
812 case TGSI_OPCODE_DST:
813 {
814 struct nv50_reg *one = alloc_immd(pc, 1.0);
815 emit_mov(pc, dst[0], one);
816 emit_mul(pc, dst[1], src[0][1], src[1][1]);
817 emit_mov(pc, dst[2], src[0][2]);
818 emit_mov(pc, dst[3], src[1][3]);
819 FREE(one);
820 }
821 break;
822 case TGSI_OPCODE_EX2:
823 temp = alloc_temp(pc, NULL);
824 for (c = 0; c < 4; c++) {
825 if (!(mask & (1 << c)))
826 continue;
827 emit_preex2(pc, temp, src[0][c]);
828 emit_flop(pc, 6, dst[c], temp);
829 }
830 free_temp(pc, temp);
831 break;
832 case TGSI_OPCODE_FLR:
833 for (c = 0; c < 4; c++) {
834 if (!(mask & (1 << c)))
835 continue;
836 emit_flr(pc, dst[c], src[0][c]);
837 }
838 break;
839 case TGSI_OPCODE_FRC:
840 temp = alloc_temp(pc, NULL);
841 for (c = 0; c < 4; c++) {
842 if (!(mask & (1 << c)))
843 continue;
844 emit_flr(pc, temp, src[0][c]);
845 emit_sub(pc, dst[c], src[0][c], temp);
846 }
847 free_temp(pc, temp);
848 break;
849 case TGSI_OPCODE_LG2:
850 for (c = 0; c < 4; c++) {
851 if (!(mask & (1 << c)))
852 continue;
853 emit_flop(pc, 3, dst[c], src[0][c]);
854 }
855 break;
856 case TGSI_OPCODE_MAD:
857 for (c = 0; c < 4; c++) {
858 if (!(mask & (1 << c)))
859 continue;
860 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
861 }
862 break;
863 case TGSI_OPCODE_MAX:
864 for (c = 0; c < 4; c++) {
865 if (!(mask & (1 << c)))
866 continue;
867 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
868 }
869 break;
870 case TGSI_OPCODE_MIN:
871 for (c = 0; c < 4; c++) {
872 if (!(mask & (1 << c)))
873 continue;
874 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
875 }
876 break;
877 case TGSI_OPCODE_MOV:
878 for (c = 0; c < 4; c++) {
879 if (!(mask & (1 << c)))
880 continue;
881 emit_mov(pc, dst[c], src[0][c]);
882 }
883 break;
884 case TGSI_OPCODE_MUL:
885 for (c = 0; c < 4; c++) {
886 if (!(mask & (1 << c)))
887 continue;
888 emit_mul(pc, dst[c], src[0][c], src[1][c]);
889 }
890 break;
891 case TGSI_OPCODE_POW:
892 temp = alloc_temp(pc, NULL);
893 emit_pow(pc, temp, src[0][0], src[1][0]);
894 for (c = 0; c < 4; c++) {
895 if (!(mask & (1 << c)))
896 continue;
897 emit_mov(pc, dst[c], temp);
898 }
899 free_temp(pc, temp);
900 break;
901 case TGSI_OPCODE_RCP:
902 for (c = 0; c < 4; c++) {
903 if (!(mask & (1 << c)))
904 continue;
905 emit_flop(pc, 0, dst[c], src[0][c]);
906 }
907 break;
908 case TGSI_OPCODE_RSQ:
909 for (c = 0; c < 4; c++) {
910 if (!(mask & (1 << c)))
911 continue;
912 emit_flop(pc, 2, dst[c], src[0][c]);
913 }
914 break;
915 case TGSI_OPCODE_SGE:
916 for (c = 0; c < 4; c++) {
917 if (!(mask & (1 << c)))
918 continue;
919 emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
920 }
921 break;
922 case TGSI_OPCODE_SIN:
923 for (c = 0; c < 4; c++) {
924 if (!(mask & (1 << c)))
925 continue;
926 emit_flop(pc, 4, dst[c], src[0][c]);
927 }
928 break;
929 case TGSI_OPCODE_SLT:
930 for (c = 0; c < 4; c++) {
931 if (!(mask & (1 << c)))
932 continue;
933 emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
934 }
935 break;
936 case TGSI_OPCODE_SUB:
937 for (c = 0; c < 4; c++) {
938 if (!(mask & (1 << c)))
939 continue;
940 emit_sub(pc, dst[c], src[0][c], src[1][c]);
941 }
942 break;
943 case TGSI_OPCODE_XPD:
944 temp = alloc_temp(pc, NULL);
945 emit_mul(pc, temp, src[0][2], src[1][1]);
946 emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
947 emit_mul(pc, temp, src[0][0], src[1][2]);
948 emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
949 emit_mul(pc, temp, src[0][1], src[1][0]);
950 emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
951 free_temp(pc, temp);
952 break;
953 case TGSI_OPCODE_END:
954 break;
955 default:
956 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
957 return FALSE;
958 }
959
960 if (sat) {
961 for (c = 0; c < 4; c++) {
962 unsigned inst[2] = { 0, 0 };
963
964 if (!(mask & (1 << c)))
965 continue;
966
967 inst[0] = 0xa0000000; /* cvt */
968 set_long(pc, inst);
969 inst[1] |= (6 << 29); /* cvt */
970 inst[1] |= 0x04000000; /* 32 bit */
971 inst[1] |= (1 << 14); /* src .f32 */
972 inst[1] |= ((1 << 5) << 14); /* .sat */
973 set_dst(pc, rdst[c], inst);
974 set_src_0(pc, dst[c], inst);
975 emit(pc, inst);
976 }
977 }
978
979 kill_temp_temp(pc);
980 return TRUE;
981 }
982
983 static boolean
984 nv50_program_tx_prep(struct nv50_pc *pc)
985 {
986 struct tgsi_parse_context p;
987 boolean ret = FALSE;
988 unsigned i, c;
989
990 tgsi_parse_init(&p, pc->p->pipe.tokens);
991 while (!tgsi_parse_end_of_tokens(&p)) {
992 const union tgsi_full_token *tok = &p.FullToken;
993
994 tgsi_parse_token(&p);
995 switch (tok->Token.Type) {
996 case TGSI_TOKEN_TYPE_IMMEDIATE:
997 {
998 const struct tgsi_full_immediate *imm =
999 &p.FullToken.FullImmediate;
1000
1001 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1002 imm->u.ImmediateFloat32[1].Float,
1003 imm->u.ImmediateFloat32[2].Float,
1004 imm->u.ImmediateFloat32[3].Float);
1005 }
1006 break;
1007 case TGSI_TOKEN_TYPE_DECLARATION:
1008 {
1009 const struct tgsi_full_declaration *d;
1010 unsigned last;
1011
1012 d = &p.FullToken.FullDeclaration;
1013 last = d->u.DeclarationRange.Last;
1014
1015 switch (d->Declaration.File) {
1016 case TGSI_FILE_TEMPORARY:
1017 if (pc->temp_nr < (last + 1))
1018 pc->temp_nr = last + 1;
1019 break;
1020 case TGSI_FILE_OUTPUT:
1021 if (pc->result_nr < (last + 1))
1022 pc->result_nr = last + 1;
1023 break;
1024 case TGSI_FILE_INPUT:
1025 if (pc->attr_nr < (last + 1))
1026 pc->attr_nr = last + 1;
1027 break;
1028 case TGSI_FILE_CONSTANT:
1029 if (pc->param_nr < (last + 1))
1030 pc->param_nr = last + 1;
1031 break;
1032 default:
1033 NOUVEAU_ERR("bad decl file %d\n",
1034 d->Declaration.File);
1035 goto out_err;
1036 }
1037 }
1038 break;
1039 case TGSI_TOKEN_TYPE_INSTRUCTION:
1040 break;
1041 default:
1042 break;
1043 }
1044 }
1045
1046 NOUVEAU_ERR("%d temps\n", pc->temp_nr);
1047 if (pc->temp_nr) {
1048 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
1049 if (!pc->temp)
1050 goto out_err;
1051
1052 for (i = 0; i < pc->temp_nr; i++) {
1053 for (c = 0; c < 4; c++) {
1054 pc->temp[i*4+c].type = P_TEMP;
1055 pc->temp[i*4+c].hw = -1;
1056 pc->temp[i*4+c].index = i;
1057 }
1058 }
1059 }
1060
1061 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
1062 if (pc->attr_nr) {
1063 struct nv50_reg *iv = NULL, *tmp = NULL;
1064 int aid = 0;
1065
1066 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
1067 if (!pc->attr)
1068 goto out_err;
1069
1070 if (pc->p->type == NV50_PROG_FRAGMENT) {
1071 iv = alloc_temp(pc, NULL);
1072 aid++;
1073 }
1074
1075 for (i = 0; i < pc->attr_nr; i++) {
1076 struct nv50_reg *a = &pc->attr[i*4];
1077
1078 for (c = 0; c < 4; c++) {
1079 if (pc->p->type == NV50_PROG_FRAGMENT) {
1080 struct nv50_reg *at =
1081 alloc_temp(pc, NULL);
1082 pc->attr[i*4+c].type = at->type;
1083 pc->attr[i*4+c].hw = at->hw;
1084 pc->attr[i*4+c].index = at->index;
1085 } else {
1086 pc->p->cfg.vp.attr[aid/32] |=
1087 (1 << (aid % 32));
1088 pc->attr[i*4+c].type = P_ATTR;
1089 pc->attr[i*4+c].hw = aid++;
1090 pc->attr[i*4+c].index = i;
1091 }
1092 }
1093
1094 if (pc->p->type != NV50_PROG_FRAGMENT)
1095 continue;
1096
1097 emit_interp(pc, iv, iv, iv, FALSE);
1098 tmp = alloc_temp(pc, NULL);
1099 {
1100 unsigned inst[2] = { 0, 0 };
1101 inst[0] = 0x90000000;
1102 inst[0] |= (tmp->hw << 2);
1103 emit(pc, inst);
1104 }
1105 emit_interp(pc, &a[0], &a[0], tmp, TRUE);
1106 emit_interp(pc, &a[1], &a[1], tmp, TRUE);
1107 emit_interp(pc, &a[2], &a[2], tmp, TRUE);
1108 emit_interp(pc, &a[3], &a[3], tmp, TRUE);
1109 free_temp(pc, tmp);
1110 }
1111
1112 if (iv)
1113 free_temp(pc, iv);
1114 }
1115
1116 NOUVEAU_ERR("%d result regs\n", pc->result_nr);
1117 if (pc->result_nr) {
1118 int rid = 0;
1119
1120 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
1121 if (!pc->result)
1122 goto out_err;
1123
1124 for (i = 0; i < pc->result_nr; i++) {
1125 for (c = 0; c < 4; c++) {
1126 if (pc->p->type == NV50_PROG_FRAGMENT)
1127 pc->result[i*4+c].type = P_TEMP;
1128 else
1129 pc->result[i*4+c].type = P_RESULT;
1130 pc->result[i*4+c].hw = rid++;
1131 pc->result[i*4+c].index = i;
1132 }
1133 }
1134 }
1135
1136 NOUVEAU_ERR("%d param regs\n", pc->param_nr);
1137 if (pc->param_nr) {
1138 int rid = 0;
1139
1140 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
1141 if (!pc->param)
1142 goto out_err;
1143
1144 for (i = 0; i < pc->param_nr; i++) {
1145 for (c = 0; c < 4; c++) {
1146 pc->param[i*4+c].type = P_CONST;
1147 pc->param[i*4+c].hw = rid++;
1148 pc->param[i*4+c].index = i;
1149 }
1150 }
1151 }
1152
1153 if (pc->immd_nr) {
1154 int rid = 0;
1155
1156 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
1157 if (!pc->immd)
1158 goto out_err;
1159
1160 for (i = 0; i < pc->immd_nr; i++) {
1161 for (c = 0; c < 4; c++) {
1162 pc->immd[i*4+c].type = P_IMMD;
1163 pc->immd[i*4+c].hw = rid++;
1164 pc->immd[i*4+c].index = i;
1165 }
1166 }
1167 }
1168
1169 ret = TRUE;
1170 out_err:
1171 tgsi_parse_free(&p);
1172 return ret;
1173 }
1174
1175 static boolean
1176 nv50_program_tx(struct nv50_program *p)
1177 {
1178 struct tgsi_parse_context parse;
1179 struct nv50_pc *pc;
1180 boolean ret;
1181
1182 pc = CALLOC_STRUCT(nv50_pc);
1183 if (!pc)
1184 return FALSE;
1185 pc->p = p;
1186 pc->p->cfg.high_temp = 4;
1187
1188 ret = nv50_program_tx_prep(pc);
1189 if (ret == FALSE)
1190 goto out_cleanup;
1191
1192 tgsi_parse_init(&parse, pc->p->pipe.tokens);
1193 while (!tgsi_parse_end_of_tokens(&parse)) {
1194 const union tgsi_full_token *tok = &parse.FullToken;
1195
1196 tgsi_parse_token(&parse);
1197
1198 switch (tok->Token.Type) {
1199 case TGSI_TOKEN_TYPE_INSTRUCTION:
1200 ret = nv50_program_tx_insn(pc, tok);
1201 if (ret == FALSE)
1202 goto out_err;
1203 break;
1204 default:
1205 break;
1206 }
1207 }
1208
1209 p->immd_nr = pc->immd_nr * 4;
1210 p->immd = pc->immd_buf;
1211
1212 out_err:
1213 tgsi_parse_free(&parse);
1214
1215 out_cleanup:
1216 return ret;
1217 }
1218
1219 static void
1220 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1221 {
1222 int i;
1223
1224 if (nv50_program_tx(p) == FALSE)
1225 assert(0);
1226 /* *not* sufficient, it's fine if last inst is long and
1227 * NOT immd - otherwise it's fucked fucked fucked */
1228 p->insns[p->insns_nr - 1] |= 0x00000001;
1229
1230 if (p->type == NV50_PROG_VERTEX) {
1231 for (i = 0; i < p->insns_nr; i++)
1232 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]);
1233 } else {
1234 for (i = 0; i < p->insns_nr; i++)
1235 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]);
1236 }
1237
1238 p->translated = TRUE;
1239 }
1240
1241 static void
1242 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1243 {
1244 int i;
1245
1246 for (i = 0; i < p->immd_nr; i++) {
1247 BEGIN_RING(tesla, 0x0f00, 2);
1248 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8));
1249 OUT_RING (fui(p->immd[i]));
1250 }
1251 }
1252
1253 static void
1254 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1255 {
1256 struct pipe_winsys *ws = nv50->pipe.winsys;
1257 void *map;
1258
1259 if (!p->buffer)
1260 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4);
1261 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
1262 memcpy(map, p->insns, p->insns_nr * 4);
1263 ws->buffer_unmap(ws, p->buffer);
1264 }
1265
1266 void
1267 nv50_vertprog_validate(struct nv50_context *nv50)
1268 {
1269 struct nouveau_grobj *tesla = nv50->screen->tesla;
1270 struct nv50_program *p = nv50->vertprog;
1271 struct nouveau_stateobj *so;
1272
1273 if (!p->translated) {
1274 nv50_program_validate(nv50, p);
1275 if (!p->translated)
1276 assert(0);
1277 }
1278
1279 nv50_program_validate_data(nv50, p);
1280 nv50_program_validate_code(nv50, p);
1281
1282 so = so_new(11, 2);
1283 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1284 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1285 NOUVEAU_BO_HIGH, 0, 0);
1286 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1287 NOUVEAU_BO_LOW, 0, 0);
1288 so_method(so, tesla, 0x1650, 2);
1289 so_data (so, p->cfg.vp.attr[0]);
1290 so_data (so, p->cfg.vp.attr[1]);
1291 so_method(so, tesla, 0x16ac, 2);
1292 so_data (so, 8);
1293 so_data (so, p->cfg.high_temp);
1294 so_method(so, tesla, 0x140c, 1);
1295 so_data (so, 0); /* program start offset */
1296 so_emit(nv50->screen->nvws, so);
1297 so_ref(NULL, &so);
1298 }
1299
1300 void
1301 nv50_fragprog_validate(struct nv50_context *nv50)
1302 {
1303 struct nouveau_grobj *tesla = nv50->screen->tesla;
1304 struct nv50_program *p = nv50->fragprog;
1305 struct nouveau_stateobj *so;
1306
1307 if (!p->translated) {
1308 nv50_program_validate(nv50, p);
1309 if (!p->translated)
1310 assert(0);
1311 }
1312
1313 nv50_program_validate_data(nv50, p);
1314 nv50_program_validate_code(nv50, p);
1315
1316 so = so_new(7, 2);
1317 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1318 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1319 NOUVEAU_BO_HIGH, 0, 0);
1320 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1321 NOUVEAU_BO_LOW, 0, 0);
1322 so_method(so, tesla, 0x198c, 1);
1323 so_data (so, p->cfg.high_temp);
1324 so_method(so, tesla, 0x1414, 1);
1325 so_data (so, 0); /* program start offset */
1326 so_emit(nv50->screen->nvws, so);
1327 so_ref(NULL, &so);
1328 }
1329
1330 void
1331 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1332 {
1333 struct pipe_winsys *ws = nv50->pipe.winsys;
1334
1335 if (p->insns_nr) {
1336 if (p->insns)
1337 FREE(p->insns);
1338 p->insns_nr = 0;
1339 }
1340
1341 if (p->buffer)
1342 pipe_buffer_reference(ws, &p->buffer, NULL);
1343
1344 p->translated = 0;
1345 }
1346