nv50: obey per-source abs (TGSI_UTIL_SIGN_CLEAR)
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
5
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
9
10 #include "nv50_context.h"
11 #include "nv50_state.h"
12
13 #define NV50_SU_MAX_TEMP 64
14
15 /* ARL
16 * LIT - other buggery
17 *
18 * MSB - Like MAD, but MUL+SUB
19 * - Fuck it off, introduce a way to negate args for ops that
20 * support it.
21 *
22 * Look into inlining IMMD for ops other than MOV (make it general?)
23 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
24 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
25 *
26 * Verify half-insns work where expected - and force disable them where they
27 * don't work - MUL has it forcibly disabled atm as it fixes POW..
28 */
29 struct nv50_reg {
30 enum {
31 P_TEMP,
32 P_ATTR,
33 P_RESULT,
34 P_CONST,
35 P_IMMD
36 } type;
37 int index;
38
39 int hw;
40 int neg;
41 };
42
43 struct nv50_pc {
44 struct nv50_program *p;
45
46 /* hw resources */
47 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
48
49 /* tgsi resources */
50 struct nv50_reg *temp;
51 int temp_nr;
52 struct nv50_reg *attr;
53 int attr_nr;
54 struct nv50_reg *result;
55 int result_nr;
56 struct nv50_reg *param;
57 int param_nr;
58 struct nv50_reg *immd;
59 float *immd_buf;
60 int immd_nr;
61
62 struct nv50_reg *temp_temp[8];
63 unsigned temp_temp_nr;
64 };
65
66 static void
67 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
68 {
69 int i;
70
71 if (reg->type != P_TEMP)
72 return;
73
74 if (reg->hw >= 0) {
75 /*XXX: do this here too to catch FP temp-as-attr usage..
76 * not clean, but works */
77 if (pc->p->cfg.high_temp < (reg->hw + 1))
78 pc->p->cfg.high_temp = reg->hw + 1;
79 return;
80 }
81
82 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
83 if (!(pc->r_temp[i])) {
84 pc->r_temp[i] = reg;
85 reg->hw = i;
86 if (pc->p->cfg.high_temp < (i + 1))
87 pc->p->cfg.high_temp = i + 1;
88 return;
89 }
90 }
91
92 assert(0);
93 }
94
95 static struct nv50_reg *
96 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
97 {
98 struct nv50_reg *r;
99 int i;
100
101 if (dst && dst->type == P_TEMP && dst->hw == -1)
102 return dst;
103
104 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
105 if (!pc->r_temp[i]) {
106 r = CALLOC_STRUCT(nv50_reg);
107 r->type = P_TEMP;
108 r->index = -1;
109 r->hw = i;
110 pc->r_temp[i] = r;
111 return r;
112 }
113 }
114
115 assert(0);
116 return NULL;
117 }
118
119 static void
120 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
121 {
122 if (r->index == -1) {
123 FREE(pc->r_temp[r->hw]);
124 pc->r_temp[r->hw] = NULL;
125 }
126 }
127
128 static struct nv50_reg *
129 temp_temp(struct nv50_pc *pc)
130 {
131 if (pc->temp_temp_nr >= 8)
132 assert(0);
133
134 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
135 return pc->temp_temp[pc->temp_temp_nr++];
136 }
137
138 static void
139 kill_temp_temp(struct nv50_pc *pc)
140 {
141 int i;
142
143 for (i = 0; i < pc->temp_temp_nr; i++)
144 free_temp(pc, pc->temp_temp[i]);
145 pc->temp_temp_nr = 0;
146 }
147
148 static int
149 ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
150 {
151 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 *
152 sizeof(float));
153 pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
154 pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
155 pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
156 pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
157
158 return pc->immd_nr++;
159 }
160
161 static struct nv50_reg *
162 alloc_immd(struct nv50_pc *pc, float f)
163 {
164 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
165 unsigned hw;
166
167 hw = ctor_immd(pc, f, 0, 0, 0);
168 r->type = P_IMMD;
169 r->hw = hw;
170 r->index = -1;
171 return r;
172 }
173
174 static void
175 emit(struct nv50_pc *pc, unsigned *inst)
176 {
177 struct nv50_program *p = pc->p;
178
179 if (inst[0] & 1) {
180 p->insns_nr += 2;
181 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
182 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
183 } else {
184 p->insns_nr += 1;
185 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
186 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
187 }
188 }
189
190 static INLINE void set_long(struct nv50_pc *, unsigned *);
191
192 static boolean
193 is_long(unsigned *inst)
194 {
195 if (inst[0] & 1)
196 return TRUE;
197 return FALSE;
198 }
199
200 static boolean
201 is_immd(unsigned *inst)
202 {
203 if (is_long(inst) && (inst[1] & 3) == 3)
204 return TRUE;
205 return FALSE;
206 }
207
208 static INLINE void
209 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst)
210 {
211 set_long(pc, inst);
212 inst[1] &= ~((0x1f << 7) | (0x3 << 12));
213 inst[1] |= (pred << 7) | (idx << 12);
214 }
215
216 static INLINE void
217 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst)
218 {
219 set_long(pc, inst);
220 inst[1] &= ~((0x3 << 4) | (1 << 6));
221 inst[1] |= (idx << 4) | (on << 6);
222 }
223
224 static INLINE void
225 set_long(struct nv50_pc *pc, unsigned *inst)
226 {
227 if (is_long(inst))
228 return;
229
230 inst[0] |= 1;
231 set_pred(pc, 0xf, 0, inst);
232 set_pred_wr(pc, 0, 0, inst);
233 }
234
235 static INLINE void
236 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst)
237 {
238 if (dst->type == P_RESULT) {
239 set_long(pc, inst);
240 inst[1] |= 0x00000008;
241 }
242
243 alloc_reg(pc, dst);
244 inst[0] |= (dst->hw << 2);
245 }
246
247 static INLINE void
248 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
249 {
250 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
251
252 set_long(pc, inst);
253 /*XXX: can't be predicated - bits overlap.. catch cases where both
254 * are required and avoid them. */
255 set_pred(pc, 0, 0, inst);
256 set_pred_wr(pc, 0, 0, inst);
257
258 inst[1] |= 0x00000002 | 0x00000001;
259 inst[0] |= (val & 0x3f) << 16;
260 inst[1] |= (val >> 6) << 2;
261 }
262
263 static void
264 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
265 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
266 {
267 unsigned inst[2] = { 0, 0 };
268
269 inst[0] |= 0x80000000;
270 set_dst(pc, dst, inst);
271 alloc_reg(pc, iv);
272 inst[0] |= (iv->hw << 9);
273 alloc_reg(pc, src);
274 inst[0] |= (src->hw << 16);
275 if (noperspective)
276 inst[0] |= (1 << 25);
277
278 emit(pc, inst);
279 }
280
281 static void
282 set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
283 {
284 set_long(pc, inst);
285 if (src->type == P_IMMD) {
286 inst[1] |= (NV50_CB_PMISC << 22);
287 } else {
288 if (pc->p->type == NV50_PROG_VERTEX)
289 inst[1] |= (NV50_CB_PVP << 22);
290 else
291 inst[1] |= (NV50_CB_PFP << 22);
292 }
293 }
294
295 static void
296 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
297 {
298 unsigned inst[2] = { 0, 0 };
299
300 inst[0] |= 0x10000000;
301
302 set_dst(pc, dst, inst);
303
304 if (dst->type != P_RESULT && src->type == P_IMMD) {
305 set_immd(pc, src, inst);
306 /*XXX: 32-bit, but steals part of "half" reg space - need to
307 * catch and handle this case if/when we do half-regs
308 */
309 inst[0] |= 0x00008000;
310 } else
311 if (src->type == P_IMMD || src->type == P_CONST) {
312 set_long(pc, inst);
313 set_cseg(pc, src, inst);
314 inst[0] |= (src->hw << 9);
315 inst[1] |= 0x20000000; /* src0 const? */
316 } else {
317 if (src->type == P_ATTR) {
318 set_long(pc, inst);
319 inst[1] |= 0x00200000;
320 }
321
322 alloc_reg(pc, src);
323 inst[0] |= (src->hw << 9);
324 }
325
326 /* We really should support "half" instructions here at some point,
327 * but I don't feel confident enough about them yet.
328 */
329 set_long(pc, inst);
330 if (is_long(inst) && !is_immd(inst)) {
331 inst[1] |= 0x04000000; /* 32-bit */
332 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
333 }
334
335 emit(pc, inst);
336 }
337
338 static boolean
339 check_swap_src_0_1(struct nv50_pc *pc,
340 struct nv50_reg **s0, struct nv50_reg **s1)
341 {
342 struct nv50_reg *src0 = *s0, *src1 = *s1;
343
344 if (src0->type == P_CONST) {
345 if (src1->type != P_CONST) {
346 *s0 = src1;
347 *s1 = src0;
348 return TRUE;
349 }
350 } else
351 if (src1->type == P_ATTR) {
352 if (src0->type != P_ATTR) {
353 *s0 = src1;
354 *s1 = src0;
355 return TRUE;
356 }
357 }
358
359 return FALSE;
360 }
361
362 static void
363 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
364 {
365 if (src->type == P_ATTR) {
366 set_long(pc, inst);
367 inst[1] |= 0x00200000;
368 } else
369 if (src->type == P_CONST || src->type == P_IMMD) {
370 struct nv50_reg *temp = temp_temp(pc);
371
372 emit_mov(pc, temp, src);
373 src = temp;
374 }
375
376 alloc_reg(pc, src);
377 inst[0] |= (src->hw << 9);
378 }
379
380 static void
381 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
382 {
383 if (src->type == P_ATTR) {
384 struct nv50_reg *temp = temp_temp(pc);
385
386 emit_mov(pc, temp, src);
387 src = temp;
388 } else
389 if (src->type == P_CONST || src->type == P_IMMD) {
390 set_cseg(pc, src, inst);
391 inst[0] |= 0x00800000;
392 }
393
394 alloc_reg(pc, src);
395 inst[0] |= (src->hw << 16);
396 }
397
398 static void
399 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
400 {
401 set_long(pc, inst);
402
403 if (src->type == P_ATTR) {
404 struct nv50_reg *temp = temp_temp(pc);
405
406 emit_mov(pc, temp, src);
407 src = temp;
408 } else
409 if (src->type == P_CONST || src->type == P_IMMD) {
410 set_cseg(pc, src, inst);
411 inst[0] |= 0x01000000;
412 }
413
414 alloc_reg(pc, src);
415 inst[1] |= (src->hw << 14);
416 }
417
418 static void
419 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
420 struct nv50_reg *src1)
421 {
422 unsigned inst[2] = { 0, 0 };
423
424 inst[0] |= 0xc0000000;
425 set_long(pc, inst);
426
427 check_swap_src_0_1(pc, &src0, &src1);
428 set_dst(pc, dst, inst);
429 set_src_0(pc, src0, inst);
430 set_src_1(pc, src1, inst);
431
432 emit(pc, inst);
433 }
434
435 static void
436 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
437 struct nv50_reg *src0, struct nv50_reg *src1)
438 {
439 unsigned inst[2] = { 0, 0 };
440
441 inst[0] |= 0xb0000000;
442
443 check_swap_src_0_1(pc, &src0, &src1);
444 set_dst(pc, dst, inst);
445 set_src_0(pc, src0, inst);
446 if (is_long(inst))
447 set_src_2(pc, src1, inst);
448 else
449 set_src_1(pc, src1, inst);
450
451 emit(pc, inst);
452 }
453
454 static void
455 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
456 struct nv50_reg *src0, struct nv50_reg *src1)
457 {
458 unsigned inst[2] = { 0, 0 };
459
460 set_long(pc, inst);
461 inst[0] |= 0xb0000000;
462 inst[1] |= (sub << 29);
463
464 check_swap_src_0_1(pc, &src0, &src1);
465 set_dst(pc, dst, inst);
466 set_src_0(pc, src0, inst);
467 set_src_1(pc, src1, inst);
468
469 emit(pc, inst);
470 }
471
472 static void
473 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
474 struct nv50_reg *src1)
475 {
476 unsigned inst[2] = { 0, 0 };
477
478 inst[0] |= 0xb0000000;
479
480 set_long(pc, inst);
481 if (check_swap_src_0_1(pc, &src0, &src1))
482 inst[1] |= 0x04000000;
483 else
484 inst[1] |= 0x08000000;
485
486 set_dst(pc, dst, inst);
487 set_src_0(pc, src0, inst);
488 set_src_2(pc, src1, inst);
489
490 emit(pc, inst);
491 }
492
493 static void
494 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
495 struct nv50_reg *src1, struct nv50_reg *src2)
496 {
497 unsigned inst[2] = { 0, 0 };
498
499 inst[0] |= 0xe0000000;
500
501 check_swap_src_0_1(pc, &src0, &src1);
502 set_dst(pc, dst, inst);
503 set_src_0(pc, src0, inst);
504 set_src_1(pc, src1, inst);
505 set_src_2(pc, src2, inst);
506
507 emit(pc, inst);
508 }
509
510 static void
511 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
512 struct nv50_reg *src1, struct nv50_reg *src2)
513 {
514 unsigned inst[2] = { 0, 0 };
515
516 inst[0] |= 0xe0000000;
517 set_long(pc, inst);
518 inst[1] |= 0x08000000; /* src0 * src1 - src2 */
519
520 check_swap_src_0_1(pc, &src0, &src1);
521 set_dst(pc, dst, inst);
522 set_src_0(pc, src0, inst);
523 set_src_1(pc, src1, inst);
524 set_src_2(pc, src2, inst);
525
526 emit(pc, inst);
527 }
528
529 static void
530 emit_flop(struct nv50_pc *pc, unsigned sub,
531 struct nv50_reg *dst, struct nv50_reg *src)
532 {
533 unsigned inst[2] = { 0, 0 };
534
535 inst[0] |= 0x90000000;
536 if (sub) {
537 set_long(pc, inst);
538 inst[1] |= (sub << 29);
539 }
540
541 set_dst(pc, dst, inst);
542 set_src_0(pc, src, inst);
543
544 emit(pc, inst);
545 }
546
547 static void
548 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
549 {
550 unsigned inst[2] = { 0, 0 };
551
552 inst[0] |= 0xb0000000;
553
554 set_dst(pc, dst, inst);
555 set_src_0(pc, src, inst);
556 set_long(pc, inst);
557 inst[1] |= (6 << 29) | 0x00004000;
558
559 emit(pc, inst);
560 }
561
562 /*XXX: inaccurate results.. why? */
563 #define ALLOW_SET_SWAP 0
564
565 static void
566 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
567 struct nv50_reg *src0, struct nv50_reg *src1)
568 {
569 unsigned inst[2] = { 0, 0 };
570 #if ALLOW_SET_SWAP
571 unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 };
572 #endif
573 struct nv50_reg *rdst;
574
575 #if ALLOW_SET_SWAP
576 assert(c_op <= 7);
577 if (check_swap_src_0_1(pc, &src0, &src1))
578 c_op = inv_cop[c_op];
579 #endif
580
581 rdst = dst;
582 if (dst->type != P_TEMP)
583 dst = alloc_temp(pc, NULL);
584
585 /* set.u32 */
586 set_long(pc, inst);
587 inst[0] |= 0xb0000000;
588 inst[1] |= (3 << 29);
589 inst[1] |= (c_op << 14);
590 /*XXX: breaks things, .u32 by default?
591 * decuda will disasm as .u16 and use .lo/.hi regs, but this
592 * doesn't seem to match what the hw actually does.
593 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
594 */
595 set_dst(pc, dst, inst);
596 set_src_0(pc, src0, inst);
597 set_src_1(pc, src1, inst);
598 emit(pc, inst);
599
600 /* cvt.f32.u32 */
601 inst[0] = 0xa0000001;
602 inst[1] = 0x64014780;
603 set_dst(pc, rdst, inst);
604 set_src_0(pc, dst, inst);
605 emit(pc, inst);
606
607 if (dst != rdst)
608 free_temp(pc, dst);
609 }
610
611 static void
612 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
613 {
614 unsigned inst[2] = { 0, 0 };
615
616 inst[0] = 0xa0000000; /* cvt */
617 set_long(pc, inst);
618 inst[1] |= (6 << 29); /* cvt */
619 inst[1] |= 0x08000000; /* integer mode */
620 inst[1] |= 0x04000000; /* 32 bit */
621 inst[1] |= ((0x1 << 3)) << 14; /* .rn */
622 inst[1] |= (1 << 14); /* src .f32 */
623 set_dst(pc, dst, inst);
624 set_src_0(pc, src, inst);
625
626 emit(pc, inst);
627 }
628
629 static void
630 emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
631 struct nv50_reg *v, struct nv50_reg *e)
632 {
633 struct nv50_reg *temp = alloc_temp(pc, NULL);
634
635 emit_flop(pc, 3, temp, v);
636 emit_mul(pc, temp, temp, e);
637 emit_preex2(pc, temp, temp);
638 emit_flop(pc, 6, dst, temp);
639
640 free_temp(pc, temp);
641 }
642
643 static void
644 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
645 {
646 unsigned inst[2] = { 0, 0 };
647
648 inst[0] = 0xa0000000; /* cvt */
649 set_long(pc, inst);
650 inst[1] |= (6 << 29); /* cvt */
651 inst[1] |= 0x04000000; /* 32 bit */
652 inst[1] |= (1 << 14); /* src .f32 */
653 inst[1] |= ((1 << 6) << 14); /* .abs */
654 set_dst(pc, dst, inst);
655 set_src_0(pc, src, inst);
656
657 emit(pc, inst);
658 }
659
660 static struct nv50_reg *
661 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
662 {
663 switch (dst->DstRegister.File) {
664 case TGSI_FILE_TEMPORARY:
665 return &pc->temp[dst->DstRegister.Index * 4 + c];
666 case TGSI_FILE_OUTPUT:
667 return &pc->result[dst->DstRegister.Index * 4 + c];
668 case TGSI_FILE_NULL:
669 return NULL;
670 default:
671 break;
672 }
673
674 return NULL;
675 }
676
677 static struct nv50_reg *
678 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
679 {
680 struct nv50_reg *r = NULL;
681 struct nv50_reg *temp;
682 unsigned c;
683
684 c = tgsi_util_get_full_src_register_extswizzle(src, chan);
685 switch (c) {
686 case TGSI_EXTSWIZZLE_X:
687 case TGSI_EXTSWIZZLE_Y:
688 case TGSI_EXTSWIZZLE_Z:
689 case TGSI_EXTSWIZZLE_W:
690 switch (src->SrcRegister.File) {
691 case TGSI_FILE_INPUT:
692 r = &pc->attr[src->SrcRegister.Index * 4 + c];
693 break;
694 case TGSI_FILE_TEMPORARY:
695 r = &pc->temp[src->SrcRegister.Index * 4 + c];
696 break;
697 case TGSI_FILE_CONSTANT:
698 r = &pc->param[src->SrcRegister.Index * 4 + c];
699 break;
700 case TGSI_FILE_IMMEDIATE:
701 r = &pc->immd[src->SrcRegister.Index * 4 + c];
702 break;
703 default:
704 assert(0);
705 break;
706 }
707 break;
708 case TGSI_EXTSWIZZLE_ZERO:
709 r = alloc_immd(pc, 0.0);
710 break;
711 case TGSI_EXTSWIZZLE_ONE:
712 r = alloc_immd(pc, 1.0);
713 break;
714 default:
715 assert(0);
716 break;
717 }
718
719 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
720 case TGSI_UTIL_SIGN_KEEP:
721 break;
722 case TGSI_UTIL_SIGN_CLEAR:
723 temp = temp_temp(pc);
724 emit_abs(pc, temp, r);
725 r = temp;
726 break;
727 default:
728 assert(0);
729 break;
730 }
731
732 return r;
733 }
734
735 static boolean
736 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
737 {
738 const struct tgsi_full_instruction *inst = &tok->FullInstruction;
739 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
740 unsigned mask, sat;
741 int i, c;
742
743 NOUVEAU_ERR("insn %p\n", tok);
744
745 mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
746 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
747
748 for (c = 0; c < 4; c++) {
749 if (mask & (1 << c))
750 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
751 else
752 dst[c] = NULL;
753 }
754
755 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
756 for (c = 0; c < 4; c++)
757 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
758 }
759
760 if (sat) {
761 for (c = 0; c < 4; c++) {
762 rdst[c] = dst[c];
763 dst[c] = temp_temp(pc);
764 }
765 }
766
767 switch (inst->Instruction.Opcode) {
768 case TGSI_OPCODE_ABS:
769 for (c = 0; c < 4; c++) {
770 if (!(mask & (1 << c)))
771 continue;
772 emit_abs(pc, dst[c], src[0][c]);
773 }
774 break;
775 case TGSI_OPCODE_ADD:
776 for (c = 0; c < 4; c++) {
777 if (!(mask & (1 << c)))
778 continue;
779 emit_add(pc, dst[c], src[0][c], src[1][c]);
780 }
781 break;
782 case TGSI_OPCODE_COS:
783 for (c = 0; c < 4; c++) {
784 if (!(mask & (1 << c)))
785 continue;
786 emit_flop(pc, 5, dst[c], src[0][c]);
787 }
788 break;
789 case TGSI_OPCODE_DP3:
790 temp = alloc_temp(pc, NULL);
791 emit_mul(pc, temp, src[0][0], src[1][0]);
792 emit_mad(pc, temp, src[0][1], src[1][1], temp);
793 emit_mad(pc, temp, src[0][2], src[1][2], temp);
794 for (c = 0; c < 4; c++) {
795 if (!(mask & (1 << c)))
796 continue;
797 emit_mov(pc, dst[c], temp);
798 }
799 free_temp(pc, temp);
800 break;
801 case TGSI_OPCODE_DP4:
802 temp = alloc_temp(pc, NULL);
803 emit_mul(pc, temp, src[0][0], src[1][0]);
804 emit_mad(pc, temp, src[0][1], src[1][1], temp);
805 emit_mad(pc, temp, src[0][2], src[1][2], temp);
806 emit_mad(pc, temp, src[0][3], src[1][3], temp);
807 for (c = 0; c < 4; c++) {
808 if (!(mask & (1 << c)))
809 continue;
810 emit_mov(pc, dst[c], temp);
811 }
812 free_temp(pc, temp);
813 break;
814 case TGSI_OPCODE_DPH:
815 temp = alloc_temp(pc, NULL);
816 emit_mul(pc, temp, src[0][0], src[1][0]);
817 emit_mad(pc, temp, src[0][1], src[1][1], temp);
818 emit_mad(pc, temp, src[0][2], src[1][2], temp);
819 emit_add(pc, temp, src[1][3], temp);
820 for (c = 0; c < 4; c++) {
821 if (!(mask & (1 << c)))
822 continue;
823 emit_mov(pc, dst[c], temp);
824 }
825 free_temp(pc, temp);
826 break;
827 case TGSI_OPCODE_DST:
828 {
829 struct nv50_reg *one = alloc_immd(pc, 1.0);
830 emit_mov(pc, dst[0], one);
831 emit_mul(pc, dst[1], src[0][1], src[1][1]);
832 emit_mov(pc, dst[2], src[0][2]);
833 emit_mov(pc, dst[3], src[1][3]);
834 FREE(one);
835 }
836 break;
837 case TGSI_OPCODE_EX2:
838 temp = alloc_temp(pc, NULL);
839 for (c = 0; c < 4; c++) {
840 if (!(mask & (1 << c)))
841 continue;
842 emit_preex2(pc, temp, src[0][c]);
843 emit_flop(pc, 6, dst[c], temp);
844 }
845 free_temp(pc, temp);
846 break;
847 case TGSI_OPCODE_FLR:
848 for (c = 0; c < 4; c++) {
849 if (!(mask & (1 << c)))
850 continue;
851 emit_flr(pc, dst[c], src[0][c]);
852 }
853 break;
854 case TGSI_OPCODE_FRC:
855 temp = alloc_temp(pc, NULL);
856 for (c = 0; c < 4; c++) {
857 if (!(mask & (1 << c)))
858 continue;
859 emit_flr(pc, temp, src[0][c]);
860 emit_sub(pc, dst[c], src[0][c], temp);
861 }
862 free_temp(pc, temp);
863 break;
864 case TGSI_OPCODE_LG2:
865 for (c = 0; c < 4; c++) {
866 if (!(mask & (1 << c)))
867 continue;
868 emit_flop(pc, 3, dst[c], src[0][c]);
869 }
870 break;
871 case TGSI_OPCODE_MAD:
872 for (c = 0; c < 4; c++) {
873 if (!(mask & (1 << c)))
874 continue;
875 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
876 }
877 break;
878 case TGSI_OPCODE_MAX:
879 for (c = 0; c < 4; c++) {
880 if (!(mask & (1 << c)))
881 continue;
882 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
883 }
884 break;
885 case TGSI_OPCODE_MIN:
886 for (c = 0; c < 4; c++) {
887 if (!(mask & (1 << c)))
888 continue;
889 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
890 }
891 break;
892 case TGSI_OPCODE_MOV:
893 for (c = 0; c < 4; c++) {
894 if (!(mask & (1 << c)))
895 continue;
896 emit_mov(pc, dst[c], src[0][c]);
897 }
898 break;
899 case TGSI_OPCODE_MUL:
900 for (c = 0; c < 4; c++) {
901 if (!(mask & (1 << c)))
902 continue;
903 emit_mul(pc, dst[c], src[0][c], src[1][c]);
904 }
905 break;
906 case TGSI_OPCODE_POW:
907 temp = alloc_temp(pc, NULL);
908 emit_pow(pc, temp, src[0][0], src[1][0]);
909 for (c = 0; c < 4; c++) {
910 if (!(mask & (1 << c)))
911 continue;
912 emit_mov(pc, dst[c], temp);
913 }
914 free_temp(pc, temp);
915 break;
916 case TGSI_OPCODE_RCP:
917 for (c = 0; c < 4; c++) {
918 if (!(mask & (1 << c)))
919 continue;
920 emit_flop(pc, 0, dst[c], src[0][c]);
921 }
922 break;
923 case TGSI_OPCODE_RSQ:
924 for (c = 0; c < 4; c++) {
925 if (!(mask & (1 << c)))
926 continue;
927 emit_flop(pc, 2, dst[c], src[0][c]);
928 }
929 break;
930 case TGSI_OPCODE_SGE:
931 for (c = 0; c < 4; c++) {
932 if (!(mask & (1 << c)))
933 continue;
934 emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
935 }
936 break;
937 case TGSI_OPCODE_SIN:
938 for (c = 0; c < 4; c++) {
939 if (!(mask & (1 << c)))
940 continue;
941 emit_flop(pc, 4, dst[c], src[0][c]);
942 }
943 break;
944 case TGSI_OPCODE_SLT:
945 for (c = 0; c < 4; c++) {
946 if (!(mask & (1 << c)))
947 continue;
948 emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
949 }
950 break;
951 case TGSI_OPCODE_SUB:
952 for (c = 0; c < 4; c++) {
953 if (!(mask & (1 << c)))
954 continue;
955 emit_sub(pc, dst[c], src[0][c], src[1][c]);
956 }
957 break;
958 case TGSI_OPCODE_XPD:
959 temp = alloc_temp(pc, NULL);
960 emit_mul(pc, temp, src[0][2], src[1][1]);
961 emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
962 emit_mul(pc, temp, src[0][0], src[1][2]);
963 emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
964 emit_mul(pc, temp, src[0][1], src[1][0]);
965 emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
966 free_temp(pc, temp);
967 break;
968 case TGSI_OPCODE_END:
969 break;
970 default:
971 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
972 return FALSE;
973 }
974
975 if (sat) {
976 for (c = 0; c < 4; c++) {
977 unsigned inst[2] = { 0, 0 };
978
979 if (!(mask & (1 << c)))
980 continue;
981
982 inst[0] = 0xa0000000; /* cvt */
983 set_long(pc, inst);
984 inst[1] |= (6 << 29); /* cvt */
985 inst[1] |= 0x04000000; /* 32 bit */
986 inst[1] |= (1 << 14); /* src .f32 */
987 inst[1] |= ((1 << 5) << 14); /* .sat */
988 set_dst(pc, rdst[c], inst);
989 set_src_0(pc, dst[c], inst);
990 emit(pc, inst);
991 }
992 }
993
994 kill_temp_temp(pc);
995 return TRUE;
996 }
997
998 static boolean
999 nv50_program_tx_prep(struct nv50_pc *pc)
1000 {
1001 struct tgsi_parse_context p;
1002 boolean ret = FALSE;
1003 unsigned i, c;
1004
1005 tgsi_parse_init(&p, pc->p->pipe.tokens);
1006 while (!tgsi_parse_end_of_tokens(&p)) {
1007 const union tgsi_full_token *tok = &p.FullToken;
1008
1009 tgsi_parse_token(&p);
1010 switch (tok->Token.Type) {
1011 case TGSI_TOKEN_TYPE_IMMEDIATE:
1012 {
1013 const struct tgsi_full_immediate *imm =
1014 &p.FullToken.FullImmediate;
1015
1016 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1017 imm->u.ImmediateFloat32[1].Float,
1018 imm->u.ImmediateFloat32[2].Float,
1019 imm->u.ImmediateFloat32[3].Float);
1020 }
1021 break;
1022 case TGSI_TOKEN_TYPE_DECLARATION:
1023 {
1024 const struct tgsi_full_declaration *d;
1025 unsigned last;
1026
1027 d = &p.FullToken.FullDeclaration;
1028 last = d->u.DeclarationRange.Last;
1029
1030 switch (d->Declaration.File) {
1031 case TGSI_FILE_TEMPORARY:
1032 if (pc->temp_nr < (last + 1))
1033 pc->temp_nr = last + 1;
1034 break;
1035 case TGSI_FILE_OUTPUT:
1036 if (pc->result_nr < (last + 1))
1037 pc->result_nr = last + 1;
1038 break;
1039 case TGSI_FILE_INPUT:
1040 if (pc->attr_nr < (last + 1))
1041 pc->attr_nr = last + 1;
1042 break;
1043 case TGSI_FILE_CONSTANT:
1044 if (pc->param_nr < (last + 1))
1045 pc->param_nr = last + 1;
1046 break;
1047 default:
1048 NOUVEAU_ERR("bad decl file %d\n",
1049 d->Declaration.File);
1050 goto out_err;
1051 }
1052 }
1053 break;
1054 case TGSI_TOKEN_TYPE_INSTRUCTION:
1055 break;
1056 default:
1057 break;
1058 }
1059 }
1060
1061 NOUVEAU_ERR("%d temps\n", pc->temp_nr);
1062 if (pc->temp_nr) {
1063 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
1064 if (!pc->temp)
1065 goto out_err;
1066
1067 for (i = 0; i < pc->temp_nr; i++) {
1068 for (c = 0; c < 4; c++) {
1069 pc->temp[i*4+c].type = P_TEMP;
1070 pc->temp[i*4+c].hw = -1;
1071 pc->temp[i*4+c].index = i;
1072 }
1073 }
1074 }
1075
1076 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
1077 if (pc->attr_nr) {
1078 struct nv50_reg *iv = NULL, *tmp = NULL;
1079 int aid = 0;
1080
1081 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
1082 if (!pc->attr)
1083 goto out_err;
1084
1085 if (pc->p->type == NV50_PROG_FRAGMENT) {
1086 iv = alloc_temp(pc, NULL);
1087 aid++;
1088 }
1089
1090 for (i = 0; i < pc->attr_nr; i++) {
1091 struct nv50_reg *a = &pc->attr[i*4];
1092
1093 for (c = 0; c < 4; c++) {
1094 if (pc->p->type == NV50_PROG_FRAGMENT) {
1095 struct nv50_reg *at =
1096 alloc_temp(pc, NULL);
1097 pc->attr[i*4+c].type = at->type;
1098 pc->attr[i*4+c].hw = at->hw;
1099 pc->attr[i*4+c].index = at->index;
1100 } else {
1101 pc->p->cfg.vp.attr[aid/32] |=
1102 (1 << (aid % 32));
1103 pc->attr[i*4+c].type = P_ATTR;
1104 pc->attr[i*4+c].hw = aid++;
1105 pc->attr[i*4+c].index = i;
1106 }
1107 }
1108
1109 if (pc->p->type != NV50_PROG_FRAGMENT)
1110 continue;
1111
1112 emit_interp(pc, iv, iv, iv, FALSE);
1113 tmp = alloc_temp(pc, NULL);
1114 {
1115 unsigned inst[2] = { 0, 0 };
1116 inst[0] = 0x90000000;
1117 inst[0] |= (tmp->hw << 2);
1118 emit(pc, inst);
1119 }
1120 emit_interp(pc, &a[0], &a[0], tmp, TRUE);
1121 emit_interp(pc, &a[1], &a[1], tmp, TRUE);
1122 emit_interp(pc, &a[2], &a[2], tmp, TRUE);
1123 emit_interp(pc, &a[3], &a[3], tmp, TRUE);
1124 free_temp(pc, tmp);
1125 }
1126
1127 if (iv)
1128 free_temp(pc, iv);
1129 }
1130
1131 NOUVEAU_ERR("%d result regs\n", pc->result_nr);
1132 if (pc->result_nr) {
1133 int rid = 0;
1134
1135 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
1136 if (!pc->result)
1137 goto out_err;
1138
1139 for (i = 0; i < pc->result_nr; i++) {
1140 for (c = 0; c < 4; c++) {
1141 if (pc->p->type == NV50_PROG_FRAGMENT)
1142 pc->result[i*4+c].type = P_TEMP;
1143 else
1144 pc->result[i*4+c].type = P_RESULT;
1145 pc->result[i*4+c].hw = rid++;
1146 pc->result[i*4+c].index = i;
1147 }
1148 }
1149 }
1150
1151 NOUVEAU_ERR("%d param regs\n", pc->param_nr);
1152 if (pc->param_nr) {
1153 int rid = 0;
1154
1155 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
1156 if (!pc->param)
1157 goto out_err;
1158
1159 for (i = 0; i < pc->param_nr; i++) {
1160 for (c = 0; c < 4; c++) {
1161 pc->param[i*4+c].type = P_CONST;
1162 pc->param[i*4+c].hw = rid++;
1163 pc->param[i*4+c].index = i;
1164 }
1165 }
1166 }
1167
1168 if (pc->immd_nr) {
1169 int rid = 0;
1170
1171 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
1172 if (!pc->immd)
1173 goto out_err;
1174
1175 for (i = 0; i < pc->immd_nr; i++) {
1176 for (c = 0; c < 4; c++) {
1177 pc->immd[i*4+c].type = P_IMMD;
1178 pc->immd[i*4+c].hw = rid++;
1179 pc->immd[i*4+c].index = i;
1180 }
1181 }
1182 }
1183
1184 ret = TRUE;
1185 out_err:
1186 tgsi_parse_free(&p);
1187 return ret;
1188 }
1189
1190 static boolean
1191 nv50_program_tx(struct nv50_program *p)
1192 {
1193 struct tgsi_parse_context parse;
1194 struct nv50_pc *pc;
1195 boolean ret;
1196
1197 pc = CALLOC_STRUCT(nv50_pc);
1198 if (!pc)
1199 return FALSE;
1200 pc->p = p;
1201 pc->p->cfg.high_temp = 4;
1202
1203 ret = nv50_program_tx_prep(pc);
1204 if (ret == FALSE)
1205 goto out_cleanup;
1206
1207 tgsi_parse_init(&parse, pc->p->pipe.tokens);
1208 while (!tgsi_parse_end_of_tokens(&parse)) {
1209 const union tgsi_full_token *tok = &parse.FullToken;
1210
1211 tgsi_parse_token(&parse);
1212
1213 switch (tok->Token.Type) {
1214 case TGSI_TOKEN_TYPE_INSTRUCTION:
1215 ret = nv50_program_tx_insn(pc, tok);
1216 if (ret == FALSE)
1217 goto out_err;
1218 break;
1219 default:
1220 break;
1221 }
1222 }
1223
1224 p->immd_nr = pc->immd_nr * 4;
1225 p->immd = pc->immd_buf;
1226
1227 out_err:
1228 tgsi_parse_free(&parse);
1229
1230 out_cleanup:
1231 return ret;
1232 }
1233
1234 static void
1235 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1236 {
1237 int i;
1238
1239 if (nv50_program_tx(p) == FALSE)
1240 assert(0);
1241 /* *not* sufficient, it's fine if last inst is long and
1242 * NOT immd - otherwise it's fucked fucked fucked */
1243 p->insns[p->insns_nr - 1] |= 0x00000001;
1244
1245 if (p->type == NV50_PROG_VERTEX) {
1246 for (i = 0; i < p->insns_nr; i++)
1247 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]);
1248 } else {
1249 for (i = 0; i < p->insns_nr; i++)
1250 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]);
1251 }
1252
1253 p->translated = TRUE;
1254 }
1255
1256 static void
1257 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1258 {
1259 int i;
1260
1261 for (i = 0; i < p->immd_nr; i++) {
1262 BEGIN_RING(tesla, 0x0f00, 2);
1263 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8));
1264 OUT_RING (fui(p->immd[i]));
1265 }
1266 }
1267
1268 static void
1269 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1270 {
1271 struct pipe_winsys *ws = nv50->pipe.winsys;
1272 void *map;
1273
1274 if (!p->buffer)
1275 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4);
1276 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
1277 memcpy(map, p->insns, p->insns_nr * 4);
1278 ws->buffer_unmap(ws, p->buffer);
1279 }
1280
1281 void
1282 nv50_vertprog_validate(struct nv50_context *nv50)
1283 {
1284 struct nouveau_grobj *tesla = nv50->screen->tesla;
1285 struct nv50_program *p = nv50->vertprog;
1286 struct nouveau_stateobj *so;
1287
1288 if (!p->translated) {
1289 nv50_program_validate(nv50, p);
1290 if (!p->translated)
1291 assert(0);
1292 }
1293
1294 nv50_program_validate_data(nv50, p);
1295 nv50_program_validate_code(nv50, p);
1296
1297 so = so_new(11, 2);
1298 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1299 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1300 NOUVEAU_BO_HIGH, 0, 0);
1301 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1302 NOUVEAU_BO_LOW, 0, 0);
1303 so_method(so, tesla, 0x1650, 2);
1304 so_data (so, p->cfg.vp.attr[0]);
1305 so_data (so, p->cfg.vp.attr[1]);
1306 so_method(so, tesla, 0x16ac, 2);
1307 so_data (so, 8);
1308 so_data (so, p->cfg.high_temp);
1309 so_method(so, tesla, 0x140c, 1);
1310 so_data (so, 0); /* program start offset */
1311 so_emit(nv50->screen->nvws, so);
1312 so_ref(NULL, &so);
1313 }
1314
1315 void
1316 nv50_fragprog_validate(struct nv50_context *nv50)
1317 {
1318 struct nouveau_grobj *tesla = nv50->screen->tesla;
1319 struct nv50_program *p = nv50->fragprog;
1320 struct nouveau_stateobj *so;
1321
1322 if (!p->translated) {
1323 nv50_program_validate(nv50, p);
1324 if (!p->translated)
1325 assert(0);
1326 }
1327
1328 nv50_program_validate_data(nv50, p);
1329 nv50_program_validate_code(nv50, p);
1330
1331 so = so_new(7, 2);
1332 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1333 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1334 NOUVEAU_BO_HIGH, 0, 0);
1335 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1336 NOUVEAU_BO_LOW, 0, 0);
1337 so_method(so, tesla, 0x198c, 1);
1338 so_data (so, p->cfg.high_temp);
1339 so_method(so, tesla, 0x1414, 1);
1340 so_data (so, 0); /* program start offset */
1341 so_emit(nv50->screen->nvws, so);
1342 so_ref(NULL, &so);
1343 }
1344
1345 void
1346 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1347 {
1348 struct pipe_winsys *ws = nv50->pipe.winsys;
1349
1350 if (p->insns_nr) {
1351 if (p->insns)
1352 FREE(p->insns);
1353 p->insns_nr = 0;
1354 }
1355
1356 if (p->buffer)
1357 pipe_buffer_reference(ws, &p->buffer, NULL);
1358
1359 p->translated = 0;
1360 }
1361