nv50: SGE/SLT
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
5
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
9
10 #include "nv50_context.h"
11 #include "nv50_state.h"
12
13 #define NV50_SU_MAX_TEMP 64
14
15 /* ABS
16 * ARL
17 * DST - const(1.0)
18 * FLR
19 * FRC
20 * LIT
21 * POW
22 * SWZ
23 *
24 * MSB - Like MAD, but MUL+SUB
25 * - Fuck it off, introduce a way to negate args for ops that
26 * support it.
27 *
28 * Need ability to specifiy driver IMMD values, like nv40 constant()
29 *
30 * Look into inlining IMMD for ops other than MOV
31 */
32 struct nv50_reg {
33 enum {
34 P_TEMP,
35 P_ATTR,
36 P_RESULT,
37 P_CONST,
38 P_IMMD
39 } type;
40 int index;
41
42 int hw;
43 int neg;
44 };
45
46 struct nv50_pc {
47 struct nv50_program *p;
48
49 /* hw resources */
50 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
51
52 /* tgsi resources */
53 struct nv50_reg *temp;
54 int temp_nr;
55 struct nv50_reg *attr;
56 int attr_nr;
57 struct nv50_reg *result;
58 int result_nr;
59 struct nv50_reg *param;
60 int param_nr;
61 struct nv50_reg *immd;
62 float *immd_buf;
63 int immd_nr;
64
65 struct nv50_reg *temp_temp[8];
66 unsigned temp_temp_nr;
67 };
68
69 static void
70 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
71 {
72 int i;
73
74 if (reg->type != P_TEMP)
75 return;
76
77 if (reg->hw >= 0) {
78 /*XXX: do this here too to catch FP temp-as-attr usage..
79 * not clean, but works */
80 if (pc->p->cfg.high_temp < (reg->hw + 1))
81 pc->p->cfg.high_temp = reg->hw + 1;
82 return;
83 }
84
85 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
86 if (!(pc->r_temp[i])) {
87 pc->r_temp[i] = reg;
88 reg->hw = i;
89 if (pc->p->cfg.high_temp < (i + 1))
90 pc->p->cfg.high_temp = i + 1;
91 return;
92 }
93 }
94
95 assert(0);
96 }
97
98 static struct nv50_reg *
99 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
100 {
101 struct nv50_reg *r;
102 int i;
103
104 if (dst && dst->type == P_TEMP && dst->hw == -1)
105 return dst;
106
107 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
108 if (!pc->r_temp[i]) {
109 r = CALLOC_STRUCT(nv50_reg);
110 r->type = P_TEMP;
111 r->index = -1;
112 r->hw = i;
113 pc->r_temp[i] = r;
114 return r;
115 }
116 }
117
118 assert(0);
119 return NULL;
120 }
121
122 static void
123 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
124 {
125 if (r->index == -1) {
126 FREE(pc->r_temp[r->hw]);
127 pc->r_temp[r->hw] = NULL;
128 }
129 }
130
131 static struct nv50_reg *
132 temp_temp(struct nv50_pc *pc)
133 {
134 if (pc->temp_temp_nr >= 8)
135 assert(0);
136
137 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
138 return pc->temp_temp[pc->temp_temp_nr++];
139 }
140
141 static void
142 kill_temp_temp(struct nv50_pc *pc)
143 {
144 int i;
145
146 for (i = 0; i < pc->temp_temp_nr; i++)
147 free_temp(pc, pc->temp_temp[i]);
148 pc->temp_temp_nr = 0;
149 }
150
151 static struct nv50_reg *
152 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
153 {
154 switch (dst->DstRegister.File) {
155 case TGSI_FILE_TEMPORARY:
156 return &pc->temp[dst->DstRegister.Index * 4 + c];
157 case TGSI_FILE_OUTPUT:
158 return &pc->result[dst->DstRegister.Index * 4 + c];
159 case TGSI_FILE_NULL:
160 return NULL;
161 default:
162 break;
163 }
164
165 return NULL;
166 }
167
168 static struct nv50_reg *
169 tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src)
170 {
171 /* Handle swizzling */
172 switch (c) {
173 case 0: c = src->SrcRegister.SwizzleX; break;
174 case 1: c = src->SrcRegister.SwizzleY; break;
175 case 2: c = src->SrcRegister.SwizzleZ; break;
176 case 3: c = src->SrcRegister.SwizzleW; break;
177 default:
178 assert(0);
179 }
180
181 switch (src->SrcRegister.File) {
182 case TGSI_FILE_INPUT:
183 return &pc->attr[src->SrcRegister.Index * 4 + c];
184 case TGSI_FILE_TEMPORARY:
185 return &pc->temp[src->SrcRegister.Index * 4 + c];
186 case TGSI_FILE_CONSTANT:
187 return &pc->param[src->SrcRegister.Index * 4 + c];
188 case TGSI_FILE_IMMEDIATE:
189 return &pc->immd[src->SrcRegister.Index * 4 + c];
190 default:
191 break;
192 }
193
194 return NULL;
195 }
196
197 static void
198 emit(struct nv50_pc *pc, unsigned *inst)
199 {
200 struct nv50_program *p = pc->p;
201
202 if (inst[0] & 1) {
203 p->insns_nr += 2;
204 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
205 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
206 } else {
207 p->insns_nr += 1;
208 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
209 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
210 }
211 }
212
213 static INLINE void set_long(struct nv50_pc *, unsigned *);
214
215 static boolean
216 is_long(unsigned *inst)
217 {
218 if (inst[0] & 1)
219 return TRUE;
220 return FALSE;
221 }
222
223 static boolean
224 is_immd(unsigned *inst)
225 {
226 if (is_long(inst) && (inst[1] & 3) == 3)
227 return TRUE;
228 return FALSE;
229 }
230
231 static INLINE void
232 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst)
233 {
234 set_long(pc, inst);
235 inst[1] &= ~((0x1f << 7) | (0x3 << 12));
236 inst[1] |= (pred << 7) | (idx << 12);
237 }
238
239 static INLINE void
240 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst)
241 {
242 set_long(pc, inst);
243 inst[1] &= ~((0x3 << 4) | (1 << 6));
244 inst[1] |= (idx << 4) | (on << 6);
245 }
246
247 static INLINE void
248 set_long(struct nv50_pc *pc, unsigned *inst)
249 {
250 if (is_long(inst))
251 return;
252
253 inst[0] |= 1;
254 set_pred(pc, 0xf, 0, inst);
255 set_pred_wr(pc, 0, 0, inst);
256 }
257
258 static INLINE void
259 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst)
260 {
261 if (dst->type == P_RESULT) {
262 set_long(pc, inst);
263 inst[1] |= 0x00000008;
264 }
265
266 alloc_reg(pc, dst);
267 inst[0] |= (dst->hw << 2);
268 }
269
270 static INLINE void
271 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
272 {
273 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
274
275 set_long(pc, inst);
276 /*XXX: can't be predicated - bits overlap.. catch cases where both
277 * are required and avoid them. */
278 set_pred(pc, 0, 0, inst);
279 set_pred_wr(pc, 0, 0, inst);
280
281 inst[1] |= 0x00000002 | 0x00000001;
282 inst[0] |= (val & 0x3f) << 16;
283 inst[1] |= (val >> 6) << 2;
284 }
285
286 static void
287 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
288 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
289 {
290 unsigned inst[2] = { 0, 0 };
291
292 inst[0] |= 0x80000000;
293 set_dst(pc, dst, inst);
294 alloc_reg(pc, iv);
295 inst[0] |= (iv->hw << 9);
296 alloc_reg(pc, src);
297 inst[0] |= (src->hw << 16);
298 if (noperspective)
299 inst[0] |= (1 << 25);
300
301 emit(pc, inst);
302 }
303
304 static void
305 set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
306 {
307 set_long(pc, inst);
308 if (src->type == P_IMMD) {
309 inst[1] |= (NV50_CB_PMISC << 22);
310 } else {
311 if (pc->p->type == NV50_PROG_VERTEX)
312 inst[1] |= (NV50_CB_PVP << 22);
313 else
314 inst[1] |= (NV50_CB_PFP << 22);
315 }
316 }
317
318 static void
319 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
320 {
321 unsigned inst[2] = { 0, 0 };
322
323 inst[0] |= 0x10000000;
324
325 set_dst(pc, dst, inst);
326
327 if (dst->type != P_RESULT && src->type == P_IMMD) {
328 set_immd(pc, src, inst);
329 /*XXX: 32-bit, but steals part of "half" reg space - need to
330 * catch and handle this case if/when we do half-regs
331 */
332 inst[0] |= 0x00008000;
333 } else
334 if (src->type == P_IMMD || src->type == P_CONST) {
335 set_long(pc, inst);
336 set_cseg(pc, src, inst);
337 inst[0] |= (src->hw << 9);
338 inst[1] |= 0x20000000; /* src0 const? */
339 } else {
340 if (src->type == P_ATTR) {
341 set_long(pc, inst);
342 inst[1] |= 0x00200000;
343 }
344
345 alloc_reg(pc, src);
346 inst[0] |= (src->hw << 9);
347 }
348
349 /* We really should support "half" instructions here at some point,
350 * but I don't feel confident enough about them yet.
351 */
352 set_long(pc, inst);
353 if (is_long(inst) && !is_immd(inst)) {
354 inst[1] |= 0x04000000; /* 32-bit */
355 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
356 }
357
358 emit(pc, inst);
359 }
360
361 static boolean
362 check_swap_src_0_1(struct nv50_pc *pc,
363 struct nv50_reg **s0, struct nv50_reg **s1)
364 {
365 struct nv50_reg *src0 = *s0, *src1 = *s1;
366
367 if (src0->type == P_CONST) {
368 if (src1->type != P_CONST) {
369 *s0 = src1;
370 *s1 = src0;
371 return TRUE;
372 }
373 } else
374 if (src1->type == P_ATTR) {
375 if (src0->type != P_ATTR) {
376 *s0 = src1;
377 *s1 = src0;
378 return TRUE;
379 }
380 }
381
382 return FALSE;
383 }
384
385 static void
386 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
387 {
388 if (src->type == P_ATTR) {
389 set_long(pc, inst);
390 inst[1] |= 0x00200000;
391 } else
392 if (src->type == P_CONST || src->type == P_IMMD) {
393 struct nv50_reg *temp = temp_temp(pc);
394
395 emit_mov(pc, temp, src);
396 src = temp;
397 }
398
399 alloc_reg(pc, src);
400 inst[0] |= (src->hw << 9);
401 }
402
403 static void
404 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
405 {
406 if (src->type == P_ATTR) {
407 struct nv50_reg *temp = temp_temp(pc);
408
409 emit_mov(pc, temp, src);
410 src = temp;
411 } else
412 if (src->type == P_CONST || src->type == P_IMMD) {
413 set_cseg(pc, src, inst);
414 inst[0] |= 0x00800000;
415 }
416
417 alloc_reg(pc, src);
418 inst[0] |= (src->hw << 16);
419 }
420
421 static void
422 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
423 {
424 set_long(pc, inst);
425
426 if (src->type == P_ATTR) {
427 struct nv50_reg *temp = temp_temp(pc);
428
429 emit_mov(pc, temp, src);
430 src = temp;
431 } else
432 if (src->type == P_CONST || src->type == P_IMMD) {
433 set_cseg(pc, src, inst);
434 inst[0] |= 0x01000000;
435 }
436
437 alloc_reg(pc, src);
438 inst[1] |= (src->hw << 14);
439 }
440
441 static void
442 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
443 struct nv50_reg *src1)
444 {
445 unsigned inst[2] = { 0, 0 };
446
447 inst[0] |= 0xc0000000;
448
449 check_swap_src_0_1(pc, &src0, &src1);
450 set_dst(pc, dst, inst);
451 set_src_0(pc, src0, inst);
452 set_src_1(pc, src1, inst);
453
454 emit(pc, inst);
455 }
456
457 static void
458 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
459 struct nv50_reg *src0, struct nv50_reg *src1)
460 {
461 unsigned inst[2] = { 0, 0 };
462
463 inst[0] |= 0xb0000000;
464
465 check_swap_src_0_1(pc, &src0, &src1);
466 set_dst(pc, dst, inst);
467 set_src_0(pc, src0, inst);
468 if (is_long(inst))
469 set_src_2(pc, src1, inst);
470 else
471 set_src_1(pc, src1, inst);
472
473 emit(pc, inst);
474 }
475
476 static void
477 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
478 struct nv50_reg *src0, struct nv50_reg *src1)
479 {
480 unsigned inst[2] = { 0, 0 };
481
482 set_long(pc, inst);
483 inst[0] |= 0xb0000000;
484 inst[1] |= (sub << 29);
485
486 check_swap_src_0_1(pc, &src0, &src1);
487 set_dst(pc, dst, inst);
488 set_src_0(pc, src0, inst);
489 set_src_1(pc, src1, inst);
490
491 emit(pc, inst);
492 }
493
494 static void
495 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
496 struct nv50_reg *src1)
497 {
498 unsigned inst[2] = { 0, 0 };
499
500 inst[0] |= 0xb0000000;
501
502 set_long(pc, inst);
503 if (check_swap_src_0_1(pc, &src0, &src1))
504 inst[1] |= 0x04000000;
505 else
506 inst[1] |= 0x08000000;
507
508 set_dst(pc, dst, inst);
509 set_src_0(pc, src0, inst);
510 set_src_2(pc, src1, inst);
511
512 emit(pc, inst);
513 }
514
515 static void
516 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
517 struct nv50_reg *src1, struct nv50_reg *src2)
518 {
519 unsigned inst[2] = { 0, 0 };
520
521 inst[0] |= 0xe0000000;
522
523 check_swap_src_0_1(pc, &src0, &src1);
524 set_dst(pc, dst, inst);
525 set_src_0(pc, src0, inst);
526 set_src_1(pc, src1, inst);
527 set_src_2(pc, src2, inst);
528
529 emit(pc, inst);
530 }
531
532 static void
533 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
534 struct nv50_reg *src1, struct nv50_reg *src2)
535 {
536 unsigned inst[2] = { 0, 0 };
537
538 inst[0] |= 0xe0000000;
539 set_long(pc, inst);
540 inst[1] |= 0x08000000; /* src0 * src1 - src2 */
541
542 check_swap_src_0_1(pc, &src0, &src1);
543 set_dst(pc, dst, inst);
544 set_src_0(pc, src0, inst);
545 set_src_1(pc, src1, inst);
546 set_src_2(pc, src2, inst);
547
548 emit(pc, inst);
549 }
550
551 static void
552 emit_flop(struct nv50_pc *pc, unsigned sub,
553 struct nv50_reg *dst, struct nv50_reg *src)
554 {
555 unsigned inst[2] = { 0, 0 };
556
557 inst[0] |= 0x90000000;
558 if (sub) {
559 set_long(pc, inst);
560 inst[1] |= (sub << 29);
561 }
562
563 set_dst(pc, dst, inst);
564 set_src_0(pc, src, inst);
565
566 emit(pc, inst);
567 }
568
569 static void
570 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
571 {
572 unsigned inst[2] = { 0, 0 };
573
574 inst[0] |= 0xb0000000;
575
576 set_dst(pc, dst, inst);
577 set_src_0(pc, src, inst);
578 set_long(pc, inst);
579 inst[1] |= (6 << 29) | 0x00004000;
580
581 emit(pc, inst);
582 }
583 /*XXX: inaccurate results.. why? */
584 #define ALLOW_SET_SWAP 0
585
586 static void
587 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
588 struct nv50_reg *src0, struct nv50_reg *src1)
589 {
590 unsigned inst[2] = { 0, 0 };
591 #if ALLOW_SET_SWAP
592 unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 };
593 #endif
594 struct nv50_reg *rdst;
595
596 #if ALLOW_SET_SWAP
597 assert(c_op <= 7);
598 if (check_swap_src_0_1(pc, &src0, &src1))
599 c_op = inv_cop[c_op];
600 #endif
601
602 rdst = dst;
603 if (dst->type != P_TEMP)
604 dst = alloc_temp(pc, NULL);
605
606 /* set.u32 */
607 set_long(pc, inst);
608 inst[0] |= 0xb0000000;
609 inst[1] |= (3 << 29);
610 inst[1] |= (c_op << 14);
611 /*XXX: breaks things, .u32 by default?
612 * decuda will disasm as .u16 and use .lo/.hi regs, but this
613 * doesn't seem to match what the hw actually does.
614 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
615 */
616 set_dst(pc, dst, inst);
617 set_src_0(pc, src0, inst);
618 set_src_1(pc, src1, inst);
619 emit(pc, inst);
620
621 /* cvt.f32.u32 */
622 inst[0] = 0xa0000001;
623 inst[1] = 0x64014780;
624 set_dst(pc, rdst, inst);
625 set_src_0(pc, dst, inst);
626 emit(pc, inst);
627
628 if (dst != rdst)
629 free_temp(pc, dst);
630 }
631
632 static boolean
633 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
634 {
635 const struct tgsi_full_instruction *inst = &tok->FullInstruction;
636 struct nv50_reg *dst[4], *src[3][4], *temp;
637 unsigned mask;
638 int i, c;
639
640 NOUVEAU_ERR("insn %p\n", tok);
641
642 mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
643
644 for (c = 0; c < 4; c++) {
645 if (mask & (1 << c))
646 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
647 else
648 dst[c] = NULL;
649 }
650
651 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
652 for (c = 0; c < 4; c++)
653 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
654 }
655
656 switch (inst->Instruction.Opcode) {
657 case TGSI_OPCODE_ADD:
658 for (c = 0; c < 4; c++) {
659 if (!(mask & (1 << c)))
660 continue;
661 emit_add(pc, dst[c], src[0][c], src[1][c]);
662 }
663 break;
664 case TGSI_OPCODE_COS:
665 for (c = 0; c < 4; c++) {
666 if (!(mask & (1 << c)))
667 continue;
668 emit_flop(pc, 5, dst[c], src[0][c]);
669 }
670 break;
671 case TGSI_OPCODE_DP3:
672 temp = alloc_temp(pc, NULL);
673 emit_mul(pc, temp, src[0][0], src[1][0]);
674 emit_mad(pc, temp, src[0][1], src[1][1], temp);
675 emit_mad(pc, temp, src[0][2], src[1][2], temp);
676 for (c = 0; c < 4; c++) {
677 if (!(mask & (1 << c)))
678 continue;
679 emit_mov(pc, dst[c], temp);
680 }
681 free_temp(pc, temp);
682 break;
683 case TGSI_OPCODE_DP4:
684 temp = alloc_temp(pc, NULL);
685 emit_mul(pc, temp, src[0][0], src[1][0]);
686 emit_mad(pc, temp, src[0][1], src[1][1], temp);
687 emit_mad(pc, temp, src[0][2], src[1][2], temp);
688 emit_mad(pc, temp, src[0][3], src[1][3], temp);
689 for (c = 0; c < 4; c++) {
690 if (!(mask & (1 << c)))
691 continue;
692 emit_mov(pc, dst[c], temp);
693 }
694 free_temp(pc, temp);
695 break;
696 case TGSI_OPCODE_DPH:
697 temp = alloc_temp(pc, NULL);
698 emit_mul(pc, temp, src[0][0], src[1][0]);
699 emit_mad(pc, temp, src[0][1], src[1][1], temp);
700 emit_mad(pc, temp, src[0][2], src[1][2], temp);
701 emit_add(pc, temp, src[1][3], temp);
702 for (c = 0; c < 4; c++) {
703 if (!(mask & (1 << c)))
704 continue;
705 emit_mov(pc, dst[c], temp);
706 }
707 free_temp(pc, temp);
708 break;
709 case TGSI_OPCODE_EX2:
710 temp = alloc_temp(pc, NULL);
711 for (c = 0; c < 4; c++) {
712 if (!(mask & (1 << c)))
713 continue;
714 emit_preex2(pc, temp, src[0][c]);
715 emit_flop(pc, 6, dst[c], temp);
716 }
717 free_temp(pc, temp);
718 break;
719 case TGSI_OPCODE_LG2:
720 for (c = 0; c < 4; c++) {
721 if (!(mask & (1 << c)))
722 continue;
723 emit_flop(pc, 3, dst[c], src[0][c]);
724 }
725 break;
726 case TGSI_OPCODE_MAD:
727 for (c = 0; c < 4; c++) {
728 if (!(mask & (1 << c)))
729 continue;
730 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
731 }
732 break;
733 case TGSI_OPCODE_MAX:
734 for (c = 0; c < 4; c++) {
735 if (!(mask & (1 << c)))
736 continue;
737 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
738 }
739 break;
740 case TGSI_OPCODE_MIN:
741 for (c = 0; c < 4; c++) {
742 if (!(mask & (1 << c)))
743 continue;
744 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
745 }
746 break;
747 case TGSI_OPCODE_MOV:
748 for (c = 0; c < 4; c++) {
749 if (!(mask & (1 << c)))
750 continue;
751 emit_mov(pc, dst[c], src[0][c]);
752 }
753 break;
754 case TGSI_OPCODE_MUL:
755 for (c = 0; c < 4; c++) {
756 if (!(mask & (1 << c)))
757 continue;
758 emit_mul(pc, dst[c], src[0][c], src[1][c]);
759 }
760 break;
761 case TGSI_OPCODE_RCP:
762 for (c = 0; c < 4; c++) {
763 if (!(mask & (1 << c)))
764 continue;
765 emit_flop(pc, 0, dst[c], src[0][c]);
766 }
767 break;
768 case TGSI_OPCODE_RSQ:
769 for (c = 0; c < 4; c++) {
770 if (!(mask & (1 << c)))
771 continue;
772 emit_flop(pc, 2, dst[c], src[0][c]);
773 }
774 break;
775 case TGSI_OPCODE_SGE:
776 for (c = 0; c < 4; c++) {
777 if (!(mask & (1 << c)))
778 continue;
779 emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
780 }
781 break;
782 case TGSI_OPCODE_SIN:
783 for (c = 0; c < 4; c++) {
784 if (!(mask & (1 << c)))
785 continue;
786 emit_flop(pc, 4, dst[c], src[0][c]);
787 }
788 break;
789 case TGSI_OPCODE_SLT:
790 for (c = 0; c < 4; c++) {
791 if (!(mask & (1 << c)))
792 continue;
793 emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
794 }
795 break;
796 case TGSI_OPCODE_SUB:
797 for (c = 0; c < 4; c++) {
798 if (!(mask & (1 << c)))
799 continue;
800 emit_sub(pc, dst[c], src[0][c], src[1][c]);
801 }
802 break;
803 case TGSI_OPCODE_XPD:
804 temp = alloc_temp(pc, NULL);
805 emit_mul(pc, temp, src[0][2], src[1][1]);
806 emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
807 emit_mul(pc, temp, src[0][0], src[1][2]);
808 emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
809 emit_mul(pc, temp, src[0][1], src[1][0]);
810 emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
811 free_temp(pc, temp);
812 break;
813 case TGSI_OPCODE_END:
814 break;
815 default:
816 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
817 return FALSE;
818 }
819
820 kill_temp_temp(pc);
821 return TRUE;
822 }
823
824 static boolean
825 nv50_program_tx_prep(struct nv50_pc *pc)
826 {
827 struct tgsi_parse_context p;
828 boolean ret = FALSE;
829 unsigned i, c;
830
831 tgsi_parse_init(&p, pc->p->pipe.tokens);
832 while (!tgsi_parse_end_of_tokens(&p)) {
833 const union tgsi_full_token *tok = &p.FullToken;
834
835 tgsi_parse_token(&p);
836 switch (tok->Token.Type) {
837 case TGSI_TOKEN_TYPE_IMMEDIATE:
838 {
839 const struct tgsi_full_immediate *imm =
840 &p.FullToken.FullImmediate;
841
842 pc->immd_nr++;
843 pc->immd_buf = realloc(pc->immd_buf, 4 * pc->immd_nr *
844 sizeof(float));
845 pc->immd_buf[4 * (pc->immd_nr - 1) + 0] =
846 imm->u.ImmediateFloat32[0].Float;
847 pc->immd_buf[4 * (pc->immd_nr - 1) + 1] =
848 imm->u.ImmediateFloat32[1].Float;
849 pc->immd_buf[4 * (pc->immd_nr - 1) + 2] =
850 imm->u.ImmediateFloat32[2].Float;
851 pc->immd_buf[4 * (pc->immd_nr - 1) + 3] =
852 imm->u.ImmediateFloat32[3].Float;
853 }
854 break;
855 case TGSI_TOKEN_TYPE_DECLARATION:
856 {
857 const struct tgsi_full_declaration *d;
858 unsigned last;
859
860 d = &p.FullToken.FullDeclaration;
861 last = d->u.DeclarationRange.Last;
862
863 switch (d->Declaration.File) {
864 case TGSI_FILE_TEMPORARY:
865 if (pc->temp_nr < (last + 1))
866 pc->temp_nr = last + 1;
867 break;
868 case TGSI_FILE_OUTPUT:
869 if (pc->result_nr < (last + 1))
870 pc->result_nr = last + 1;
871 break;
872 case TGSI_FILE_INPUT:
873 if (pc->attr_nr < (last + 1))
874 pc->attr_nr = last + 1;
875 break;
876 case TGSI_FILE_CONSTANT:
877 if (pc->param_nr < (last + 1))
878 pc->param_nr = last + 1;
879 break;
880 default:
881 NOUVEAU_ERR("bad decl file %d\n",
882 d->Declaration.File);
883 goto out_err;
884 }
885 }
886 break;
887 case TGSI_TOKEN_TYPE_INSTRUCTION:
888 break;
889 default:
890 break;
891 }
892 }
893
894 NOUVEAU_ERR("%d temps\n", pc->temp_nr);
895 if (pc->temp_nr) {
896 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
897 if (!pc->temp)
898 goto out_err;
899
900 for (i = 0; i < pc->temp_nr; i++) {
901 for (c = 0; c < 4; c++) {
902 pc->temp[i*4+c].type = P_TEMP;
903 pc->temp[i*4+c].hw = -1;
904 pc->temp[i*4+c].index = i;
905 }
906 }
907 }
908
909 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
910 if (pc->attr_nr) {
911 struct nv50_reg *iv = NULL, *tmp = NULL;
912 int aid = 0;
913
914 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
915 if (!pc->attr)
916 goto out_err;
917
918 if (pc->p->type == NV50_PROG_FRAGMENT) {
919 iv = alloc_temp(pc, NULL);
920 aid++;
921 }
922
923 for (i = 0; i < pc->attr_nr; i++) {
924 struct nv50_reg *a = &pc->attr[i*4];
925
926 for (c = 0; c < 4; c++) {
927 if (pc->p->type == NV50_PROG_FRAGMENT) {
928 struct nv50_reg *at =
929 alloc_temp(pc, NULL);
930 pc->attr[i*4+c].type = at->type;
931 pc->attr[i*4+c].hw = at->hw;
932 pc->attr[i*4+c].index = at->index;
933 } else {
934 pc->p->cfg.vp.attr[aid/32] |=
935 (1 << (aid % 32));
936 pc->attr[i*4+c].type = P_ATTR;
937 pc->attr[i*4+c].hw = aid++;
938 pc->attr[i*4+c].index = i;
939 }
940 }
941
942 if (pc->p->type != NV50_PROG_FRAGMENT)
943 continue;
944
945 emit_interp(pc, iv, iv, iv, FALSE);
946 tmp = alloc_temp(pc, NULL);
947 {
948 unsigned inst[2] = { 0, 0 };
949 inst[0] = 0x90000000;
950 inst[0] |= (tmp->hw << 2);
951 emit(pc, inst);
952 }
953 emit_interp(pc, &a[0], &a[0], tmp, TRUE);
954 emit_interp(pc, &a[1], &a[1], tmp, TRUE);
955 emit_interp(pc, &a[2], &a[2], tmp, TRUE);
956 emit_interp(pc, &a[3], &a[3], tmp, TRUE);
957 free_temp(pc, tmp);
958 }
959
960 if (iv)
961 free_temp(pc, iv);
962 }
963
964 NOUVEAU_ERR("%d result regs\n", pc->result_nr);
965 if (pc->result_nr) {
966 int rid = 0;
967
968 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
969 if (!pc->result)
970 goto out_err;
971
972 for (i = 0; i < pc->result_nr; i++) {
973 for (c = 0; c < 4; c++) {
974 if (pc->p->type == NV50_PROG_FRAGMENT)
975 pc->result[i*4+c].type = P_TEMP;
976 else
977 pc->result[i*4+c].type = P_RESULT;
978 pc->result[i*4+c].hw = rid++;
979 pc->result[i*4+c].index = i;
980 }
981 }
982 }
983
984 NOUVEAU_ERR("%d param regs\n", pc->param_nr);
985 if (pc->param_nr) {
986 int rid = 0;
987
988 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
989 if (!pc->param)
990 goto out_err;
991
992 for (i = 0; i < pc->param_nr; i++) {
993 for (c = 0; c < 4; c++) {
994 pc->param[i*4+c].type = P_CONST;
995 pc->param[i*4+c].hw = rid++;
996 pc->param[i*4+c].index = i;
997 }
998 }
999 }
1000
1001 if (pc->immd_nr) {
1002 int rid = 0;
1003
1004 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
1005 if (!pc->immd)
1006 goto out_err;
1007
1008 for (i = 0; i < pc->immd_nr; i++) {
1009 for (c = 0; c < 4; c++) {
1010 pc->immd[i*4+c].type = P_IMMD;
1011 pc->immd[i*4+c].hw = rid++;
1012 pc->immd[i*4+c].index = i;
1013 }
1014 }
1015 }
1016
1017 ret = TRUE;
1018 out_err:
1019 tgsi_parse_free(&p);
1020 return ret;
1021 }
1022
1023 static boolean
1024 nv50_program_tx(struct nv50_program *p)
1025 {
1026 struct tgsi_parse_context parse;
1027 struct nv50_pc *pc;
1028 boolean ret;
1029
1030 pc = CALLOC_STRUCT(nv50_pc);
1031 if (!pc)
1032 return FALSE;
1033 pc->p = p;
1034 pc->p->cfg.high_temp = 4;
1035
1036 ret = nv50_program_tx_prep(pc);
1037 if (ret == FALSE)
1038 goto out_cleanup;
1039
1040 tgsi_parse_init(&parse, pc->p->pipe.tokens);
1041 while (!tgsi_parse_end_of_tokens(&parse)) {
1042 const union tgsi_full_token *tok = &parse.FullToken;
1043
1044 tgsi_parse_token(&parse);
1045
1046 switch (tok->Token.Type) {
1047 case TGSI_TOKEN_TYPE_INSTRUCTION:
1048 ret = nv50_program_tx_insn(pc, tok);
1049 if (ret == FALSE)
1050 goto out_err;
1051 break;
1052 default:
1053 break;
1054 }
1055 }
1056
1057 p->immd_nr = pc->immd_nr * 4;
1058 p->immd = pc->immd_buf;
1059
1060 out_err:
1061 tgsi_parse_free(&parse);
1062
1063 out_cleanup:
1064 return ret;
1065 }
1066
1067 static void
1068 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1069 {
1070 int i;
1071
1072 if (nv50_program_tx(p) == FALSE)
1073 assert(0);
1074 /* *not* sufficient, it's fine if last inst is long and
1075 * NOT immd - otherwise it's fucked fucked fucked */
1076 p->insns[p->insns_nr - 1] |= 0x00000001;
1077
1078 if (p->type == NV50_PROG_VERTEX) {
1079 for (i = 0; i < p->insns_nr; i++)
1080 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]);
1081 } else {
1082 for (i = 0; i < p->insns_nr; i++)
1083 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]);
1084 }
1085
1086 p->translated = TRUE;
1087 }
1088
1089 static void
1090 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1091 {
1092 int i;
1093
1094 for (i = 0; i < p->immd_nr; i++) {
1095 BEGIN_RING(tesla, 0x0f00, 2);
1096 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8));
1097 OUT_RING (fui(p->immd[i]));
1098 }
1099 }
1100
1101 static void
1102 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1103 {
1104 struct pipe_winsys *ws = nv50->pipe.winsys;
1105 void *map;
1106
1107 if (!p->buffer)
1108 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4);
1109 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
1110 memcpy(map, p->insns, p->insns_nr * 4);
1111 ws->buffer_unmap(ws, p->buffer);
1112 }
1113
1114 void
1115 nv50_vertprog_validate(struct nv50_context *nv50)
1116 {
1117 struct nouveau_grobj *tesla = nv50->screen->tesla;
1118 struct nv50_program *p = nv50->vertprog;
1119 struct nouveau_stateobj *so;
1120
1121 if (!p->translated) {
1122 nv50_program_validate(nv50, p);
1123 if (!p->translated)
1124 assert(0);
1125 }
1126
1127 nv50_program_validate_data(nv50, p);
1128 nv50_program_validate_code(nv50, p);
1129
1130 so = so_new(11, 2);
1131 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1132 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1133 NOUVEAU_BO_HIGH, 0, 0);
1134 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1135 NOUVEAU_BO_LOW, 0, 0);
1136 so_method(so, tesla, 0x1650, 2);
1137 so_data (so, p->cfg.vp.attr[0]);
1138 so_data (so, p->cfg.vp.attr[1]);
1139 so_method(so, tesla, 0x16ac, 2);
1140 so_data (so, 8);
1141 so_data (so, p->cfg.high_temp);
1142 so_method(so, tesla, 0x140c, 1);
1143 so_data (so, 0); /* program start offset */
1144 so_emit(nv50->screen->nvws, so);
1145 so_ref(NULL, &so);
1146 }
1147
1148 void
1149 nv50_fragprog_validate(struct nv50_context *nv50)
1150 {
1151 struct nouveau_grobj *tesla = nv50->screen->tesla;
1152 struct nv50_program *p = nv50->fragprog;
1153 struct nouveau_stateobj *so;
1154
1155 if (!p->translated) {
1156 nv50_program_validate(nv50, p);
1157 if (!p->translated)
1158 assert(0);
1159 }
1160
1161 nv50_program_validate_data(nv50, p);
1162 nv50_program_validate_code(nv50, p);
1163
1164 so = so_new(7, 2);
1165 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1166 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1167 NOUVEAU_BO_HIGH, 0, 0);
1168 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1169 NOUVEAU_BO_LOW, 0, 0);
1170 so_method(so, tesla, 0x198c, 1);
1171 so_data (so, p->cfg.high_temp);
1172 so_method(so, tesla, 0x1414, 1);
1173 so_data (so, 0); /* program start offset */
1174 so_emit(nv50->screen->nvws, so);
1175 so_ref(NULL, &so);
1176 }
1177
1178 void
1179 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1180 {
1181 struct pipe_winsys *ws = nv50->pipe.winsys;
1182
1183 if (p->insns_nr) {
1184 if (p->insns)
1185 FREE(p->insns);
1186 p->insns_nr = 0;
1187 }
1188
1189 if (p->buffer)
1190 pipe_buffer_reference(ws, &p->buffer, NULL);
1191
1192 p->translated = 0;
1193 }
1194