Revert "nv50: move some magics"
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
5
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
9
10 #include "nv50_context.h"
11 #include "nv50_state.h"
12
13 #define NV50_SU_MAX_TEMP 64
14
15 /* ARL - gallium craps itself on progs/vp/arl.txt
16 *
17 * MSB - Like MAD, but MUL+SUB
18 * - Fuck it off, introduce a way to negate args for ops that
19 * support it.
20 *
21 * Look into inlining IMMD for ops other than MOV (make it general?)
22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
24 *
25 * Verify half-insns work where expected - and force disable them where they
26 * don't work - MUL has it forcibly disabled atm as it fixes POW..
27 *
28 * FUCK! watch dst==src vectors, can overwrite components that are needed.
29 * ie. SUB R0, R0.yzxw, R0
30 *
31 * MOV dst, -src
32 * "delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0)
33 * mov dst, tmp
34 *
35 * Things to check with renouveau:
36 * FP attr/result assignment - how?
37 * attrib
38 * - 0x16bc maps vp output onto fp hpos
39 * - 0x16c0 maps vp output onto fp col0
40 * result
41 * - colr always 0-3
42 * - depr always 4
43 * 0x16bc->0x16e8 --> some binding between vp/fp regs
44 * 0x16b8 --> VP output count
45 *
46 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
47 * "MOV rcol.x, fcol.y" = 0x00000004
48 * 0x19a8 --> as above but 0x00000100 and 0x00000000
49 * - 0x00100000 used when KIL used
50 * 0x196c --> as above but 0x00000011 and 0x00000000
51 *
52 * 0x1988 --> 0xXXNNNNNN
53 * - XX == FP high something
54 */
55 struct nv50_reg {
56 enum {
57 P_TEMP,
58 P_ATTR,
59 P_RESULT,
60 P_CONST,
61 P_IMMD
62 } type;
63 int index;
64
65 int hw;
66 int neg;
67 };
68
69 struct nv50_pc {
70 struct nv50_program *p;
71
72 /* hw resources */
73 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
74
75 /* tgsi resources */
76 struct nv50_reg *temp;
77 int temp_nr;
78 struct nv50_reg *attr;
79 int attr_nr;
80 struct nv50_reg *result;
81 int result_nr;
82 struct nv50_reg *param;
83 int param_nr;
84 struct nv50_reg *immd;
85 float *immd_buf;
86 int immd_nr;
87
88 struct nv50_reg *temp_temp[16];
89 unsigned temp_temp_nr;
90 };
91
92 static void
93 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
94 {
95 int i;
96
97 if (reg->type != P_TEMP)
98 return;
99
100 if (reg->hw >= 0) {
101 /*XXX: do this here too to catch FP temp-as-attr usage..
102 * not clean, but works */
103 if (pc->p->cfg.high_temp < (reg->hw + 1))
104 pc->p->cfg.high_temp = reg->hw + 1;
105 return;
106 }
107
108 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
109 if (!(pc->r_temp[i])) {
110 pc->r_temp[i] = reg;
111 reg->hw = i;
112 if (pc->p->cfg.high_temp < (i + 1))
113 pc->p->cfg.high_temp = i + 1;
114 return;
115 }
116 }
117
118 assert(0);
119 }
120
121 static struct nv50_reg *
122 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
123 {
124 struct nv50_reg *r;
125 int i;
126
127 if (dst && dst->type == P_TEMP && dst->hw == -1)
128 return dst;
129
130 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
131 if (!pc->r_temp[i]) {
132 r = CALLOC_STRUCT(nv50_reg);
133 r->type = P_TEMP;
134 r->index = -1;
135 r->hw = i;
136 pc->r_temp[i] = r;
137 return r;
138 }
139 }
140
141 assert(0);
142 return NULL;
143 }
144
145 static void
146 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
147 {
148 if (r->index == -1) {
149 FREE(pc->r_temp[r->hw]);
150 pc->r_temp[r->hw] = NULL;
151 }
152 }
153
154 static struct nv50_reg *
155 temp_temp(struct nv50_pc *pc)
156 {
157 if (pc->temp_temp_nr >= 16)
158 assert(0);
159
160 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
161 return pc->temp_temp[pc->temp_temp_nr++];
162 }
163
164 static void
165 kill_temp_temp(struct nv50_pc *pc)
166 {
167 int i;
168
169 for (i = 0; i < pc->temp_temp_nr; i++)
170 free_temp(pc, pc->temp_temp[i]);
171 pc->temp_temp_nr = 0;
172 }
173
174 static int
175 ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
176 {
177 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 *
178 sizeof(float));
179 pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
180 pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
181 pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
182 pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
183
184 return pc->immd_nr++;
185 }
186
187 static struct nv50_reg *
188 alloc_immd(struct nv50_pc *pc, float f)
189 {
190 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
191 unsigned hw;
192
193 hw = ctor_immd(pc, f, 0, 0, 0) * 4;
194 r->type = P_IMMD;
195 r->hw = hw;
196 r->index = -1;
197 return r;
198 }
199
200 static void
201 emit(struct nv50_pc *pc, unsigned *inst)
202 {
203 struct nv50_program *p = pc->p;
204
205 if (inst[0] & 1) {
206 p->insns_nr += 2;
207 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
208 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2);
209 } else {
210 p->insns_nr += 1;
211 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr);
212 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned));
213 }
214 }
215
216 static INLINE void set_long(struct nv50_pc *, unsigned *);
217
218 static boolean
219 is_long(unsigned *inst)
220 {
221 if (inst[0] & 1)
222 return TRUE;
223 return FALSE;
224 }
225
226 static boolean
227 is_immd(unsigned *inst)
228 {
229 if (is_long(inst) && (inst[1] & 3) == 3)
230 return TRUE;
231 return FALSE;
232 }
233
234 static INLINE void
235 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst)
236 {
237 set_long(pc, inst);
238 inst[1] &= ~((0x1f << 7) | (0x3 << 12));
239 inst[1] |= (pred << 7) | (idx << 12);
240 }
241
242 static INLINE void
243 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst)
244 {
245 set_long(pc, inst);
246 inst[1] &= ~((0x3 << 4) | (1 << 6));
247 inst[1] |= (idx << 4) | (on << 6);
248 }
249
250 static INLINE void
251 set_long(struct nv50_pc *pc, unsigned *inst)
252 {
253 if (is_long(inst))
254 return;
255
256 inst[0] |= 1;
257 set_pred(pc, 0xf, 0, inst);
258 set_pred_wr(pc, 0, 0, inst);
259 }
260
261 static INLINE void
262 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst)
263 {
264 if (dst->type == P_RESULT) {
265 set_long(pc, inst);
266 inst[1] |= 0x00000008;
267 }
268
269 alloc_reg(pc, dst);
270 inst[0] |= (dst->hw << 2);
271 }
272
273 static INLINE void
274 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
275 {
276 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
277
278 set_long(pc, inst);
279 /*XXX: can't be predicated - bits overlap.. catch cases where both
280 * are required and avoid them. */
281 set_pred(pc, 0, 0, inst);
282 set_pred_wr(pc, 0, 0, inst);
283
284 inst[1] |= 0x00000002 | 0x00000001;
285 inst[0] |= (val & 0x3f) << 16;
286 inst[1] |= (val >> 6) << 2;
287 }
288
289 static void
290 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
291 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
292 {
293 unsigned inst[2] = { 0, 0 };
294
295 inst[0] |= 0x80000000;
296 set_dst(pc, dst, inst);
297 alloc_reg(pc, iv);
298 inst[0] |= (iv->hw << 9);
299 alloc_reg(pc, src);
300 inst[0] |= (src->hw << 16);
301 if (noperspective)
302 inst[0] |= (1 << 25);
303
304 emit(pc, inst);
305 }
306
307 static void
308 set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
309 {
310 set_long(pc, inst);
311 if (src->type == P_IMMD) {
312 inst[1] |= (NV50_CB_PMISC << 22);
313 } else {
314 if (pc->p->type == PIPE_SHADER_VERTEX)
315 inst[1] |= (NV50_CB_PVP << 22);
316 else
317 inst[1] |= (NV50_CB_PFP << 22);
318 }
319 }
320
321 static void
322 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
323 {
324 unsigned inst[2] = { 0, 0 };
325
326 inst[0] |= 0x10000000;
327
328 set_dst(pc, dst, inst);
329
330 if (dst->type != P_RESULT && src->type == P_IMMD) {
331 set_immd(pc, src, inst);
332 /*XXX: 32-bit, but steals part of "half" reg space - need to
333 * catch and handle this case if/when we do half-regs
334 */
335 inst[0] |= 0x00008000;
336 } else
337 if (src->type == P_IMMD || src->type == P_CONST) {
338 set_long(pc, inst);
339 set_cseg(pc, src, inst);
340 inst[0] |= (src->hw << 9);
341 inst[1] |= 0x20000000; /* src0 const? */
342 } else {
343 if (src->type == P_ATTR) {
344 set_long(pc, inst);
345 inst[1] |= 0x00200000;
346 }
347
348 alloc_reg(pc, src);
349 inst[0] |= (src->hw << 9);
350 }
351
352 /* We really should support "half" instructions here at some point,
353 * but I don't feel confident enough about them yet.
354 */
355 set_long(pc, inst);
356 if (is_long(inst) && !is_immd(inst)) {
357 inst[1] |= 0x04000000; /* 32-bit */
358 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
359 }
360
361 emit(pc, inst);
362 }
363
364 static boolean
365 check_swap_src_0_1(struct nv50_pc *pc,
366 struct nv50_reg **s0, struct nv50_reg **s1)
367 {
368 struct nv50_reg *src0 = *s0, *src1 = *s1;
369
370 if (src0->type == P_CONST) {
371 if (src1->type != P_CONST) {
372 *s0 = src1;
373 *s1 = src0;
374 return TRUE;
375 }
376 } else
377 if (src1->type == P_ATTR) {
378 if (src0->type != P_ATTR) {
379 *s0 = src1;
380 *s1 = src0;
381 return TRUE;
382 }
383 }
384
385 return FALSE;
386 }
387
388 static void
389 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
390 {
391 if (src->type == P_ATTR) {
392 set_long(pc, inst);
393 inst[1] |= 0x00200000;
394 } else
395 if (src->type == P_CONST || src->type == P_IMMD) {
396 struct nv50_reg *temp = temp_temp(pc);
397
398 emit_mov(pc, temp, src);
399 src = temp;
400 }
401
402 alloc_reg(pc, src);
403 inst[0] |= (src->hw << 9);
404 }
405
406 static void
407 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
408 {
409 if (src->type == P_ATTR) {
410 struct nv50_reg *temp = temp_temp(pc);
411
412 emit_mov(pc, temp, src);
413 src = temp;
414 } else
415 if (src->type == P_CONST || src->type == P_IMMD) {
416 assert(!(inst[0] & 0x00800000));
417 if (inst[0] & 0x01000000) {
418 struct nv50_reg *temp = temp_temp(pc);
419
420 emit_mov(pc, temp, src);
421 src = temp;
422 } else {
423 set_cseg(pc, src, inst);
424 inst[0] |= 0x00800000;
425 }
426 }
427
428 alloc_reg(pc, src);
429 inst[0] |= (src->hw << 16);
430 }
431
432 static void
433 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst)
434 {
435 set_long(pc, inst);
436
437 if (src->type == P_ATTR) {
438 struct nv50_reg *temp = temp_temp(pc);
439
440 emit_mov(pc, temp, src);
441 src = temp;
442 } else
443 if (src->type == P_CONST || src->type == P_IMMD) {
444 assert(!(inst[0] & 0x01000000));
445 if (inst[0] & 0x00800000) {
446 struct nv50_reg *temp = temp_temp(pc);
447
448 emit_mov(pc, temp, src);
449 src = temp;
450 } else {
451 set_cseg(pc, src, inst);
452 inst[0] |= 0x01000000;
453 }
454 }
455
456 alloc_reg(pc, src);
457 inst[1] |= (src->hw << 14);
458 }
459
460 static void
461 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
462 struct nv50_reg *src1)
463 {
464 unsigned inst[2] = { 0, 0 };
465
466 inst[0] |= 0xc0000000;
467 set_long(pc, inst);
468
469 check_swap_src_0_1(pc, &src0, &src1);
470 set_dst(pc, dst, inst);
471 set_src_0(pc, src0, inst);
472 set_src_1(pc, src1, inst);
473
474 emit(pc, inst);
475 }
476
477 static void
478 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
479 struct nv50_reg *src0, struct nv50_reg *src1)
480 {
481 unsigned inst[2] = { 0, 0 };
482
483 inst[0] |= 0xb0000000;
484
485 check_swap_src_0_1(pc, &src0, &src1);
486 set_dst(pc, dst, inst);
487 set_src_0(pc, src0, inst);
488 if (is_long(inst))
489 set_src_2(pc, src1, inst);
490 else
491 set_src_1(pc, src1, inst);
492
493 emit(pc, inst);
494 }
495
496 static void
497 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
498 struct nv50_reg *src0, struct nv50_reg *src1)
499 {
500 unsigned inst[2] = { 0, 0 };
501
502 set_long(pc, inst);
503 inst[0] |= 0xb0000000;
504 inst[1] |= (sub << 29);
505
506 check_swap_src_0_1(pc, &src0, &src1);
507 set_dst(pc, dst, inst);
508 set_src_0(pc, src0, inst);
509 set_src_1(pc, src1, inst);
510
511 emit(pc, inst);
512 }
513
514 static void
515 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
516 struct nv50_reg *src1)
517 {
518 unsigned inst[2] = { 0, 0 };
519
520 inst[0] |= 0xb0000000;
521
522 set_long(pc, inst);
523 if (check_swap_src_0_1(pc, &src0, &src1))
524 inst[1] |= 0x04000000;
525 else
526 inst[1] |= 0x08000000;
527
528 set_dst(pc, dst, inst);
529 set_src_0(pc, src0, inst);
530 set_src_2(pc, src1, inst);
531
532 emit(pc, inst);
533 }
534
535 static void
536 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
537 struct nv50_reg *src1, struct nv50_reg *src2)
538 {
539 unsigned inst[2] = { 0, 0 };
540
541 inst[0] |= 0xe0000000;
542
543 check_swap_src_0_1(pc, &src0, &src1);
544 set_dst(pc, dst, inst);
545 set_src_0(pc, src0, inst);
546 set_src_1(pc, src1, inst);
547 set_src_2(pc, src2, inst);
548
549 emit(pc, inst);
550 }
551
552 static void
553 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
554 struct nv50_reg *src1, struct nv50_reg *src2)
555 {
556 unsigned inst[2] = { 0, 0 };
557
558 inst[0] |= 0xe0000000;
559 set_long(pc, inst);
560 inst[1] |= 0x08000000; /* src0 * src1 - src2 */
561
562 check_swap_src_0_1(pc, &src0, &src1);
563 set_dst(pc, dst, inst);
564 set_src_0(pc, src0, inst);
565 set_src_1(pc, src1, inst);
566 set_src_2(pc, src2, inst);
567
568 emit(pc, inst);
569 }
570
571 static void
572 emit_flop(struct nv50_pc *pc, unsigned sub,
573 struct nv50_reg *dst, struct nv50_reg *src)
574 {
575 unsigned inst[2] = { 0, 0 };
576
577 inst[0] |= 0x90000000;
578 if (sub) {
579 set_long(pc, inst);
580 inst[1] |= (sub << 29);
581 }
582
583 set_dst(pc, dst, inst);
584 set_src_0(pc, src, inst);
585
586 emit(pc, inst);
587 }
588
589 static void
590 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
591 {
592 unsigned inst[2] = { 0, 0 };
593
594 inst[0] |= 0xb0000000;
595
596 set_dst(pc, dst, inst);
597 set_src_0(pc, src, inst);
598 set_long(pc, inst);
599 inst[1] |= (6 << 29) | 0x00004000;
600
601 emit(pc, inst);
602 }
603
604 static void
605 emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
606 {
607 unsigned inst[2] = { 0, 0 };
608
609 inst[0] |= 0xb0000000;
610
611 set_dst(pc, dst, inst);
612 set_src_0(pc, src, inst);
613 set_long(pc, inst);
614 inst[1] |= (6 << 29);
615
616 emit(pc, inst);
617 }
618
619 static void
620 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
621 struct nv50_reg *src0, struct nv50_reg *src1)
622 {
623 unsigned inst[2] = { 0, 0 };
624 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
625 struct nv50_reg *rdst;
626
627 assert(c_op <= 7);
628 if (check_swap_src_0_1(pc, &src0, &src1))
629 c_op = inv_cop[c_op];
630
631 rdst = dst;
632 if (dst->type != P_TEMP)
633 dst = alloc_temp(pc, NULL);
634
635 /* set.u32 */
636 set_long(pc, inst);
637 inst[0] |= 0xb0000000;
638 inst[1] |= (3 << 29);
639 inst[1] |= (c_op << 14);
640 /*XXX: breaks things, .u32 by default?
641 * decuda will disasm as .u16 and use .lo/.hi regs, but this
642 * doesn't seem to match what the hw actually does.
643 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
644 */
645 set_dst(pc, dst, inst);
646 set_src_0(pc, src0, inst);
647 set_src_1(pc, src1, inst);
648 emit(pc, inst);
649
650 /* cvt.f32.u32 */
651 inst[0] = 0xa0000001;
652 inst[1] = 0x64014780;
653 set_dst(pc, rdst, inst);
654 set_src_0(pc, dst, inst);
655 emit(pc, inst);
656
657 if (dst != rdst)
658 free_temp(pc, dst);
659 }
660
661 static void
662 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
663 {
664 unsigned inst[2] = { 0, 0 };
665
666 inst[0] = 0xa0000000; /* cvt */
667 set_long(pc, inst);
668 inst[1] |= (6 << 29); /* cvt */
669 inst[1] |= 0x08000000; /* integer mode */
670 inst[1] |= 0x04000000; /* 32 bit */
671 inst[1] |= ((0x1 << 3)) << 14; /* .rn */
672 inst[1] |= (1 << 14); /* src .f32 */
673 set_dst(pc, dst, inst);
674 set_src_0(pc, src, inst);
675
676 emit(pc, inst);
677 }
678
679 static void
680 emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
681 struct nv50_reg *v, struct nv50_reg *e)
682 {
683 struct nv50_reg *temp = alloc_temp(pc, NULL);
684
685 emit_flop(pc, 3, temp, v);
686 emit_mul(pc, temp, temp, e);
687 emit_preex2(pc, temp, temp);
688 emit_flop(pc, 6, dst, temp);
689
690 free_temp(pc, temp);
691 }
692
693 static void
694 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
695 {
696 unsigned inst[2] = { 0, 0 };
697
698 inst[0] = 0xa0000000; /* cvt */
699 set_long(pc, inst);
700 inst[1] |= (6 << 29); /* cvt */
701 inst[1] |= 0x04000000; /* 32 bit */
702 inst[1] |= (1 << 14); /* src .f32 */
703 inst[1] |= ((1 << 6) << 14); /* .abs */
704 set_dst(pc, dst, inst);
705 set_src_0(pc, src, inst);
706
707 emit(pc, inst);
708 }
709
710 static void
711 emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
712 struct nv50_reg **src)
713 {
714 struct nv50_reg *one = alloc_immd(pc, 1.0);
715 struct nv50_reg *zero = alloc_immd(pc, 0.0);
716 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
717 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
718 struct nv50_reg *tmp[4];
719
720 if (mask & (1 << 0))
721 emit_mov(pc, dst[0], one);
722
723 if (mask & (1 << 3))
724 emit_mov(pc, dst[3], one);
725
726 if (mask & (3 << 1)) {
727 if (mask & (1 << 1))
728 tmp[0] = dst[1];
729 else
730 tmp[0] = temp_temp(pc);
731 emit_minmax(pc, 4, tmp[0], src[0], zero);
732 }
733
734 if (mask & (1 << 2)) {
735 set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]);
736
737 tmp[1] = temp_temp(pc);
738 emit_minmax(pc, 4, tmp[1], src[1], zero);
739
740 tmp[3] = temp_temp(pc);
741 emit_minmax(pc, 4, tmp[3], src[3], neg128);
742 emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
743
744 emit_pow(pc, dst[2], tmp[1], tmp[3]);
745 emit_mov(pc, dst[2], zero);
746 set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]);
747 }
748 }
749
750 static void
751 emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
752 {
753 unsigned inst[2] = { 0, 0 };
754
755 set_long(pc, inst);
756 inst[0] |= 0xa0000000; /* delta */
757 inst[1] |= (7 << 29); /* delta */
758 inst[1] |= 0x04000000; /* negate arg0? probably not */
759 inst[1] |= (1 << 14); /* src .f32 */
760 set_dst(pc, dst, inst);
761 set_src_0(pc, src, inst);
762
763 emit(pc, inst);
764 }
765
766 static struct nv50_reg *
767 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
768 {
769 switch (dst->DstRegister.File) {
770 case TGSI_FILE_TEMPORARY:
771 return &pc->temp[dst->DstRegister.Index * 4 + c];
772 case TGSI_FILE_OUTPUT:
773 return &pc->result[dst->DstRegister.Index * 4 + c];
774 case TGSI_FILE_NULL:
775 return NULL;
776 default:
777 break;
778 }
779
780 return NULL;
781 }
782
783 static struct nv50_reg *
784 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
785 {
786 struct nv50_reg *r = NULL;
787 struct nv50_reg *temp;
788 unsigned c;
789
790 c = tgsi_util_get_full_src_register_extswizzle(src, chan);
791 switch (c) {
792 case TGSI_EXTSWIZZLE_X:
793 case TGSI_EXTSWIZZLE_Y:
794 case TGSI_EXTSWIZZLE_Z:
795 case TGSI_EXTSWIZZLE_W:
796 switch (src->SrcRegister.File) {
797 case TGSI_FILE_INPUT:
798 r = &pc->attr[src->SrcRegister.Index * 4 + c];
799 break;
800 case TGSI_FILE_TEMPORARY:
801 r = &pc->temp[src->SrcRegister.Index * 4 + c];
802 break;
803 case TGSI_FILE_CONSTANT:
804 r = &pc->param[src->SrcRegister.Index * 4 + c];
805 break;
806 case TGSI_FILE_IMMEDIATE:
807 r = &pc->immd[src->SrcRegister.Index * 4 + c];
808 break;
809 default:
810 assert(0);
811 break;
812 }
813 break;
814 case TGSI_EXTSWIZZLE_ZERO:
815 r = alloc_immd(pc, 0.0);
816 break;
817 case TGSI_EXTSWIZZLE_ONE:
818 r = alloc_immd(pc, 1.0);
819 break;
820 default:
821 assert(0);
822 break;
823 }
824
825 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
826 case TGSI_UTIL_SIGN_KEEP:
827 break;
828 case TGSI_UTIL_SIGN_CLEAR:
829 temp = temp_temp(pc);
830 emit_abs(pc, temp, r);
831 r = temp;
832 break;
833 case TGSI_UTIL_SIGN_TOGGLE:
834 temp = temp_temp(pc);
835 emit_neg(pc, temp, r);
836 r = temp;
837 break;
838 case TGSI_UTIL_SIGN_SET:
839 temp = temp_temp(pc);
840 emit_abs(pc, temp, r);
841 emit_neg(pc, temp, r);
842 r = temp;
843 break;
844 default:
845 assert(0);
846 break;
847 }
848
849 return r;
850 }
851
852 static boolean
853 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
854 {
855 const struct tgsi_full_instruction *inst = &tok->FullInstruction;
856 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
857 unsigned mask, sat;
858 int i, c;
859
860 NOUVEAU_ERR("insn %p\n", tok);
861
862 mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
863 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
864
865 for (c = 0; c < 4; c++) {
866 if (mask & (1 << c))
867 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
868 else
869 dst[c] = NULL;
870 }
871
872 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
873 for (c = 0; c < 4; c++)
874 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
875 }
876
877 if (sat) {
878 for (c = 0; c < 4; c++) {
879 rdst[c] = dst[c];
880 dst[c] = temp_temp(pc);
881 }
882 }
883
884 switch (inst->Instruction.Opcode) {
885 case TGSI_OPCODE_ABS:
886 for (c = 0; c < 4; c++) {
887 if (!(mask & (1 << c)))
888 continue;
889 emit_abs(pc, dst[c], src[0][c]);
890 }
891 break;
892 case TGSI_OPCODE_ADD:
893 for (c = 0; c < 4; c++) {
894 if (!(mask & (1 << c)))
895 continue;
896 emit_add(pc, dst[c], src[0][c], src[1][c]);
897 }
898 break;
899 case TGSI_OPCODE_COS:
900 temp = alloc_temp(pc, NULL);
901 emit_precossin(pc, temp, src[0][0]);
902 emit_flop(pc, 5, temp, temp);
903 for (c = 0; c < 4; c++) {
904 if (!(mask & (1 << c)))
905 continue;
906 emit_mov(pc, dst[c], temp);
907 }
908 break;
909 case TGSI_OPCODE_DP3:
910 temp = alloc_temp(pc, NULL);
911 emit_mul(pc, temp, src[0][0], src[1][0]);
912 emit_mad(pc, temp, src[0][1], src[1][1], temp);
913 emit_mad(pc, temp, src[0][2], src[1][2], temp);
914 for (c = 0; c < 4; c++) {
915 if (!(mask & (1 << c)))
916 continue;
917 emit_mov(pc, dst[c], temp);
918 }
919 free_temp(pc, temp);
920 break;
921 case TGSI_OPCODE_DP4:
922 temp = alloc_temp(pc, NULL);
923 emit_mul(pc, temp, src[0][0], src[1][0]);
924 emit_mad(pc, temp, src[0][1], src[1][1], temp);
925 emit_mad(pc, temp, src[0][2], src[1][2], temp);
926 emit_mad(pc, temp, src[0][3], src[1][3], temp);
927 for (c = 0; c < 4; c++) {
928 if (!(mask & (1 << c)))
929 continue;
930 emit_mov(pc, dst[c], temp);
931 }
932 free_temp(pc, temp);
933 break;
934 case TGSI_OPCODE_DPH:
935 temp = alloc_temp(pc, NULL);
936 emit_mul(pc, temp, src[0][0], src[1][0]);
937 emit_mad(pc, temp, src[0][1], src[1][1], temp);
938 emit_mad(pc, temp, src[0][2], src[1][2], temp);
939 emit_add(pc, temp, src[1][3], temp);
940 for (c = 0; c < 4; c++) {
941 if (!(mask & (1 << c)))
942 continue;
943 emit_mov(pc, dst[c], temp);
944 }
945 free_temp(pc, temp);
946 break;
947 case TGSI_OPCODE_DST:
948 {
949 struct nv50_reg *one = alloc_immd(pc, 1.0);
950 if (mask & (1 << 0))
951 emit_mov(pc, dst[0], one);
952 if (mask & (1 << 1))
953 emit_mul(pc, dst[1], src[0][1], src[1][1]);
954 if (mask & (1 << 2))
955 emit_mov(pc, dst[2], src[0][2]);
956 if (mask & (1 << 3))
957 emit_mov(pc, dst[3], src[1][3]);
958 FREE(one);
959 }
960 break;
961 case TGSI_OPCODE_EX2:
962 temp = alloc_temp(pc, NULL);
963 emit_preex2(pc, temp, src[0][0]);
964 emit_flop(pc, 6, temp, temp);
965 for (c = 0; c < 4; c++) {
966 if (!(mask & (1 << c)))
967 continue;
968 emit_mov(pc, dst[c], temp);
969 }
970 free_temp(pc, temp);
971 break;
972 case TGSI_OPCODE_FLR:
973 for (c = 0; c < 4; c++) {
974 if (!(mask & (1 << c)))
975 continue;
976 emit_flr(pc, dst[c], src[0][c]);
977 }
978 break;
979 case TGSI_OPCODE_FRC:
980 temp = alloc_temp(pc, NULL);
981 for (c = 0; c < 4; c++) {
982 if (!(mask & (1 << c)))
983 continue;
984 emit_flr(pc, temp, src[0][c]);
985 emit_sub(pc, dst[c], src[0][c], temp);
986 }
987 free_temp(pc, temp);
988 break;
989 case TGSI_OPCODE_LIT:
990 emit_lit(pc, &dst[0], mask, &src[0][0]);
991 break;
992 case TGSI_OPCODE_LG2:
993 temp = alloc_temp(pc, NULL);
994 emit_flop(pc, 3, temp, src[0][0]);
995 for (c = 0; c < 4; c++) {
996 if (!(mask & (1 << c)))
997 continue;
998 emit_mov(pc, dst[c], temp);
999 }
1000 break;
1001 case TGSI_OPCODE_LRP:
1002 for (c = 0; c < 4; c++) {
1003 if (!(mask & (1 << c)))
1004 continue;
1005 /*XXX: we can do better than this */
1006 temp = alloc_temp(pc, NULL);
1007 emit_neg(pc, temp, src[0][c]);
1008 emit_mad(pc, temp, temp, src[2][c], src[2][c]);
1009 emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
1010 free_temp(pc, temp);
1011 }
1012 break;
1013 case TGSI_OPCODE_MAD:
1014 for (c = 0; c < 4; c++) {
1015 if (!(mask & (1 << c)))
1016 continue;
1017 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
1018 }
1019 break;
1020 case TGSI_OPCODE_MAX:
1021 for (c = 0; c < 4; c++) {
1022 if (!(mask & (1 << c)))
1023 continue;
1024 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
1025 }
1026 break;
1027 case TGSI_OPCODE_MIN:
1028 for (c = 0; c < 4; c++) {
1029 if (!(mask & (1 << c)))
1030 continue;
1031 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
1032 }
1033 break;
1034 case TGSI_OPCODE_MOV:
1035 for (c = 0; c < 4; c++) {
1036 if (!(mask & (1 << c)))
1037 continue;
1038 emit_mov(pc, dst[c], src[0][c]);
1039 }
1040 break;
1041 case TGSI_OPCODE_MUL:
1042 for (c = 0; c < 4; c++) {
1043 if (!(mask & (1 << c)))
1044 continue;
1045 emit_mul(pc, dst[c], src[0][c], src[1][c]);
1046 }
1047 break;
1048 case TGSI_OPCODE_POW:
1049 temp = alloc_temp(pc, NULL);
1050 emit_pow(pc, temp, src[0][0], src[1][0]);
1051 for (c = 0; c < 4; c++) {
1052 if (!(mask & (1 << c)))
1053 continue;
1054 emit_mov(pc, dst[c], temp);
1055 }
1056 free_temp(pc, temp);
1057 break;
1058 case TGSI_OPCODE_RCP:
1059 for (c = 0; c < 4; c++) {
1060 if (!(mask & (1 << c)))
1061 continue;
1062 emit_flop(pc, 0, dst[c], src[0][0]);
1063 }
1064 break;
1065 case TGSI_OPCODE_RSQ:
1066 for (c = 0; c < 4; c++) {
1067 if (!(mask & (1 << c)))
1068 continue;
1069 emit_flop(pc, 2, dst[c], src[0][0]);
1070 }
1071 break;
1072 case TGSI_OPCODE_SCS:
1073 temp = alloc_temp(pc, NULL);
1074 emit_precossin(pc, temp, src[0][0]);
1075 if (mask & (1 << 0))
1076 emit_flop(pc, 5, dst[0], temp);
1077 if (mask & (1 << 1))
1078 emit_flop(pc, 4, dst[1], temp);
1079 break;
1080 case TGSI_OPCODE_SGE:
1081 for (c = 0; c < 4; c++) {
1082 if (!(mask & (1 << c)))
1083 continue;
1084 emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
1085 }
1086 break;
1087 case TGSI_OPCODE_SIN:
1088 temp = alloc_temp(pc, NULL);
1089 emit_precossin(pc, temp, src[0][0]);
1090 emit_flop(pc, 4, temp, temp);
1091 for (c = 0; c < 4; c++) {
1092 if (!(mask & (1 << c)))
1093 continue;
1094 emit_mov(pc, dst[c], temp);
1095 }
1096 break;
1097 case TGSI_OPCODE_SLT:
1098 for (c = 0; c < 4; c++) {
1099 if (!(mask & (1 << c)))
1100 continue;
1101 emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1102 }
1103 break;
1104 case TGSI_OPCODE_SUB:
1105 for (c = 0; c < 4; c++) {
1106 if (!(mask & (1 << c)))
1107 continue;
1108 emit_sub(pc, dst[c], src[0][c], src[1][c]);
1109 }
1110 break;
1111 case TGSI_OPCODE_XPD:
1112 temp = alloc_temp(pc, NULL);
1113 if (mask & (1 << 0)) {
1114 emit_mul(pc, temp, src[0][2], src[1][1]);
1115 emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1116 }
1117 if (mask & (1 << 1)) {
1118 emit_mul(pc, temp, src[0][0], src[1][2]);
1119 emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1120 }
1121 if (mask & (1 << 2)) {
1122 emit_mul(pc, temp, src[0][1], src[1][0]);
1123 emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1124 }
1125 free_temp(pc, temp);
1126 break;
1127 case TGSI_OPCODE_END:
1128 break;
1129 default:
1130 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1131 return FALSE;
1132 }
1133
1134 if (sat) {
1135 for (c = 0; c < 4; c++) {
1136 unsigned inst[2] = { 0, 0 };
1137
1138 if (!(mask & (1 << c)))
1139 continue;
1140
1141 inst[0] = 0xa0000000; /* cvt */
1142 set_long(pc, inst);
1143 inst[1] |= (6 << 29); /* cvt */
1144 inst[1] |= 0x04000000; /* 32 bit */
1145 inst[1] |= (1 << 14); /* src .f32 */
1146 inst[1] |= ((1 << 5) << 14); /* .sat */
1147 set_dst(pc, rdst[c], inst);
1148 set_src_0(pc, dst[c], inst);
1149 emit(pc, inst);
1150 }
1151 }
1152
1153 kill_temp_temp(pc);
1154 return TRUE;
1155 }
1156
1157 static boolean
1158 nv50_program_tx_prep(struct nv50_pc *pc)
1159 {
1160 struct tgsi_parse_context p;
1161 boolean ret = FALSE;
1162 unsigned i, c;
1163
1164 tgsi_parse_init(&p, pc->p->pipe.tokens);
1165 while (!tgsi_parse_end_of_tokens(&p)) {
1166 const union tgsi_full_token *tok = &p.FullToken;
1167
1168 tgsi_parse_token(&p);
1169 switch (tok->Token.Type) {
1170 case TGSI_TOKEN_TYPE_IMMEDIATE:
1171 {
1172 const struct tgsi_full_immediate *imm =
1173 &p.FullToken.FullImmediate;
1174
1175 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1176 imm->u.ImmediateFloat32[1].Float,
1177 imm->u.ImmediateFloat32[2].Float,
1178 imm->u.ImmediateFloat32[3].Float);
1179 }
1180 break;
1181 case TGSI_TOKEN_TYPE_DECLARATION:
1182 {
1183 const struct tgsi_full_declaration *d;
1184 unsigned last;
1185
1186 d = &p.FullToken.FullDeclaration;
1187 last = d->u.DeclarationRange.Last;
1188
1189 switch (d->Declaration.File) {
1190 case TGSI_FILE_TEMPORARY:
1191 if (pc->temp_nr < (last + 1))
1192 pc->temp_nr = last + 1;
1193 break;
1194 case TGSI_FILE_OUTPUT:
1195 if (pc->result_nr < (last + 1))
1196 pc->result_nr = last + 1;
1197 break;
1198 case TGSI_FILE_INPUT:
1199 if (pc->attr_nr < (last + 1))
1200 pc->attr_nr = last + 1;
1201 break;
1202 case TGSI_FILE_CONSTANT:
1203 if (pc->param_nr < (last + 1))
1204 pc->param_nr = last + 1;
1205 break;
1206 default:
1207 NOUVEAU_ERR("bad decl file %d\n",
1208 d->Declaration.File);
1209 goto out_err;
1210 }
1211 }
1212 break;
1213 case TGSI_TOKEN_TYPE_INSTRUCTION:
1214 break;
1215 default:
1216 break;
1217 }
1218 }
1219
1220 NOUVEAU_ERR("%d temps\n", pc->temp_nr);
1221 if (pc->temp_nr) {
1222 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
1223 if (!pc->temp)
1224 goto out_err;
1225
1226 for (i = 0; i < pc->temp_nr; i++) {
1227 for (c = 0; c < 4; c++) {
1228 pc->temp[i*4+c].type = P_TEMP;
1229 pc->temp[i*4+c].hw = -1;
1230 pc->temp[i*4+c].index = i;
1231 }
1232 }
1233 }
1234
1235 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
1236 if (pc->attr_nr) {
1237 struct nv50_reg *iv = NULL, *tmp = NULL;
1238 int aid = 0;
1239
1240 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
1241 if (!pc->attr)
1242 goto out_err;
1243
1244 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1245 iv = alloc_temp(pc, NULL);
1246 aid++;
1247 }
1248
1249 for (i = 0; i < pc->attr_nr; i++) {
1250 struct nv50_reg *a = &pc->attr[i*4];
1251
1252 for (c = 0; c < 4; c++) {
1253 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1254 struct nv50_reg *at =
1255 alloc_temp(pc, NULL);
1256 pc->attr[i*4+c].type = at->type;
1257 pc->attr[i*4+c].hw = at->hw;
1258 pc->attr[i*4+c].index = at->index;
1259 } else {
1260 pc->p->cfg.vp.attr[aid/32] |=
1261 (1 << (aid % 32));
1262 pc->attr[i*4+c].type = P_ATTR;
1263 pc->attr[i*4+c].hw = aid++;
1264 pc->attr[i*4+c].index = i;
1265 }
1266 }
1267
1268 if (pc->p->type != PIPE_SHADER_FRAGMENT)
1269 continue;
1270
1271 emit_interp(pc, iv, iv, iv, FALSE);
1272 tmp = alloc_temp(pc, NULL);
1273 emit_flop(pc, 0, tmp, iv);
1274 emit_interp(pc, &a[0], &a[0], tmp, TRUE);
1275 emit_interp(pc, &a[1], &a[1], tmp, TRUE);
1276 emit_interp(pc, &a[2], &a[2], tmp, TRUE);
1277 emit_interp(pc, &a[3], &a[3], tmp, TRUE);
1278 free_temp(pc, tmp);
1279 }
1280
1281 if (iv)
1282 free_temp(pc, iv);
1283 }
1284
1285 NOUVEAU_ERR("%d result regs\n", pc->result_nr);
1286 if (pc->result_nr) {
1287 int rid = 0;
1288
1289 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
1290 if (!pc->result)
1291 goto out_err;
1292
1293 for (i = 0; i < pc->result_nr; i++) {
1294 for (c = 0; c < 4; c++) {
1295 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1296 pc->result[i*4+c].type = P_TEMP;
1297 pc->result[i*4+c].hw = -1;
1298 } else {
1299 pc->result[i*4+c].type = P_RESULT;
1300 pc->result[i*4+c].hw = rid++;
1301 }
1302 pc->result[i*4+c].index = i;
1303 }
1304 }
1305 }
1306
1307 NOUVEAU_ERR("%d param regs\n", pc->param_nr);
1308 if (pc->param_nr) {
1309 int rid = 0;
1310
1311 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
1312 if (!pc->param)
1313 goto out_err;
1314
1315 for (i = 0; i < pc->param_nr; i++) {
1316 for (c = 0; c < 4; c++) {
1317 pc->param[i*4+c].type = P_CONST;
1318 pc->param[i*4+c].hw = rid++;
1319 pc->param[i*4+c].index = i;
1320 }
1321 }
1322 }
1323
1324 if (pc->immd_nr) {
1325 int rid = 0;
1326
1327 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
1328 if (!pc->immd)
1329 goto out_err;
1330
1331 for (i = 0; i < pc->immd_nr; i++) {
1332 for (c = 0; c < 4; c++) {
1333 pc->immd[i*4+c].type = P_IMMD;
1334 pc->immd[i*4+c].hw = rid++;
1335 pc->immd[i*4+c].index = i;
1336 }
1337 }
1338 }
1339
1340 ret = TRUE;
1341 out_err:
1342 tgsi_parse_free(&p);
1343 return ret;
1344 }
1345
1346 static boolean
1347 nv50_program_tx(struct nv50_program *p)
1348 {
1349 struct tgsi_parse_context parse;
1350 struct nv50_pc *pc;
1351 boolean ret;
1352
1353 pc = CALLOC_STRUCT(nv50_pc);
1354 if (!pc)
1355 return FALSE;
1356 pc->p = p;
1357 pc->p->cfg.high_temp = 4;
1358
1359 ret = nv50_program_tx_prep(pc);
1360 if (ret == FALSE)
1361 goto out_cleanup;
1362
1363 tgsi_parse_init(&parse, pc->p->pipe.tokens);
1364 while (!tgsi_parse_end_of_tokens(&parse)) {
1365 const union tgsi_full_token *tok = &parse.FullToken;
1366
1367 tgsi_parse_token(&parse);
1368
1369 switch (tok->Token.Type) {
1370 case TGSI_TOKEN_TYPE_INSTRUCTION:
1371 ret = nv50_program_tx_insn(pc, tok);
1372 if (ret == FALSE)
1373 goto out_err;
1374 break;
1375 default:
1376 break;
1377 }
1378 }
1379
1380 if (p->type == PIPE_SHADER_FRAGMENT) {
1381 struct nv50_reg out;
1382
1383 out.type = P_TEMP;
1384 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1385 emit_mov(pc, &out, &pc->result[out.hw]);
1386 }
1387
1388 p->immd_nr = pc->immd_nr * 4;
1389 p->immd = pc->immd_buf;
1390
1391 out_err:
1392 tgsi_parse_free(&parse);
1393
1394 out_cleanup:
1395 return ret;
1396 }
1397
1398 static void
1399 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1400 {
1401 int i;
1402
1403 if (nv50_program_tx(p) == FALSE)
1404 assert(0);
1405 /* *not* sufficient, it's fine if last inst is long and
1406 * NOT immd - otherwise it's fucked fucked fucked */
1407 p->insns[p->insns_nr - 1] |= 0x00000001;
1408
1409 if (p->type == PIPE_SHADER_VERTEX) {
1410 for (i = 0; i < p->insns_nr; i++)
1411 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]);
1412 } else {
1413 for (i = 0; i < p->insns_nr; i++)
1414 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]);
1415 }
1416
1417 p->translated = TRUE;
1418 }
1419
1420 static void
1421 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1422 {
1423 int i;
1424
1425 for (i = 0; i < p->immd_nr; i++) {
1426 BEGIN_RING(tesla, 0x0f00, 2);
1427 OUT_RING ((NV50_CB_PMISC << 0) | (i << 8));
1428 OUT_RING (fui(p->immd[i]));
1429 }
1430 }
1431
1432 static void
1433 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1434 {
1435 struct pipe_winsys *ws = nv50->pipe.winsys;
1436 void *map;
1437
1438 if (!p->buffer)
1439 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4);
1440 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
1441 memcpy(map, p->insns, p->insns_nr * 4);
1442 ws->buffer_unmap(ws, p->buffer);
1443 }
1444
1445 void
1446 nv50_vertprog_validate(struct nv50_context *nv50)
1447 {
1448 struct nouveau_grobj *tesla = nv50->screen->tesla;
1449 struct nv50_program *p = nv50->vertprog;
1450 struct nouveau_stateobj *so;
1451
1452 if (!p->translated) {
1453 nv50_program_validate(nv50, p);
1454 if (!p->translated)
1455 assert(0);
1456 }
1457
1458 nv50_program_validate_data(nv50, p);
1459 nv50_program_validate_code(nv50, p);
1460
1461 so = so_new(11, 2);
1462 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1463 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1464 NOUVEAU_BO_HIGH, 0, 0);
1465 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1466 NOUVEAU_BO_LOW, 0, 0);
1467 so_method(so, tesla, 0x1650, 2);
1468 so_data (so, p->cfg.vp.attr[0]);
1469 so_data (so, p->cfg.vp.attr[1]);
1470 so_method(so, tesla, 0x16ac, 2);
1471 so_data (so, 8);
1472 so_data (so, p->cfg.high_temp);
1473 so_method(so, tesla, 0x140c, 1);
1474 so_data (so, 0); /* program start offset */
1475 so_emit(nv50->screen->nvws, so);
1476 so_ref(NULL, &so);
1477 }
1478
1479 void
1480 nv50_fragprog_validate(struct nv50_context *nv50)
1481 {
1482 struct nouveau_grobj *tesla = nv50->screen->tesla;
1483 struct nv50_program *p = nv50->fragprog;
1484 struct nouveau_stateobj *so;
1485
1486 if (!p->translated) {
1487 nv50_program_validate(nv50, p);
1488 if (!p->translated)
1489 assert(0);
1490 }
1491
1492 nv50_program_validate_data(nv50, p);
1493 nv50_program_validate_code(nv50, p);
1494
1495 so = so_new(7, 2);
1496 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1497 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1498 NOUVEAU_BO_HIGH, 0, 0);
1499 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1500 NOUVEAU_BO_LOW, 0, 0);
1501 so_method(so, tesla, 0x198c, 1);
1502 so_data (so, p->cfg.high_temp);
1503 so_method(so, tesla, 0x1414, 1);
1504 so_data (so, 0); /* program start offset */
1505 so_emit(nv50->screen->nvws, so);
1506 so_ref(NULL, &so);
1507 }
1508
1509 void
1510 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1511 {
1512 struct pipe_winsys *ws = nv50->pipe.winsys;
1513
1514 if (p->insns_nr) {
1515 if (p->insns)
1516 FREE(p->insns);
1517 p->insns_nr = 0;
1518 }
1519
1520 if (p->buffer)
1521 pipe_buffer_reference(ws, &p->buffer, NULL);
1522
1523 p->translated = 0;
1524 }
1525