nv50: comment on a so-far unseen bug
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
5
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
9
10 #include "nv50_context.h"
11
12 #define NV50_SU_MAX_TEMP 64
13 #define NV50_PROGRAM_DUMP
14
15 /* ARL - gallium craps itself on progs/vp/arl.txt
16 *
17 * MSB - Like MAD, but MUL+SUB
18 * - Fuck it off, introduce a way to negate args for ops that
19 * support it.
20 *
21 * Look into inlining IMMD for ops other than MOV (make it general?)
22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
24 *
25 * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
26 * case, if the emit_src() causes the inst to suddenly become long.
27 *
28 * Verify half-insns work where expected - and force disable them where they
29 * don't work - MUL has it forcibly disabled atm as it fixes POW..
30 *
31 * FUCK! watch dst==src vectors, can overwrite components that are needed.
32 * ie. SUB R0, R0.yzxw, R0
33 *
34 * MOV dst, -src
35 * "delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0)
36 * mov dst, tmp
37 *
38 * Things to check with renouveau:
39 * FP attr/result assignment - how?
40 * attrib
41 * - 0x16bc maps vp output onto fp hpos
42 * - 0x16c0 maps vp output onto fp col0
43 * result
44 * - colr always 0-3
45 * - depr always 4
46 * 0x16bc->0x16e8 --> some binding between vp/fp regs
47 * 0x16b8 --> VP output count
48 *
49 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
50 * "MOV rcol.x, fcol.y" = 0x00000004
51 * 0x19a8 --> as above but 0x00000100 and 0x00000000
52 * - 0x00100000 used when KIL used
53 * 0x196c --> as above but 0x00000011 and 0x00000000
54 *
55 * 0x1988 --> 0xXXNNNNNN
56 * - XX == FP high something
57 */
58 struct nv50_reg {
59 enum {
60 P_TEMP,
61 P_ATTR,
62 P_RESULT,
63 P_CONST,
64 P_IMMD
65 } type;
66 int index;
67
68 int hw;
69 int neg;
70 };
71
72 struct nv50_pc {
73 struct nv50_program *p;
74
75 /* hw resources */
76 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
77
78 /* tgsi resources */
79 struct nv50_reg *temp;
80 int temp_nr;
81 struct nv50_reg *attr;
82 int attr_nr;
83 struct nv50_reg *result;
84 int result_nr;
85 struct nv50_reg *param;
86 int param_nr;
87 struct nv50_reg *immd;
88 float *immd_buf;
89 int immd_nr;
90
91 struct nv50_reg *temp_temp[16];
92 unsigned temp_temp_nr;
93 };
94
95 static void
96 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
97 {
98 int i;
99
100 if (reg->type != P_TEMP)
101 return;
102
103 if (reg->hw >= 0) {
104 /*XXX: do this here too to catch FP temp-as-attr usage..
105 * not clean, but works */
106 if (pc->p->cfg.high_temp < (reg->hw + 1))
107 pc->p->cfg.high_temp = reg->hw + 1;
108 return;
109 }
110
111 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
112 if (!(pc->r_temp[i])) {
113 pc->r_temp[i] = reg;
114 reg->hw = i;
115 if (pc->p->cfg.high_temp < (i + 1))
116 pc->p->cfg.high_temp = i + 1;
117 return;
118 }
119 }
120
121 assert(0);
122 }
123
124 static struct nv50_reg *
125 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
126 {
127 struct nv50_reg *r;
128 int i;
129
130 if (dst && dst->type == P_TEMP && dst->hw == -1)
131 return dst;
132
133 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
134 if (!pc->r_temp[i]) {
135 r = CALLOC_STRUCT(nv50_reg);
136 r->type = P_TEMP;
137 r->index = -1;
138 r->hw = i;
139 pc->r_temp[i] = r;
140 return r;
141 }
142 }
143
144 assert(0);
145 return NULL;
146 }
147
148 static void
149 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
150 {
151 if (r->index == -1) {
152 unsigned hw = r->hw;
153
154 FREE(pc->r_temp[hw]);
155 pc->r_temp[hw] = NULL;
156 }
157 }
158
159 static struct nv50_reg *
160 temp_temp(struct nv50_pc *pc)
161 {
162 if (pc->temp_temp_nr >= 16)
163 assert(0);
164
165 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
166 return pc->temp_temp[pc->temp_temp_nr++];
167 }
168
169 static void
170 kill_temp_temp(struct nv50_pc *pc)
171 {
172 int i;
173
174 for (i = 0; i < pc->temp_temp_nr; i++)
175 free_temp(pc, pc->temp_temp[i]);
176 pc->temp_temp_nr = 0;
177 }
178
179 static int
180 ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
181 {
182 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 *
183 sizeof(float));
184 pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
185 pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
186 pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
187 pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
188
189 return pc->immd_nr++;
190 }
191
192 static struct nv50_reg *
193 alloc_immd(struct nv50_pc *pc, float f)
194 {
195 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
196 unsigned hw;
197
198 hw = ctor_immd(pc, f, 0, 0, 0) * 4;
199 r->type = P_IMMD;
200 r->hw = hw;
201 r->index = -1;
202 return r;
203 }
204
205 static struct nv50_program_exec *
206 exec(struct nv50_pc *pc)
207 {
208 struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
209
210 e->param.index = -1;
211 return e;
212 }
213
214 static void
215 emit(struct nv50_pc *pc, struct nv50_program_exec *e)
216 {
217 struct nv50_program *p = pc->p;
218
219 if (p->exec_tail)
220 p->exec_tail->next = e;
221 if (!p->exec_head)
222 p->exec_head = e;
223 p->exec_tail = e;
224 p->exec_size += (e->inst[0] & 1) ? 2 : 1;
225 }
226
227 static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
228
229 static boolean
230 is_long(struct nv50_program_exec *e)
231 {
232 if (e->inst[0] & 1)
233 return TRUE;
234 return FALSE;
235 }
236
237 static boolean
238 is_immd(struct nv50_program_exec *e)
239 {
240 if (is_long(e) && (e->inst[1] & 3) == 3)
241 return TRUE;
242 return FALSE;
243 }
244
245 static INLINE void
246 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
247 struct nv50_program_exec *e)
248 {
249 set_long(pc, e);
250 e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
251 e->inst[1] |= (pred << 7) | (idx << 12);
252 }
253
254 static INLINE void
255 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
256 struct nv50_program_exec *e)
257 {
258 set_long(pc, e);
259 e->inst[1] &= ~((0x3 << 4) | (1 << 6));
260 e->inst[1] |= (idx << 4) | (on << 6);
261 }
262
263 static INLINE void
264 set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
265 {
266 if (is_long(e))
267 return;
268
269 e->inst[0] |= 1;
270 set_pred(pc, 0xf, 0, e);
271 set_pred_wr(pc, 0, 0, e);
272 }
273
274 static INLINE void
275 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
276 {
277 if (dst->type == P_RESULT) {
278 set_long(pc, e);
279 e->inst[1] |= 0x00000008;
280 }
281
282 alloc_reg(pc, dst);
283 e->inst[0] |= (dst->hw << 2);
284 }
285
286 static INLINE void
287 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
288 {
289 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
290
291 set_long(pc, e);
292 /*XXX: can't be predicated - bits overlap.. catch cases where both
293 * are required and avoid them. */
294 set_pred(pc, 0, 0, e);
295 set_pred_wr(pc, 0, 0, e);
296
297 e->inst[1] |= 0x00000002 | 0x00000001;
298 e->inst[0] |= (val & 0x3f) << 16;
299 e->inst[1] |= (val >> 6) << 2;
300 }
301
302 static void
303 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
304 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
305 {
306 struct nv50_program_exec *e = exec(pc);
307
308 e->inst[0] |= 0x80000000;
309 set_dst(pc, dst, e);
310 alloc_reg(pc, iv);
311 e->inst[0] |= (iv->hw << 9);
312 alloc_reg(pc, src);
313 e->inst[0] |= (src->hw << 16);
314 if (noperspective)
315 e->inst[0] |= (1 << 25);
316
317 emit(pc, e);
318 }
319
320 static void
321 set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
322 struct nv50_program_exec *e)
323 {
324 set_long(pc, e);
325 #if 1
326 e->inst[1] |= (1 << 22);
327 #else
328 if (src->type == P_IMMD) {
329 e->inst[1] |= (NV50_CB_PMISC << 22);
330 } else {
331 if (pc->p->type == PIPE_SHADER_VERTEX)
332 e->inst[1] |= (NV50_CB_PVP << 22);
333 else
334 e->inst[1] |= (NV50_CB_PFP << 22);
335 }
336 #endif
337
338 e->param.index = src->hw;
339 e->param.shift = s;
340 e->param.mask = m << (s % 32);
341 }
342
343 static void
344 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
345 {
346 struct nv50_program_exec *e = exec(pc);
347
348 e->inst[0] |= 0x10000000;
349
350 set_dst(pc, dst, e);
351
352 if (dst->type != P_RESULT && src->type == P_IMMD) {
353 set_immd(pc, src, e);
354 /*XXX: 32-bit, but steals part of "half" reg space - need to
355 * catch and handle this case if/when we do half-regs
356 */
357 e->inst[0] |= 0x00008000;
358 } else
359 if (src->type == P_IMMD || src->type == P_CONST) {
360 set_long(pc, e);
361 set_data(pc, src, 0x7f, 9, e);
362 e->inst[1] |= 0x20000000; /* src0 const? */
363 } else {
364 if (src->type == P_ATTR) {
365 set_long(pc, e);
366 e->inst[1] |= 0x00200000;
367 }
368
369 alloc_reg(pc, src);
370 e->inst[0] |= (src->hw << 9);
371 }
372
373 /* We really should support "half" instructions here at some point,
374 * but I don't feel confident enough about them yet.
375 */
376 set_long(pc, e);
377 if (is_long(e) && !is_immd(e)) {
378 e->inst[1] |= 0x04000000; /* 32-bit */
379 e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
380 }
381
382 emit(pc, e);
383 }
384
385 static boolean
386 check_swap_src_0_1(struct nv50_pc *pc,
387 struct nv50_reg **s0, struct nv50_reg **s1)
388 {
389 struct nv50_reg *src0 = *s0, *src1 = *s1;
390
391 if (src0->type == P_CONST) {
392 if (src1->type != P_CONST) {
393 *s0 = src1;
394 *s1 = src0;
395 return TRUE;
396 }
397 } else
398 if (src1->type == P_ATTR) {
399 if (src0->type != P_ATTR) {
400 *s0 = src1;
401 *s1 = src0;
402 return TRUE;
403 }
404 }
405
406 return FALSE;
407 }
408
409 static void
410 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
411 {
412 if (src->type == P_ATTR) {
413 set_long(pc, e);
414 e->inst[1] |= 0x00200000;
415 } else
416 if (src->type == P_CONST || src->type == P_IMMD) {
417 struct nv50_reg *temp = temp_temp(pc);
418
419 emit_mov(pc, temp, src);
420 src = temp;
421 }
422
423 alloc_reg(pc, src);
424 e->inst[0] |= (src->hw << 9);
425 }
426
427 static void
428 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
429 {
430 if (src->type == P_ATTR) {
431 struct nv50_reg *temp = temp_temp(pc);
432
433 emit_mov(pc, temp, src);
434 src = temp;
435 } else
436 if (src->type == P_CONST || src->type == P_IMMD) {
437 assert(!(e->inst[0] & 0x00800000));
438 if (e->inst[0] & 0x01000000) {
439 struct nv50_reg *temp = temp_temp(pc);
440
441 emit_mov(pc, temp, src);
442 src = temp;
443 } else {
444 set_data(pc, src, 0x7f, 16, e);
445 e->inst[0] |= 0x00800000;
446 }
447 }
448
449 alloc_reg(pc, src);
450 e->inst[0] |= (src->hw << 16);
451 }
452
453 static void
454 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
455 {
456 set_long(pc, e);
457
458 if (src->type == P_ATTR) {
459 struct nv50_reg *temp = temp_temp(pc);
460
461 emit_mov(pc, temp, src);
462 src = temp;
463 } else
464 if (src->type == P_CONST || src->type == P_IMMD) {
465 assert(!(e->inst[0] & 0x01000000));
466 if (e->inst[0] & 0x00800000) {
467 struct nv50_reg *temp = temp_temp(pc);
468
469 emit_mov(pc, temp, src);
470 src = temp;
471 } else {
472 set_data(pc, src, 0x7f, 32+14, e);
473 e->inst[0] |= 0x01000000;
474 }
475 }
476
477 alloc_reg(pc, src);
478 e->inst[1] |= (src->hw << 14);
479 }
480
481 static void
482 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
483 struct nv50_reg *src1)
484 {
485 struct nv50_program_exec *e = exec(pc);
486
487 e->inst[0] |= 0xc0000000;
488 set_long(pc, e);
489
490 check_swap_src_0_1(pc, &src0, &src1);
491 set_dst(pc, dst, e);
492 set_src_0(pc, src0, e);
493 set_src_1(pc, src1, e);
494
495 emit(pc, e);
496 }
497
498 static void
499 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
500 struct nv50_reg *src0, struct nv50_reg *src1)
501 {
502 struct nv50_program_exec *e = exec(pc);
503
504 e->inst[0] |= 0xb0000000;
505
506 check_swap_src_0_1(pc, &src0, &src1);
507 set_dst(pc, dst, e);
508 set_src_0(pc, src0, e);
509 if (is_long(e))
510 set_src_2(pc, src1, e);
511 else
512 set_src_1(pc, src1, e);
513
514 emit(pc, e);
515 }
516
517 static void
518 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
519 struct nv50_reg *src0, struct nv50_reg *src1)
520 {
521 struct nv50_program_exec *e = exec(pc);
522
523 set_long(pc, e);
524 e->inst[0] |= 0xb0000000;
525 e->inst[1] |= (sub << 29);
526
527 check_swap_src_0_1(pc, &src0, &src1);
528 set_dst(pc, dst, e);
529 set_src_0(pc, src0, e);
530 set_src_1(pc, src1, e);
531
532 emit(pc, e);
533 }
534
535 static void
536 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
537 struct nv50_reg *src1)
538 {
539 struct nv50_program_exec *e = exec(pc);
540
541 e->inst[0] |= 0xb0000000;
542
543 set_long(pc, e);
544 if (check_swap_src_0_1(pc, &src0, &src1))
545 e->inst[1] |= 0x04000000;
546 else
547 e->inst[1] |= 0x08000000;
548
549 set_dst(pc, dst, e);
550 set_src_0(pc, src0, e);
551 set_src_2(pc, src1, e);
552
553 emit(pc, e);
554 }
555
556 static void
557 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
558 struct nv50_reg *src1, struct nv50_reg *src2)
559 {
560 struct nv50_program_exec *e = exec(pc);
561
562 e->inst[0] |= 0xe0000000;
563
564 check_swap_src_0_1(pc, &src0, &src1);
565 set_dst(pc, dst, e);
566 set_src_0(pc, src0, e);
567 set_src_1(pc, src1, e);
568 set_src_2(pc, src2, e);
569
570 emit(pc, e);
571 }
572
573 static void
574 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
575 struct nv50_reg *src1, struct nv50_reg *src2)
576 {
577 struct nv50_program_exec *e = exec(pc);
578
579 e->inst[0] |= 0xe0000000;
580 set_long(pc, e);
581 e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
582
583 check_swap_src_0_1(pc, &src0, &src1);
584 set_dst(pc, dst, e);
585 set_src_0(pc, src0, e);
586 set_src_1(pc, src1, e);
587 set_src_2(pc, src2, e);
588
589 emit(pc, e);
590 }
591
592 static void
593 emit_flop(struct nv50_pc *pc, unsigned sub,
594 struct nv50_reg *dst, struct nv50_reg *src)
595 {
596 struct nv50_program_exec *e = exec(pc);
597
598 e->inst[0] |= 0x90000000;
599 if (sub) {
600 set_long(pc, e);
601 e->inst[1] |= (sub << 29);
602 }
603
604 set_dst(pc, dst, e);
605 set_src_0(pc, src, e);
606
607 emit(pc, e);
608 }
609
610 static void
611 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
612 {
613 struct nv50_program_exec *e = exec(pc);
614
615 e->inst[0] |= 0xb0000000;
616
617 set_dst(pc, dst, e);
618 set_src_0(pc, src, e);
619 set_long(pc, e);
620 e->inst[1] |= (6 << 29) | 0x00004000;
621
622 emit(pc, e);
623 }
624
625 static void
626 emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
627 {
628 struct nv50_program_exec *e = exec(pc);
629
630 e->inst[0] |= 0xb0000000;
631
632 set_dst(pc, dst, e);
633 set_src_0(pc, src, e);
634 set_long(pc, e);
635 e->inst[1] |= (6 << 29);
636
637 emit(pc, e);
638 }
639
640 static void
641 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
642 struct nv50_reg *src0, struct nv50_reg *src1)
643 {
644 struct nv50_program_exec *e = exec(pc);
645 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
646 struct nv50_reg *rdst;
647
648 assert(c_op <= 7);
649 if (check_swap_src_0_1(pc, &src0, &src1))
650 c_op = inv_cop[c_op];
651
652 rdst = dst;
653 if (dst->type != P_TEMP)
654 dst = alloc_temp(pc, NULL);
655
656 /* set.u32 */
657 set_long(pc, e);
658 e->inst[0] |= 0xb0000000;
659 e->inst[1] |= (3 << 29);
660 e->inst[1] |= (c_op << 14);
661 /*XXX: breaks things, .u32 by default?
662 * decuda will disasm as .u16 and use .lo/.hi regs, but this
663 * doesn't seem to match what the hw actually does.
664 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
665 */
666 set_dst(pc, dst, e);
667 set_src_0(pc, src0, e);
668 set_src_1(pc, src1, e);
669 emit(pc, e);
670
671 /* cvt.f32.u32 */
672 e = exec(pc);
673 e->inst[0] = 0xa0000001;
674 e->inst[1] = 0x64014780;
675 set_dst(pc, rdst, e);
676 set_src_0(pc, dst, e);
677 emit(pc, e);
678
679 if (dst != rdst)
680 free_temp(pc, dst);
681 }
682
683 static void
684 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
685 {
686 struct nv50_program_exec *e = exec(pc);
687
688 e->inst[0] = 0xa0000000; /* cvt */
689 set_long(pc, e);
690 e->inst[1] |= (6 << 29); /* cvt */
691 e->inst[1] |= 0x08000000; /* integer mode */
692 e->inst[1] |= 0x04000000; /* 32 bit */
693 e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
694 e->inst[1] |= (1 << 14); /* src .f32 */
695 set_dst(pc, dst, e);
696 set_src_0(pc, src, e);
697
698 emit(pc, e);
699 }
700
701 static void
702 emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
703 struct nv50_reg *v, struct nv50_reg *e)
704 {
705 struct nv50_reg *temp = alloc_temp(pc, NULL);
706
707 emit_flop(pc, 3, temp, v);
708 emit_mul(pc, temp, temp, e);
709 emit_preex2(pc, temp, temp);
710 emit_flop(pc, 6, dst, temp);
711
712 free_temp(pc, temp);
713 }
714
715 static void
716 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
717 {
718 struct nv50_program_exec *e = exec(pc);
719
720 e->inst[0] = 0xa0000000; /* cvt */
721 set_long(pc, e);
722 e->inst[1] |= (6 << 29); /* cvt */
723 e->inst[1] |= 0x04000000; /* 32 bit */
724 e->inst[1] |= (1 << 14); /* src .f32 */
725 e->inst[1] |= ((1 << 6) << 14); /* .abs */
726 set_dst(pc, dst, e);
727 set_src_0(pc, src, e);
728
729 emit(pc, e);
730 }
731
732 static void
733 emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
734 struct nv50_reg **src)
735 {
736 struct nv50_reg *one = alloc_immd(pc, 1.0);
737 struct nv50_reg *zero = alloc_immd(pc, 0.0);
738 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
739 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
740 struct nv50_reg *tmp[4];
741
742 if (mask & (1 << 0))
743 emit_mov(pc, dst[0], one);
744
745 if (mask & (1 << 3))
746 emit_mov(pc, dst[3], one);
747
748 if (mask & (3 << 1)) {
749 if (mask & (1 << 1))
750 tmp[0] = dst[1];
751 else
752 tmp[0] = temp_temp(pc);
753 emit_minmax(pc, 4, tmp[0], src[0], zero);
754 }
755
756 if (mask & (1 << 2)) {
757 set_pred_wr(pc, 1, 0, pc->p->exec_tail);
758
759 tmp[1] = temp_temp(pc);
760 emit_minmax(pc, 4, tmp[1], src[1], zero);
761
762 tmp[3] = temp_temp(pc);
763 emit_minmax(pc, 4, tmp[3], src[3], neg128);
764 emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
765
766 emit_pow(pc, dst[2], tmp[1], tmp[3]);
767 emit_mov(pc, dst[2], zero);
768 set_pred(pc, 3, 0, pc->p->exec_tail);
769 }
770 }
771
772 static void
773 emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
774 {
775 struct nv50_program_exec *e = exec(pc);
776
777 set_long(pc, e);
778 e->inst[0] |= 0xa0000000; /* delta */
779 e->inst[1] |= (7 << 29); /* delta */
780 e->inst[1] |= 0x04000000; /* negate arg0? probably not */
781 e->inst[1] |= (1 << 14); /* src .f32 */
782 set_dst(pc, dst, e);
783 set_src_0(pc, src, e);
784
785 emit(pc, e);
786 }
787
788 static struct nv50_reg *
789 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
790 {
791 switch (dst->DstRegister.File) {
792 case TGSI_FILE_TEMPORARY:
793 return &pc->temp[dst->DstRegister.Index * 4 + c];
794 case TGSI_FILE_OUTPUT:
795 return &pc->result[dst->DstRegister.Index * 4 + c];
796 case TGSI_FILE_NULL:
797 return NULL;
798 default:
799 break;
800 }
801
802 return NULL;
803 }
804
805 static struct nv50_reg *
806 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
807 {
808 struct nv50_reg *r = NULL;
809 struct nv50_reg *temp;
810 unsigned c;
811
812 c = tgsi_util_get_full_src_register_extswizzle(src, chan);
813 switch (c) {
814 case TGSI_EXTSWIZZLE_X:
815 case TGSI_EXTSWIZZLE_Y:
816 case TGSI_EXTSWIZZLE_Z:
817 case TGSI_EXTSWIZZLE_W:
818 switch (src->SrcRegister.File) {
819 case TGSI_FILE_INPUT:
820 r = &pc->attr[src->SrcRegister.Index * 4 + c];
821 break;
822 case TGSI_FILE_TEMPORARY:
823 r = &pc->temp[src->SrcRegister.Index * 4 + c];
824 break;
825 case TGSI_FILE_CONSTANT:
826 r = &pc->param[src->SrcRegister.Index * 4 + c];
827 break;
828 case TGSI_FILE_IMMEDIATE:
829 r = &pc->immd[src->SrcRegister.Index * 4 + c];
830 break;
831 default:
832 assert(0);
833 break;
834 }
835 break;
836 case TGSI_EXTSWIZZLE_ZERO:
837 r = alloc_immd(pc, 0.0);
838 break;
839 case TGSI_EXTSWIZZLE_ONE:
840 r = alloc_immd(pc, 1.0);
841 break;
842 default:
843 assert(0);
844 break;
845 }
846
847 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
848 case TGSI_UTIL_SIGN_KEEP:
849 break;
850 case TGSI_UTIL_SIGN_CLEAR:
851 temp = temp_temp(pc);
852 emit_abs(pc, temp, r);
853 r = temp;
854 break;
855 case TGSI_UTIL_SIGN_TOGGLE:
856 temp = temp_temp(pc);
857 emit_neg(pc, temp, r);
858 r = temp;
859 break;
860 case TGSI_UTIL_SIGN_SET:
861 temp = temp_temp(pc);
862 emit_abs(pc, temp, r);
863 emit_neg(pc, temp, r);
864 r = temp;
865 break;
866 default:
867 assert(0);
868 break;
869 }
870
871 return r;
872 }
873
874 static boolean
875 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
876 {
877 const struct tgsi_full_instruction *inst = &tok->FullInstruction;
878 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
879 unsigned mask, sat;
880 int i, c;
881
882 NOUVEAU_ERR("insn %p\n", tok);
883
884 mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
885 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
886
887 for (c = 0; c < 4; c++) {
888 if (mask & (1 << c))
889 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
890 else
891 dst[c] = NULL;
892 }
893
894 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
895 for (c = 0; c < 4; c++)
896 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
897 }
898
899 if (sat) {
900 for (c = 0; c < 4; c++) {
901 rdst[c] = dst[c];
902 dst[c] = temp_temp(pc);
903 }
904 }
905
906 switch (inst->Instruction.Opcode) {
907 case TGSI_OPCODE_ABS:
908 for (c = 0; c < 4; c++) {
909 if (!(mask & (1 << c)))
910 continue;
911 emit_abs(pc, dst[c], src[0][c]);
912 }
913 break;
914 case TGSI_OPCODE_ADD:
915 for (c = 0; c < 4; c++) {
916 if (!(mask & (1 << c)))
917 continue;
918 emit_add(pc, dst[c], src[0][c], src[1][c]);
919 }
920 break;
921 case TGSI_OPCODE_COS:
922 temp = alloc_temp(pc, NULL);
923 emit_precossin(pc, temp, src[0][0]);
924 emit_flop(pc, 5, temp, temp);
925 for (c = 0; c < 4; c++) {
926 if (!(mask & (1 << c)))
927 continue;
928 emit_mov(pc, dst[c], temp);
929 }
930 break;
931 case TGSI_OPCODE_DP3:
932 temp = alloc_temp(pc, NULL);
933 emit_mul(pc, temp, src[0][0], src[1][0]);
934 emit_mad(pc, temp, src[0][1], src[1][1], temp);
935 emit_mad(pc, temp, src[0][2], src[1][2], temp);
936 for (c = 0; c < 4; c++) {
937 if (!(mask & (1 << c)))
938 continue;
939 emit_mov(pc, dst[c], temp);
940 }
941 free_temp(pc, temp);
942 break;
943 case TGSI_OPCODE_DP4:
944 temp = alloc_temp(pc, NULL);
945 emit_mul(pc, temp, src[0][0], src[1][0]);
946 emit_mad(pc, temp, src[0][1], src[1][1], temp);
947 emit_mad(pc, temp, src[0][2], src[1][2], temp);
948 emit_mad(pc, temp, src[0][3], src[1][3], temp);
949 for (c = 0; c < 4; c++) {
950 if (!(mask & (1 << c)))
951 continue;
952 emit_mov(pc, dst[c], temp);
953 }
954 free_temp(pc, temp);
955 break;
956 case TGSI_OPCODE_DPH:
957 temp = alloc_temp(pc, NULL);
958 emit_mul(pc, temp, src[0][0], src[1][0]);
959 emit_mad(pc, temp, src[0][1], src[1][1], temp);
960 emit_mad(pc, temp, src[0][2], src[1][2], temp);
961 emit_add(pc, temp, src[1][3], temp);
962 for (c = 0; c < 4; c++) {
963 if (!(mask & (1 << c)))
964 continue;
965 emit_mov(pc, dst[c], temp);
966 }
967 free_temp(pc, temp);
968 break;
969 case TGSI_OPCODE_DST:
970 {
971 struct nv50_reg *one = alloc_immd(pc, 1.0);
972 if (mask & (1 << 0))
973 emit_mov(pc, dst[0], one);
974 if (mask & (1 << 1))
975 emit_mul(pc, dst[1], src[0][1], src[1][1]);
976 if (mask & (1 << 2))
977 emit_mov(pc, dst[2], src[0][2]);
978 if (mask & (1 << 3))
979 emit_mov(pc, dst[3], src[1][3]);
980 FREE(one);
981 }
982 break;
983 case TGSI_OPCODE_EX2:
984 temp = alloc_temp(pc, NULL);
985 emit_preex2(pc, temp, src[0][0]);
986 emit_flop(pc, 6, temp, temp);
987 for (c = 0; c < 4; c++) {
988 if (!(mask & (1 << c)))
989 continue;
990 emit_mov(pc, dst[c], temp);
991 }
992 free_temp(pc, temp);
993 break;
994 case TGSI_OPCODE_FLR:
995 for (c = 0; c < 4; c++) {
996 if (!(mask & (1 << c)))
997 continue;
998 emit_flr(pc, dst[c], src[0][c]);
999 }
1000 break;
1001 case TGSI_OPCODE_FRC:
1002 temp = alloc_temp(pc, NULL);
1003 for (c = 0; c < 4; c++) {
1004 if (!(mask & (1 << c)))
1005 continue;
1006 emit_flr(pc, temp, src[0][c]);
1007 emit_sub(pc, dst[c], src[0][c], temp);
1008 }
1009 free_temp(pc, temp);
1010 break;
1011 case TGSI_OPCODE_LIT:
1012 emit_lit(pc, &dst[0], mask, &src[0][0]);
1013 break;
1014 case TGSI_OPCODE_LG2:
1015 temp = alloc_temp(pc, NULL);
1016 emit_flop(pc, 3, temp, src[0][0]);
1017 for (c = 0; c < 4; c++) {
1018 if (!(mask & (1 << c)))
1019 continue;
1020 emit_mov(pc, dst[c], temp);
1021 }
1022 break;
1023 case TGSI_OPCODE_LRP:
1024 for (c = 0; c < 4; c++) {
1025 if (!(mask & (1 << c)))
1026 continue;
1027 /*XXX: we can do better than this */
1028 temp = alloc_temp(pc, NULL);
1029 emit_neg(pc, temp, src[0][c]);
1030 emit_mad(pc, temp, temp, src[2][c], src[2][c]);
1031 emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
1032 free_temp(pc, temp);
1033 }
1034 break;
1035 case TGSI_OPCODE_MAD:
1036 for (c = 0; c < 4; c++) {
1037 if (!(mask & (1 << c)))
1038 continue;
1039 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
1040 }
1041 break;
1042 case TGSI_OPCODE_MAX:
1043 for (c = 0; c < 4; c++) {
1044 if (!(mask & (1 << c)))
1045 continue;
1046 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
1047 }
1048 break;
1049 case TGSI_OPCODE_MIN:
1050 for (c = 0; c < 4; c++) {
1051 if (!(mask & (1 << c)))
1052 continue;
1053 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
1054 }
1055 break;
1056 case TGSI_OPCODE_MOV:
1057 for (c = 0; c < 4; c++) {
1058 if (!(mask & (1 << c)))
1059 continue;
1060 emit_mov(pc, dst[c], src[0][c]);
1061 }
1062 break;
1063 case TGSI_OPCODE_MUL:
1064 for (c = 0; c < 4; c++) {
1065 if (!(mask & (1 << c)))
1066 continue;
1067 emit_mul(pc, dst[c], src[0][c], src[1][c]);
1068 }
1069 break;
1070 case TGSI_OPCODE_POW:
1071 temp = alloc_temp(pc, NULL);
1072 emit_pow(pc, temp, src[0][0], src[1][0]);
1073 for (c = 0; c < 4; c++) {
1074 if (!(mask & (1 << c)))
1075 continue;
1076 emit_mov(pc, dst[c], temp);
1077 }
1078 free_temp(pc, temp);
1079 break;
1080 case TGSI_OPCODE_RCP:
1081 for (c = 0; c < 4; c++) {
1082 if (!(mask & (1 << c)))
1083 continue;
1084 emit_flop(pc, 0, dst[c], src[0][0]);
1085 }
1086 break;
1087 case TGSI_OPCODE_RSQ:
1088 for (c = 0; c < 4; c++) {
1089 if (!(mask & (1 << c)))
1090 continue;
1091 emit_flop(pc, 2, dst[c], src[0][0]);
1092 }
1093 break;
1094 case TGSI_OPCODE_SCS:
1095 temp = alloc_temp(pc, NULL);
1096 emit_precossin(pc, temp, src[0][0]);
1097 if (mask & (1 << 0))
1098 emit_flop(pc, 5, dst[0], temp);
1099 if (mask & (1 << 1))
1100 emit_flop(pc, 4, dst[1], temp);
1101 break;
1102 case TGSI_OPCODE_SGE:
1103 for (c = 0; c < 4; c++) {
1104 if (!(mask & (1 << c)))
1105 continue;
1106 emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
1107 }
1108 break;
1109 case TGSI_OPCODE_SIN:
1110 temp = alloc_temp(pc, NULL);
1111 emit_precossin(pc, temp, src[0][0]);
1112 emit_flop(pc, 4, temp, temp);
1113 for (c = 0; c < 4; c++) {
1114 if (!(mask & (1 << c)))
1115 continue;
1116 emit_mov(pc, dst[c], temp);
1117 }
1118 break;
1119 case TGSI_OPCODE_SLT:
1120 for (c = 0; c < 4; c++) {
1121 if (!(mask & (1 << c)))
1122 continue;
1123 emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1124 }
1125 break;
1126 case TGSI_OPCODE_SUB:
1127 for (c = 0; c < 4; c++) {
1128 if (!(mask & (1 << c)))
1129 continue;
1130 emit_sub(pc, dst[c], src[0][c], src[1][c]);
1131 }
1132 break;
1133 case TGSI_OPCODE_XPD:
1134 temp = alloc_temp(pc, NULL);
1135 if (mask & (1 << 0)) {
1136 emit_mul(pc, temp, src[0][2], src[1][1]);
1137 emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1138 }
1139 if (mask & (1 << 1)) {
1140 emit_mul(pc, temp, src[0][0], src[1][2]);
1141 emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1142 }
1143 if (mask & (1 << 2)) {
1144 emit_mul(pc, temp, src[0][1], src[1][0]);
1145 emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1146 }
1147 free_temp(pc, temp);
1148 break;
1149 case TGSI_OPCODE_END:
1150 break;
1151 default:
1152 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1153 return FALSE;
1154 }
1155
1156 if (sat) {
1157 for (c = 0; c < 4; c++) {
1158 struct nv50_program_exec *e;
1159
1160 if (!(mask & (1 << c)))
1161 continue;
1162 e = exec(pc);
1163
1164 e->inst[0] = 0xa0000000; /* cvt */
1165 set_long(pc, e);
1166 e->inst[1] |= (6 << 29); /* cvt */
1167 e->inst[1] |= 0x04000000; /* 32 bit */
1168 e->inst[1] |= (1 << 14); /* src .f32 */
1169 e->inst[1] |= ((1 << 5) << 14); /* .sat */
1170 set_dst(pc, rdst[c], e);
1171 set_src_0(pc, dst[c], e);
1172 emit(pc, e);
1173 }
1174 }
1175
1176 kill_temp_temp(pc);
1177 return TRUE;
1178 }
1179
1180 static boolean
1181 nv50_program_tx_prep(struct nv50_pc *pc)
1182 {
1183 struct tgsi_parse_context p;
1184 boolean ret = FALSE;
1185 unsigned i, c;
1186
1187 tgsi_parse_init(&p, pc->p->pipe.tokens);
1188 while (!tgsi_parse_end_of_tokens(&p)) {
1189 const union tgsi_full_token *tok = &p.FullToken;
1190
1191 tgsi_parse_token(&p);
1192 switch (tok->Token.Type) {
1193 case TGSI_TOKEN_TYPE_IMMEDIATE:
1194 {
1195 const struct tgsi_full_immediate *imm =
1196 &p.FullToken.FullImmediate;
1197
1198 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1199 imm->u.ImmediateFloat32[1].Float,
1200 imm->u.ImmediateFloat32[2].Float,
1201 imm->u.ImmediateFloat32[3].Float);
1202 }
1203 break;
1204 case TGSI_TOKEN_TYPE_DECLARATION:
1205 {
1206 const struct tgsi_full_declaration *d;
1207 unsigned last;
1208
1209 d = &p.FullToken.FullDeclaration;
1210 last = d->u.DeclarationRange.Last;
1211
1212 switch (d->Declaration.File) {
1213 case TGSI_FILE_TEMPORARY:
1214 if (pc->temp_nr < (last + 1))
1215 pc->temp_nr = last + 1;
1216 break;
1217 case TGSI_FILE_OUTPUT:
1218 if (pc->result_nr < (last + 1))
1219 pc->result_nr = last + 1;
1220 break;
1221 case TGSI_FILE_INPUT:
1222 if (pc->attr_nr < (last + 1))
1223 pc->attr_nr = last + 1;
1224 break;
1225 case TGSI_FILE_CONSTANT:
1226 if (pc->param_nr < (last + 1))
1227 pc->param_nr = last + 1;
1228 break;
1229 default:
1230 NOUVEAU_ERR("bad decl file %d\n",
1231 d->Declaration.File);
1232 goto out_err;
1233 }
1234 }
1235 break;
1236 case TGSI_TOKEN_TYPE_INSTRUCTION:
1237 break;
1238 default:
1239 break;
1240 }
1241 }
1242
1243 NOUVEAU_ERR("%d temps\n", pc->temp_nr);
1244 if (pc->temp_nr) {
1245 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg));
1246 if (!pc->temp)
1247 goto out_err;
1248
1249 for (i = 0; i < pc->temp_nr; i++) {
1250 for (c = 0; c < 4; c++) {
1251 pc->temp[i*4+c].type = P_TEMP;
1252 pc->temp[i*4+c].hw = -1;
1253 pc->temp[i*4+c].index = i;
1254 }
1255 }
1256 }
1257
1258 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
1259 if (pc->attr_nr) {
1260 struct nv50_reg *iv = NULL, *tmp = NULL;
1261 int aid = 0;
1262
1263 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
1264 if (!pc->attr)
1265 goto out_err;
1266
1267 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1268 iv = alloc_temp(pc, NULL);
1269 aid++;
1270 }
1271
1272 for (i = 0; i < pc->attr_nr; i++) {
1273 struct nv50_reg *a = &pc->attr[i*4];
1274
1275 for (c = 0; c < 4; c++) {
1276 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1277 struct nv50_reg *at =
1278 alloc_temp(pc, NULL);
1279 pc->attr[i*4+c].type = at->type;
1280 pc->attr[i*4+c].hw = at->hw;
1281 pc->attr[i*4+c].index = at->index;
1282 } else {
1283 pc->p->cfg.vp.attr[aid/32] |=
1284 (1 << (aid % 32));
1285 pc->attr[i*4+c].type = P_ATTR;
1286 pc->attr[i*4+c].hw = aid++;
1287 pc->attr[i*4+c].index = i;
1288 }
1289 }
1290
1291 if (pc->p->type != PIPE_SHADER_FRAGMENT)
1292 continue;
1293
1294 emit_interp(pc, iv, iv, iv, FALSE);
1295 tmp = alloc_temp(pc, NULL);
1296 emit_flop(pc, 0, tmp, iv);
1297 emit_interp(pc, &a[0], &a[0], tmp, TRUE);
1298 emit_interp(pc, &a[1], &a[1], tmp, TRUE);
1299 emit_interp(pc, &a[2], &a[2], tmp, TRUE);
1300 emit_interp(pc, &a[3], &a[3], tmp, TRUE);
1301 free_temp(pc, tmp);
1302 }
1303
1304 if (iv)
1305 free_temp(pc, iv);
1306 }
1307
1308 NOUVEAU_ERR("%d result regs\n", pc->result_nr);
1309 if (pc->result_nr) {
1310 int rid = 0;
1311
1312 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg));
1313 if (!pc->result)
1314 goto out_err;
1315
1316 for (i = 0; i < pc->result_nr; i++) {
1317 for (c = 0; c < 4; c++) {
1318 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1319 pc->result[i*4+c].type = P_TEMP;
1320 pc->result[i*4+c].hw = -1;
1321 } else {
1322 pc->result[i*4+c].type = P_RESULT;
1323 pc->result[i*4+c].hw = rid++;
1324 }
1325 pc->result[i*4+c].index = i;
1326 }
1327 }
1328 }
1329
1330 NOUVEAU_ERR("%d param regs\n", pc->param_nr);
1331 if (pc->param_nr) {
1332 int rid = 0;
1333
1334 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg));
1335 if (!pc->param)
1336 goto out_err;
1337
1338 for (i = 0; i < pc->param_nr; i++) {
1339 for (c = 0; c < 4; c++) {
1340 pc->param[i*4+c].type = P_CONST;
1341 pc->param[i*4+c].hw = rid++;
1342 pc->param[i*4+c].index = i;
1343 }
1344 }
1345 }
1346
1347 if (pc->immd_nr) {
1348 int rid = pc->param_nr * 4;
1349
1350 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg));
1351 if (!pc->immd)
1352 goto out_err;
1353
1354 for (i = 0; i < pc->immd_nr; i++) {
1355 for (c = 0; c < 4; c++) {
1356 pc->immd[i*4+c].type = P_IMMD;
1357 pc->immd[i*4+c].hw = rid++;
1358 pc->immd[i*4+c].index = i;
1359 }
1360 }
1361 }
1362
1363 ret = TRUE;
1364 out_err:
1365 tgsi_parse_free(&p);
1366 return ret;
1367 }
1368
1369 static boolean
1370 nv50_program_tx(struct nv50_program *p)
1371 {
1372 struct tgsi_parse_context parse;
1373 struct nv50_pc *pc;
1374 boolean ret;
1375
1376 pc = CALLOC_STRUCT(nv50_pc);
1377 if (!pc)
1378 return FALSE;
1379 pc->p = p;
1380 pc->p->cfg.high_temp = 4;
1381
1382 ret = nv50_program_tx_prep(pc);
1383 if (ret == FALSE)
1384 goto out_cleanup;
1385
1386 tgsi_parse_init(&parse, pc->p->pipe.tokens);
1387 while (!tgsi_parse_end_of_tokens(&parse)) {
1388 const union tgsi_full_token *tok = &parse.FullToken;
1389
1390 tgsi_parse_token(&parse);
1391
1392 switch (tok->Token.Type) {
1393 case TGSI_TOKEN_TYPE_INSTRUCTION:
1394 ret = nv50_program_tx_insn(pc, tok);
1395 if (ret == FALSE)
1396 goto out_err;
1397 break;
1398 default:
1399 break;
1400 }
1401 }
1402
1403 if (p->type == PIPE_SHADER_FRAGMENT) {
1404 struct nv50_reg out;
1405
1406 out.type = P_TEMP;
1407 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1408 emit_mov(pc, &out, &pc->result[out.hw]);
1409 }
1410
1411 assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
1412 pc->p->exec_tail->inst[1] |= 0x00000001;
1413
1414 p->param_nr = pc->param_nr * 4;
1415 p->immd_nr = pc->immd_nr * 4;
1416 p->immd = pc->immd_buf;
1417
1418 out_err:
1419 tgsi_parse_free(&parse);
1420
1421 out_cleanup:
1422 return ret;
1423 }
1424
1425 static void
1426 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1427 {
1428 if (nv50_program_tx(p) == FALSE)
1429 assert(0);
1430 p->translated = TRUE;
1431 }
1432
1433 static void
1434 nv50_program_upload_data(struct nv50_context *nv50, float *map,
1435 unsigned start, unsigned count)
1436 {
1437 while (count) {
1438 unsigned nr = count > 2047 ? 2047 : count;
1439
1440 BEGIN_RING(tesla, 0x00000f00, 1);
1441 OUT_RING ((NV50_CB_PMISC << 0) | (start << 8));
1442 BEGIN_RING(tesla, 0x40000f04, nr);
1443 OUT_RINGp (map, nr);
1444
1445 map += nr;
1446 start += nr;
1447 count -= nr;
1448 }
1449 }
1450
1451 static void
1452 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1453 {
1454 struct nouveau_winsys *nvws = nv50->screen->nvws;
1455 struct pipe_winsys *ws = nv50->pipe.winsys;
1456 unsigned nr = p->param_nr + p->immd_nr;
1457
1458 if (!p->data && nr) {
1459 struct nouveau_resource *heap = nv50->screen->vp_data_heap;
1460
1461 if (nvws->res_alloc(heap, nr, p, &p->data)) {
1462 while (heap->next && heap->size < nr) {
1463 struct nv50_program *evict = heap->next->priv;
1464 nvws->res_free(&evict->data);
1465 }
1466
1467 if (nvws->res_alloc(heap, nr, p, &p->data))
1468 assert(0);
1469 }
1470 }
1471
1472 if (p->param_nr) {
1473 float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
1474 PIPE_BUFFER_USAGE_CPU_READ);
1475 nv50_program_upload_data(nv50, map, p->data->start,
1476 p->param_nr);
1477 ws->buffer_unmap(ws, nv50->constbuf[p->type]);
1478 }
1479
1480 if (p->immd_nr) {
1481 nv50_program_upload_data(nv50, p->immd,
1482 p->data->start + p->param_nr,
1483 p->immd_nr);
1484 }
1485 }
1486
1487 static void
1488 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1489 {
1490 struct pipe_winsys *ws = nv50->pipe.winsys;
1491 struct nv50_program_exec *e;
1492 boolean upload = FALSE;
1493 unsigned *map;
1494
1495 if (!p->buffer) {
1496 p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4);
1497 upload = TRUE;
1498 }
1499
1500 if (p->data && p->data->start != p->data_start) {
1501 for (e = p->exec_head; e; e = e->next) {
1502 unsigned ei, ci;
1503
1504 if (e->param.index < 0)
1505 continue;
1506 ei = e->param.shift >> 5;
1507 ci = e->param.index + p->data->start;
1508
1509 e->inst[ei] &= ~e->param.mask;
1510 e->inst[ei] |= (ci << e->param.shift);
1511 }
1512
1513 p->data_start = p->data->start;
1514 upload = TRUE;
1515 }
1516
1517 if (!upload)
1518 return FALSE;
1519
1520 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
1521 for (e = p->exec_head; e; e = e->next) {
1522 #ifdef NV50_PROGRAM_DUMP
1523 NOUVEAU_ERR("0x%08x\n", e->inst[0]);
1524 #endif
1525 *(map++) = e->inst[0];
1526 if (is_long(e)) {
1527 #ifdef NV50_PROGRAM_DUMP
1528 NOUVEAU_ERR("0x%08x\n", e->inst[1]);
1529 #endif
1530 *(map++) = e->inst[1];
1531 }
1532 }
1533 ws->buffer_unmap(ws, p->buffer);
1534 }
1535
1536 void
1537 nv50_vertprog_validate(struct nv50_context *nv50)
1538 {
1539 struct nouveau_grobj *tesla = nv50->screen->tesla;
1540 struct nv50_program *p = nv50->vertprog;
1541 struct nouveau_stateobj *so;
1542
1543 if (!p->translated) {
1544 nv50_program_validate(nv50, p);
1545 if (!p->translated)
1546 assert(0);
1547 }
1548
1549 nv50_program_validate_data(nv50, p);
1550 nv50_program_validate_code(nv50, p);
1551
1552 so = so_new(11, 2);
1553 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1554 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1555 NOUVEAU_BO_HIGH, 0, 0);
1556 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1557 NOUVEAU_BO_LOW, 0, 0);
1558 so_method(so, tesla, 0x1650, 2);
1559 so_data (so, p->cfg.vp.attr[0]);
1560 so_data (so, p->cfg.vp.attr[1]);
1561 so_method(so, tesla, 0x16ac, 2);
1562 so_data (so, 8);
1563 so_data (so, p->cfg.high_temp);
1564 so_method(so, tesla, 0x140c, 1);
1565 so_data (so, 0); /* program start offset */
1566 so_emit(nv50->screen->nvws, so);
1567 so_ref(NULL, &so);
1568 }
1569
1570 void
1571 nv50_fragprog_validate(struct nv50_context *nv50)
1572 {
1573 struct nouveau_grobj *tesla = nv50->screen->tesla;
1574 struct nv50_program *p = nv50->fragprog;
1575 struct nouveau_stateobj *so;
1576
1577 if (!p->translated) {
1578 nv50_program_validate(nv50, p);
1579 if (!p->translated)
1580 assert(0);
1581 }
1582
1583 nv50_program_validate_data(nv50, p);
1584 nv50_program_validate_code(nv50, p);
1585
1586 so = so_new(64, 2);
1587 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1588 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1589 NOUVEAU_BO_HIGH, 0, 0);
1590 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1591 NOUVEAU_BO_LOW, 0, 0);
1592 so_method(so, tesla, 0x1904, 4);
1593 so_data (so, 0x01040404); /* p: 0x01000404 */
1594 so_data (so, 0x00000004);
1595 so_data (so, 0x00000000);
1596 so_data (so, 0x00000000);
1597 so_method(so, tesla, 0x16bc, 2); /*XXX: fixme */
1598 so_data (so, 0x03020100);
1599 so_data (so, 0x07060504);
1600 so_method(so, tesla, 0x1988, 2);
1601 so_data (so, 0x08040404); /* p: 0x0f000401 */
1602 so_data (so, p->cfg.high_temp);
1603 so_method(so, tesla, 0x1414, 1);
1604 so_data (so, 0); /* program start offset */
1605 so_emit(nv50->screen->nvws, so);
1606 so_ref(NULL, &so);
1607 }
1608
1609 void
1610 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1611 {
1612 struct pipe_winsys *ws = nv50->pipe.winsys;
1613
1614 while (p->exec_head) {
1615 struct nv50_program_exec *e = p->exec_head;
1616
1617 p->exec_head = e->next;
1618 FREE(e);
1619 }
1620 p->exec_tail = NULL;
1621 p->exec_size = 0;
1622
1623 if (p->buffer)
1624 pipe_buffer_reference(ws, &p->buffer, NULL);
1625
1626 p->translated = 0;
1627 }
1628