nv50: inspect decl semantic and interpolation mode
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_defines.h"
25 #include "pipe/p_state.h"
26 #include "pipe/p_inlines.h"
27
28 #include "pipe/p_shader_tokens.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "tgsi/tgsi_util.h"
31
32 #include "nv50_context.h"
33
34 #define NV50_SU_MAX_TEMP 64
35 //#define NV50_PROGRAM_DUMP
36
37 /* ARL - gallium craps itself on progs/vp/arl.txt
38 *
39 * MSB - Like MAD, but MUL+SUB
40 * - Fuck it off, introduce a way to negate args for ops that
41 * support it.
42 *
43 * Look into inlining IMMD for ops other than MOV (make it general?)
44 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
45 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
46 *
47 * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
48 * case, if the emit_src() causes the inst to suddenly become long.
49 *
50 * Verify half-insns work where expected - and force disable them where they
51 * don't work - MUL has it forcibly disabled atm as it fixes POW..
52 *
53 * FUCK! watch dst==src vectors, can overwrite components that are needed.
54 * ie. SUB R0, R0.yzxw, R0
55 *
56 * Things to check with renouveau:
57 * FP attr/result assignment - how?
58 * attrib
59 * - 0x16bc maps vp output onto fp hpos
60 * - 0x16c0 maps vp output onto fp col0
61 * result
62 * - colr always 0-3
63 * - depr always 4
64 * 0x16bc->0x16e8 --> some binding between vp/fp regs
65 * 0x16b8 --> VP output count
66 *
67 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
68 * "MOV rcol.x, fcol.y" = 0x00000004
69 * 0x19a8 --> as above but 0x00000100 and 0x00000000
70 * - 0x00100000 used when KIL used
71 * 0x196c --> as above but 0x00000011 and 0x00000000
72 *
73 * 0x1988 --> 0xXXNNNNNN
74 * - XX == FP high something
75 */
76 struct nv50_reg {
77 enum {
78 P_TEMP,
79 P_ATTR,
80 P_RESULT,
81 P_CONST,
82 P_IMMD
83 } type;
84 int index;
85
86 int hw;
87 int neg;
88
89 int acc; /* instruction where this reg is last read (first insn == 1) */
90 };
91
92 struct nv50_pc {
93 struct nv50_program *p;
94
95 /* hw resources */
96 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
97
98 /* tgsi resources */
99 struct nv50_reg *temp;
100 int temp_nr;
101 struct nv50_reg *attr;
102 int attr_nr;
103 struct nv50_reg *result;
104 int result_nr;
105 struct nv50_reg *param;
106 int param_nr;
107 struct nv50_reg *immd;
108 float *immd_buf;
109 int immd_nr;
110
111 struct nv50_reg *temp_temp[16];
112 unsigned temp_temp_nr;
113
114 unsigned interp_mode[32];
115
116 /* current instruction and total number of insns */
117 unsigned insn_cur;
118 unsigned insn_nr;
119 };
120
121 static void
122 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
123 {
124 int i;
125
126 if (reg->type == P_RESULT) {
127 if (pc->p->cfg.high_result < (reg->hw + 1))
128 pc->p->cfg.high_result = reg->hw + 1;
129 }
130
131 if (reg->type != P_TEMP)
132 return;
133
134 if (reg->hw >= 0) {
135 /*XXX: do this here too to catch FP temp-as-attr usage..
136 * not clean, but works */
137 if (pc->p->cfg.high_temp < (reg->hw + 1))
138 pc->p->cfg.high_temp = reg->hw + 1;
139 return;
140 }
141
142 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
143 if (!(pc->r_temp[i])) {
144 pc->r_temp[i] = reg;
145 reg->hw = i;
146 if (pc->p->cfg.high_temp < (i + 1))
147 pc->p->cfg.high_temp = i + 1;
148 return;
149 }
150 }
151
152 assert(0);
153 }
154
155 static struct nv50_reg *
156 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
157 {
158 struct nv50_reg *r;
159 int i;
160
161 if (dst && dst->type == P_TEMP && dst->hw == -1)
162 return dst;
163
164 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
165 if (!pc->r_temp[i]) {
166 r = CALLOC_STRUCT(nv50_reg);
167 r->type = P_TEMP;
168 r->index = -1;
169 r->hw = i;
170 pc->r_temp[i] = r;
171 return r;
172 }
173 }
174
175 assert(0);
176 return NULL;
177 }
178
179 static void
180 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
181 {
182 if (r->index == -1) {
183 unsigned hw = r->hw;
184
185 FREE(pc->r_temp[hw]);
186 pc->r_temp[hw] = NULL;
187 }
188 }
189
190 static int
191 alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
192 {
193 int i;
194
195 if ((idx + 4) >= NV50_SU_MAX_TEMP)
196 return 1;
197
198 if (pc->r_temp[idx] || pc->r_temp[idx + 1] ||
199 pc->r_temp[idx + 2] || pc->r_temp[idx + 3])
200 return alloc_temp4(pc, dst, idx + 1);
201
202 for (i = 0; i < 4; i++) {
203 dst[i] = CALLOC_STRUCT(nv50_reg);
204 dst[i]->type = P_TEMP;
205 dst[i]->index = -1;
206 dst[i]->hw = idx + i;
207 pc->r_temp[idx + i] = dst[i];
208 }
209
210 return 0;
211 }
212
213 static void
214 free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4])
215 {
216 int i;
217
218 for (i = 0; i < 4; i++)
219 free_temp(pc, reg[i]);
220 }
221
222 static struct nv50_reg *
223 temp_temp(struct nv50_pc *pc)
224 {
225 if (pc->temp_temp_nr >= 16)
226 assert(0);
227
228 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
229 return pc->temp_temp[pc->temp_temp_nr++];
230 }
231
232 static void
233 kill_temp_temp(struct nv50_pc *pc)
234 {
235 int i;
236
237 for (i = 0; i < pc->temp_temp_nr; i++)
238 free_temp(pc, pc->temp_temp[i]);
239 pc->temp_temp_nr = 0;
240 }
241
242 static int
243 ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
244 {
245 pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)),
246 (pc->immd_nr + 1) * 4 * sizeof(float));
247 pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
248 pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
249 pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
250 pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
251
252 return pc->immd_nr++;
253 }
254
255 static struct nv50_reg *
256 alloc_immd(struct nv50_pc *pc, float f)
257 {
258 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
259 unsigned hw;
260
261 for (hw = 0; hw < pc->immd_nr * 4; hw++)
262 if (pc->immd_buf[hw] == f)
263 break;
264
265 if (hw == pc->immd_nr * 4)
266 hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4;
267
268 r->type = P_IMMD;
269 r->hw = hw;
270 r->index = -1;
271 return r;
272 }
273
274 static struct nv50_program_exec *
275 exec(struct nv50_pc *pc)
276 {
277 struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
278
279 e->param.index = -1;
280 return e;
281 }
282
283 static void
284 emit(struct nv50_pc *pc, struct nv50_program_exec *e)
285 {
286 struct nv50_program *p = pc->p;
287
288 if (p->exec_tail)
289 p->exec_tail->next = e;
290 if (!p->exec_head)
291 p->exec_head = e;
292 p->exec_tail = e;
293 p->exec_size += (e->inst[0] & 1) ? 2 : 1;
294 }
295
296 static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
297
298 static boolean
299 is_long(struct nv50_program_exec *e)
300 {
301 if (e->inst[0] & 1)
302 return TRUE;
303 return FALSE;
304 }
305
306 static boolean
307 is_immd(struct nv50_program_exec *e)
308 {
309 if (is_long(e) && (e->inst[1] & 3) == 3)
310 return TRUE;
311 return FALSE;
312 }
313
314 static INLINE void
315 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
316 struct nv50_program_exec *e)
317 {
318 set_long(pc, e);
319 e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
320 e->inst[1] |= (pred << 7) | (idx << 12);
321 }
322
323 static INLINE void
324 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
325 struct nv50_program_exec *e)
326 {
327 set_long(pc, e);
328 e->inst[1] &= ~((0x3 << 4) | (1 << 6));
329 e->inst[1] |= (idx << 4) | (on << 6);
330 }
331
332 static INLINE void
333 set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
334 {
335 if (is_long(e))
336 return;
337
338 e->inst[0] |= 1;
339 set_pred(pc, 0xf, 0, e);
340 set_pred_wr(pc, 0, 0, e);
341 }
342
343 static INLINE void
344 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
345 {
346 if (dst->type == P_RESULT) {
347 set_long(pc, e);
348 e->inst[1] |= 0x00000008;
349 }
350
351 alloc_reg(pc, dst);
352 e->inst[0] |= (dst->hw << 2);
353 }
354
355 static INLINE void
356 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
357 {
358 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
359
360 set_long(pc, e);
361 /*XXX: can't be predicated - bits overlap.. catch cases where both
362 * are required and avoid them. */
363 set_pred(pc, 0, 0, e);
364 set_pred_wr(pc, 0, 0, e);
365
366 e->inst[1] |= 0x00000002 | 0x00000001;
367 e->inst[0] |= (val & 0x3f) << 16;
368 e->inst[1] |= (val >> 6) << 2;
369 }
370
371
372 #define INTERP_LINEAR 0
373 #define INTERP_FLAT 1
374 #define INTERP_PERSPECTIVE 2
375 #define INTERP_CENTROID 4
376
377 static void
378 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
379 struct nv50_reg *src, struct nv50_reg *iv)
380 {
381 struct nv50_program_exec *e = exec(pc);
382
383 e->inst[0] |= 0x80000000;
384 set_dst(pc, dst, e);
385 alloc_reg(pc, src);
386 e->inst[0] |= (src->hw << 16);
387 if (iv) {
388 e->inst[0] |= (1 << 25);
389 alloc_reg(pc, iv);
390 e->inst[0] |= (iv->hw << 9);
391 }
392
393 emit(pc, e);
394 }
395
396 static void
397 set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
398 struct nv50_program_exec *e)
399 {
400 set_long(pc, e);
401 #if 1
402 e->inst[1] |= (1 << 22);
403 #else
404 if (src->type == P_IMMD) {
405 e->inst[1] |= (NV50_CB_PMISC << 22);
406 } else {
407 if (pc->p->type == PIPE_SHADER_VERTEX)
408 e->inst[1] |= (NV50_CB_PVP << 22);
409 else
410 e->inst[1] |= (NV50_CB_PFP << 22);
411 }
412 #endif
413
414 e->param.index = src->hw;
415 e->param.shift = s;
416 e->param.mask = m << (s % 32);
417 }
418
419 static void
420 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
421 {
422 struct nv50_program_exec *e = exec(pc);
423
424 e->inst[0] |= 0x10000000;
425
426 set_dst(pc, dst, e);
427
428 if (0 && dst->type != P_RESULT && src->type == P_IMMD) {
429 set_immd(pc, src, e);
430 /*XXX: 32-bit, but steals part of "half" reg space - need to
431 * catch and handle this case if/when we do half-regs
432 */
433 e->inst[0] |= 0x00008000;
434 } else
435 if (src->type == P_IMMD || src->type == P_CONST) {
436 set_long(pc, e);
437 set_data(pc, src, 0x7f, 9, e);
438 e->inst[1] |= 0x20000000; /* src0 const? */
439 } else {
440 if (src->type == P_ATTR) {
441 set_long(pc, e);
442 e->inst[1] |= 0x00200000;
443 }
444
445 alloc_reg(pc, src);
446 e->inst[0] |= (src->hw << 9);
447 }
448
449 /* We really should support "half" instructions here at some point,
450 * but I don't feel confident enough about them yet.
451 */
452 set_long(pc, e);
453 if (is_long(e) && !is_immd(e)) {
454 e->inst[1] |= 0x04000000; /* 32-bit */
455 e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
456 }
457
458 emit(pc, e);
459 }
460
461 static INLINE void
462 emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
463 {
464 struct nv50_reg *imm = alloc_immd(pc, f);
465 emit_mov(pc, dst, imm);
466 FREE(imm);
467 }
468
469 static boolean
470 check_swap_src_0_1(struct nv50_pc *pc,
471 struct nv50_reg **s0, struct nv50_reg **s1)
472 {
473 struct nv50_reg *src0 = *s0, *src1 = *s1;
474
475 if (src0->type == P_CONST) {
476 if (src1->type != P_CONST) {
477 *s0 = src1;
478 *s1 = src0;
479 return TRUE;
480 }
481 } else
482 if (src1->type == P_ATTR) {
483 if (src0->type != P_ATTR) {
484 *s0 = src1;
485 *s1 = src0;
486 return TRUE;
487 }
488 }
489
490 return FALSE;
491 }
492
493 static void
494 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
495 {
496 if (src->type == P_ATTR) {
497 set_long(pc, e);
498 e->inst[1] |= 0x00200000;
499 } else
500 if (src->type == P_CONST || src->type == P_IMMD) {
501 struct nv50_reg *temp = temp_temp(pc);
502
503 emit_mov(pc, temp, src);
504 src = temp;
505 }
506
507 alloc_reg(pc, src);
508 e->inst[0] |= (src->hw << 9);
509 }
510
511 static void
512 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
513 {
514 if (src->type == P_ATTR) {
515 struct nv50_reg *temp = temp_temp(pc);
516
517 emit_mov(pc, temp, src);
518 src = temp;
519 } else
520 if (src->type == P_CONST || src->type == P_IMMD) {
521 assert(!(e->inst[0] & 0x00800000));
522 if (e->inst[0] & 0x01000000) {
523 struct nv50_reg *temp = temp_temp(pc);
524
525 emit_mov(pc, temp, src);
526 src = temp;
527 } else {
528 set_data(pc, src, 0x7f, 16, e);
529 e->inst[0] |= 0x00800000;
530 }
531 }
532
533 alloc_reg(pc, src);
534 e->inst[0] |= (src->hw << 16);
535 }
536
537 static void
538 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
539 {
540 set_long(pc, e);
541
542 if (src->type == P_ATTR) {
543 struct nv50_reg *temp = temp_temp(pc);
544
545 emit_mov(pc, temp, src);
546 src = temp;
547 } else
548 if (src->type == P_CONST || src->type == P_IMMD) {
549 assert(!(e->inst[0] & 0x01000000));
550 if (e->inst[0] & 0x00800000) {
551 struct nv50_reg *temp = temp_temp(pc);
552
553 emit_mov(pc, temp, src);
554 src = temp;
555 } else {
556 set_data(pc, src, 0x7f, 32+14, e);
557 e->inst[0] |= 0x01000000;
558 }
559 }
560
561 alloc_reg(pc, src);
562 e->inst[1] |= (src->hw << 14);
563 }
564
565 static void
566 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
567 struct nv50_reg *src1)
568 {
569 struct nv50_program_exec *e = exec(pc);
570
571 e->inst[0] |= 0xc0000000;
572 set_long(pc, e);
573
574 check_swap_src_0_1(pc, &src0, &src1);
575 set_dst(pc, dst, e);
576 set_src_0(pc, src0, e);
577 set_src_1(pc, src1, e);
578
579 emit(pc, e);
580 }
581
582 static void
583 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
584 struct nv50_reg *src0, struct nv50_reg *src1)
585 {
586 struct nv50_program_exec *e = exec(pc);
587
588 e->inst[0] |= 0xb0000000;
589
590 check_swap_src_0_1(pc, &src0, &src1);
591 set_dst(pc, dst, e);
592 set_src_0(pc, src0, e);
593 if (is_long(e))
594 set_src_2(pc, src1, e);
595 else
596 set_src_1(pc, src1, e);
597
598 emit(pc, e);
599 }
600
601 static void
602 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
603 struct nv50_reg *src0, struct nv50_reg *src1)
604 {
605 struct nv50_program_exec *e = exec(pc);
606
607 set_long(pc, e);
608 e->inst[0] |= 0xb0000000;
609 e->inst[1] |= (sub << 29);
610
611 check_swap_src_0_1(pc, &src0, &src1);
612 set_dst(pc, dst, e);
613 set_src_0(pc, src0, e);
614 set_src_1(pc, src1, e);
615
616 emit(pc, e);
617 }
618
619 static void
620 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
621 struct nv50_reg *src1)
622 {
623 struct nv50_program_exec *e = exec(pc);
624
625 e->inst[0] |= 0xb0000000;
626
627 set_long(pc, e);
628 if (check_swap_src_0_1(pc, &src0, &src1))
629 e->inst[1] |= 0x04000000;
630 else
631 e->inst[1] |= 0x08000000;
632
633 set_dst(pc, dst, e);
634 set_src_0(pc, src0, e);
635 set_src_2(pc, src1, e);
636
637 emit(pc, e);
638 }
639
640 static void
641 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
642 struct nv50_reg *src1, struct nv50_reg *src2)
643 {
644 struct nv50_program_exec *e = exec(pc);
645
646 e->inst[0] |= 0xe0000000;
647
648 check_swap_src_0_1(pc, &src0, &src1);
649 set_dst(pc, dst, e);
650 set_src_0(pc, src0, e);
651 set_src_1(pc, src1, e);
652 set_src_2(pc, src2, e);
653
654 emit(pc, e);
655 }
656
657 static void
658 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
659 struct nv50_reg *src1, struct nv50_reg *src2)
660 {
661 struct nv50_program_exec *e = exec(pc);
662
663 e->inst[0] |= 0xe0000000;
664 set_long(pc, e);
665 e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
666
667 check_swap_src_0_1(pc, &src0, &src1);
668 set_dst(pc, dst, e);
669 set_src_0(pc, src0, e);
670 set_src_1(pc, src1, e);
671 set_src_2(pc, src2, e);
672
673 emit(pc, e);
674 }
675
676 static void
677 emit_flop(struct nv50_pc *pc, unsigned sub,
678 struct nv50_reg *dst, struct nv50_reg *src)
679 {
680 struct nv50_program_exec *e = exec(pc);
681
682 e->inst[0] |= 0x90000000;
683 if (sub) {
684 set_long(pc, e);
685 e->inst[1] |= (sub << 29);
686 }
687
688 set_dst(pc, dst, e);
689 set_src_0(pc, src, e);
690
691 emit(pc, e);
692 }
693
694 static void
695 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
696 {
697 struct nv50_program_exec *e = exec(pc);
698
699 e->inst[0] |= 0xb0000000;
700
701 set_dst(pc, dst, e);
702 set_src_0(pc, src, e);
703 set_long(pc, e);
704 e->inst[1] |= (6 << 29) | 0x00004000;
705
706 emit(pc, e);
707 }
708
709 static void
710 emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
711 {
712 struct nv50_program_exec *e = exec(pc);
713
714 e->inst[0] |= 0xb0000000;
715
716 set_dst(pc, dst, e);
717 set_src_0(pc, src, e);
718 set_long(pc, e);
719 e->inst[1] |= (6 << 29);
720
721 emit(pc, e);
722 }
723
724 static void
725 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
726 struct nv50_reg *src0, struct nv50_reg *src1)
727 {
728 struct nv50_program_exec *e = exec(pc);
729 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
730 struct nv50_reg *rdst;
731
732 assert(c_op <= 7);
733 if (check_swap_src_0_1(pc, &src0, &src1))
734 c_op = inv_cop[c_op];
735
736 rdst = dst;
737 if (dst->type != P_TEMP)
738 dst = alloc_temp(pc, NULL);
739
740 /* set.u32 */
741 set_long(pc, e);
742 e->inst[0] |= 0xb0000000;
743 e->inst[1] |= (3 << 29);
744 e->inst[1] |= (c_op << 14);
745 /*XXX: breaks things, .u32 by default?
746 * decuda will disasm as .u16 and use .lo/.hi regs, but this
747 * doesn't seem to match what the hw actually does.
748 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
749 */
750 set_dst(pc, dst, e);
751 set_src_0(pc, src0, e);
752 set_src_1(pc, src1, e);
753 emit(pc, e);
754
755 /* cvt.f32.u32 */
756 e = exec(pc);
757 e->inst[0] = 0xa0000001;
758 e->inst[1] = 0x64014780;
759 set_dst(pc, rdst, e);
760 set_src_0(pc, dst, e);
761 emit(pc, e);
762
763 if (dst != rdst)
764 free_temp(pc, dst);
765 }
766
767 static void
768 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
769 {
770 struct nv50_program_exec *e = exec(pc);
771
772 e->inst[0] = 0xa0000000; /* cvt */
773 set_long(pc, e);
774 e->inst[1] |= (6 << 29); /* cvt */
775 e->inst[1] |= 0x08000000; /* integer mode */
776 e->inst[1] |= 0x04000000; /* 32 bit */
777 e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
778 e->inst[1] |= (1 << 14); /* src .f32 */
779 set_dst(pc, dst, e);
780 set_src_0(pc, src, e);
781
782 emit(pc, e);
783 }
784
785 static void
786 emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
787 struct nv50_reg *v, struct nv50_reg *e)
788 {
789 struct nv50_reg *temp = alloc_temp(pc, NULL);
790
791 emit_flop(pc, 3, temp, v);
792 emit_mul(pc, temp, temp, e);
793 emit_preex2(pc, temp, temp);
794 emit_flop(pc, 6, dst, temp);
795
796 free_temp(pc, temp);
797 }
798
799 static void
800 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
801 {
802 struct nv50_program_exec *e = exec(pc);
803
804 e->inst[0] = 0xa0000000; /* cvt */
805 set_long(pc, e);
806 e->inst[1] |= (6 << 29); /* cvt */
807 e->inst[1] |= 0x04000000; /* 32 bit */
808 e->inst[1] |= (1 << 14); /* src .f32 */
809 e->inst[1] |= ((1 << 6) << 14); /* .abs */
810 set_dst(pc, dst, e);
811 set_src_0(pc, src, e);
812
813 emit(pc, e);
814 }
815
816 static void
817 emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
818 struct nv50_reg **src)
819 {
820 struct nv50_reg *one = alloc_immd(pc, 1.0);
821 struct nv50_reg *zero = alloc_immd(pc, 0.0);
822 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
823 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
824 struct nv50_reg *tmp[4];
825
826 if (mask & (1 << 0))
827 emit_mov(pc, dst[0], one);
828
829 if (mask & (1 << 3))
830 emit_mov(pc, dst[3], one);
831
832 if (mask & (3 << 1)) {
833 if (mask & (1 << 1))
834 tmp[0] = dst[1];
835 else
836 tmp[0] = temp_temp(pc);
837 emit_minmax(pc, 4, tmp[0], src[0], zero);
838 }
839
840 if (mask & (1 << 2)) {
841 set_pred_wr(pc, 1, 0, pc->p->exec_tail);
842
843 tmp[1] = temp_temp(pc);
844 emit_minmax(pc, 4, tmp[1], src[1], zero);
845
846 tmp[3] = temp_temp(pc);
847 emit_minmax(pc, 4, tmp[3], src[3], neg128);
848 emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
849
850 emit_pow(pc, dst[2], tmp[1], tmp[3]);
851 emit_mov(pc, dst[2], zero);
852 set_pred(pc, 3, 0, pc->p->exec_tail);
853 }
854
855 FREE(pos128);
856 FREE(neg128);
857 FREE(zero);
858 FREE(one);
859 }
860
861 static void
862 emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
863 {
864 struct nv50_program_exec *e = exec(pc);
865
866 set_long(pc, e);
867 e->inst[0] |= 0xa0000000; /* delta */
868 e->inst[1] |= (7 << 29); /* delta */
869 e->inst[1] |= 0x04000000; /* negate arg0? probably not */
870 e->inst[1] |= (1 << 14); /* src .f32 */
871 set_dst(pc, dst, e);
872 set_src_0(pc, src, e);
873
874 emit(pc, e);
875 }
876
877 static void
878 emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
879 {
880 struct nv50_program_exec *e;
881 const int r_pred = 1;
882
883 /* Sets predicate reg ? */
884 e = exec(pc);
885 e->inst[0] = 0xa00001fd;
886 e->inst[1] = 0xc4014788;
887 set_src_0(pc, src, e);
888 set_pred_wr(pc, 1, r_pred, e);
889 emit(pc, e);
890
891 /* This is probably KILP */
892 e = exec(pc);
893 e->inst[0] = 0x000001fe;
894 set_long(pc, e);
895 set_pred(pc, 1 /* LT? */, r_pred, e);
896 emit(pc, e);
897 }
898
899 static struct nv50_reg *
900 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
901 {
902 switch (dst->DstRegister.File) {
903 case TGSI_FILE_TEMPORARY:
904 return &pc->temp[dst->DstRegister.Index * 4 + c];
905 case TGSI_FILE_OUTPUT:
906 return &pc->result[dst->DstRegister.Index * 4 + c];
907 case TGSI_FILE_NULL:
908 return NULL;
909 default:
910 break;
911 }
912
913 return NULL;
914 }
915
916 static struct nv50_reg *
917 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
918 {
919 struct nv50_reg *r = NULL;
920 struct nv50_reg *temp;
921 unsigned sgn, c;
922
923 sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
924
925 c = tgsi_util_get_full_src_register_extswizzle(src, chan);
926 switch (c) {
927 case TGSI_EXTSWIZZLE_X:
928 case TGSI_EXTSWIZZLE_Y:
929 case TGSI_EXTSWIZZLE_Z:
930 case TGSI_EXTSWIZZLE_W:
931 switch (src->SrcRegister.File) {
932 case TGSI_FILE_INPUT:
933 r = &pc->attr[src->SrcRegister.Index * 4 + c];
934 break;
935 case TGSI_FILE_TEMPORARY:
936 r = &pc->temp[src->SrcRegister.Index * 4 + c];
937 break;
938 case TGSI_FILE_CONSTANT:
939 r = &pc->param[src->SrcRegister.Index * 4 + c];
940 break;
941 case TGSI_FILE_IMMEDIATE:
942 r = &pc->immd[src->SrcRegister.Index * 4 + c];
943 break;
944 case TGSI_FILE_SAMPLER:
945 break;
946 default:
947 assert(0);
948 break;
949 }
950 break;
951 case TGSI_EXTSWIZZLE_ZERO:
952 r = alloc_immd(pc, 0.0);
953 return r;
954 case TGSI_EXTSWIZZLE_ONE:
955 if (sgn == TGSI_UTIL_SIGN_TOGGLE || sgn == TGSI_UTIL_SIGN_SET)
956 return alloc_immd(pc, -1.0);
957 return alloc_immd(pc, 1.0);
958 default:
959 assert(0);
960 break;
961 }
962
963 switch (sgn) {
964 case TGSI_UTIL_SIGN_KEEP:
965 break;
966 case TGSI_UTIL_SIGN_CLEAR:
967 temp = temp_temp(pc);
968 emit_abs(pc, temp, r);
969 r = temp;
970 break;
971 case TGSI_UTIL_SIGN_TOGGLE:
972 temp = temp_temp(pc);
973 emit_neg(pc, temp, r);
974 r = temp;
975 break;
976 case TGSI_UTIL_SIGN_SET:
977 temp = temp_temp(pc);
978 emit_abs(pc, temp, r);
979 emit_neg(pc, temp, temp);
980 r = temp;
981 break;
982 default:
983 assert(0);
984 break;
985 }
986
987 return r;
988 }
989
990 static boolean
991 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
992 {
993 const struct tgsi_full_instruction *inst = &tok->FullInstruction;
994 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
995 unsigned mask, sat, unit;
996 int i, c;
997
998 mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
999 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
1000
1001 for (c = 0; c < 4; c++) {
1002 if (mask & (1 << c))
1003 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
1004 else
1005 dst[c] = NULL;
1006 rdst[c] = NULL;
1007 src[0][c] = NULL;
1008 src[1][c] = NULL;
1009 src[2][c] = NULL;
1010 }
1011
1012 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1013 const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i];
1014
1015 if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)
1016 unit = fs->SrcRegister.Index;
1017
1018 for (c = 0; c < 4; c++)
1019 src[i][c] = tgsi_src(pc, c, fs);
1020 }
1021
1022 if (sat) {
1023 for (c = 0; c < 4; c++) {
1024 rdst[c] = dst[c];
1025 dst[c] = temp_temp(pc);
1026 }
1027 }
1028
1029 switch (inst->Instruction.Opcode) {
1030 case TGSI_OPCODE_ABS:
1031 for (c = 0; c < 4; c++) {
1032 if (!(mask & (1 << c)))
1033 continue;
1034 emit_abs(pc, dst[c], src[0][c]);
1035 }
1036 break;
1037 case TGSI_OPCODE_ADD:
1038 for (c = 0; c < 4; c++) {
1039 if (!(mask & (1 << c)))
1040 continue;
1041 emit_add(pc, dst[c], src[0][c], src[1][c]);
1042 }
1043 break;
1044 case TGSI_OPCODE_COS:
1045 temp = temp_temp(pc);
1046 emit_precossin(pc, temp, src[0][0]);
1047 emit_flop(pc, 5, temp, temp);
1048 for (c = 0; c < 4; c++) {
1049 if (!(mask & (1 << c)))
1050 continue;
1051 emit_mov(pc, dst[c], temp);
1052 }
1053 break;
1054 case TGSI_OPCODE_DP3:
1055 temp = temp_temp(pc);
1056 emit_mul(pc, temp, src[0][0], src[1][0]);
1057 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1058 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1059 for (c = 0; c < 4; c++) {
1060 if (!(mask & (1 << c)))
1061 continue;
1062 emit_mov(pc, dst[c], temp);
1063 }
1064 break;
1065 case TGSI_OPCODE_DP4:
1066 temp = temp_temp(pc);
1067 emit_mul(pc, temp, src[0][0], src[1][0]);
1068 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1069 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1070 emit_mad(pc, temp, src[0][3], src[1][3], temp);
1071 for (c = 0; c < 4; c++) {
1072 if (!(mask & (1 << c)))
1073 continue;
1074 emit_mov(pc, dst[c], temp);
1075 }
1076 break;
1077 case TGSI_OPCODE_DPH:
1078 temp = temp_temp(pc);
1079 emit_mul(pc, temp, src[0][0], src[1][0]);
1080 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1081 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1082 emit_add(pc, temp, src[1][3], temp);
1083 for (c = 0; c < 4; c++) {
1084 if (!(mask & (1 << c)))
1085 continue;
1086 emit_mov(pc, dst[c], temp);
1087 }
1088 break;
1089 case TGSI_OPCODE_DST:
1090 {
1091 struct nv50_reg *one = alloc_immd(pc, 1.0);
1092 if (mask & (1 << 0))
1093 emit_mov(pc, dst[0], one);
1094 if (mask & (1 << 1))
1095 emit_mul(pc, dst[1], src[0][1], src[1][1]);
1096 if (mask & (1 << 2))
1097 emit_mov(pc, dst[2], src[0][2]);
1098 if (mask & (1 << 3))
1099 emit_mov(pc, dst[3], src[1][3]);
1100 FREE(one);
1101 }
1102 break;
1103 case TGSI_OPCODE_EX2:
1104 temp = temp_temp(pc);
1105 emit_preex2(pc, temp, src[0][0]);
1106 emit_flop(pc, 6, temp, temp);
1107 for (c = 0; c < 4; c++) {
1108 if (!(mask & (1 << c)))
1109 continue;
1110 emit_mov(pc, dst[c], temp);
1111 }
1112 break;
1113 case TGSI_OPCODE_FLR:
1114 for (c = 0; c < 4; c++) {
1115 if (!(mask & (1 << c)))
1116 continue;
1117 emit_flr(pc, dst[c], src[0][c]);
1118 }
1119 break;
1120 case TGSI_OPCODE_FRC:
1121 temp = temp_temp(pc);
1122 for (c = 0; c < 4; c++) {
1123 if (!(mask & (1 << c)))
1124 continue;
1125 emit_flr(pc, temp, src[0][c]);
1126 emit_sub(pc, dst[c], src[0][c], temp);
1127 }
1128 break;
1129 case TGSI_OPCODE_KIL:
1130 emit_kil(pc, src[0][0]);
1131 emit_kil(pc, src[0][1]);
1132 emit_kil(pc, src[0][2]);
1133 emit_kil(pc, src[0][3]);
1134 break;
1135 case TGSI_OPCODE_LIT:
1136 emit_lit(pc, &dst[0], mask, &src[0][0]);
1137 break;
1138 case TGSI_OPCODE_LG2:
1139 temp = temp_temp(pc);
1140 emit_flop(pc, 3, temp, src[0][0]);
1141 for (c = 0; c < 4; c++) {
1142 if (!(mask & (1 << c)))
1143 continue;
1144 emit_mov(pc, dst[c], temp);
1145 }
1146 break;
1147 case TGSI_OPCODE_LRP:
1148 temp = temp_temp(pc);
1149 for (c = 0; c < 4; c++) {
1150 if (!(mask & (1 << c)))
1151 continue;
1152 emit_sub(pc, temp, src[1][c], src[2][c]);
1153 emit_mad(pc, dst[c], temp, src[0][c], src[2][c]);
1154 }
1155 break;
1156 case TGSI_OPCODE_MAD:
1157 for (c = 0; c < 4; c++) {
1158 if (!(mask & (1 << c)))
1159 continue;
1160 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
1161 }
1162 break;
1163 case TGSI_OPCODE_MAX:
1164 for (c = 0; c < 4; c++) {
1165 if (!(mask & (1 << c)))
1166 continue;
1167 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
1168 }
1169 break;
1170 case TGSI_OPCODE_MIN:
1171 for (c = 0; c < 4; c++) {
1172 if (!(mask & (1 << c)))
1173 continue;
1174 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
1175 }
1176 break;
1177 case TGSI_OPCODE_MOV:
1178 for (c = 0; c < 4; c++) {
1179 if (!(mask & (1 << c)))
1180 continue;
1181 emit_mov(pc, dst[c], src[0][c]);
1182 }
1183 break;
1184 case TGSI_OPCODE_MUL:
1185 for (c = 0; c < 4; c++) {
1186 if (!(mask & (1 << c)))
1187 continue;
1188 emit_mul(pc, dst[c], src[0][c], src[1][c]);
1189 }
1190 break;
1191 case TGSI_OPCODE_POW:
1192 temp = temp_temp(pc);
1193 emit_pow(pc, temp, src[0][0], src[1][0]);
1194 for (c = 0; c < 4; c++) {
1195 if (!(mask & (1 << c)))
1196 continue;
1197 emit_mov(pc, dst[c], temp);
1198 }
1199 break;
1200 case TGSI_OPCODE_RCP:
1201 for (c = 0; c < 4; c++) {
1202 if (!(mask & (1 << c)))
1203 continue;
1204 emit_flop(pc, 0, dst[c], src[0][0]);
1205 }
1206 break;
1207 case TGSI_OPCODE_RSQ:
1208 for (c = 0; c < 4; c++) {
1209 if (!(mask & (1 << c)))
1210 continue;
1211 emit_flop(pc, 2, dst[c], src[0][0]);
1212 }
1213 break;
1214 case TGSI_OPCODE_SCS:
1215 temp = temp_temp(pc);
1216 emit_precossin(pc, temp, src[0][0]);
1217 if (mask & (1 << 0))
1218 emit_flop(pc, 5, dst[0], temp);
1219 if (mask & (1 << 1))
1220 emit_flop(pc, 4, dst[1], temp);
1221 if (mask & (1 << 2))
1222 emit_mov_immdval(pc, dst[2], 0.0);
1223 if (mask & (1 << 3))
1224 emit_mov_immdval(pc, dst[3], 1.0);
1225 break;
1226 case TGSI_OPCODE_SGE:
1227 for (c = 0; c < 4; c++) {
1228 if (!(mask & (1 << c)))
1229 continue;
1230 emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
1231 }
1232 break;
1233 case TGSI_OPCODE_SIN:
1234 temp = temp_temp(pc);
1235 emit_precossin(pc, temp, src[0][0]);
1236 emit_flop(pc, 4, temp, temp);
1237 for (c = 0; c < 4; c++) {
1238 if (!(mask & (1 << c)))
1239 continue;
1240 emit_mov(pc, dst[c], temp);
1241 }
1242 break;
1243 case TGSI_OPCODE_SLT:
1244 for (c = 0; c < 4; c++) {
1245 if (!(mask & (1 << c)))
1246 continue;
1247 emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1248 }
1249 break;
1250 case TGSI_OPCODE_SUB:
1251 for (c = 0; c < 4; c++) {
1252 if (!(mask & (1 << c)))
1253 continue;
1254 emit_sub(pc, dst[c], src[0][c], src[1][c]);
1255 }
1256 break;
1257 case TGSI_OPCODE_TEX:
1258 case TGSI_OPCODE_TXP:
1259 {
1260 struct nv50_reg *t[4];
1261 struct nv50_program_exec *e;
1262
1263 alloc_temp4(pc, t, 0);
1264 emit_mov(pc, t[0], src[0][0]);
1265 emit_mov(pc, t[1], src[0][1]);
1266
1267 e = exec(pc);
1268 e->inst[0] = 0xf6400000;
1269 e->inst[0] |= (unit << 9);
1270 set_long(pc, e);
1271 e->inst[1] |= 0x0000c004;
1272 set_dst(pc, t[0], e);
1273 emit(pc, e);
1274
1275 if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]);
1276 if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]);
1277 if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]);
1278 if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]);
1279
1280 free_temp4(pc, t);
1281 }
1282 break;
1283 case TGSI_OPCODE_XPD:
1284 temp = temp_temp(pc);
1285 if (mask & (1 << 0)) {
1286 emit_mul(pc, temp, src[0][2], src[1][1]);
1287 emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1288 }
1289 if (mask & (1 << 1)) {
1290 emit_mul(pc, temp, src[0][0], src[1][2]);
1291 emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1292 }
1293 if (mask & (1 << 2)) {
1294 emit_mul(pc, temp, src[0][1], src[1][0]);
1295 emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1296 }
1297 if (mask & (1 << 3))
1298 emit_mov_immdval(pc, dst[3], 1.0);
1299 break;
1300 case TGSI_OPCODE_END:
1301 break;
1302 default:
1303 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1304 return FALSE;
1305 }
1306
1307 if (sat) {
1308 for (c = 0; c < 4; c++) {
1309 struct nv50_program_exec *e;
1310
1311 if (!(mask & (1 << c)))
1312 continue;
1313 e = exec(pc);
1314
1315 e->inst[0] = 0xa0000000; /* cvt */
1316 set_long(pc, e);
1317 e->inst[1] |= (6 << 29); /* cvt */
1318 e->inst[1] |= 0x04000000; /* 32 bit */
1319 e->inst[1] |= (1 << 14); /* src .f32 */
1320 e->inst[1] |= ((1 << 5) << 14); /* .sat */
1321 set_dst(pc, rdst[c], e);
1322 set_src_0(pc, dst[c], e);
1323 emit(pc, e);
1324 }
1325 }
1326
1327 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1328 for (c = 0; c < 4; c++) {
1329 if (!src[i][c])
1330 continue;
1331 if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
1332 FREE(src[i][c]);
1333 }
1334 }
1335
1336 kill_temp_temp(pc);
1337 return TRUE;
1338 }
1339
1340 /* Adjust a bitmask that indicates what components of a source are used,
1341 * we use this in tx_prep so we only load interpolants that are needed.
1342 */
1343 static void
1344 insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask)
1345 {
1346 const struct tgsi_instruction_ext_texture *tex;
1347
1348 switch (insn->Instruction.Opcode) {
1349 case TGSI_OPCODE_DP3:
1350 *mask = 0x7;
1351 break;
1352 case TGSI_OPCODE_DP4:
1353 case TGSI_OPCODE_DPH:
1354 *mask = 0xF;
1355 break;
1356 case TGSI_OPCODE_LIT:
1357 *mask = 0xB;
1358 break;
1359 case TGSI_OPCODE_RCP:
1360 case TGSI_OPCODE_RSQ:
1361 *mask = 0x1;
1362 break;
1363 case TGSI_OPCODE_TEX:
1364 case TGSI_OPCODE_TXP:
1365 assert(insn->Instruction.Extended);
1366 tex = &insn->InstructionExtTexture;
1367
1368 *mask = 0x7;
1369 if (tex->Texture == TGSI_TEXTURE_1D)
1370 *mask = 0x1;
1371 else
1372 if (tex->Texture == TGSI_TEXTURE_2D)
1373 *mask = 0x3;
1374
1375 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
1376 *mask |= 0x8;
1377 break;
1378 default:
1379 break;
1380 }
1381 }
1382
1383 static void
1384 prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok,
1385 unsigned *r_usage[2])
1386 {
1387 const struct tgsi_full_instruction *insn;
1388 const struct tgsi_full_src_register *src;
1389 const struct tgsi_dst_register *dst;
1390
1391 unsigned i, c, k, n, mask, *acc_p;
1392
1393 insn = &tok->FullInstruction;
1394 dst = &insn->FullDstRegisters[0].DstRegister;
1395 mask = dst->WriteMask;
1396
1397 if (!r_usage[0])
1398 r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned));
1399 if (!r_usage[1])
1400 r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned));
1401
1402 if (dst->File == TGSI_FILE_TEMPORARY) {
1403 for (c = 0; c < 4; c++) {
1404 if (!(mask & (1 << c)))
1405 continue;
1406 r_usage[0][dst->Index * 4 + c] = pc->insn_nr;
1407 }
1408 }
1409
1410 for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
1411 src = &insn->FullSrcRegisters[i];
1412
1413 switch (src->SrcRegister.File) {
1414 case TGSI_FILE_TEMPORARY:
1415 acc_p = r_usage[0];
1416 break;
1417 case TGSI_FILE_INPUT:
1418 acc_p = r_usage[1];
1419 break;
1420 default:
1421 continue;
1422 }
1423
1424 insn_adjust_mask(insn, &mask);
1425
1426 for (c = 0; c < 4; c++) {
1427 if (!(mask & (1 << c)))
1428 continue;
1429
1430 k = tgsi_util_get_full_src_register_extswizzle(src, c);
1431 switch (k) {
1432 case TGSI_EXTSWIZZLE_X:
1433 case TGSI_EXTSWIZZLE_Y:
1434 case TGSI_EXTSWIZZLE_Z:
1435 case TGSI_EXTSWIZZLE_W:
1436 n = src->SrcRegister.Index * 4 + k;
1437 acc_p[n] = pc->insn_nr;
1438 break;
1439 default:
1440 break;
1441 }
1442 }
1443 }
1444 }
1445
1446 static boolean
1447 nv50_program_tx_prep(struct nv50_pc *pc)
1448 {
1449 struct tgsi_parse_context p;
1450 boolean ret = FALSE;
1451 unsigned i, c;
1452 unsigned fcol, bcol, fcrd, depr;
1453
1454 /* count (centroid) perspective interpolations */
1455 unsigned centroid_loads = 0;
1456 unsigned perspect_loads = 0;
1457
1458 /* track register access for temps and attrs */
1459 unsigned *r_usage[2];
1460 r_usage[0] = NULL;
1461 r_usage[1] = NULL;
1462
1463 depr = fcol = bcol = fcrd = 0xffff;
1464
1465 tgsi_parse_init(&p, pc->p->pipe.tokens);
1466 while (!tgsi_parse_end_of_tokens(&p)) {
1467 const union tgsi_full_token *tok = &p.FullToken;
1468
1469 tgsi_parse_token(&p);
1470 switch (tok->Token.Type) {
1471 case TGSI_TOKEN_TYPE_IMMEDIATE:
1472 {
1473 const struct tgsi_full_immediate *imm =
1474 &p.FullToken.FullImmediate;
1475
1476 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1477 imm->u.ImmediateFloat32[1].Float,
1478 imm->u.ImmediateFloat32[2].Float,
1479 imm->u.ImmediateFloat32[3].Float);
1480 }
1481 break;
1482 case TGSI_TOKEN_TYPE_DECLARATION:
1483 {
1484 const struct tgsi_full_declaration *d;
1485 unsigned last, first, mode;
1486
1487 d = &p.FullToken.FullDeclaration;
1488 first = d->DeclarationRange.First;
1489 last = d->DeclarationRange.Last;
1490
1491 switch (d->Declaration.File) {
1492 case TGSI_FILE_TEMPORARY:
1493 if (pc->temp_nr < (last + 1))
1494 pc->temp_nr = last + 1;
1495 break;
1496 case TGSI_FILE_OUTPUT:
1497 if (pc->result_nr < (last + 1))
1498 pc->result_nr = last + 1;
1499
1500 if (!d->Declaration.Semantic)
1501 break;
1502
1503 switch (d->Semantic.SemanticName) {
1504 case TGSI_SEMANTIC_POSITION:
1505 depr = first;
1506 break;
1507 default:
1508 break;
1509 }
1510
1511 break;
1512 case TGSI_FILE_INPUT:
1513 {
1514 if (pc->attr_nr < (last + 1))
1515 pc->attr_nr = last + 1;
1516
1517 if (pc->p->type != PIPE_SHADER_FRAGMENT)
1518 break;
1519
1520 switch (d->Declaration.Interpolate) {
1521 case TGSI_INTERPOLATE_CONSTANT:
1522 mode = INTERP_FLAT;
1523 break;
1524 case TGSI_INTERPOLATE_PERSPECTIVE:
1525 mode = INTERP_PERSPECTIVE;
1526 break;
1527 default:
1528 mode = INTERP_LINEAR;
1529 break;
1530 }
1531
1532 if (d->Declaration.Semantic) {
1533 switch (d->Semantic.SemanticName) {
1534 case TGSI_SEMANTIC_POSITION:
1535 fcrd = first;
1536 break;
1537 case TGSI_SEMANTIC_COLOR:
1538 fcol = first;
1539 mode = INTERP_PERSPECTIVE;
1540 break;
1541 case TGSI_SEMANTIC_BCOLOR:
1542 bcol = first;
1543 mode = INTERP_PERSPECTIVE;
1544 break;
1545 }
1546 }
1547
1548 if (d->Declaration.Centroid) {
1549 mode |= INTERP_CENTROID;
1550 if (mode & INTERP_PERSPECTIVE)
1551 centroid_loads++;
1552 } else
1553 if (mode & INTERP_PERSPECTIVE)
1554 perspect_loads++;
1555
1556 assert(last < 32);
1557 for (i = first; i <= last; i++)
1558 pc->interp_mode[i] = mode;
1559 }
1560 break;
1561 case TGSI_FILE_CONSTANT:
1562 if (pc->param_nr < (last + 1))
1563 pc->param_nr = last + 1;
1564 break;
1565 case TGSI_FILE_SAMPLER:
1566 break;
1567 default:
1568 NOUVEAU_ERR("bad decl file %d\n",
1569 d->Declaration.File);
1570 goto out_err;
1571 }
1572 }
1573 break;
1574 case TGSI_TOKEN_TYPE_INSTRUCTION:
1575 pc->insn_nr++;
1576 prep_inspect_insn(pc, tok, r_usage);
1577 break;
1578 default:
1579 break;
1580 }
1581 }
1582
1583 if (pc->temp_nr) {
1584 pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg));
1585 if (!pc->temp)
1586 goto out_err;
1587
1588 for (i = 0; i < pc->temp_nr; i++) {
1589 for (c = 0; c < 4; c++) {
1590 pc->temp[i*4+c].type = P_TEMP;
1591 pc->temp[i*4+c].hw = -1;
1592 pc->temp[i*4+c].index = i;
1593 pc->temp[i*4+c].acc = r_usage[0][i*4+c];
1594 }
1595 }
1596 }
1597
1598 if (pc->attr_nr) {
1599 struct nv50_reg *iv = NULL;
1600 int aid = 0;
1601
1602 pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
1603 if (!pc->attr)
1604 goto out_err;
1605
1606 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1607 iv = alloc_temp(pc, NULL);
1608 emit_interp(pc, iv, iv, NULL);
1609 emit_flop(pc, 0, iv, iv);
1610 aid++;
1611 }
1612
1613 for (i = 0; i < pc->attr_nr; i++) {
1614 struct nv50_reg *a = &pc->attr[i*4];
1615
1616 for (c = 0; c < 4; c++) {
1617 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1618 struct nv50_reg *at =
1619 alloc_temp(pc, NULL);
1620 pc->attr[i*4+c].type = at->type;
1621 pc->attr[i*4+c].hw = at->hw;
1622 pc->attr[i*4+c].index = at->index;
1623 pc->attr[i*4+c].acc = r_usage[1][i*4+c];
1624 } else {
1625 pc->p->cfg.vp.attr[aid/32] |=
1626 (1 << (aid % 32));
1627 pc->attr[i*4+c].type = P_ATTR;
1628 pc->attr[i*4+c].hw = aid++;
1629 pc->attr[i*4+c].index = i;
1630 }
1631 }
1632
1633 if (pc->p->type != PIPE_SHADER_FRAGMENT)
1634 continue;
1635
1636 emit_interp(pc, &a[0], &a[0], iv);
1637 emit_interp(pc, &a[1], &a[1], iv);
1638 emit_interp(pc, &a[2], &a[2], iv);
1639 emit_interp(pc, &a[3], &a[3], iv);
1640 }
1641
1642 if (iv)
1643 free_temp(pc, iv);
1644 }
1645
1646 if (pc->result_nr) {
1647 int rid = 0;
1648
1649 pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg));
1650 if (!pc->result)
1651 goto out_err;
1652
1653 for (i = 0; i < pc->result_nr; i++) {
1654 for (c = 0; c < 4; c++) {
1655 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1656 pc->result[i*4+c].type = P_TEMP;
1657 pc->result[i*4+c].hw = -1;
1658 } else {
1659 pc->result[i*4+c].type = P_RESULT;
1660 pc->result[i*4+c].hw = rid++;
1661 }
1662 pc->result[i*4+c].index = i;
1663 }
1664 }
1665 }
1666
1667 if (pc->param_nr) {
1668 int rid = 0;
1669
1670 pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg));
1671 if (!pc->param)
1672 goto out_err;
1673
1674 for (i = 0; i < pc->param_nr; i++) {
1675 for (c = 0; c < 4; c++) {
1676 pc->param[i*4+c].type = P_CONST;
1677 pc->param[i*4+c].hw = rid++;
1678 pc->param[i*4+c].index = i;
1679 }
1680 }
1681 }
1682
1683 if (pc->immd_nr) {
1684 int rid = pc->param_nr * 4;
1685
1686 pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
1687 if (!pc->immd)
1688 goto out_err;
1689
1690 for (i = 0; i < pc->immd_nr; i++) {
1691 for (c = 0; c < 4; c++) {
1692 pc->immd[i*4+c].type = P_IMMD;
1693 pc->immd[i*4+c].hw = rid++;
1694 pc->immd[i*4+c].index = i;
1695 }
1696 }
1697 }
1698
1699 ret = TRUE;
1700 out_err:
1701 if (r_usage[0])
1702 FREE(r_usage[0]);
1703 if (r_usage[1])
1704 FREE(r_usage[1]);
1705
1706 tgsi_parse_free(&p);
1707 return ret;
1708 }
1709
1710 static void
1711 free_nv50_pc(struct nv50_pc *pc)
1712 {
1713 unsigned i;
1714
1715 if (pc->immd)
1716 FREE(pc->immd);
1717 if (pc->param)
1718 FREE(pc->param);
1719 if (pc->result)
1720 FREE(pc->result);
1721 if (pc->attr)
1722 FREE(pc->attr);
1723 if (pc->temp)
1724 FREE(pc->temp);
1725
1726 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
1727 /* deallocate fragment program attributes */
1728 if (pc->r_temp[i] && pc->r_temp[i]->index == -1)
1729 FREE(pc->r_temp[i]);
1730 }
1731
1732 FREE(pc);
1733 }
1734
1735 static boolean
1736 nv50_program_tx(struct nv50_program *p)
1737 {
1738 struct tgsi_parse_context parse;
1739 struct nv50_pc *pc;
1740 boolean ret;
1741
1742 pc = CALLOC_STRUCT(nv50_pc);
1743 if (!pc)
1744 return FALSE;
1745 pc->p = p;
1746 pc->p->cfg.high_temp = 4;
1747
1748 ret = nv50_program_tx_prep(pc);
1749 if (ret == FALSE)
1750 goto out_cleanup;
1751
1752 tgsi_parse_init(&parse, pc->p->pipe.tokens);
1753 while (!tgsi_parse_end_of_tokens(&parse)) {
1754 const union tgsi_full_token *tok = &parse.FullToken;
1755
1756 tgsi_parse_token(&parse);
1757
1758 switch (tok->Token.Type) {
1759 case TGSI_TOKEN_TYPE_INSTRUCTION:
1760 ++pc->insn_cur;
1761 ret = nv50_program_tx_insn(pc, tok);
1762 if (ret == FALSE)
1763 goto out_err;
1764 break;
1765 default:
1766 break;
1767 }
1768 }
1769
1770 if (p->type == PIPE_SHADER_FRAGMENT) {
1771 struct nv50_reg out;
1772
1773 out.type = P_TEMP;
1774 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1775 emit_mov(pc, &out, &pc->result[out.hw]);
1776 }
1777
1778 assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
1779 pc->p->exec_tail->inst[1] |= 0x00000001;
1780
1781 p->param_nr = pc->param_nr * 4;
1782 p->immd_nr = pc->immd_nr * 4;
1783 p->immd = pc->immd_buf;
1784
1785 out_err:
1786 tgsi_parse_free(&parse);
1787
1788 out_cleanup:
1789 free_nv50_pc(pc);
1790 return ret;
1791 }
1792
1793 static void
1794 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1795 {
1796 if (nv50_program_tx(p) == FALSE)
1797 assert(0);
1798 p->translated = TRUE;
1799 }
1800
1801 static void
1802 nv50_program_upload_data(struct nv50_context *nv50, float *map,
1803 unsigned start, unsigned count)
1804 {
1805 struct nouveau_channel *chan = nv50->screen->nvws->channel;
1806 struct nouveau_grobj *tesla = nv50->screen->tesla;
1807
1808 while (count) {
1809 unsigned nr = count > 2047 ? 2047 : count;
1810
1811 BEGIN_RING(chan, tesla, 0x00000f00, 1);
1812 OUT_RING (chan, (NV50_CB_PMISC << 0) | (start << 8));
1813 BEGIN_RING(chan, tesla, 0x40000f04, nr);
1814 OUT_RINGp (chan, map, nr);
1815
1816 map += nr;
1817 start += nr;
1818 count -= nr;
1819 }
1820 }
1821
1822 static void
1823 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1824 {
1825 struct nouveau_winsys *nvws = nv50->screen->nvws;
1826 struct pipe_winsys *ws = nv50->pipe.winsys;
1827 unsigned nr = p->param_nr + p->immd_nr;
1828
1829 if (!p->data && nr) {
1830 struct nouveau_resource *heap = nv50->screen->vp_data_heap;
1831
1832 if (nvws->res_alloc(heap, nr, p, &p->data)) {
1833 while (heap->next && heap->size < nr) {
1834 struct nv50_program *evict = heap->next->priv;
1835 nvws->res_free(&evict->data);
1836 }
1837
1838 if (nvws->res_alloc(heap, nr, p, &p->data))
1839 assert(0);
1840 }
1841 }
1842
1843 if (p->param_nr) {
1844 float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
1845 PIPE_BUFFER_USAGE_CPU_READ);
1846 nv50_program_upload_data(nv50, map, p->data->start,
1847 p->param_nr);
1848 ws->buffer_unmap(ws, nv50->constbuf[p->type]);
1849 }
1850
1851 if (p->immd_nr) {
1852 nv50_program_upload_data(nv50, p->immd,
1853 p->data->start + p->param_nr,
1854 p->immd_nr);
1855 }
1856 }
1857
1858 static void
1859 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1860 {
1861 struct nouveau_channel *chan = nv50->screen->nvws->channel;
1862 struct nouveau_grobj *tesla = nv50->screen->tesla;
1863 struct pipe_screen *screen = nv50->pipe.screen;
1864 struct nv50_program_exec *e;
1865 struct nouveau_stateobj *so;
1866 const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
1867 unsigned start, count, *up, *ptr;
1868 boolean upload = FALSE;
1869
1870 if (!p->buffer) {
1871 p->buffer = screen->buffer_create(screen, 0x100, 0, p->exec_size * 4);
1872 upload = TRUE;
1873 }
1874
1875 if (p->data && p->data->start != p->data_start) {
1876 for (e = p->exec_head; e; e = e->next) {
1877 unsigned ei, ci;
1878
1879 if (e->param.index < 0)
1880 continue;
1881 ei = e->param.shift >> 5;
1882 ci = e->param.index + p->data->start;
1883
1884 e->inst[ei] &= ~e->param.mask;
1885 e->inst[ei] |= (ci << e->param.shift);
1886 }
1887
1888 p->data_start = p->data->start;
1889 upload = TRUE;
1890 }
1891
1892 if (!upload)
1893 return;
1894
1895 #ifdef NV50_PROGRAM_DUMP
1896 NOUVEAU_ERR("-------\n");
1897 for (e = p->exec_head; e; e = e->next) {
1898 NOUVEAU_ERR("0x%08x\n", e->inst[0]);
1899 if (is_long(e))
1900 NOUVEAU_ERR("0x%08x\n", e->inst[1]);
1901 }
1902 #endif
1903
1904 up = ptr = MALLOC(p->exec_size * 4);
1905 for (e = p->exec_head; e; e = e->next) {
1906 *(ptr++) = e->inst[0];
1907 if (is_long(e))
1908 *(ptr++) = e->inst[1];
1909 }
1910
1911 so = so_new(4,2);
1912 so_method(so, nv50->screen->tesla, 0x1280, 3);
1913 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
1914 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0);
1915 so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
1916
1917 start = 0; count = p->exec_size;
1918 while (count) {
1919 struct nouveau_winsys *nvws = nv50->screen->nvws;
1920 unsigned nr;
1921
1922 so_emit(nvws, so);
1923
1924 nr = MIN2(count, 2047);
1925 nr = MIN2(nvws->channel->pushbuf->remaining, nr);
1926 if (nvws->channel->pushbuf->remaining < (nr + 3)) {
1927 FIRE_RING(chan);
1928 continue;
1929 }
1930
1931 BEGIN_RING(chan, tesla, 0x0f00, 1);
1932 OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD);
1933 BEGIN_RING(chan, tesla, 0x40000f04, nr);
1934 OUT_RINGp (chan, up + start, nr);
1935
1936 start += nr;
1937 count -= nr;
1938 }
1939
1940 FREE(up);
1941 so_ref(NULL, &so);
1942 }
1943
1944 void
1945 nv50_vertprog_validate(struct nv50_context *nv50)
1946 {
1947 struct nouveau_grobj *tesla = nv50->screen->tesla;
1948 struct nv50_program *p = nv50->vertprog;
1949 struct nouveau_stateobj *so;
1950
1951 if (!p->translated) {
1952 nv50_program_validate(nv50, p);
1953 if (!p->translated)
1954 assert(0);
1955 }
1956
1957 nv50_program_validate_data(nv50, p);
1958 nv50_program_validate_code(nv50, p);
1959
1960 so = so_new(13, 2);
1961 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1962 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1963 NOUVEAU_BO_HIGH, 0, 0);
1964 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1965 NOUVEAU_BO_LOW, 0, 0);
1966 so_method(so, tesla, 0x1650, 2);
1967 so_data (so, p->cfg.vp.attr[0]);
1968 so_data (so, p->cfg.vp.attr[1]);
1969 so_method(so, tesla, 0x16b8, 1);
1970 so_data (so, p->cfg.high_result);
1971 so_method(so, tesla, 0x16ac, 2);
1972 so_data (so, p->cfg.high_result); //8);
1973 so_data (so, p->cfg.high_temp);
1974 so_method(so, tesla, 0x140c, 1);
1975 so_data (so, 0); /* program start offset */
1976 so_ref(so, &nv50->state.vertprog);
1977 so_ref(NULL, &so);
1978 }
1979
1980 void
1981 nv50_fragprog_validate(struct nv50_context *nv50)
1982 {
1983 struct nouveau_grobj *tesla = nv50->screen->tesla;
1984 struct nv50_program *p = nv50->fragprog;
1985 struct nouveau_stateobj *so;
1986
1987 if (!p->translated) {
1988 nv50_program_validate(nv50, p);
1989 if (!p->translated)
1990 assert(0);
1991 }
1992
1993 nv50_program_validate_data(nv50, p);
1994 nv50_program_validate_code(nv50, p);
1995
1996 so = so_new(64, 2);
1997 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1998 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1999 NOUVEAU_BO_HIGH, 0, 0);
2000 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
2001 NOUVEAU_BO_LOW, 0, 0);
2002 so_method(so, tesla, 0x1904, 4);
2003 so_data (so, 0x00040404); /* p: 0x01000404 */
2004 so_data (so, 0x00000004);
2005 so_data (so, 0x00000000);
2006 so_data (so, 0x00000000);
2007 so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */
2008 so_data (so, 0x03020100);
2009 so_data (so, 0x07060504);
2010 so_data (so, 0x0b0a0908);
2011 so_method(so, tesla, 0x1988, 2);
2012 so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */
2013 so_data (so, p->cfg.high_temp);
2014 so_method(so, tesla, 0x1414, 1);
2015 so_data (so, 0); /* program start offset */
2016 so_ref(so, &nv50->state.fragprog);
2017 so_ref(NULL, &so);
2018 }
2019
2020 void
2021 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
2022 {
2023 struct pipe_screen *pscreen = nv50->pipe.screen;
2024
2025 while (p->exec_head) {
2026 struct nv50_program_exec *e = p->exec_head;
2027
2028 p->exec_head = e->next;
2029 FREE(e);
2030 }
2031 p->exec_tail = NULL;
2032 p->exec_size = 0;
2033
2034 if (p->buffer)
2035 pipe_buffer_reference(&p->buffer, NULL);
2036
2037 nv50->screen->nvws->res_free(&p->data);
2038
2039 p->translated = 0;
2040 }
2041