Merge commit 'origin/gallium-master-merge'
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_defines.h"
25 #include "pipe/p_state.h"
26 #include "pipe/p_inlines.h"
27
28 #include "pipe/p_shader_tokens.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "tgsi/tgsi_util.h"
31
32 #include "nv50_context.h"
33
34 #define NV50_SU_MAX_TEMP 64
35 //#define NV50_PROGRAM_DUMP
36
37 /* ARL - gallium craps itself on progs/vp/arl.txt
38 *
39 * MSB - Like MAD, but MUL+SUB
40 * - Fuck it off, introduce a way to negate args for ops that
41 * support it.
42 *
43 * Look into inlining IMMD for ops other than MOV (make it general?)
44 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
45 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
46 *
47 * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
48 * case, if the emit_src() causes the inst to suddenly become long.
49 *
50 * Verify half-insns work where expected - and force disable them where they
51 * don't work - MUL has it forcibly disabled atm as it fixes POW..
52 *
53 * FUCK! watch dst==src vectors, can overwrite components that are needed.
54 * ie. SUB R0, R0.yzxw, R0
55 *
56 * Things to check with renouveau:
57 * FP attr/result assignment - how?
58 * attrib
59 * - 0x16bc maps vp output onto fp hpos
60 * - 0x16c0 maps vp output onto fp col0
61 * result
62 * - colr always 0-3
63 * - depr always 4
64 * 0x16bc->0x16e8 --> some binding between vp/fp regs
65 * 0x16b8 --> VP output count
66 *
67 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
68 * "MOV rcol.x, fcol.y" = 0x00000004
69 * 0x19a8 --> as above but 0x00000100 and 0x00000000
70 * - 0x00100000 used when KIL used
71 * 0x196c --> as above but 0x00000011 and 0x00000000
72 *
73 * 0x1988 --> 0xXXNNNNNN
74 * - XX == FP high something
75 */
76 struct nv50_reg {
77 enum {
78 P_TEMP,
79 P_ATTR,
80 P_RESULT,
81 P_CONST,
82 P_IMMD
83 } type;
84 int index;
85
86 int hw;
87 int neg;
88 };
89
90 struct nv50_pc {
91 struct nv50_program *p;
92
93 /* hw resources */
94 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
95
96 /* tgsi resources */
97 struct nv50_reg *temp;
98 int temp_nr;
99 struct nv50_reg *attr;
100 int attr_nr;
101 struct nv50_reg *result;
102 int result_nr;
103 struct nv50_reg *param;
104 int param_nr;
105 struct nv50_reg *immd;
106 float *immd_buf;
107 int immd_nr;
108
109 struct nv50_reg *temp_temp[16];
110 unsigned temp_temp_nr;
111 };
112
113 static void
114 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
115 {
116 int i;
117
118 if (reg->type == P_RESULT) {
119 if (pc->p->cfg.high_result < (reg->hw + 1))
120 pc->p->cfg.high_result = reg->hw + 1;
121 }
122
123 if (reg->type != P_TEMP)
124 return;
125
126 if (reg->hw >= 0) {
127 /*XXX: do this here too to catch FP temp-as-attr usage..
128 * not clean, but works */
129 if (pc->p->cfg.high_temp < (reg->hw + 1))
130 pc->p->cfg.high_temp = reg->hw + 1;
131 return;
132 }
133
134 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
135 if (!(pc->r_temp[i])) {
136 pc->r_temp[i] = reg;
137 reg->hw = i;
138 if (pc->p->cfg.high_temp < (i + 1))
139 pc->p->cfg.high_temp = i + 1;
140 return;
141 }
142 }
143
144 assert(0);
145 }
146
147 static struct nv50_reg *
148 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
149 {
150 struct nv50_reg *r;
151 int i;
152
153 if (dst && dst->type == P_TEMP && dst->hw == -1)
154 return dst;
155
156 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
157 if (!pc->r_temp[i]) {
158 r = CALLOC_STRUCT(nv50_reg);
159 r->type = P_TEMP;
160 r->index = -1;
161 r->hw = i;
162 pc->r_temp[i] = r;
163 return r;
164 }
165 }
166
167 assert(0);
168 return NULL;
169 }
170
171 static void
172 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
173 {
174 if (r->index == -1) {
175 unsigned hw = r->hw;
176
177 FREE(pc->r_temp[hw]);
178 pc->r_temp[hw] = NULL;
179 }
180 }
181
182 static int
183 alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
184 {
185 int i;
186
187 if ((idx + 4) >= NV50_SU_MAX_TEMP)
188 return 1;
189
190 if (pc->r_temp[idx] || pc->r_temp[idx + 1] ||
191 pc->r_temp[idx + 2] || pc->r_temp[idx + 3])
192 return alloc_temp4(pc, dst, idx + 1);
193
194 for (i = 0; i < 4; i++) {
195 dst[i] = CALLOC_STRUCT(nv50_reg);
196 dst[i]->type = P_TEMP;
197 dst[i]->index = -1;
198 dst[i]->hw = idx + i;
199 pc->r_temp[idx + i] = dst[i];
200 }
201
202 return 0;
203 }
204
205 static void
206 free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4])
207 {
208 int i;
209
210 for (i = 0; i < 4; i++)
211 free_temp(pc, reg[i]);
212 }
213
214 static struct nv50_reg *
215 temp_temp(struct nv50_pc *pc)
216 {
217 if (pc->temp_temp_nr >= 16)
218 assert(0);
219
220 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
221 return pc->temp_temp[pc->temp_temp_nr++];
222 }
223
224 static void
225 kill_temp_temp(struct nv50_pc *pc)
226 {
227 int i;
228
229 for (i = 0; i < pc->temp_temp_nr; i++)
230 free_temp(pc, pc->temp_temp[i]);
231 pc->temp_temp_nr = 0;
232 }
233
234 static int
235 ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
236 {
237 pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)),
238 (pc->immd_nr + 1) * 4 * sizeof(float));
239 pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
240 pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
241 pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
242 pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
243
244 return pc->immd_nr++;
245 }
246
247 static struct nv50_reg *
248 alloc_immd(struct nv50_pc *pc, float f)
249 {
250 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
251 unsigned hw;
252
253 hw = ctor_immd(pc, f, 0, 0, 0) * 4;
254 r->type = P_IMMD;
255 r->hw = hw;
256 r->index = -1;
257 return r;
258 }
259
260 static struct nv50_program_exec *
261 exec(struct nv50_pc *pc)
262 {
263 struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
264
265 e->param.index = -1;
266 return e;
267 }
268
269 static void
270 emit(struct nv50_pc *pc, struct nv50_program_exec *e)
271 {
272 struct nv50_program *p = pc->p;
273
274 if (p->exec_tail)
275 p->exec_tail->next = e;
276 if (!p->exec_head)
277 p->exec_head = e;
278 p->exec_tail = e;
279 p->exec_size += (e->inst[0] & 1) ? 2 : 1;
280 }
281
282 static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
283
284 static boolean
285 is_long(struct nv50_program_exec *e)
286 {
287 if (e->inst[0] & 1)
288 return TRUE;
289 return FALSE;
290 }
291
292 static boolean
293 is_immd(struct nv50_program_exec *e)
294 {
295 if (is_long(e) && (e->inst[1] & 3) == 3)
296 return TRUE;
297 return FALSE;
298 }
299
300 static INLINE void
301 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
302 struct nv50_program_exec *e)
303 {
304 set_long(pc, e);
305 e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
306 e->inst[1] |= (pred << 7) | (idx << 12);
307 }
308
309 static INLINE void
310 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
311 struct nv50_program_exec *e)
312 {
313 set_long(pc, e);
314 e->inst[1] &= ~((0x3 << 4) | (1 << 6));
315 e->inst[1] |= (idx << 4) | (on << 6);
316 }
317
318 static INLINE void
319 set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
320 {
321 if (is_long(e))
322 return;
323
324 e->inst[0] |= 1;
325 set_pred(pc, 0xf, 0, e);
326 set_pred_wr(pc, 0, 0, e);
327 }
328
329 static INLINE void
330 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
331 {
332 if (dst->type == P_RESULT) {
333 set_long(pc, e);
334 e->inst[1] |= 0x00000008;
335 }
336
337 alloc_reg(pc, dst);
338 e->inst[0] |= (dst->hw << 2);
339 }
340
341 static INLINE void
342 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
343 {
344 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
345
346 set_long(pc, e);
347 /*XXX: can't be predicated - bits overlap.. catch cases where both
348 * are required and avoid them. */
349 set_pred(pc, 0, 0, e);
350 set_pred_wr(pc, 0, 0, e);
351
352 e->inst[1] |= 0x00000002 | 0x00000001;
353 e->inst[0] |= (val & 0x3f) << 16;
354 e->inst[1] |= (val >> 6) << 2;
355 }
356
357 static void
358 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
359 struct nv50_reg *src, struct nv50_reg *iv)
360 {
361 struct nv50_program_exec *e = exec(pc);
362
363 e->inst[0] |= 0x80000000;
364 set_dst(pc, dst, e);
365 alloc_reg(pc, src);
366 e->inst[0] |= (src->hw << 16);
367 if (iv) {
368 e->inst[0] |= (1 << 25);
369 alloc_reg(pc, iv);
370 e->inst[0] |= (iv->hw << 9);
371 }
372
373 emit(pc, e);
374 }
375
376 static void
377 set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
378 struct nv50_program_exec *e)
379 {
380 set_long(pc, e);
381 #if 1
382 e->inst[1] |= (1 << 22);
383 #else
384 if (src->type == P_IMMD) {
385 e->inst[1] |= (NV50_CB_PMISC << 22);
386 } else {
387 if (pc->p->type == PIPE_SHADER_VERTEX)
388 e->inst[1] |= (NV50_CB_PVP << 22);
389 else
390 e->inst[1] |= (NV50_CB_PFP << 22);
391 }
392 #endif
393
394 e->param.index = src->hw;
395 e->param.shift = s;
396 e->param.mask = m << (s % 32);
397 }
398
399 static void
400 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
401 {
402 struct nv50_program_exec *e = exec(pc);
403
404 e->inst[0] |= 0x10000000;
405
406 set_dst(pc, dst, e);
407
408 if (0 && dst->type != P_RESULT && src->type == P_IMMD) {
409 set_immd(pc, src, e);
410 /*XXX: 32-bit, but steals part of "half" reg space - need to
411 * catch and handle this case if/when we do half-regs
412 */
413 e->inst[0] |= 0x00008000;
414 } else
415 if (src->type == P_IMMD || src->type == P_CONST) {
416 set_long(pc, e);
417 set_data(pc, src, 0x7f, 9, e);
418 e->inst[1] |= 0x20000000; /* src0 const? */
419 } else {
420 if (src->type == P_ATTR) {
421 set_long(pc, e);
422 e->inst[1] |= 0x00200000;
423 }
424
425 alloc_reg(pc, src);
426 e->inst[0] |= (src->hw << 9);
427 }
428
429 /* We really should support "half" instructions here at some point,
430 * but I don't feel confident enough about them yet.
431 */
432 set_long(pc, e);
433 if (is_long(e) && !is_immd(e)) {
434 e->inst[1] |= 0x04000000; /* 32-bit */
435 e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
436 }
437
438 emit(pc, e);
439 }
440
441 static boolean
442 check_swap_src_0_1(struct nv50_pc *pc,
443 struct nv50_reg **s0, struct nv50_reg **s1)
444 {
445 struct nv50_reg *src0 = *s0, *src1 = *s1;
446
447 if (src0->type == P_CONST) {
448 if (src1->type != P_CONST) {
449 *s0 = src1;
450 *s1 = src0;
451 return TRUE;
452 }
453 } else
454 if (src1->type == P_ATTR) {
455 if (src0->type != P_ATTR) {
456 *s0 = src1;
457 *s1 = src0;
458 return TRUE;
459 }
460 }
461
462 return FALSE;
463 }
464
465 static void
466 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
467 {
468 if (src->type == P_ATTR) {
469 set_long(pc, e);
470 e->inst[1] |= 0x00200000;
471 } else
472 if (src->type == P_CONST || src->type == P_IMMD) {
473 struct nv50_reg *temp = temp_temp(pc);
474
475 emit_mov(pc, temp, src);
476 src = temp;
477 }
478
479 alloc_reg(pc, src);
480 e->inst[0] |= (src->hw << 9);
481 }
482
483 static void
484 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
485 {
486 if (src->type == P_ATTR) {
487 struct nv50_reg *temp = temp_temp(pc);
488
489 emit_mov(pc, temp, src);
490 src = temp;
491 } else
492 if (src->type == P_CONST || src->type == P_IMMD) {
493 assert(!(e->inst[0] & 0x00800000));
494 if (e->inst[0] & 0x01000000) {
495 struct nv50_reg *temp = temp_temp(pc);
496
497 emit_mov(pc, temp, src);
498 src = temp;
499 } else {
500 set_data(pc, src, 0x7f, 16, e);
501 e->inst[0] |= 0x00800000;
502 }
503 }
504
505 alloc_reg(pc, src);
506 e->inst[0] |= (src->hw << 16);
507 }
508
509 static void
510 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
511 {
512 set_long(pc, e);
513
514 if (src->type == P_ATTR) {
515 struct nv50_reg *temp = temp_temp(pc);
516
517 emit_mov(pc, temp, src);
518 src = temp;
519 } else
520 if (src->type == P_CONST || src->type == P_IMMD) {
521 assert(!(e->inst[0] & 0x01000000));
522 if (e->inst[0] & 0x00800000) {
523 struct nv50_reg *temp = temp_temp(pc);
524
525 emit_mov(pc, temp, src);
526 src = temp;
527 } else {
528 set_data(pc, src, 0x7f, 32+14, e);
529 e->inst[0] |= 0x01000000;
530 }
531 }
532
533 alloc_reg(pc, src);
534 e->inst[1] |= (src->hw << 14);
535 }
536
537 static void
538 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
539 struct nv50_reg *src1)
540 {
541 struct nv50_program_exec *e = exec(pc);
542
543 e->inst[0] |= 0xc0000000;
544 set_long(pc, e);
545
546 check_swap_src_0_1(pc, &src0, &src1);
547 set_dst(pc, dst, e);
548 set_src_0(pc, src0, e);
549 set_src_1(pc, src1, e);
550
551 emit(pc, e);
552 }
553
554 static void
555 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
556 struct nv50_reg *src0, struct nv50_reg *src1)
557 {
558 struct nv50_program_exec *e = exec(pc);
559
560 e->inst[0] |= 0xb0000000;
561
562 check_swap_src_0_1(pc, &src0, &src1);
563 set_dst(pc, dst, e);
564 set_src_0(pc, src0, e);
565 if (is_long(e))
566 set_src_2(pc, src1, e);
567 else
568 set_src_1(pc, src1, e);
569
570 emit(pc, e);
571 }
572
573 static void
574 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
575 struct nv50_reg *src0, struct nv50_reg *src1)
576 {
577 struct nv50_program_exec *e = exec(pc);
578
579 set_long(pc, e);
580 e->inst[0] |= 0xb0000000;
581 e->inst[1] |= (sub << 29);
582
583 check_swap_src_0_1(pc, &src0, &src1);
584 set_dst(pc, dst, e);
585 set_src_0(pc, src0, e);
586 set_src_1(pc, src1, e);
587
588 emit(pc, e);
589 }
590
591 static void
592 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
593 struct nv50_reg *src1)
594 {
595 struct nv50_program_exec *e = exec(pc);
596
597 e->inst[0] |= 0xb0000000;
598
599 set_long(pc, e);
600 if (check_swap_src_0_1(pc, &src0, &src1))
601 e->inst[1] |= 0x04000000;
602 else
603 e->inst[1] |= 0x08000000;
604
605 set_dst(pc, dst, e);
606 set_src_0(pc, src0, e);
607 set_src_2(pc, src1, e);
608
609 emit(pc, e);
610 }
611
612 static void
613 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
614 struct nv50_reg *src1, struct nv50_reg *src2)
615 {
616 struct nv50_program_exec *e = exec(pc);
617
618 e->inst[0] |= 0xe0000000;
619
620 check_swap_src_0_1(pc, &src0, &src1);
621 set_dst(pc, dst, e);
622 set_src_0(pc, src0, e);
623 set_src_1(pc, src1, e);
624 set_src_2(pc, src2, e);
625
626 emit(pc, e);
627 }
628
629 static void
630 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
631 struct nv50_reg *src1, struct nv50_reg *src2)
632 {
633 struct nv50_program_exec *e = exec(pc);
634
635 e->inst[0] |= 0xe0000000;
636 set_long(pc, e);
637 e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
638
639 check_swap_src_0_1(pc, &src0, &src1);
640 set_dst(pc, dst, e);
641 set_src_0(pc, src0, e);
642 set_src_1(pc, src1, e);
643 set_src_2(pc, src2, e);
644
645 emit(pc, e);
646 }
647
648 static void
649 emit_flop(struct nv50_pc *pc, unsigned sub,
650 struct nv50_reg *dst, struct nv50_reg *src)
651 {
652 struct nv50_program_exec *e = exec(pc);
653
654 e->inst[0] |= 0x90000000;
655 if (sub) {
656 set_long(pc, e);
657 e->inst[1] |= (sub << 29);
658 }
659
660 set_dst(pc, dst, e);
661 set_src_0(pc, src, e);
662
663 emit(pc, e);
664 }
665
666 static void
667 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
668 {
669 struct nv50_program_exec *e = exec(pc);
670
671 e->inst[0] |= 0xb0000000;
672
673 set_dst(pc, dst, e);
674 set_src_0(pc, src, e);
675 set_long(pc, e);
676 e->inst[1] |= (6 << 29) | 0x00004000;
677
678 emit(pc, e);
679 }
680
681 static void
682 emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
683 {
684 struct nv50_program_exec *e = exec(pc);
685
686 e->inst[0] |= 0xb0000000;
687
688 set_dst(pc, dst, e);
689 set_src_0(pc, src, e);
690 set_long(pc, e);
691 e->inst[1] |= (6 << 29);
692
693 emit(pc, e);
694 }
695
696 static void
697 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
698 struct nv50_reg *src0, struct nv50_reg *src1)
699 {
700 struct nv50_program_exec *e = exec(pc);
701 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
702 struct nv50_reg *rdst;
703
704 assert(c_op <= 7);
705 if (check_swap_src_0_1(pc, &src0, &src1))
706 c_op = inv_cop[c_op];
707
708 rdst = dst;
709 if (dst->type != P_TEMP)
710 dst = alloc_temp(pc, NULL);
711
712 /* set.u32 */
713 set_long(pc, e);
714 e->inst[0] |= 0xb0000000;
715 e->inst[1] |= (3 << 29);
716 e->inst[1] |= (c_op << 14);
717 /*XXX: breaks things, .u32 by default?
718 * decuda will disasm as .u16 and use .lo/.hi regs, but this
719 * doesn't seem to match what the hw actually does.
720 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
721 */
722 set_dst(pc, dst, e);
723 set_src_0(pc, src0, e);
724 set_src_1(pc, src1, e);
725 emit(pc, e);
726
727 /* cvt.f32.u32 */
728 e = exec(pc);
729 e->inst[0] = 0xa0000001;
730 e->inst[1] = 0x64014780;
731 set_dst(pc, rdst, e);
732 set_src_0(pc, dst, e);
733 emit(pc, e);
734
735 if (dst != rdst)
736 free_temp(pc, dst);
737 }
738
739 static void
740 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
741 {
742 struct nv50_program_exec *e = exec(pc);
743
744 e->inst[0] = 0xa0000000; /* cvt */
745 set_long(pc, e);
746 e->inst[1] |= (6 << 29); /* cvt */
747 e->inst[1] |= 0x08000000; /* integer mode */
748 e->inst[1] |= 0x04000000; /* 32 bit */
749 e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
750 e->inst[1] |= (1 << 14); /* src .f32 */
751 set_dst(pc, dst, e);
752 set_src_0(pc, src, e);
753
754 emit(pc, e);
755 }
756
757 static void
758 emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
759 struct nv50_reg *v, struct nv50_reg *e)
760 {
761 struct nv50_reg *temp = alloc_temp(pc, NULL);
762
763 emit_flop(pc, 3, temp, v);
764 emit_mul(pc, temp, temp, e);
765 emit_preex2(pc, temp, temp);
766 emit_flop(pc, 6, dst, temp);
767
768 free_temp(pc, temp);
769 }
770
771 static void
772 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
773 {
774 struct nv50_program_exec *e = exec(pc);
775
776 e->inst[0] = 0xa0000000; /* cvt */
777 set_long(pc, e);
778 e->inst[1] |= (6 << 29); /* cvt */
779 e->inst[1] |= 0x04000000; /* 32 bit */
780 e->inst[1] |= (1 << 14); /* src .f32 */
781 e->inst[1] |= ((1 << 6) << 14); /* .abs */
782 set_dst(pc, dst, e);
783 set_src_0(pc, src, e);
784
785 emit(pc, e);
786 }
787
788 static void
789 emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
790 struct nv50_reg **src)
791 {
792 struct nv50_reg *one = alloc_immd(pc, 1.0);
793 struct nv50_reg *zero = alloc_immd(pc, 0.0);
794 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
795 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
796 struct nv50_reg *tmp[4];
797
798 if (mask & (1 << 0))
799 emit_mov(pc, dst[0], one);
800
801 if (mask & (1 << 3))
802 emit_mov(pc, dst[3], one);
803
804 if (mask & (3 << 1)) {
805 if (mask & (1 << 1))
806 tmp[0] = dst[1];
807 else
808 tmp[0] = temp_temp(pc);
809 emit_minmax(pc, 4, tmp[0], src[0], zero);
810 }
811
812 if (mask & (1 << 2)) {
813 set_pred_wr(pc, 1, 0, pc->p->exec_tail);
814
815 tmp[1] = temp_temp(pc);
816 emit_minmax(pc, 4, tmp[1], src[1], zero);
817
818 tmp[3] = temp_temp(pc);
819 emit_minmax(pc, 4, tmp[3], src[3], neg128);
820 emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
821
822 emit_pow(pc, dst[2], tmp[1], tmp[3]);
823 emit_mov(pc, dst[2], zero);
824 set_pred(pc, 3, 0, pc->p->exec_tail);
825 }
826 }
827
828 static void
829 emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
830 {
831 struct nv50_program_exec *e = exec(pc);
832
833 set_long(pc, e);
834 e->inst[0] |= 0xa0000000; /* delta */
835 e->inst[1] |= (7 << 29); /* delta */
836 e->inst[1] |= 0x04000000; /* negate arg0? probably not */
837 e->inst[1] |= (1 << 14); /* src .f32 */
838 set_dst(pc, dst, e);
839 set_src_0(pc, src, e);
840
841 emit(pc, e);
842 }
843
844 static void
845 emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
846 {
847 struct nv50_program_exec *e;
848 const int r_pred = 1;
849
850 /* Sets predicate reg ? */
851 e = exec(pc);
852 e->inst[0] = 0xa00001fd;
853 e->inst[1] = 0xc4014788;
854 set_src_0(pc, src, e);
855 set_pred_wr(pc, 1, r_pred, e);
856 emit(pc, e);
857
858 /* This is probably KILP */
859 e = exec(pc);
860 e->inst[0] = 0x000001fe;
861 set_long(pc, e);
862 set_pred(pc, 1 /* LT? */, r_pred, e);
863 emit(pc, e);
864 }
865
866 static struct nv50_reg *
867 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
868 {
869 switch (dst->DstRegister.File) {
870 case TGSI_FILE_TEMPORARY:
871 return &pc->temp[dst->DstRegister.Index * 4 + c];
872 case TGSI_FILE_OUTPUT:
873 return &pc->result[dst->DstRegister.Index * 4 + c];
874 case TGSI_FILE_NULL:
875 return NULL;
876 default:
877 break;
878 }
879
880 return NULL;
881 }
882
883 static struct nv50_reg *
884 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
885 {
886 struct nv50_reg *r = NULL;
887 struct nv50_reg *temp;
888 unsigned c;
889
890 c = tgsi_util_get_full_src_register_extswizzle(src, chan);
891 switch (c) {
892 case TGSI_EXTSWIZZLE_X:
893 case TGSI_EXTSWIZZLE_Y:
894 case TGSI_EXTSWIZZLE_Z:
895 case TGSI_EXTSWIZZLE_W:
896 switch (src->SrcRegister.File) {
897 case TGSI_FILE_INPUT:
898 r = &pc->attr[src->SrcRegister.Index * 4 + c];
899 break;
900 case TGSI_FILE_TEMPORARY:
901 r = &pc->temp[src->SrcRegister.Index * 4 + c];
902 break;
903 case TGSI_FILE_CONSTANT:
904 r = &pc->param[src->SrcRegister.Index * 4 + c];
905 break;
906 case TGSI_FILE_IMMEDIATE:
907 r = &pc->immd[src->SrcRegister.Index * 4 + c];
908 break;
909 case TGSI_FILE_SAMPLER:
910 break;
911 default:
912 assert(0);
913 break;
914 }
915 break;
916 case TGSI_EXTSWIZZLE_ZERO:
917 r = alloc_immd(pc, 0.0);
918 break;
919 case TGSI_EXTSWIZZLE_ONE:
920 r = alloc_immd(pc, 1.0);
921 break;
922 default:
923 assert(0);
924 break;
925 }
926
927 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
928 case TGSI_UTIL_SIGN_KEEP:
929 break;
930 case TGSI_UTIL_SIGN_CLEAR:
931 temp = temp_temp(pc);
932 emit_abs(pc, temp, r);
933 r = temp;
934 break;
935 case TGSI_UTIL_SIGN_TOGGLE:
936 temp = temp_temp(pc);
937 emit_neg(pc, temp, r);
938 r = temp;
939 break;
940 case TGSI_UTIL_SIGN_SET:
941 temp = temp_temp(pc);
942 emit_abs(pc, temp, r);
943 emit_neg(pc, temp, r);
944 r = temp;
945 break;
946 default:
947 assert(0);
948 break;
949 }
950
951 return r;
952 }
953
954 static boolean
955 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
956 {
957 const struct tgsi_full_instruction *inst = &tok->FullInstruction;
958 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
959 unsigned mask, sat, unit;
960 int i, c;
961
962 mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
963 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
964
965 for (c = 0; c < 4; c++) {
966 if (mask & (1 << c))
967 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
968 else
969 dst[c] = NULL;
970 }
971
972 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
973 const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i];
974
975 if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)
976 unit = fs->SrcRegister.Index;
977
978 for (c = 0; c < 4; c++)
979 src[i][c] = tgsi_src(pc, c, fs);
980 }
981
982 if (sat) {
983 for (c = 0; c < 4; c++) {
984 rdst[c] = dst[c];
985 dst[c] = temp_temp(pc);
986 }
987 }
988
989 switch (inst->Instruction.Opcode) {
990 case TGSI_OPCODE_ABS:
991 for (c = 0; c < 4; c++) {
992 if (!(mask & (1 << c)))
993 continue;
994 emit_abs(pc, dst[c], src[0][c]);
995 }
996 break;
997 case TGSI_OPCODE_ADD:
998 for (c = 0; c < 4; c++) {
999 if (!(mask & (1 << c)))
1000 continue;
1001 emit_add(pc, dst[c], src[0][c], src[1][c]);
1002 }
1003 break;
1004 case TGSI_OPCODE_COS:
1005 temp = alloc_temp(pc, NULL);
1006 emit_precossin(pc, temp, src[0][0]);
1007 emit_flop(pc, 5, temp, temp);
1008 for (c = 0; c < 4; c++) {
1009 if (!(mask & (1 << c)))
1010 continue;
1011 emit_mov(pc, dst[c], temp);
1012 }
1013 break;
1014 case TGSI_OPCODE_DP3:
1015 temp = alloc_temp(pc, NULL);
1016 emit_mul(pc, temp, src[0][0], src[1][0]);
1017 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1018 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1019 for (c = 0; c < 4; c++) {
1020 if (!(mask & (1 << c)))
1021 continue;
1022 emit_mov(pc, dst[c], temp);
1023 }
1024 free_temp(pc, temp);
1025 break;
1026 case TGSI_OPCODE_DP4:
1027 temp = alloc_temp(pc, NULL);
1028 emit_mul(pc, temp, src[0][0], src[1][0]);
1029 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1030 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1031 emit_mad(pc, temp, src[0][3], src[1][3], temp);
1032 for (c = 0; c < 4; c++) {
1033 if (!(mask & (1 << c)))
1034 continue;
1035 emit_mov(pc, dst[c], temp);
1036 }
1037 free_temp(pc, temp);
1038 break;
1039 case TGSI_OPCODE_DPH:
1040 temp = alloc_temp(pc, NULL);
1041 emit_mul(pc, temp, src[0][0], src[1][0]);
1042 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1043 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1044 emit_add(pc, temp, src[1][3], temp);
1045 for (c = 0; c < 4; c++) {
1046 if (!(mask & (1 << c)))
1047 continue;
1048 emit_mov(pc, dst[c], temp);
1049 }
1050 free_temp(pc, temp);
1051 break;
1052 case TGSI_OPCODE_DST:
1053 {
1054 struct nv50_reg *one = alloc_immd(pc, 1.0);
1055 if (mask & (1 << 0))
1056 emit_mov(pc, dst[0], one);
1057 if (mask & (1 << 1))
1058 emit_mul(pc, dst[1], src[0][1], src[1][1]);
1059 if (mask & (1 << 2))
1060 emit_mov(pc, dst[2], src[0][2]);
1061 if (mask & (1 << 3))
1062 emit_mov(pc, dst[3], src[1][3]);
1063 FREE(one);
1064 }
1065 break;
1066 case TGSI_OPCODE_EX2:
1067 temp = alloc_temp(pc, NULL);
1068 emit_preex2(pc, temp, src[0][0]);
1069 emit_flop(pc, 6, temp, temp);
1070 for (c = 0; c < 4; c++) {
1071 if (!(mask & (1 << c)))
1072 continue;
1073 emit_mov(pc, dst[c], temp);
1074 }
1075 free_temp(pc, temp);
1076 break;
1077 case TGSI_OPCODE_FLR:
1078 for (c = 0; c < 4; c++) {
1079 if (!(mask & (1 << c)))
1080 continue;
1081 emit_flr(pc, dst[c], src[0][c]);
1082 }
1083 break;
1084 case TGSI_OPCODE_FRC:
1085 temp = alloc_temp(pc, NULL);
1086 for (c = 0; c < 4; c++) {
1087 if (!(mask & (1 << c)))
1088 continue;
1089 emit_flr(pc, temp, src[0][c]);
1090 emit_sub(pc, dst[c], src[0][c], temp);
1091 }
1092 free_temp(pc, temp);
1093 break;
1094 case TGSI_OPCODE_KIL:
1095 emit_kil(pc, src[0][0]);
1096 emit_kil(pc, src[0][1]);
1097 emit_kil(pc, src[0][2]);
1098 emit_kil(pc, src[0][3]);
1099 break;
1100 case TGSI_OPCODE_LIT:
1101 emit_lit(pc, &dst[0], mask, &src[0][0]);
1102 break;
1103 case TGSI_OPCODE_LG2:
1104 temp = alloc_temp(pc, NULL);
1105 emit_flop(pc, 3, temp, src[0][0]);
1106 for (c = 0; c < 4; c++) {
1107 if (!(mask & (1 << c)))
1108 continue;
1109 emit_mov(pc, dst[c], temp);
1110 }
1111 break;
1112 case TGSI_OPCODE_LRP:
1113 for (c = 0; c < 4; c++) {
1114 if (!(mask & (1 << c)))
1115 continue;
1116 /*XXX: we can do better than this */
1117 temp = alloc_temp(pc, NULL);
1118 emit_neg(pc, temp, src[0][c]);
1119 emit_mad(pc, temp, temp, src[2][c], src[2][c]);
1120 emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
1121 free_temp(pc, temp);
1122 }
1123 break;
1124 case TGSI_OPCODE_MAD:
1125 for (c = 0; c < 4; c++) {
1126 if (!(mask & (1 << c)))
1127 continue;
1128 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
1129 }
1130 break;
1131 case TGSI_OPCODE_MAX:
1132 for (c = 0; c < 4; c++) {
1133 if (!(mask & (1 << c)))
1134 continue;
1135 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
1136 }
1137 break;
1138 case TGSI_OPCODE_MIN:
1139 for (c = 0; c < 4; c++) {
1140 if (!(mask & (1 << c)))
1141 continue;
1142 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
1143 }
1144 break;
1145 case TGSI_OPCODE_MOV:
1146 for (c = 0; c < 4; c++) {
1147 if (!(mask & (1 << c)))
1148 continue;
1149 emit_mov(pc, dst[c], src[0][c]);
1150 }
1151 break;
1152 case TGSI_OPCODE_MUL:
1153 for (c = 0; c < 4; c++) {
1154 if (!(mask & (1 << c)))
1155 continue;
1156 emit_mul(pc, dst[c], src[0][c], src[1][c]);
1157 }
1158 break;
1159 case TGSI_OPCODE_POW:
1160 temp = alloc_temp(pc, NULL);
1161 emit_pow(pc, temp, src[0][0], src[1][0]);
1162 for (c = 0; c < 4; c++) {
1163 if (!(mask & (1 << c)))
1164 continue;
1165 emit_mov(pc, dst[c], temp);
1166 }
1167 free_temp(pc, temp);
1168 break;
1169 case TGSI_OPCODE_RCP:
1170 for (c = 0; c < 4; c++) {
1171 if (!(mask & (1 << c)))
1172 continue;
1173 emit_flop(pc, 0, dst[c], src[0][0]);
1174 }
1175 break;
1176 case TGSI_OPCODE_RSQ:
1177 for (c = 0; c < 4; c++) {
1178 if (!(mask & (1 << c)))
1179 continue;
1180 emit_flop(pc, 2, dst[c], src[0][0]);
1181 }
1182 break;
1183 case TGSI_OPCODE_SCS:
1184 temp = alloc_temp(pc, NULL);
1185 emit_precossin(pc, temp, src[0][0]);
1186 if (mask & (1 << 0))
1187 emit_flop(pc, 5, dst[0], temp);
1188 if (mask & (1 << 1))
1189 emit_flop(pc, 4, dst[1], temp);
1190 break;
1191 case TGSI_OPCODE_SGE:
1192 for (c = 0; c < 4; c++) {
1193 if (!(mask & (1 << c)))
1194 continue;
1195 emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
1196 }
1197 break;
1198 case TGSI_OPCODE_SIN:
1199 temp = alloc_temp(pc, NULL);
1200 emit_precossin(pc, temp, src[0][0]);
1201 emit_flop(pc, 4, temp, temp);
1202 for (c = 0; c < 4; c++) {
1203 if (!(mask & (1 << c)))
1204 continue;
1205 emit_mov(pc, dst[c], temp);
1206 }
1207 break;
1208 case TGSI_OPCODE_SLT:
1209 for (c = 0; c < 4; c++) {
1210 if (!(mask & (1 << c)))
1211 continue;
1212 emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1213 }
1214 break;
1215 case TGSI_OPCODE_SUB:
1216 for (c = 0; c < 4; c++) {
1217 if (!(mask & (1 << c)))
1218 continue;
1219 emit_sub(pc, dst[c], src[0][c], src[1][c]);
1220 }
1221 break;
1222 case TGSI_OPCODE_TEX:
1223 case TGSI_OPCODE_TXP:
1224 {
1225 struct nv50_reg *t[4];
1226 struct nv50_program_exec *e;
1227
1228 alloc_temp4(pc, t, 0);
1229 emit_mov(pc, t[0], src[0][0]);
1230 emit_mov(pc, t[1], src[0][1]);
1231
1232 e = exec(pc);
1233 e->inst[0] = 0xf6400000;
1234 e->inst[0] |= (unit << 9);
1235 set_long(pc, e);
1236 e->inst[1] |= 0x0000c004;
1237 set_dst(pc, t[0], e);
1238 emit(pc, e);
1239
1240 if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]);
1241 if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]);
1242 if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]);
1243 if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]);
1244
1245 free_temp4(pc, t);
1246 }
1247 break;
1248 case TGSI_OPCODE_XPD:
1249 temp = alloc_temp(pc, NULL);
1250 if (mask & (1 << 0)) {
1251 emit_mul(pc, temp, src[0][2], src[1][1]);
1252 emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1253 }
1254 if (mask & (1 << 1)) {
1255 emit_mul(pc, temp, src[0][0], src[1][2]);
1256 emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1257 }
1258 if (mask & (1 << 2)) {
1259 emit_mul(pc, temp, src[0][1], src[1][0]);
1260 emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1261 }
1262 free_temp(pc, temp);
1263 break;
1264 case TGSI_OPCODE_END:
1265 break;
1266 default:
1267 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1268 return FALSE;
1269 }
1270
1271 if (sat) {
1272 for (c = 0; c < 4; c++) {
1273 struct nv50_program_exec *e;
1274
1275 if (!(mask & (1 << c)))
1276 continue;
1277 e = exec(pc);
1278
1279 e->inst[0] = 0xa0000000; /* cvt */
1280 set_long(pc, e);
1281 e->inst[1] |= (6 << 29); /* cvt */
1282 e->inst[1] |= 0x04000000; /* 32 bit */
1283 e->inst[1] |= (1 << 14); /* src .f32 */
1284 e->inst[1] |= ((1 << 5) << 14); /* .sat */
1285 set_dst(pc, rdst[c], e);
1286 set_src_0(pc, dst[c], e);
1287 emit(pc, e);
1288 }
1289 }
1290
1291 kill_temp_temp(pc);
1292 return TRUE;
1293 }
1294
1295 static boolean
1296 nv50_program_tx_prep(struct nv50_pc *pc)
1297 {
1298 struct tgsi_parse_context p;
1299 boolean ret = FALSE;
1300 unsigned i, c;
1301
1302 tgsi_parse_init(&p, pc->p->pipe.tokens);
1303 while (!tgsi_parse_end_of_tokens(&p)) {
1304 const union tgsi_full_token *tok = &p.FullToken;
1305
1306 tgsi_parse_token(&p);
1307 switch (tok->Token.Type) {
1308 case TGSI_TOKEN_TYPE_IMMEDIATE:
1309 {
1310 const struct tgsi_full_immediate *imm =
1311 &p.FullToken.FullImmediate;
1312
1313 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1314 imm->u.ImmediateFloat32[1].Float,
1315 imm->u.ImmediateFloat32[2].Float,
1316 imm->u.ImmediateFloat32[3].Float);
1317 }
1318 break;
1319 case TGSI_TOKEN_TYPE_DECLARATION:
1320 {
1321 const struct tgsi_full_declaration *d;
1322 unsigned last;
1323
1324 d = &p.FullToken.FullDeclaration;
1325 last = d->DeclarationRange.Last;
1326
1327 switch (d->Declaration.File) {
1328 case TGSI_FILE_TEMPORARY:
1329 if (pc->temp_nr < (last + 1))
1330 pc->temp_nr = last + 1;
1331 break;
1332 case TGSI_FILE_OUTPUT:
1333 if (pc->result_nr < (last + 1))
1334 pc->result_nr = last + 1;
1335 break;
1336 case TGSI_FILE_INPUT:
1337 if (pc->attr_nr < (last + 1))
1338 pc->attr_nr = last + 1;
1339 break;
1340 case TGSI_FILE_CONSTANT:
1341 if (pc->param_nr < (last + 1))
1342 pc->param_nr = last + 1;
1343 break;
1344 case TGSI_FILE_SAMPLER:
1345 break;
1346 default:
1347 NOUVEAU_ERR("bad decl file %d\n",
1348 d->Declaration.File);
1349 goto out_err;
1350 }
1351 }
1352 break;
1353 case TGSI_TOKEN_TYPE_INSTRUCTION:
1354 break;
1355 default:
1356 break;
1357 }
1358 }
1359
1360 if (pc->temp_nr) {
1361 pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg));
1362 if (!pc->temp)
1363 goto out_err;
1364
1365 for (i = 0; i < pc->temp_nr; i++) {
1366 for (c = 0; c < 4; c++) {
1367 pc->temp[i*4+c].type = P_TEMP;
1368 pc->temp[i*4+c].hw = -1;
1369 pc->temp[i*4+c].index = i;
1370 }
1371 }
1372 }
1373
1374 if (pc->attr_nr) {
1375 struct nv50_reg *iv = NULL;
1376 int aid = 0;
1377
1378 pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
1379 if (!pc->attr)
1380 goto out_err;
1381
1382 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1383 iv = alloc_temp(pc, NULL);
1384 emit_interp(pc, iv, iv, NULL);
1385 emit_flop(pc, 0, iv, iv);
1386 aid++;
1387 }
1388
1389 for (i = 0; i < pc->attr_nr; i++) {
1390 struct nv50_reg *a = &pc->attr[i*4];
1391
1392 for (c = 0; c < 4; c++) {
1393 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1394 struct nv50_reg *at =
1395 alloc_temp(pc, NULL);
1396 pc->attr[i*4+c].type = at->type;
1397 pc->attr[i*4+c].hw = at->hw;
1398 pc->attr[i*4+c].index = at->index;
1399 } else {
1400 pc->p->cfg.vp.attr[aid/32] |=
1401 (1 << (aid % 32));
1402 pc->attr[i*4+c].type = P_ATTR;
1403 pc->attr[i*4+c].hw = aid++;
1404 pc->attr[i*4+c].index = i;
1405 }
1406 }
1407
1408 if (pc->p->type != PIPE_SHADER_FRAGMENT)
1409 continue;
1410
1411 emit_interp(pc, &a[0], &a[0], iv);
1412 emit_interp(pc, &a[1], &a[1], iv);
1413 emit_interp(pc, &a[2], &a[2], iv);
1414 emit_interp(pc, &a[3], &a[3], iv);
1415 }
1416
1417 if (iv)
1418 free_temp(pc, iv);
1419 }
1420
1421 if (pc->result_nr) {
1422 int rid = 0;
1423
1424 pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg));
1425 if (!pc->result)
1426 goto out_err;
1427
1428 for (i = 0; i < pc->result_nr; i++) {
1429 for (c = 0; c < 4; c++) {
1430 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1431 pc->result[i*4+c].type = P_TEMP;
1432 pc->result[i*4+c].hw = -1;
1433 } else {
1434 pc->result[i*4+c].type = P_RESULT;
1435 pc->result[i*4+c].hw = rid++;
1436 }
1437 pc->result[i*4+c].index = i;
1438 }
1439 }
1440 }
1441
1442 if (pc->param_nr) {
1443 int rid = 0;
1444
1445 pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg));
1446 if (!pc->param)
1447 goto out_err;
1448
1449 for (i = 0; i < pc->param_nr; i++) {
1450 for (c = 0; c < 4; c++) {
1451 pc->param[i*4+c].type = P_CONST;
1452 pc->param[i*4+c].hw = rid++;
1453 pc->param[i*4+c].index = i;
1454 }
1455 }
1456 }
1457
1458 if (pc->immd_nr) {
1459 int rid = pc->param_nr * 4;
1460
1461 pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
1462 if (!pc->immd)
1463 goto out_err;
1464
1465 for (i = 0; i < pc->immd_nr; i++) {
1466 for (c = 0; c < 4; c++) {
1467 pc->immd[i*4+c].type = P_IMMD;
1468 pc->immd[i*4+c].hw = rid++;
1469 pc->immd[i*4+c].index = i;
1470 }
1471 }
1472 }
1473
1474 ret = TRUE;
1475 out_err:
1476 tgsi_parse_free(&p);
1477 return ret;
1478 }
1479
1480 static boolean
1481 nv50_program_tx(struct nv50_program *p)
1482 {
1483 struct tgsi_parse_context parse;
1484 struct nv50_pc *pc;
1485 boolean ret;
1486
1487 pc = CALLOC_STRUCT(nv50_pc);
1488 if (!pc)
1489 return FALSE;
1490 pc->p = p;
1491 pc->p->cfg.high_temp = 4;
1492
1493 ret = nv50_program_tx_prep(pc);
1494 if (ret == FALSE)
1495 goto out_cleanup;
1496
1497 tgsi_parse_init(&parse, pc->p->pipe.tokens);
1498 while (!tgsi_parse_end_of_tokens(&parse)) {
1499 const union tgsi_full_token *tok = &parse.FullToken;
1500
1501 tgsi_parse_token(&parse);
1502
1503 switch (tok->Token.Type) {
1504 case TGSI_TOKEN_TYPE_INSTRUCTION:
1505 ret = nv50_program_tx_insn(pc, tok);
1506 if (ret == FALSE)
1507 goto out_err;
1508 break;
1509 default:
1510 break;
1511 }
1512 }
1513
1514 if (p->type == PIPE_SHADER_FRAGMENT) {
1515 struct nv50_reg out;
1516
1517 out.type = P_TEMP;
1518 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1519 emit_mov(pc, &out, &pc->result[out.hw]);
1520 }
1521
1522 assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
1523 pc->p->exec_tail->inst[1] |= 0x00000001;
1524
1525 p->param_nr = pc->param_nr * 4;
1526 p->immd_nr = pc->immd_nr * 4;
1527 p->immd = pc->immd_buf;
1528
1529 out_err:
1530 tgsi_parse_free(&parse);
1531
1532 out_cleanup:
1533 return ret;
1534 }
1535
1536 static void
1537 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1538 {
1539 if (nv50_program_tx(p) == FALSE)
1540 assert(0);
1541 p->translated = TRUE;
1542 }
1543
1544 static void
1545 nv50_program_upload_data(struct nv50_context *nv50, float *map,
1546 unsigned start, unsigned count)
1547 {
1548 struct nouveau_channel *chan = nv50->screen->nvws->channel;
1549 struct nouveau_grobj *tesla = nv50->screen->tesla;
1550
1551 while (count) {
1552 unsigned nr = count > 2047 ? 2047 : count;
1553
1554 BEGIN_RING(chan, tesla, 0x00000f00, 1);
1555 OUT_RING (chan, (NV50_CB_PMISC << 0) | (start << 8));
1556 BEGIN_RING(chan, tesla, 0x40000f04, nr);
1557 OUT_RINGp (chan, map, nr);
1558
1559 map += nr;
1560 start += nr;
1561 count -= nr;
1562 }
1563 }
1564
1565 static void
1566 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1567 {
1568 struct nouveau_winsys *nvws = nv50->screen->nvws;
1569 struct pipe_winsys *ws = nv50->pipe.winsys;
1570 unsigned nr = p->param_nr + p->immd_nr;
1571
1572 if (!p->data && nr) {
1573 struct nouveau_resource *heap = nv50->screen->vp_data_heap;
1574
1575 if (nvws->res_alloc(heap, nr, p, &p->data)) {
1576 while (heap->next && heap->size < nr) {
1577 struct nv50_program *evict = heap->next->priv;
1578 nvws->res_free(&evict->data);
1579 }
1580
1581 if (nvws->res_alloc(heap, nr, p, &p->data))
1582 assert(0);
1583 }
1584 }
1585
1586 if (p->param_nr) {
1587 float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
1588 PIPE_BUFFER_USAGE_CPU_READ);
1589 nv50_program_upload_data(nv50, map, p->data->start,
1590 p->param_nr);
1591 ws->buffer_unmap(ws, nv50->constbuf[p->type]);
1592 }
1593
1594 if (p->immd_nr) {
1595 nv50_program_upload_data(nv50, p->immd,
1596 p->data->start + p->param_nr,
1597 p->immd_nr);
1598 }
1599 }
1600
1601 static void
1602 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1603 {
1604 struct nouveau_channel *chan = nv50->screen->nvws->channel;
1605 struct nouveau_grobj *tesla = nv50->screen->tesla;
1606 struct pipe_winsys *ws = nv50->pipe.winsys;
1607 struct nv50_program_exec *e;
1608 struct nouveau_stateobj *so;
1609 const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
1610 unsigned start, count, *up, *ptr;
1611 boolean upload = FALSE;
1612
1613 if (!p->buffer) {
1614 p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4);
1615 upload = TRUE;
1616 }
1617
1618 if (p->data && p->data->start != p->data_start) {
1619 for (e = p->exec_head; e; e = e->next) {
1620 unsigned ei, ci;
1621
1622 if (e->param.index < 0)
1623 continue;
1624 ei = e->param.shift >> 5;
1625 ci = e->param.index + p->data->start;
1626
1627 e->inst[ei] &= ~e->param.mask;
1628 e->inst[ei] |= (ci << e->param.shift);
1629 }
1630
1631 p->data_start = p->data->start;
1632 upload = TRUE;
1633 }
1634
1635 if (!upload)
1636 return;
1637
1638 #ifdef NV50_PROGRAM_DUMP
1639 NOUVEAU_ERR("-------\n");
1640 up = ptr = MALLOC(p->exec_size * 4);
1641 for (e = p->exec_head; e; e = e->next) {
1642 NOUVEAU_ERR("0x%08x\n", e->inst[0]);
1643 if (is_long(e))
1644 NOUVEAU_ERR("0x%08x\n", e->inst[1]);
1645 }
1646
1647 #endif
1648
1649 up = ptr = MALLOC(p->exec_size * 4);
1650 for (e = p->exec_head; e; e = e->next) {
1651 *(ptr++) = e->inst[0];
1652 if (is_long(e))
1653 *(ptr++) = e->inst[1];
1654 }
1655
1656 so = so_new(4,2);
1657 so_method(so, nv50->screen->tesla, 0x1280, 3);
1658 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
1659 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0);
1660 so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
1661
1662 start = 0; count = p->exec_size;
1663 while (count) {
1664 struct nouveau_winsys *nvws = nv50->screen->nvws;
1665 unsigned nr;
1666
1667 so_emit(nvws, so);
1668
1669 nr = MIN2(count, 2047);
1670 nr = MIN2(nvws->channel->pushbuf->remaining, nr);
1671 if (nvws->channel->pushbuf->remaining < (nr + 3)) {
1672 FIRE_RING(chan);
1673 continue;
1674 }
1675
1676 BEGIN_RING(chan, tesla, 0x0f00, 1);
1677 OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD);
1678 BEGIN_RING(chan, tesla, 0x40000f04, nr);
1679 OUT_RINGp (chan, up + start, nr);
1680
1681 start += nr;
1682 count -= nr;
1683 }
1684
1685 FREE(up);
1686 so_ref(NULL, &so);
1687 }
1688
1689 void
1690 nv50_vertprog_validate(struct nv50_context *nv50)
1691 {
1692 struct nouveau_grobj *tesla = nv50->screen->tesla;
1693 struct nv50_program *p = nv50->vertprog;
1694 struct nouveau_stateobj *so;
1695
1696 if (!p->translated) {
1697 nv50_program_validate(nv50, p);
1698 if (!p->translated)
1699 assert(0);
1700 }
1701
1702 nv50_program_validate_data(nv50, p);
1703 nv50_program_validate_code(nv50, p);
1704
1705 so = so_new(13, 2);
1706 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1707 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1708 NOUVEAU_BO_HIGH, 0, 0);
1709 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1710 NOUVEAU_BO_LOW, 0, 0);
1711 so_method(so, tesla, 0x1650, 2);
1712 so_data (so, p->cfg.vp.attr[0]);
1713 so_data (so, p->cfg.vp.attr[1]);
1714 so_method(so, tesla, 0x16b8, 1);
1715 so_data (so, p->cfg.high_result);
1716 so_method(so, tesla, 0x16ac, 2);
1717 so_data (so, p->cfg.high_result); //8);
1718 so_data (so, p->cfg.high_temp);
1719 so_method(so, tesla, 0x140c, 1);
1720 so_data (so, 0); /* program start offset */
1721 so_ref(so, &nv50->state.vertprog);
1722 }
1723
1724 void
1725 nv50_fragprog_validate(struct nv50_context *nv50)
1726 {
1727 struct nouveau_grobj *tesla = nv50->screen->tesla;
1728 struct nv50_program *p = nv50->fragprog;
1729 struct nouveau_stateobj *so;
1730
1731 if (!p->translated) {
1732 nv50_program_validate(nv50, p);
1733 if (!p->translated)
1734 assert(0);
1735 }
1736
1737 nv50_program_validate_data(nv50, p);
1738 nv50_program_validate_code(nv50, p);
1739
1740 so = so_new(64, 2);
1741 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1742 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1743 NOUVEAU_BO_HIGH, 0, 0);
1744 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1745 NOUVEAU_BO_LOW, 0, 0);
1746 so_method(so, tesla, 0x1904, 4);
1747 so_data (so, 0x00040404); /* p: 0x01000404 */
1748 so_data (so, 0x00000004);
1749 so_data (so, 0x00000000);
1750 so_data (so, 0x00000000);
1751 so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */
1752 so_data (so, 0x03020100);
1753 so_data (so, 0x07060504);
1754 so_data (so, 0x0b0a0908);
1755 so_method(so, tesla, 0x1988, 2);
1756 so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */
1757 so_data (so, p->cfg.high_temp);
1758 so_method(so, tesla, 0x1414, 1);
1759 so_data (so, 0); /* program start offset */
1760 so_ref(so, &nv50->state.fragprog);
1761 }
1762
1763 void
1764 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1765 {
1766 struct pipe_screen *pscreen = nv50->pipe.screen;
1767
1768 while (p->exec_head) {
1769 struct nv50_program_exec *e = p->exec_head;
1770
1771 p->exec_head = e->next;
1772 FREE(e);
1773 }
1774 p->exec_tail = NULL;
1775 p->exec_size = 0;
1776
1777 if (p->buffer)
1778 pipe_buffer_reference(pscreen, &p->buffer, NULL);
1779
1780 nv50->screen->nvws->res_free(&p->data);
1781
1782 p->translated = 0;
1783 }
1784