b205cdbaca341aebd04a43869ef44cb67623c8c9
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_defines.h"
25 #include "pipe/p_state.h"
26 #include "pipe/p_inlines.h"
27
28 #include "pipe/p_shader_tokens.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "tgsi/tgsi_util.h"
31
32 #include "nv50_context.h"
33
34 #define NV50_SU_MAX_TEMP 64
35 #define NV50_PROGRAM_DUMP
36
37 /* ARL - gallium craps itself on progs/vp/arl.txt
38 *
39 * MSB - Like MAD, but MUL+SUB
40 * - Fuck it off, introduce a way to negate args for ops that
41 * support it.
42 *
43 * Look into inlining IMMD for ops other than MOV (make it general?)
44 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
45 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
46 *
47 * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
48 * case, if the emit_src() causes the inst to suddenly become long.
49 *
50 * Verify half-insns work where expected - and force disable them where they
51 * don't work - MUL has it forcibly disabled atm as it fixes POW..
52 *
53 * FUCK! watch dst==src vectors, can overwrite components that are needed.
54 * ie. SUB R0, R0.yzxw, R0
55 *
56 * Things to check with renouveau:
57 * FP attr/result assignment - how?
58 * attrib
59 * - 0x16bc maps vp output onto fp hpos
60 * - 0x16c0 maps vp output onto fp col0
61 * result
62 * - colr always 0-3
63 * - depr always 4
64 * 0x16bc->0x16e8 --> some binding between vp/fp regs
65 * 0x16b8 --> VP output count
66 *
67 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
68 * "MOV rcol.x, fcol.y" = 0x00000004
69 * 0x19a8 --> as above but 0x00000100 and 0x00000000
70 * - 0x00100000 used when KIL used
71 * 0x196c --> as above but 0x00000011 and 0x00000000
72 *
73 * 0x1988 --> 0xXXNNNNNN
74 * - XX == FP high something
75 */
76 struct nv50_reg {
77 enum {
78 P_TEMP,
79 P_ATTR,
80 P_RESULT,
81 P_CONST,
82 P_IMMD
83 } type;
84 int index;
85
86 int hw;
87 int neg;
88 };
89
90 struct nv50_pc {
91 struct nv50_program *p;
92
93 /* hw resources */
94 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
95
96 /* tgsi resources */
97 struct nv50_reg *temp;
98 int temp_nr;
99 struct nv50_reg *attr;
100 int attr_nr;
101 struct nv50_reg *result;
102 int result_nr;
103 struct nv50_reg *param;
104 int param_nr;
105 struct nv50_reg *immd;
106 float *immd_buf;
107 int immd_nr;
108
109 struct nv50_reg *temp_temp[16];
110 unsigned temp_temp_nr;
111 };
112
113 static void
114 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
115 {
116 int i;
117
118 if (reg->type == P_RESULT) {
119 if (pc->p->cfg.high_result < (reg->hw + 1))
120 pc->p->cfg.high_result = reg->hw + 1;
121 }
122
123 if (reg->type != P_TEMP)
124 return;
125
126 if (reg->hw >= 0) {
127 /*XXX: do this here too to catch FP temp-as-attr usage..
128 * not clean, but works */
129 if (pc->p->cfg.high_temp < (reg->hw + 1))
130 pc->p->cfg.high_temp = reg->hw + 1;
131 return;
132 }
133
134 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
135 if (!(pc->r_temp[i])) {
136 pc->r_temp[i] = reg;
137 reg->hw = i;
138 if (pc->p->cfg.high_temp < (i + 1))
139 pc->p->cfg.high_temp = i + 1;
140 return;
141 }
142 }
143
144 assert(0);
145 }
146
147 static struct nv50_reg *
148 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
149 {
150 struct nv50_reg *r;
151 int i;
152
153 if (dst && dst->type == P_TEMP && dst->hw == -1)
154 return dst;
155
156 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
157 if (!pc->r_temp[i]) {
158 r = CALLOC_STRUCT(nv50_reg);
159 r->type = P_TEMP;
160 r->index = -1;
161 r->hw = i;
162 pc->r_temp[i] = r;
163 return r;
164 }
165 }
166
167 assert(0);
168 return NULL;
169 }
170
171 static void
172 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
173 {
174 if (r->index == -1) {
175 unsigned hw = r->hw;
176
177 FREE(pc->r_temp[hw]);
178 pc->r_temp[hw] = NULL;
179 }
180 }
181
182 static int
183 alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
184 {
185 int i;
186
187 if ((idx + 4) >= NV50_SU_MAX_TEMP)
188 return 1;
189
190 if (pc->r_temp[idx] || pc->r_temp[idx + 1] ||
191 pc->r_temp[idx + 2] || pc->r_temp[idx + 3])
192 return alloc_temp4(pc, dst, idx + 1);
193
194 for (i = 0; i < 4; i++) {
195 dst[i] = CALLOC_STRUCT(nv50_reg);
196 dst[i]->type = P_TEMP;
197 dst[i]->index = -1;
198 dst[i]->hw = idx + i;
199 pc->r_temp[idx + i] = dst[i];
200 }
201
202 return 0;
203 }
204
205 static void
206 free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4])
207 {
208 int i;
209
210 for (i = 0; i < 4; i++)
211 free_temp(pc, reg[i]);
212 }
213
214 static struct nv50_reg *
215 temp_temp(struct nv50_pc *pc)
216 {
217 if (pc->temp_temp_nr >= 16)
218 assert(0);
219
220 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
221 return pc->temp_temp[pc->temp_temp_nr++];
222 }
223
224 static void
225 kill_temp_temp(struct nv50_pc *pc)
226 {
227 int i;
228
229 for (i = 0; i < pc->temp_temp_nr; i++)
230 free_temp(pc, pc->temp_temp[i]);
231 pc->temp_temp_nr = 0;
232 }
233
234 static int
235 ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
236 {
237 pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)),
238 (pc->immd_nr + 1) * 4 * sizeof(float));
239 pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
240 pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
241 pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
242 pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
243
244 return pc->immd_nr++;
245 }
246
247 static struct nv50_reg *
248 alloc_immd(struct nv50_pc *pc, float f)
249 {
250 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
251 unsigned hw;
252
253 hw = ctor_immd(pc, f, 0, 0, 0) * 4;
254 r->type = P_IMMD;
255 r->hw = hw;
256 r->index = -1;
257 return r;
258 }
259
260 static struct nv50_program_exec *
261 exec(struct nv50_pc *pc)
262 {
263 struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
264
265 e->param.index = -1;
266 return e;
267 }
268
269 static void
270 emit(struct nv50_pc *pc, struct nv50_program_exec *e)
271 {
272 struct nv50_program *p = pc->p;
273
274 if (p->exec_tail)
275 p->exec_tail->next = e;
276 if (!p->exec_head)
277 p->exec_head = e;
278 p->exec_tail = e;
279 p->exec_size += (e->inst[0] & 1) ? 2 : 1;
280 }
281
282 static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
283
284 static boolean
285 is_long(struct nv50_program_exec *e)
286 {
287 if (e->inst[0] & 1)
288 return TRUE;
289 return FALSE;
290 }
291
292 static boolean
293 is_immd(struct nv50_program_exec *e)
294 {
295 if (is_long(e) && (e->inst[1] & 3) == 3)
296 return TRUE;
297 return FALSE;
298 }
299
300 static INLINE void
301 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
302 struct nv50_program_exec *e)
303 {
304 set_long(pc, e);
305 e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
306 e->inst[1] |= (pred << 7) | (idx << 12);
307 }
308
309 static INLINE void
310 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
311 struct nv50_program_exec *e)
312 {
313 set_long(pc, e);
314 e->inst[1] &= ~((0x3 << 4) | (1 << 6));
315 e->inst[1] |= (idx << 4) | (on << 6);
316 }
317
318 static INLINE void
319 set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
320 {
321 if (is_long(e))
322 return;
323
324 e->inst[0] |= 1;
325 set_pred(pc, 0xf, 0, e);
326 set_pred_wr(pc, 0, 0, e);
327 }
328
329 static INLINE void
330 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
331 {
332 if (dst->type == P_RESULT) {
333 set_long(pc, e);
334 e->inst[1] |= 0x00000008;
335 }
336
337 alloc_reg(pc, dst);
338 e->inst[0] |= (dst->hw << 2);
339 }
340
341 static INLINE void
342 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
343 {
344 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
345
346 set_long(pc, e);
347 /*XXX: can't be predicated - bits overlap.. catch cases where both
348 * are required and avoid them. */
349 set_pred(pc, 0, 0, e);
350 set_pred_wr(pc, 0, 0, e);
351
352 e->inst[1] |= 0x00000002 | 0x00000001;
353 e->inst[0] |= (val & 0x3f) << 16;
354 e->inst[1] |= (val >> 6) << 2;
355 }
356
357 static void
358 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
359 struct nv50_reg *src, struct nv50_reg *iv)
360 {
361 struct nv50_program_exec *e = exec(pc);
362
363 e->inst[0] |= 0x80000000;
364 set_dst(pc, dst, e);
365 alloc_reg(pc, src);
366 e->inst[0] |= (src->hw << 16);
367 if (iv) {
368 e->inst[0] |= (1 << 25);
369 alloc_reg(pc, iv);
370 e->inst[0] |= (iv->hw << 9);
371 }
372
373 emit(pc, e);
374 }
375
376 static void
377 set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
378 struct nv50_program_exec *e)
379 {
380 set_long(pc, e);
381 #if 1
382 e->inst[1] |= (1 << 22);
383 #else
384 if (src->type == P_IMMD) {
385 e->inst[1] |= (NV50_CB_PMISC << 22);
386 } else {
387 if (pc->p->type == PIPE_SHADER_VERTEX)
388 e->inst[1] |= (NV50_CB_PVP << 22);
389 else
390 e->inst[1] |= (NV50_CB_PFP << 22);
391 }
392 #endif
393
394 e->param.index = src->hw;
395 e->param.shift = s;
396 e->param.mask = m << (s % 32);
397 }
398
399 static void
400 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
401 {
402 struct nv50_program_exec *e = exec(pc);
403
404 e->inst[0] |= 0x10000000;
405
406 set_dst(pc, dst, e);
407
408 if (0 && dst->type != P_RESULT && src->type == P_IMMD) {
409 set_immd(pc, src, e);
410 /*XXX: 32-bit, but steals part of "half" reg space - need to
411 * catch and handle this case if/when we do half-regs
412 */
413 e->inst[0] |= 0x00008000;
414 } else
415 if (src->type == P_IMMD || src->type == P_CONST) {
416 set_long(pc, e);
417 set_data(pc, src, 0x7f, 9, e);
418 e->inst[1] |= 0x20000000; /* src0 const? */
419 } else {
420 if (src->type == P_ATTR) {
421 set_long(pc, e);
422 e->inst[1] |= 0x00200000;
423 }
424
425 alloc_reg(pc, src);
426 e->inst[0] |= (src->hw << 9);
427 }
428
429 /* We really should support "half" instructions here at some point,
430 * but I don't feel confident enough about them yet.
431 */
432 set_long(pc, e);
433 if (is_long(e) && !is_immd(e)) {
434 e->inst[1] |= 0x04000000; /* 32-bit */
435 e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
436 }
437
438 emit(pc, e);
439 }
440
441 static boolean
442 check_swap_src_0_1(struct nv50_pc *pc,
443 struct nv50_reg **s0, struct nv50_reg **s1)
444 {
445 struct nv50_reg *src0 = *s0, *src1 = *s1;
446
447 if (src0->type == P_CONST) {
448 if (src1->type != P_CONST) {
449 *s0 = src1;
450 *s1 = src0;
451 return TRUE;
452 }
453 } else
454 if (src1->type == P_ATTR) {
455 if (src0->type != P_ATTR) {
456 *s0 = src1;
457 *s1 = src0;
458 return TRUE;
459 }
460 }
461
462 return FALSE;
463 }
464
465 static void
466 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
467 {
468 if (src->type == P_ATTR) {
469 set_long(pc, e);
470 e->inst[1] |= 0x00200000;
471 } else
472 if (src->type == P_CONST || src->type == P_IMMD) {
473 struct nv50_reg *temp = temp_temp(pc);
474
475 emit_mov(pc, temp, src);
476 src = temp;
477 }
478
479 alloc_reg(pc, src);
480 e->inst[0] |= (src->hw << 9);
481 }
482
483 static void
484 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
485 {
486 if (src->type == P_ATTR) {
487 struct nv50_reg *temp = temp_temp(pc);
488
489 emit_mov(pc, temp, src);
490 src = temp;
491 } else
492 if (src->type == P_CONST || src->type == P_IMMD) {
493 assert(!(e->inst[0] & 0x00800000));
494 if (e->inst[0] & 0x01000000) {
495 struct nv50_reg *temp = temp_temp(pc);
496
497 emit_mov(pc, temp, src);
498 src = temp;
499 } else {
500 set_data(pc, src, 0x7f, 16, e);
501 e->inst[0] |= 0x00800000;
502 }
503 }
504
505 alloc_reg(pc, src);
506 e->inst[0] |= (src->hw << 16);
507 }
508
509 static void
510 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
511 {
512 set_long(pc, e);
513
514 if (src->type == P_ATTR) {
515 struct nv50_reg *temp = temp_temp(pc);
516
517 emit_mov(pc, temp, src);
518 src = temp;
519 } else
520 if (src->type == P_CONST || src->type == P_IMMD) {
521 assert(!(e->inst[0] & 0x01000000));
522 if (e->inst[0] & 0x00800000) {
523 struct nv50_reg *temp = temp_temp(pc);
524
525 emit_mov(pc, temp, src);
526 src = temp;
527 } else {
528 set_data(pc, src, 0x7f, 32+14, e);
529 e->inst[0] |= 0x01000000;
530 }
531 }
532
533 alloc_reg(pc, src);
534 e->inst[1] |= (src->hw << 14);
535 }
536
537 static void
538 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
539 struct nv50_reg *src1)
540 {
541 struct nv50_program_exec *e = exec(pc);
542
543 e->inst[0] |= 0xc0000000;
544 set_long(pc, e);
545
546 check_swap_src_0_1(pc, &src0, &src1);
547 set_dst(pc, dst, e);
548 set_src_0(pc, src0, e);
549 set_src_1(pc, src1, e);
550
551 emit(pc, e);
552 }
553
554 static void
555 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
556 struct nv50_reg *src0, struct nv50_reg *src1)
557 {
558 struct nv50_program_exec *e = exec(pc);
559
560 e->inst[0] |= 0xb0000000;
561
562 check_swap_src_0_1(pc, &src0, &src1);
563 set_dst(pc, dst, e);
564 set_src_0(pc, src0, e);
565 if (is_long(e))
566 set_src_2(pc, src1, e);
567 else
568 set_src_1(pc, src1, e);
569
570 emit(pc, e);
571 }
572
573 static void
574 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
575 struct nv50_reg *src0, struct nv50_reg *src1)
576 {
577 struct nv50_program_exec *e = exec(pc);
578
579 set_long(pc, e);
580 e->inst[0] |= 0xb0000000;
581 e->inst[1] |= (sub << 29);
582
583 check_swap_src_0_1(pc, &src0, &src1);
584 set_dst(pc, dst, e);
585 set_src_0(pc, src0, e);
586 set_src_1(pc, src1, e);
587
588 emit(pc, e);
589 }
590
591 static void
592 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
593 struct nv50_reg *src1)
594 {
595 struct nv50_program_exec *e = exec(pc);
596
597 e->inst[0] |= 0xb0000000;
598
599 set_long(pc, e);
600 if (check_swap_src_0_1(pc, &src0, &src1))
601 e->inst[1] |= 0x04000000;
602 else
603 e->inst[1] |= 0x08000000;
604
605 set_dst(pc, dst, e);
606 set_src_0(pc, src0, e);
607 set_src_2(pc, src1, e);
608
609 emit(pc, e);
610 }
611
612 static void
613 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
614 struct nv50_reg *src1, struct nv50_reg *src2)
615 {
616 struct nv50_program_exec *e = exec(pc);
617
618 e->inst[0] |= 0xe0000000;
619
620 check_swap_src_0_1(pc, &src0, &src1);
621 set_dst(pc, dst, e);
622 set_src_0(pc, src0, e);
623 set_src_1(pc, src1, e);
624 set_src_2(pc, src2, e);
625
626 emit(pc, e);
627 }
628
629 static void
630 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
631 struct nv50_reg *src1, struct nv50_reg *src2)
632 {
633 struct nv50_program_exec *e = exec(pc);
634
635 e->inst[0] |= 0xe0000000;
636 set_long(pc, e);
637 e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
638
639 check_swap_src_0_1(pc, &src0, &src1);
640 set_dst(pc, dst, e);
641 set_src_0(pc, src0, e);
642 set_src_1(pc, src1, e);
643 set_src_2(pc, src2, e);
644
645 emit(pc, e);
646 }
647
648 static void
649 emit_flop(struct nv50_pc *pc, unsigned sub,
650 struct nv50_reg *dst, struct nv50_reg *src)
651 {
652 struct nv50_program_exec *e = exec(pc);
653
654 e->inst[0] |= 0x90000000;
655 if (sub) {
656 set_long(pc, e);
657 e->inst[1] |= (sub << 29);
658 }
659
660 set_dst(pc, dst, e);
661 set_src_0(pc, src, e);
662
663 emit(pc, e);
664 }
665
666 static void
667 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
668 {
669 struct nv50_program_exec *e = exec(pc);
670
671 e->inst[0] |= 0xb0000000;
672
673 set_dst(pc, dst, e);
674 set_src_0(pc, src, e);
675 set_long(pc, e);
676 e->inst[1] |= (6 << 29) | 0x00004000;
677
678 emit(pc, e);
679 }
680
681 static void
682 emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
683 {
684 struct nv50_program_exec *e = exec(pc);
685
686 e->inst[0] |= 0xb0000000;
687
688 set_dst(pc, dst, e);
689 set_src_0(pc, src, e);
690 set_long(pc, e);
691 e->inst[1] |= (6 << 29);
692
693 emit(pc, e);
694 }
695
696 static void
697 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
698 struct nv50_reg *src0, struct nv50_reg *src1)
699 {
700 struct nv50_program_exec *e = exec(pc);
701 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
702 struct nv50_reg *rdst;
703
704 assert(c_op <= 7);
705 if (check_swap_src_0_1(pc, &src0, &src1))
706 c_op = inv_cop[c_op];
707
708 rdst = dst;
709 if (dst->type != P_TEMP)
710 dst = alloc_temp(pc, NULL);
711
712 /* set.u32 */
713 set_long(pc, e);
714 e->inst[0] |= 0xb0000000;
715 e->inst[1] |= (3 << 29);
716 e->inst[1] |= (c_op << 14);
717 /*XXX: breaks things, .u32 by default?
718 * decuda will disasm as .u16 and use .lo/.hi regs, but this
719 * doesn't seem to match what the hw actually does.
720 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
721 */
722 set_dst(pc, dst, e);
723 set_src_0(pc, src0, e);
724 set_src_1(pc, src1, e);
725 emit(pc, e);
726
727 /* cvt.f32.u32 */
728 e = exec(pc);
729 e->inst[0] = 0xa0000001;
730 e->inst[1] = 0x64014780;
731 set_dst(pc, rdst, e);
732 set_src_0(pc, dst, e);
733 emit(pc, e);
734
735 if (dst != rdst)
736 free_temp(pc, dst);
737 }
738
739 static void
740 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
741 {
742 struct nv50_program_exec *e = exec(pc);
743
744 e->inst[0] = 0xa0000000; /* cvt */
745 set_long(pc, e);
746 e->inst[1] |= (6 << 29); /* cvt */
747 e->inst[1] |= 0x08000000; /* integer mode */
748 e->inst[1] |= 0x04000000; /* 32 bit */
749 e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
750 e->inst[1] |= (1 << 14); /* src .f32 */
751 set_dst(pc, dst, e);
752 set_src_0(pc, src, e);
753
754 emit(pc, e);
755 }
756
757 static void
758 emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
759 struct nv50_reg *v, struct nv50_reg *e)
760 {
761 struct nv50_reg *temp = alloc_temp(pc, NULL);
762
763 emit_flop(pc, 3, temp, v);
764 emit_mul(pc, temp, temp, e);
765 emit_preex2(pc, temp, temp);
766 emit_flop(pc, 6, dst, temp);
767
768 free_temp(pc, temp);
769 }
770
771 static void
772 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
773 {
774 struct nv50_program_exec *e = exec(pc);
775
776 e->inst[0] = 0xa0000000; /* cvt */
777 set_long(pc, e);
778 e->inst[1] |= (6 << 29); /* cvt */
779 e->inst[1] |= 0x04000000; /* 32 bit */
780 e->inst[1] |= (1 << 14); /* src .f32 */
781 e->inst[1] |= ((1 << 6) << 14); /* .abs */
782 set_dst(pc, dst, e);
783 set_src_0(pc, src, e);
784
785 emit(pc, e);
786 }
787
788 static void
789 emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
790 struct nv50_reg **src)
791 {
792 struct nv50_reg *one = alloc_immd(pc, 1.0);
793 struct nv50_reg *zero = alloc_immd(pc, 0.0);
794 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
795 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
796 struct nv50_reg *tmp[4];
797
798 if (mask & (1 << 0))
799 emit_mov(pc, dst[0], one);
800
801 if (mask & (1 << 3))
802 emit_mov(pc, dst[3], one);
803
804 if (mask & (3 << 1)) {
805 if (mask & (1 << 1))
806 tmp[0] = dst[1];
807 else
808 tmp[0] = temp_temp(pc);
809 emit_minmax(pc, 4, tmp[0], src[0], zero);
810 }
811
812 if (mask & (1 << 2)) {
813 set_pred_wr(pc, 1, 0, pc->p->exec_tail);
814
815 tmp[1] = temp_temp(pc);
816 emit_minmax(pc, 4, tmp[1], src[1], zero);
817
818 tmp[3] = temp_temp(pc);
819 emit_minmax(pc, 4, tmp[3], src[3], neg128);
820 emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
821
822 emit_pow(pc, dst[2], tmp[1], tmp[3]);
823 emit_mov(pc, dst[2], zero);
824 set_pred(pc, 3, 0, pc->p->exec_tail);
825 }
826 }
827
828 static void
829 emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
830 {
831 struct nv50_program_exec *e = exec(pc);
832
833 set_long(pc, e);
834 e->inst[0] |= 0xa0000000; /* delta */
835 e->inst[1] |= (7 << 29); /* delta */
836 e->inst[1] |= 0x04000000; /* negate arg0? probably not */
837 e->inst[1] |= (1 << 14); /* src .f32 */
838 set_dst(pc, dst, e);
839 set_src_0(pc, src, e);
840
841 emit(pc, e);
842 }
843
844 static struct nv50_reg *
845 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
846 {
847 switch (dst->DstRegister.File) {
848 case TGSI_FILE_TEMPORARY:
849 return &pc->temp[dst->DstRegister.Index * 4 + c];
850 case TGSI_FILE_OUTPUT:
851 return &pc->result[dst->DstRegister.Index * 4 + c];
852 case TGSI_FILE_NULL:
853 return NULL;
854 default:
855 break;
856 }
857
858 return NULL;
859 }
860
861 static struct nv50_reg *
862 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
863 {
864 struct nv50_reg *r = NULL;
865 struct nv50_reg *temp;
866 unsigned c;
867
868 c = tgsi_util_get_full_src_register_extswizzle(src, chan);
869 switch (c) {
870 case TGSI_EXTSWIZZLE_X:
871 case TGSI_EXTSWIZZLE_Y:
872 case TGSI_EXTSWIZZLE_Z:
873 case TGSI_EXTSWIZZLE_W:
874 switch (src->SrcRegister.File) {
875 case TGSI_FILE_INPUT:
876 r = &pc->attr[src->SrcRegister.Index * 4 + c];
877 break;
878 case TGSI_FILE_TEMPORARY:
879 r = &pc->temp[src->SrcRegister.Index * 4 + c];
880 break;
881 case TGSI_FILE_CONSTANT:
882 r = &pc->param[src->SrcRegister.Index * 4 + c];
883 break;
884 case TGSI_FILE_IMMEDIATE:
885 r = &pc->immd[src->SrcRegister.Index * 4 + c];
886 break;
887 case TGSI_FILE_SAMPLER:
888 break;
889 default:
890 assert(0);
891 break;
892 }
893 break;
894 case TGSI_EXTSWIZZLE_ZERO:
895 r = alloc_immd(pc, 0.0);
896 break;
897 case TGSI_EXTSWIZZLE_ONE:
898 r = alloc_immd(pc, 1.0);
899 break;
900 default:
901 assert(0);
902 break;
903 }
904
905 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
906 case TGSI_UTIL_SIGN_KEEP:
907 break;
908 case TGSI_UTIL_SIGN_CLEAR:
909 temp = temp_temp(pc);
910 emit_abs(pc, temp, r);
911 r = temp;
912 break;
913 case TGSI_UTIL_SIGN_TOGGLE:
914 temp = temp_temp(pc);
915 emit_neg(pc, temp, r);
916 r = temp;
917 break;
918 case TGSI_UTIL_SIGN_SET:
919 temp = temp_temp(pc);
920 emit_abs(pc, temp, r);
921 emit_neg(pc, temp, r);
922 r = temp;
923 break;
924 default:
925 assert(0);
926 break;
927 }
928
929 return r;
930 }
931
932 static boolean
933 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
934 {
935 const struct tgsi_full_instruction *inst = &tok->FullInstruction;
936 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
937 unsigned mask, sat;
938 int i, c;
939
940 mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
941 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
942
943 for (c = 0; c < 4; c++) {
944 if (mask & (1 << c))
945 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
946 else
947 dst[c] = NULL;
948 }
949
950 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
951 for (c = 0; c < 4; c++)
952 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
953 }
954
955 if (sat) {
956 for (c = 0; c < 4; c++) {
957 rdst[c] = dst[c];
958 dst[c] = temp_temp(pc);
959 }
960 }
961
962 switch (inst->Instruction.Opcode) {
963 case TGSI_OPCODE_ABS:
964 for (c = 0; c < 4; c++) {
965 if (!(mask & (1 << c)))
966 continue;
967 emit_abs(pc, dst[c], src[0][c]);
968 }
969 break;
970 case TGSI_OPCODE_ADD:
971 for (c = 0; c < 4; c++) {
972 if (!(mask & (1 << c)))
973 continue;
974 emit_add(pc, dst[c], src[0][c], src[1][c]);
975 }
976 break;
977 case TGSI_OPCODE_COS:
978 temp = alloc_temp(pc, NULL);
979 emit_precossin(pc, temp, src[0][0]);
980 emit_flop(pc, 5, temp, temp);
981 for (c = 0; c < 4; c++) {
982 if (!(mask & (1 << c)))
983 continue;
984 emit_mov(pc, dst[c], temp);
985 }
986 break;
987 case TGSI_OPCODE_DP3:
988 temp = alloc_temp(pc, NULL);
989 emit_mul(pc, temp, src[0][0], src[1][0]);
990 emit_mad(pc, temp, src[0][1], src[1][1], temp);
991 emit_mad(pc, temp, src[0][2], src[1][2], temp);
992 for (c = 0; c < 4; c++) {
993 if (!(mask & (1 << c)))
994 continue;
995 emit_mov(pc, dst[c], temp);
996 }
997 free_temp(pc, temp);
998 break;
999 case TGSI_OPCODE_DP4:
1000 temp = alloc_temp(pc, NULL);
1001 emit_mul(pc, temp, src[0][0], src[1][0]);
1002 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1003 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1004 emit_mad(pc, temp, src[0][3], src[1][3], temp);
1005 for (c = 0; c < 4; c++) {
1006 if (!(mask & (1 << c)))
1007 continue;
1008 emit_mov(pc, dst[c], temp);
1009 }
1010 free_temp(pc, temp);
1011 break;
1012 case TGSI_OPCODE_DPH:
1013 temp = alloc_temp(pc, NULL);
1014 emit_mul(pc, temp, src[0][0], src[1][0]);
1015 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1016 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1017 emit_add(pc, temp, src[1][3], temp);
1018 for (c = 0; c < 4; c++) {
1019 if (!(mask & (1 << c)))
1020 continue;
1021 emit_mov(pc, dst[c], temp);
1022 }
1023 free_temp(pc, temp);
1024 break;
1025 case TGSI_OPCODE_DST:
1026 {
1027 struct nv50_reg *one = alloc_immd(pc, 1.0);
1028 if (mask & (1 << 0))
1029 emit_mov(pc, dst[0], one);
1030 if (mask & (1 << 1))
1031 emit_mul(pc, dst[1], src[0][1], src[1][1]);
1032 if (mask & (1 << 2))
1033 emit_mov(pc, dst[2], src[0][2]);
1034 if (mask & (1 << 3))
1035 emit_mov(pc, dst[3], src[1][3]);
1036 FREE(one);
1037 }
1038 break;
1039 case TGSI_OPCODE_EX2:
1040 temp = alloc_temp(pc, NULL);
1041 emit_preex2(pc, temp, src[0][0]);
1042 emit_flop(pc, 6, temp, temp);
1043 for (c = 0; c < 4; c++) {
1044 if (!(mask & (1 << c)))
1045 continue;
1046 emit_mov(pc, dst[c], temp);
1047 }
1048 free_temp(pc, temp);
1049 break;
1050 case TGSI_OPCODE_FLR:
1051 for (c = 0; c < 4; c++) {
1052 if (!(mask & (1 << c)))
1053 continue;
1054 emit_flr(pc, dst[c], src[0][c]);
1055 }
1056 break;
1057 case TGSI_OPCODE_FRC:
1058 temp = alloc_temp(pc, NULL);
1059 for (c = 0; c < 4; c++) {
1060 if (!(mask & (1 << c)))
1061 continue;
1062 emit_flr(pc, temp, src[0][c]);
1063 emit_sub(pc, dst[c], src[0][c], temp);
1064 }
1065 free_temp(pc, temp);
1066 break;
1067 case TGSI_OPCODE_LIT:
1068 emit_lit(pc, &dst[0], mask, &src[0][0]);
1069 break;
1070 case TGSI_OPCODE_LG2:
1071 temp = alloc_temp(pc, NULL);
1072 emit_flop(pc, 3, temp, src[0][0]);
1073 for (c = 0; c < 4; c++) {
1074 if (!(mask & (1 << c)))
1075 continue;
1076 emit_mov(pc, dst[c], temp);
1077 }
1078 break;
1079 case TGSI_OPCODE_LRP:
1080 for (c = 0; c < 4; c++) {
1081 if (!(mask & (1 << c)))
1082 continue;
1083 /*XXX: we can do better than this */
1084 temp = alloc_temp(pc, NULL);
1085 emit_neg(pc, temp, src[0][c]);
1086 emit_mad(pc, temp, temp, src[2][c], src[2][c]);
1087 emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
1088 free_temp(pc, temp);
1089 }
1090 break;
1091 case TGSI_OPCODE_MAD:
1092 for (c = 0; c < 4; c++) {
1093 if (!(mask & (1 << c)))
1094 continue;
1095 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
1096 }
1097 break;
1098 case TGSI_OPCODE_MAX:
1099 for (c = 0; c < 4; c++) {
1100 if (!(mask & (1 << c)))
1101 continue;
1102 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
1103 }
1104 break;
1105 case TGSI_OPCODE_MIN:
1106 for (c = 0; c < 4; c++) {
1107 if (!(mask & (1 << c)))
1108 continue;
1109 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
1110 }
1111 break;
1112 case TGSI_OPCODE_MOV:
1113 for (c = 0; c < 4; c++) {
1114 if (!(mask & (1 << c)))
1115 continue;
1116 emit_mov(pc, dst[c], src[0][c]);
1117 }
1118 break;
1119 case TGSI_OPCODE_MUL:
1120 for (c = 0; c < 4; c++) {
1121 if (!(mask & (1 << c)))
1122 continue;
1123 emit_mul(pc, dst[c], src[0][c], src[1][c]);
1124 }
1125 break;
1126 case TGSI_OPCODE_POW:
1127 temp = alloc_temp(pc, NULL);
1128 emit_pow(pc, temp, src[0][0], src[1][0]);
1129 for (c = 0; c < 4; c++) {
1130 if (!(mask & (1 << c)))
1131 continue;
1132 emit_mov(pc, dst[c], temp);
1133 }
1134 free_temp(pc, temp);
1135 break;
1136 case TGSI_OPCODE_RCP:
1137 for (c = 0; c < 4; c++) {
1138 if (!(mask & (1 << c)))
1139 continue;
1140 emit_flop(pc, 0, dst[c], src[0][0]);
1141 }
1142 break;
1143 case TGSI_OPCODE_RSQ:
1144 for (c = 0; c < 4; c++) {
1145 if (!(mask & (1 << c)))
1146 continue;
1147 emit_flop(pc, 2, dst[c], src[0][0]);
1148 }
1149 break;
1150 case TGSI_OPCODE_SCS:
1151 temp = alloc_temp(pc, NULL);
1152 emit_precossin(pc, temp, src[0][0]);
1153 if (mask & (1 << 0))
1154 emit_flop(pc, 5, dst[0], temp);
1155 if (mask & (1 << 1))
1156 emit_flop(pc, 4, dst[1], temp);
1157 break;
1158 case TGSI_OPCODE_SGE:
1159 for (c = 0; c < 4; c++) {
1160 if (!(mask & (1 << c)))
1161 continue;
1162 emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
1163 }
1164 break;
1165 case TGSI_OPCODE_SIN:
1166 temp = alloc_temp(pc, NULL);
1167 emit_precossin(pc, temp, src[0][0]);
1168 emit_flop(pc, 4, temp, temp);
1169 for (c = 0; c < 4; c++) {
1170 if (!(mask & (1 << c)))
1171 continue;
1172 emit_mov(pc, dst[c], temp);
1173 }
1174 break;
1175 case TGSI_OPCODE_SLT:
1176 for (c = 0; c < 4; c++) {
1177 if (!(mask & (1 << c)))
1178 continue;
1179 emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1180 }
1181 break;
1182 case TGSI_OPCODE_SUB:
1183 for (c = 0; c < 4; c++) {
1184 if (!(mask & (1 << c)))
1185 continue;
1186 emit_sub(pc, dst[c], src[0][c], src[1][c]);
1187 }
1188 break;
1189 case TGSI_OPCODE_TEX:
1190 {
1191 struct nv50_reg *t[4];
1192 struct nv50_program_exec *e;
1193
1194 alloc_temp4(pc, t, 0);
1195 emit_mov(pc, t[0], src[0][0]);
1196 emit_mov(pc, t[1], src[0][1]);
1197
1198 e = exec(pc);
1199 e->inst[0] = 0xf6400000;
1200 set_long(pc, e);
1201 e->inst[1] |= 0x0000c004;
1202 set_dst(pc, t[0], e);
1203 emit(pc, e);
1204
1205 if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]);
1206 if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]);
1207 if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]);
1208 if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]);
1209
1210 free_temp4(pc, t);
1211 }
1212 break;
1213 case TGSI_OPCODE_XPD:
1214 temp = alloc_temp(pc, NULL);
1215 if (mask & (1 << 0)) {
1216 emit_mul(pc, temp, src[0][2], src[1][1]);
1217 emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1218 }
1219 if (mask & (1 << 1)) {
1220 emit_mul(pc, temp, src[0][0], src[1][2]);
1221 emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1222 }
1223 if (mask & (1 << 2)) {
1224 emit_mul(pc, temp, src[0][1], src[1][0]);
1225 emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1226 }
1227 free_temp(pc, temp);
1228 break;
1229 case TGSI_OPCODE_END:
1230 break;
1231 default:
1232 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1233 return FALSE;
1234 }
1235
1236 if (sat) {
1237 for (c = 0; c < 4; c++) {
1238 struct nv50_program_exec *e;
1239
1240 if (!(mask & (1 << c)))
1241 continue;
1242 e = exec(pc);
1243
1244 e->inst[0] = 0xa0000000; /* cvt */
1245 set_long(pc, e);
1246 e->inst[1] |= (6 << 29); /* cvt */
1247 e->inst[1] |= 0x04000000; /* 32 bit */
1248 e->inst[1] |= (1 << 14); /* src .f32 */
1249 e->inst[1] |= ((1 << 5) << 14); /* .sat */
1250 set_dst(pc, rdst[c], e);
1251 set_src_0(pc, dst[c], e);
1252 emit(pc, e);
1253 }
1254 }
1255
1256 kill_temp_temp(pc);
1257 return TRUE;
1258 }
1259
1260 static boolean
1261 nv50_program_tx_prep(struct nv50_pc *pc)
1262 {
1263 struct tgsi_parse_context p;
1264 boolean ret = FALSE;
1265 unsigned i, c;
1266
1267 tgsi_parse_init(&p, pc->p->pipe.tokens);
1268 while (!tgsi_parse_end_of_tokens(&p)) {
1269 const union tgsi_full_token *tok = &p.FullToken;
1270
1271 tgsi_parse_token(&p);
1272 switch (tok->Token.Type) {
1273 case TGSI_TOKEN_TYPE_IMMEDIATE:
1274 {
1275 const struct tgsi_full_immediate *imm =
1276 &p.FullToken.FullImmediate;
1277
1278 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1279 imm->u.ImmediateFloat32[1].Float,
1280 imm->u.ImmediateFloat32[2].Float,
1281 imm->u.ImmediateFloat32[3].Float);
1282 }
1283 break;
1284 case TGSI_TOKEN_TYPE_DECLARATION:
1285 {
1286 const struct tgsi_full_declaration *d;
1287 unsigned last;
1288
1289 d = &p.FullToken.FullDeclaration;
1290 last = d->DeclarationRange.Last;
1291
1292 switch (d->Declaration.File) {
1293 case TGSI_FILE_TEMPORARY:
1294 if (pc->temp_nr < (last + 1))
1295 pc->temp_nr = last + 1;
1296 break;
1297 case TGSI_FILE_OUTPUT:
1298 if (pc->result_nr < (last + 1))
1299 pc->result_nr = last + 1;
1300 break;
1301 case TGSI_FILE_INPUT:
1302 if (pc->attr_nr < (last + 1))
1303 pc->attr_nr = last + 1;
1304 break;
1305 case TGSI_FILE_CONSTANT:
1306 if (pc->param_nr < (last + 1))
1307 pc->param_nr = last + 1;
1308 break;
1309 case TGSI_FILE_SAMPLER:
1310 break;
1311 default:
1312 NOUVEAU_ERR("bad decl file %d\n",
1313 d->Declaration.File);
1314 goto out_err;
1315 }
1316 }
1317 break;
1318 case TGSI_TOKEN_TYPE_INSTRUCTION:
1319 break;
1320 default:
1321 break;
1322 }
1323 }
1324
1325 if (pc->temp_nr) {
1326 pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg));
1327 if (!pc->temp)
1328 goto out_err;
1329
1330 for (i = 0; i < pc->temp_nr; i++) {
1331 for (c = 0; c < 4; c++) {
1332 pc->temp[i*4+c].type = P_TEMP;
1333 pc->temp[i*4+c].hw = -1;
1334 pc->temp[i*4+c].index = i;
1335 }
1336 }
1337 }
1338
1339 if (pc->attr_nr) {
1340 struct nv50_reg *iv = NULL;
1341 int aid = 0;
1342
1343 pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
1344 if (!pc->attr)
1345 goto out_err;
1346
1347 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1348 iv = alloc_temp(pc, NULL);
1349 emit_interp(pc, iv, iv, NULL);
1350 emit_flop(pc, 0, iv, iv);
1351 aid++;
1352 }
1353
1354 for (i = 0; i < pc->attr_nr; i++) {
1355 struct nv50_reg *a = &pc->attr[i*4];
1356
1357 for (c = 0; c < 4; c++) {
1358 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1359 struct nv50_reg *at =
1360 alloc_temp(pc, NULL);
1361 pc->attr[i*4+c].type = at->type;
1362 pc->attr[i*4+c].hw = at->hw;
1363 pc->attr[i*4+c].index = at->index;
1364 } else {
1365 pc->p->cfg.vp.attr[aid/32] |=
1366 (1 << (aid % 32));
1367 pc->attr[i*4+c].type = P_ATTR;
1368 pc->attr[i*4+c].hw = aid++;
1369 pc->attr[i*4+c].index = i;
1370 }
1371 }
1372
1373 if (pc->p->type != PIPE_SHADER_FRAGMENT)
1374 continue;
1375
1376 emit_interp(pc, &a[0], &a[0], iv);
1377 emit_interp(pc, &a[1], &a[1], iv);
1378 emit_interp(pc, &a[2], &a[2], iv);
1379 emit_interp(pc, &a[3], &a[3], iv);
1380 }
1381
1382 if (iv)
1383 free_temp(pc, iv);
1384 }
1385
1386 if (pc->result_nr) {
1387 int rid = 0;
1388
1389 pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg));
1390 if (!pc->result)
1391 goto out_err;
1392
1393 for (i = 0; i < pc->result_nr; i++) {
1394 for (c = 0; c < 4; c++) {
1395 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1396 pc->result[i*4+c].type = P_TEMP;
1397 pc->result[i*4+c].hw = -1;
1398 } else {
1399 pc->result[i*4+c].type = P_RESULT;
1400 pc->result[i*4+c].hw = rid++;
1401 }
1402 pc->result[i*4+c].index = i;
1403 }
1404 }
1405 }
1406
1407 if (pc->param_nr) {
1408 int rid = 0;
1409
1410 pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg));
1411 if (!pc->param)
1412 goto out_err;
1413
1414 for (i = 0; i < pc->param_nr; i++) {
1415 for (c = 0; c < 4; c++) {
1416 pc->param[i*4+c].type = P_CONST;
1417 pc->param[i*4+c].hw = rid++;
1418 pc->param[i*4+c].index = i;
1419 }
1420 }
1421 }
1422
1423 if (pc->immd_nr) {
1424 int rid = pc->param_nr * 4;
1425
1426 pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
1427 if (!pc->immd)
1428 goto out_err;
1429
1430 for (i = 0; i < pc->immd_nr; i++) {
1431 for (c = 0; c < 4; c++) {
1432 pc->immd[i*4+c].type = P_IMMD;
1433 pc->immd[i*4+c].hw = rid++;
1434 pc->immd[i*4+c].index = i;
1435 }
1436 }
1437 }
1438
1439 ret = TRUE;
1440 out_err:
1441 tgsi_parse_free(&p);
1442 return ret;
1443 }
1444
1445 static boolean
1446 nv50_program_tx(struct nv50_program *p)
1447 {
1448 struct tgsi_parse_context parse;
1449 struct nv50_pc *pc;
1450 boolean ret;
1451
1452 pc = CALLOC_STRUCT(nv50_pc);
1453 if (!pc)
1454 return FALSE;
1455 pc->p = p;
1456 pc->p->cfg.high_temp = 4;
1457
1458 ret = nv50_program_tx_prep(pc);
1459 if (ret == FALSE)
1460 goto out_cleanup;
1461
1462 tgsi_parse_init(&parse, pc->p->pipe.tokens);
1463 while (!tgsi_parse_end_of_tokens(&parse)) {
1464 const union tgsi_full_token *tok = &parse.FullToken;
1465
1466 tgsi_parse_token(&parse);
1467
1468 switch (tok->Token.Type) {
1469 case TGSI_TOKEN_TYPE_INSTRUCTION:
1470 ret = nv50_program_tx_insn(pc, tok);
1471 if (ret == FALSE)
1472 goto out_err;
1473 break;
1474 default:
1475 break;
1476 }
1477 }
1478
1479 if (p->type == PIPE_SHADER_FRAGMENT) {
1480 struct nv50_reg out;
1481
1482 out.type = P_TEMP;
1483 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1484 emit_mov(pc, &out, &pc->result[out.hw]);
1485 }
1486
1487 assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
1488 pc->p->exec_tail->inst[1] |= 0x00000001;
1489
1490 p->param_nr = pc->param_nr * 4;
1491 p->immd_nr = pc->immd_nr * 4;
1492 p->immd = pc->immd_buf;
1493
1494 out_err:
1495 tgsi_parse_free(&parse);
1496
1497 out_cleanup:
1498 return ret;
1499 }
1500
1501 static void
1502 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1503 {
1504 if (nv50_program_tx(p) == FALSE)
1505 assert(0);
1506 p->translated = TRUE;
1507 }
1508
1509 static void
1510 nv50_program_upload_data(struct nv50_context *nv50, float *map,
1511 unsigned start, unsigned count)
1512 {
1513 while (count) {
1514 unsigned nr = count > 2047 ? 2047 : count;
1515
1516 BEGIN_RING(tesla, 0x00000f00, 1);
1517 OUT_RING ((NV50_CB_PMISC << 0) | (start << 8));
1518 BEGIN_RING(tesla, 0x40000f04, nr);
1519 OUT_RINGp (map, nr);
1520
1521 map += nr;
1522 start += nr;
1523 count -= nr;
1524 }
1525 }
1526
1527 static void
1528 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1529 {
1530 struct nouveau_winsys *nvws = nv50->screen->nvws;
1531 struct pipe_winsys *ws = nv50->pipe.winsys;
1532 unsigned nr = p->param_nr + p->immd_nr;
1533
1534 if (!p->data && nr) {
1535 struct nouveau_resource *heap = nv50->screen->vp_data_heap;
1536
1537 if (nvws->res_alloc(heap, nr, p, &p->data)) {
1538 while (heap->next && heap->size < nr) {
1539 struct nv50_program *evict = heap->next->priv;
1540 nvws->res_free(&evict->data);
1541 }
1542
1543 if (nvws->res_alloc(heap, nr, p, &p->data))
1544 assert(0);
1545 }
1546 }
1547
1548 if (p->param_nr) {
1549 float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
1550 PIPE_BUFFER_USAGE_CPU_READ);
1551 nv50_program_upload_data(nv50, map, p->data->start,
1552 p->param_nr);
1553 ws->buffer_unmap(ws, nv50->constbuf[p->type]);
1554 }
1555
1556 if (p->immd_nr) {
1557 nv50_program_upload_data(nv50, p->immd,
1558 p->data->start + p->param_nr,
1559 p->immd_nr);
1560 }
1561 }
1562
1563 static void
1564 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1565 {
1566 struct pipe_winsys *ws = nv50->pipe.winsys;
1567 struct nv50_program_exec *e;
1568 struct nouveau_stateobj *so;
1569 const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
1570 unsigned start, count, *up, *ptr;
1571 boolean upload = FALSE;
1572
1573 if (!p->buffer) {
1574 p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4);
1575 upload = TRUE;
1576 }
1577
1578 if (p->data && p->data->start != p->data_start) {
1579 for (e = p->exec_head; e; e = e->next) {
1580 unsigned ei, ci;
1581
1582 if (e->param.index < 0)
1583 continue;
1584 ei = e->param.shift >> 5;
1585 ci = e->param.index + p->data->start;
1586
1587 e->inst[ei] &= ~e->param.mask;
1588 e->inst[ei] |= (ci << e->param.shift);
1589 }
1590
1591 p->data_start = p->data->start;
1592 upload = TRUE;
1593 }
1594
1595 if (!upload)
1596 return;
1597
1598 NOUVEAU_ERR("-------\n");
1599 up = ptr = MALLOC(p->exec_size * 4);
1600 for (e = p->exec_head; e; e = e->next) {
1601 NOUVEAU_ERR("0x%08x\n", e->inst[0]);
1602 if (is_long(e))
1603 NOUVEAU_ERR("0x%08x\n", e->inst[1]);
1604
1605 *(ptr++) = e->inst[0];
1606 if (is_long(e))
1607 *(ptr++) = e->inst[1];
1608 }
1609
1610 so = so_new(4,2);
1611 so_method(so, nv50->screen->tesla, 0x1280, 3);
1612 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
1613 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0);
1614 so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
1615
1616 start = 0; count = p->exec_size;
1617 while (count) {
1618 struct nouveau_winsys *nvws = nv50->screen->nvws;
1619 unsigned nr;
1620
1621 so_emit(nvws, so);
1622
1623 nr = MIN2(count, 2047);
1624 nr = MIN2(nvws->channel->pushbuf->remaining, nr);
1625 if (nvws->channel->pushbuf->remaining < (nr + 3)) {
1626 FIRE_RING(NULL);
1627 continue;
1628 }
1629
1630 BEGIN_RING(tesla, 0x0f00, 1);
1631 OUT_RING ((start << 8) | NV50_CB_PUPLOAD);
1632 BEGIN_RING(tesla, 0x40000f04, nr);
1633 OUT_RINGp (up + start, nr);
1634
1635 start += nr;
1636 count -= nr;
1637 }
1638
1639 FREE(up);
1640 so_ref(NULL, &so);
1641 }
1642
1643 void
1644 nv50_vertprog_validate(struct nv50_context *nv50)
1645 {
1646 struct nouveau_grobj *tesla = nv50->screen->tesla;
1647 struct nv50_program *p = nv50->vertprog;
1648 struct nouveau_stateobj *so;
1649
1650 if (!p->translated) {
1651 nv50_program_validate(nv50, p);
1652 if (!p->translated)
1653 assert(0);
1654 }
1655
1656 nv50_program_validate_data(nv50, p);
1657 nv50_program_validate_code(nv50, p);
1658
1659 so = so_new(13, 2);
1660 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1661 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1662 NOUVEAU_BO_HIGH, 0, 0);
1663 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1664 NOUVEAU_BO_LOW, 0, 0);
1665 so_method(so, tesla, 0x1650, 2);
1666 so_data (so, p->cfg.vp.attr[0]);
1667 so_data (so, p->cfg.vp.attr[1]);
1668 so_method(so, tesla, 0x16b8, 1);
1669 so_data (so, p->cfg.high_result);
1670 so_method(so, tesla, 0x16ac, 2);
1671 so_data (so, p->cfg.high_result); //8);
1672 so_data (so, p->cfg.high_temp);
1673 so_method(so, tesla, 0x140c, 1);
1674 so_data (so, 0); /* program start offset */
1675 so_ref(so, &nv50->state.vertprog);
1676 }
1677
1678 void
1679 nv50_fragprog_validate(struct nv50_context *nv50)
1680 {
1681 struct nouveau_grobj *tesla = nv50->screen->tesla;
1682 struct nv50_program *p = nv50->fragprog;
1683 struct nouveau_stateobj *so;
1684
1685 if (!p->translated) {
1686 nv50_program_validate(nv50, p);
1687 if (!p->translated)
1688 assert(0);
1689 }
1690
1691 nv50_program_validate_data(nv50, p);
1692 nv50_program_validate_code(nv50, p);
1693
1694 so = so_new(64, 2);
1695 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1696 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1697 NOUVEAU_BO_HIGH, 0, 0);
1698 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1699 NOUVEAU_BO_LOW, 0, 0);
1700 so_method(so, tesla, 0x1904, 4);
1701 so_data (so, 0x01040404); /* p: 0x01000404 */
1702 so_data (so, 0x00000004);
1703 so_data (so, 0x00000000);
1704 so_data (so, 0x00000000);
1705 so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */
1706 so_data (so, 0x03020100);
1707 so_data (so, 0x07060504);
1708 so_data (so, 0x0b0a0908);
1709 so_method(so, tesla, 0x1988, 2);
1710 so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */
1711 so_data (so, p->cfg.high_temp);
1712 so_method(so, tesla, 0x1414, 1);
1713 so_data (so, 0); /* program start offset */
1714 so_ref(so, &nv50->state.fragprog);
1715 }
1716
1717 void
1718 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1719 {
1720 struct pipe_winsys *ws = nv50->pipe.winsys;
1721
1722 while (p->exec_head) {
1723 struct nv50_program_exec *e = p->exec_head;
1724
1725 p->exec_head = e->next;
1726 FREE(e);
1727 }
1728 p->exec_tail = NULL;
1729 p->exec_size = 0;
1730
1731 if (p->buffer)
1732 pipe_buffer_reference(ws, &p->buffer, NULL);
1733
1734 nv50->screen->nvws->res_free(&p->data);
1735
1736 p->translated = 0;
1737 }
1738