nv50: save some space in immediate buffer
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_defines.h"
25 #include "pipe/p_state.h"
26 #include "pipe/p_inlines.h"
27
28 #include "pipe/p_shader_tokens.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "tgsi/tgsi_util.h"
31
32 #include "nv50_context.h"
33
34 #define NV50_SU_MAX_TEMP 64
35 //#define NV50_PROGRAM_DUMP
36
37 /* ARL - gallium craps itself on progs/vp/arl.txt
38 *
39 * MSB - Like MAD, but MUL+SUB
40 * - Fuck it off, introduce a way to negate args for ops that
41 * support it.
42 *
43 * Look into inlining IMMD for ops other than MOV (make it general?)
44 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
45 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
46 *
47 * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
48 * case, if the emit_src() causes the inst to suddenly become long.
49 *
50 * Verify half-insns work where expected - and force disable them where they
51 * don't work - MUL has it forcibly disabled atm as it fixes POW..
52 *
53 * FUCK! watch dst==src vectors, can overwrite components that are needed.
54 * ie. SUB R0, R0.yzxw, R0
55 *
56 * Things to check with renouveau:
57 * FP attr/result assignment - how?
58 * attrib
59 * - 0x16bc maps vp output onto fp hpos
60 * - 0x16c0 maps vp output onto fp col0
61 * result
62 * - colr always 0-3
63 * - depr always 4
64 * 0x16bc->0x16e8 --> some binding between vp/fp regs
65 * 0x16b8 --> VP output count
66 *
67 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
68 * "MOV rcol.x, fcol.y" = 0x00000004
69 * 0x19a8 --> as above but 0x00000100 and 0x00000000
70 * - 0x00100000 used when KIL used
71 * 0x196c --> as above but 0x00000011 and 0x00000000
72 *
73 * 0x1988 --> 0xXXNNNNNN
74 * - XX == FP high something
75 */
76 struct nv50_reg {
77 enum {
78 P_TEMP,
79 P_ATTR,
80 P_RESULT,
81 P_CONST,
82 P_IMMD
83 } type;
84 int index;
85
86 int hw;
87 int neg;
88 };
89
90 struct nv50_pc {
91 struct nv50_program *p;
92
93 /* hw resources */
94 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
95
96 /* tgsi resources */
97 struct nv50_reg *temp;
98 int temp_nr;
99 struct nv50_reg *attr;
100 int attr_nr;
101 struct nv50_reg *result;
102 int result_nr;
103 struct nv50_reg *param;
104 int param_nr;
105 struct nv50_reg *immd;
106 float *immd_buf;
107 int immd_nr;
108
109 struct nv50_reg *temp_temp[16];
110 unsigned temp_temp_nr;
111 };
112
113 static void
114 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
115 {
116 int i;
117
118 if (reg->type == P_RESULT) {
119 if (pc->p->cfg.high_result < (reg->hw + 1))
120 pc->p->cfg.high_result = reg->hw + 1;
121 }
122
123 if (reg->type != P_TEMP)
124 return;
125
126 if (reg->hw >= 0) {
127 /*XXX: do this here too to catch FP temp-as-attr usage..
128 * not clean, but works */
129 if (pc->p->cfg.high_temp < (reg->hw + 1))
130 pc->p->cfg.high_temp = reg->hw + 1;
131 return;
132 }
133
134 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
135 if (!(pc->r_temp[i])) {
136 pc->r_temp[i] = reg;
137 reg->hw = i;
138 if (pc->p->cfg.high_temp < (i + 1))
139 pc->p->cfg.high_temp = i + 1;
140 return;
141 }
142 }
143
144 assert(0);
145 }
146
147 static struct nv50_reg *
148 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
149 {
150 struct nv50_reg *r;
151 int i;
152
153 if (dst && dst->type == P_TEMP && dst->hw == -1)
154 return dst;
155
156 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
157 if (!pc->r_temp[i]) {
158 r = CALLOC_STRUCT(nv50_reg);
159 r->type = P_TEMP;
160 r->index = -1;
161 r->hw = i;
162 pc->r_temp[i] = r;
163 return r;
164 }
165 }
166
167 assert(0);
168 return NULL;
169 }
170
171 static void
172 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
173 {
174 if (r->index == -1) {
175 unsigned hw = r->hw;
176
177 FREE(pc->r_temp[hw]);
178 pc->r_temp[hw] = NULL;
179 }
180 }
181
182 static int
183 alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
184 {
185 int i;
186
187 if ((idx + 4) >= NV50_SU_MAX_TEMP)
188 return 1;
189
190 if (pc->r_temp[idx] || pc->r_temp[idx + 1] ||
191 pc->r_temp[idx + 2] || pc->r_temp[idx + 3])
192 return alloc_temp4(pc, dst, idx + 1);
193
194 for (i = 0; i < 4; i++) {
195 dst[i] = CALLOC_STRUCT(nv50_reg);
196 dst[i]->type = P_TEMP;
197 dst[i]->index = -1;
198 dst[i]->hw = idx + i;
199 pc->r_temp[idx + i] = dst[i];
200 }
201
202 return 0;
203 }
204
205 static void
206 free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4])
207 {
208 int i;
209
210 for (i = 0; i < 4; i++)
211 free_temp(pc, reg[i]);
212 }
213
214 static struct nv50_reg *
215 temp_temp(struct nv50_pc *pc)
216 {
217 if (pc->temp_temp_nr >= 16)
218 assert(0);
219
220 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
221 return pc->temp_temp[pc->temp_temp_nr++];
222 }
223
224 static void
225 kill_temp_temp(struct nv50_pc *pc)
226 {
227 int i;
228
229 for (i = 0; i < pc->temp_temp_nr; i++)
230 free_temp(pc, pc->temp_temp[i]);
231 pc->temp_temp_nr = 0;
232 }
233
234 static int
235 ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
236 {
237 pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)),
238 (pc->immd_nr + 1) * 4 * sizeof(float));
239 pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
240 pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
241 pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
242 pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
243
244 return pc->immd_nr++;
245 }
246
247 static struct nv50_reg *
248 alloc_immd(struct nv50_pc *pc, float f)
249 {
250 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
251 unsigned hw;
252
253 for (hw = 0; hw < pc->immd_nr * 4; hw++)
254 if (pc->immd_buf[hw] == f)
255 break;
256
257 if (hw == pc->immd_nr * 4)
258 hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4;
259
260 r->type = P_IMMD;
261 r->hw = hw;
262 r->index = -1;
263 return r;
264 }
265
266 static struct nv50_program_exec *
267 exec(struct nv50_pc *pc)
268 {
269 struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
270
271 e->param.index = -1;
272 return e;
273 }
274
275 static void
276 emit(struct nv50_pc *pc, struct nv50_program_exec *e)
277 {
278 struct nv50_program *p = pc->p;
279
280 if (p->exec_tail)
281 p->exec_tail->next = e;
282 if (!p->exec_head)
283 p->exec_head = e;
284 p->exec_tail = e;
285 p->exec_size += (e->inst[0] & 1) ? 2 : 1;
286 }
287
288 static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
289
290 static boolean
291 is_long(struct nv50_program_exec *e)
292 {
293 if (e->inst[0] & 1)
294 return TRUE;
295 return FALSE;
296 }
297
298 static boolean
299 is_immd(struct nv50_program_exec *e)
300 {
301 if (is_long(e) && (e->inst[1] & 3) == 3)
302 return TRUE;
303 return FALSE;
304 }
305
306 static INLINE void
307 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
308 struct nv50_program_exec *e)
309 {
310 set_long(pc, e);
311 e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
312 e->inst[1] |= (pred << 7) | (idx << 12);
313 }
314
315 static INLINE void
316 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
317 struct nv50_program_exec *e)
318 {
319 set_long(pc, e);
320 e->inst[1] &= ~((0x3 << 4) | (1 << 6));
321 e->inst[1] |= (idx << 4) | (on << 6);
322 }
323
324 static INLINE void
325 set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
326 {
327 if (is_long(e))
328 return;
329
330 e->inst[0] |= 1;
331 set_pred(pc, 0xf, 0, e);
332 set_pred_wr(pc, 0, 0, e);
333 }
334
335 static INLINE void
336 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
337 {
338 if (dst->type == P_RESULT) {
339 set_long(pc, e);
340 e->inst[1] |= 0x00000008;
341 }
342
343 alloc_reg(pc, dst);
344 e->inst[0] |= (dst->hw << 2);
345 }
346
347 static INLINE void
348 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
349 {
350 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
351
352 set_long(pc, e);
353 /*XXX: can't be predicated - bits overlap.. catch cases where both
354 * are required and avoid them. */
355 set_pred(pc, 0, 0, e);
356 set_pred_wr(pc, 0, 0, e);
357
358 e->inst[1] |= 0x00000002 | 0x00000001;
359 e->inst[0] |= (val & 0x3f) << 16;
360 e->inst[1] |= (val >> 6) << 2;
361 }
362
363 static void
364 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
365 struct nv50_reg *src, struct nv50_reg *iv)
366 {
367 struct nv50_program_exec *e = exec(pc);
368
369 e->inst[0] |= 0x80000000;
370 set_dst(pc, dst, e);
371 alloc_reg(pc, src);
372 e->inst[0] |= (src->hw << 16);
373 if (iv) {
374 e->inst[0] |= (1 << 25);
375 alloc_reg(pc, iv);
376 e->inst[0] |= (iv->hw << 9);
377 }
378
379 emit(pc, e);
380 }
381
382 static void
383 set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
384 struct nv50_program_exec *e)
385 {
386 set_long(pc, e);
387 #if 1
388 e->inst[1] |= (1 << 22);
389 #else
390 if (src->type == P_IMMD) {
391 e->inst[1] |= (NV50_CB_PMISC << 22);
392 } else {
393 if (pc->p->type == PIPE_SHADER_VERTEX)
394 e->inst[1] |= (NV50_CB_PVP << 22);
395 else
396 e->inst[1] |= (NV50_CB_PFP << 22);
397 }
398 #endif
399
400 e->param.index = src->hw;
401 e->param.shift = s;
402 e->param.mask = m << (s % 32);
403 }
404
405 static void
406 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
407 {
408 struct nv50_program_exec *e = exec(pc);
409
410 e->inst[0] |= 0x10000000;
411
412 set_dst(pc, dst, e);
413
414 if (0 && dst->type != P_RESULT && src->type == P_IMMD) {
415 set_immd(pc, src, e);
416 /*XXX: 32-bit, but steals part of "half" reg space - need to
417 * catch and handle this case if/when we do half-regs
418 */
419 e->inst[0] |= 0x00008000;
420 } else
421 if (src->type == P_IMMD || src->type == P_CONST) {
422 set_long(pc, e);
423 set_data(pc, src, 0x7f, 9, e);
424 e->inst[1] |= 0x20000000; /* src0 const? */
425 } else {
426 if (src->type == P_ATTR) {
427 set_long(pc, e);
428 e->inst[1] |= 0x00200000;
429 }
430
431 alloc_reg(pc, src);
432 e->inst[0] |= (src->hw << 9);
433 }
434
435 /* We really should support "half" instructions here at some point,
436 * but I don't feel confident enough about them yet.
437 */
438 set_long(pc, e);
439 if (is_long(e) && !is_immd(e)) {
440 e->inst[1] |= 0x04000000; /* 32-bit */
441 e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
442 }
443
444 emit(pc, e);
445 }
446
447 static INLINE void
448 emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
449 {
450 struct nv50_reg *imm = alloc_immd(pc, f);
451 emit_mov(pc, dst, imm);
452 FREE(imm);
453 }
454
455 static boolean
456 check_swap_src_0_1(struct nv50_pc *pc,
457 struct nv50_reg **s0, struct nv50_reg **s1)
458 {
459 struct nv50_reg *src0 = *s0, *src1 = *s1;
460
461 if (src0->type == P_CONST) {
462 if (src1->type != P_CONST) {
463 *s0 = src1;
464 *s1 = src0;
465 return TRUE;
466 }
467 } else
468 if (src1->type == P_ATTR) {
469 if (src0->type != P_ATTR) {
470 *s0 = src1;
471 *s1 = src0;
472 return TRUE;
473 }
474 }
475
476 return FALSE;
477 }
478
479 static void
480 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
481 {
482 if (src->type == P_ATTR) {
483 set_long(pc, e);
484 e->inst[1] |= 0x00200000;
485 } else
486 if (src->type == P_CONST || src->type == P_IMMD) {
487 struct nv50_reg *temp = temp_temp(pc);
488
489 emit_mov(pc, temp, src);
490 src = temp;
491 }
492
493 alloc_reg(pc, src);
494 e->inst[0] |= (src->hw << 9);
495 }
496
497 static void
498 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
499 {
500 if (src->type == P_ATTR) {
501 struct nv50_reg *temp = temp_temp(pc);
502
503 emit_mov(pc, temp, src);
504 src = temp;
505 } else
506 if (src->type == P_CONST || src->type == P_IMMD) {
507 assert(!(e->inst[0] & 0x00800000));
508 if (e->inst[0] & 0x01000000) {
509 struct nv50_reg *temp = temp_temp(pc);
510
511 emit_mov(pc, temp, src);
512 src = temp;
513 } else {
514 set_data(pc, src, 0x7f, 16, e);
515 e->inst[0] |= 0x00800000;
516 }
517 }
518
519 alloc_reg(pc, src);
520 e->inst[0] |= (src->hw << 16);
521 }
522
523 static void
524 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
525 {
526 set_long(pc, e);
527
528 if (src->type == P_ATTR) {
529 struct nv50_reg *temp = temp_temp(pc);
530
531 emit_mov(pc, temp, src);
532 src = temp;
533 } else
534 if (src->type == P_CONST || src->type == P_IMMD) {
535 assert(!(e->inst[0] & 0x01000000));
536 if (e->inst[0] & 0x00800000) {
537 struct nv50_reg *temp = temp_temp(pc);
538
539 emit_mov(pc, temp, src);
540 src = temp;
541 } else {
542 set_data(pc, src, 0x7f, 32+14, e);
543 e->inst[0] |= 0x01000000;
544 }
545 }
546
547 alloc_reg(pc, src);
548 e->inst[1] |= (src->hw << 14);
549 }
550
551 static void
552 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
553 struct nv50_reg *src1)
554 {
555 struct nv50_program_exec *e = exec(pc);
556
557 e->inst[0] |= 0xc0000000;
558 set_long(pc, e);
559
560 check_swap_src_0_1(pc, &src0, &src1);
561 set_dst(pc, dst, e);
562 set_src_0(pc, src0, e);
563 set_src_1(pc, src1, e);
564
565 emit(pc, e);
566 }
567
568 static void
569 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
570 struct nv50_reg *src0, struct nv50_reg *src1)
571 {
572 struct nv50_program_exec *e = exec(pc);
573
574 e->inst[0] |= 0xb0000000;
575
576 check_swap_src_0_1(pc, &src0, &src1);
577 set_dst(pc, dst, e);
578 set_src_0(pc, src0, e);
579 if (is_long(e))
580 set_src_2(pc, src1, e);
581 else
582 set_src_1(pc, src1, e);
583
584 emit(pc, e);
585 }
586
587 static void
588 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
589 struct nv50_reg *src0, struct nv50_reg *src1)
590 {
591 struct nv50_program_exec *e = exec(pc);
592
593 set_long(pc, e);
594 e->inst[0] |= 0xb0000000;
595 e->inst[1] |= (sub << 29);
596
597 check_swap_src_0_1(pc, &src0, &src1);
598 set_dst(pc, dst, e);
599 set_src_0(pc, src0, e);
600 set_src_1(pc, src1, e);
601
602 emit(pc, e);
603 }
604
605 static void
606 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
607 struct nv50_reg *src1)
608 {
609 struct nv50_program_exec *e = exec(pc);
610
611 e->inst[0] |= 0xb0000000;
612
613 set_long(pc, e);
614 if (check_swap_src_0_1(pc, &src0, &src1))
615 e->inst[1] |= 0x04000000;
616 else
617 e->inst[1] |= 0x08000000;
618
619 set_dst(pc, dst, e);
620 set_src_0(pc, src0, e);
621 set_src_2(pc, src1, e);
622
623 emit(pc, e);
624 }
625
626 static void
627 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
628 struct nv50_reg *src1, struct nv50_reg *src2)
629 {
630 struct nv50_program_exec *e = exec(pc);
631
632 e->inst[0] |= 0xe0000000;
633
634 check_swap_src_0_1(pc, &src0, &src1);
635 set_dst(pc, dst, e);
636 set_src_0(pc, src0, e);
637 set_src_1(pc, src1, e);
638 set_src_2(pc, src2, e);
639
640 emit(pc, e);
641 }
642
643 static void
644 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
645 struct nv50_reg *src1, struct nv50_reg *src2)
646 {
647 struct nv50_program_exec *e = exec(pc);
648
649 e->inst[0] |= 0xe0000000;
650 set_long(pc, e);
651 e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
652
653 check_swap_src_0_1(pc, &src0, &src1);
654 set_dst(pc, dst, e);
655 set_src_0(pc, src0, e);
656 set_src_1(pc, src1, e);
657 set_src_2(pc, src2, e);
658
659 emit(pc, e);
660 }
661
662 static void
663 emit_flop(struct nv50_pc *pc, unsigned sub,
664 struct nv50_reg *dst, struct nv50_reg *src)
665 {
666 struct nv50_program_exec *e = exec(pc);
667
668 e->inst[0] |= 0x90000000;
669 if (sub) {
670 set_long(pc, e);
671 e->inst[1] |= (sub << 29);
672 }
673
674 set_dst(pc, dst, e);
675 set_src_0(pc, src, e);
676
677 emit(pc, e);
678 }
679
680 static void
681 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
682 {
683 struct nv50_program_exec *e = exec(pc);
684
685 e->inst[0] |= 0xb0000000;
686
687 set_dst(pc, dst, e);
688 set_src_0(pc, src, e);
689 set_long(pc, e);
690 e->inst[1] |= (6 << 29) | 0x00004000;
691
692 emit(pc, e);
693 }
694
695 static void
696 emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
697 {
698 struct nv50_program_exec *e = exec(pc);
699
700 e->inst[0] |= 0xb0000000;
701
702 set_dst(pc, dst, e);
703 set_src_0(pc, src, e);
704 set_long(pc, e);
705 e->inst[1] |= (6 << 29);
706
707 emit(pc, e);
708 }
709
710 static void
711 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
712 struct nv50_reg *src0, struct nv50_reg *src1)
713 {
714 struct nv50_program_exec *e = exec(pc);
715 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
716 struct nv50_reg *rdst;
717
718 assert(c_op <= 7);
719 if (check_swap_src_0_1(pc, &src0, &src1))
720 c_op = inv_cop[c_op];
721
722 rdst = dst;
723 if (dst->type != P_TEMP)
724 dst = alloc_temp(pc, NULL);
725
726 /* set.u32 */
727 set_long(pc, e);
728 e->inst[0] |= 0xb0000000;
729 e->inst[1] |= (3 << 29);
730 e->inst[1] |= (c_op << 14);
731 /*XXX: breaks things, .u32 by default?
732 * decuda will disasm as .u16 and use .lo/.hi regs, but this
733 * doesn't seem to match what the hw actually does.
734 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
735 */
736 set_dst(pc, dst, e);
737 set_src_0(pc, src0, e);
738 set_src_1(pc, src1, e);
739 emit(pc, e);
740
741 /* cvt.f32.u32 */
742 e = exec(pc);
743 e->inst[0] = 0xa0000001;
744 e->inst[1] = 0x64014780;
745 set_dst(pc, rdst, e);
746 set_src_0(pc, dst, e);
747 emit(pc, e);
748
749 if (dst != rdst)
750 free_temp(pc, dst);
751 }
752
753 static void
754 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
755 {
756 struct nv50_program_exec *e = exec(pc);
757
758 e->inst[0] = 0xa0000000; /* cvt */
759 set_long(pc, e);
760 e->inst[1] |= (6 << 29); /* cvt */
761 e->inst[1] |= 0x08000000; /* integer mode */
762 e->inst[1] |= 0x04000000; /* 32 bit */
763 e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
764 e->inst[1] |= (1 << 14); /* src .f32 */
765 set_dst(pc, dst, e);
766 set_src_0(pc, src, e);
767
768 emit(pc, e);
769 }
770
771 static void
772 emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
773 struct nv50_reg *v, struct nv50_reg *e)
774 {
775 struct nv50_reg *temp = alloc_temp(pc, NULL);
776
777 emit_flop(pc, 3, temp, v);
778 emit_mul(pc, temp, temp, e);
779 emit_preex2(pc, temp, temp);
780 emit_flop(pc, 6, dst, temp);
781
782 free_temp(pc, temp);
783 }
784
785 static void
786 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
787 {
788 struct nv50_program_exec *e = exec(pc);
789
790 e->inst[0] = 0xa0000000; /* cvt */
791 set_long(pc, e);
792 e->inst[1] |= (6 << 29); /* cvt */
793 e->inst[1] |= 0x04000000; /* 32 bit */
794 e->inst[1] |= (1 << 14); /* src .f32 */
795 e->inst[1] |= ((1 << 6) << 14); /* .abs */
796 set_dst(pc, dst, e);
797 set_src_0(pc, src, e);
798
799 emit(pc, e);
800 }
801
802 static void
803 emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
804 struct nv50_reg **src)
805 {
806 struct nv50_reg *one = alloc_immd(pc, 1.0);
807 struct nv50_reg *zero = alloc_immd(pc, 0.0);
808 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
809 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
810 struct nv50_reg *tmp[4];
811
812 if (mask & (1 << 0))
813 emit_mov(pc, dst[0], one);
814
815 if (mask & (1 << 3))
816 emit_mov(pc, dst[3], one);
817
818 if (mask & (3 << 1)) {
819 if (mask & (1 << 1))
820 tmp[0] = dst[1];
821 else
822 tmp[0] = temp_temp(pc);
823 emit_minmax(pc, 4, tmp[0], src[0], zero);
824 }
825
826 if (mask & (1 << 2)) {
827 set_pred_wr(pc, 1, 0, pc->p->exec_tail);
828
829 tmp[1] = temp_temp(pc);
830 emit_minmax(pc, 4, tmp[1], src[1], zero);
831
832 tmp[3] = temp_temp(pc);
833 emit_minmax(pc, 4, tmp[3], src[3], neg128);
834 emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
835
836 emit_pow(pc, dst[2], tmp[1], tmp[3]);
837 emit_mov(pc, dst[2], zero);
838 set_pred(pc, 3, 0, pc->p->exec_tail);
839 }
840
841 FREE(pos128);
842 FREE(neg128);
843 FREE(zero);
844 FREE(one);
845 }
846
847 static void
848 emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
849 {
850 struct nv50_program_exec *e = exec(pc);
851
852 set_long(pc, e);
853 e->inst[0] |= 0xa0000000; /* delta */
854 e->inst[1] |= (7 << 29); /* delta */
855 e->inst[1] |= 0x04000000; /* negate arg0? probably not */
856 e->inst[1] |= (1 << 14); /* src .f32 */
857 set_dst(pc, dst, e);
858 set_src_0(pc, src, e);
859
860 emit(pc, e);
861 }
862
863 static void
864 emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
865 {
866 struct nv50_program_exec *e;
867 const int r_pred = 1;
868
869 /* Sets predicate reg ? */
870 e = exec(pc);
871 e->inst[0] = 0xa00001fd;
872 e->inst[1] = 0xc4014788;
873 set_src_0(pc, src, e);
874 set_pred_wr(pc, 1, r_pred, e);
875 emit(pc, e);
876
877 /* This is probably KILP */
878 e = exec(pc);
879 e->inst[0] = 0x000001fe;
880 set_long(pc, e);
881 set_pred(pc, 1 /* LT? */, r_pred, e);
882 emit(pc, e);
883 }
884
885 static struct nv50_reg *
886 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
887 {
888 switch (dst->DstRegister.File) {
889 case TGSI_FILE_TEMPORARY:
890 return &pc->temp[dst->DstRegister.Index * 4 + c];
891 case TGSI_FILE_OUTPUT:
892 return &pc->result[dst->DstRegister.Index * 4 + c];
893 case TGSI_FILE_NULL:
894 return NULL;
895 default:
896 break;
897 }
898
899 return NULL;
900 }
901
902 static struct nv50_reg *
903 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
904 {
905 struct nv50_reg *r = NULL;
906 struct nv50_reg *temp;
907 unsigned sgn, c;
908
909 sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
910
911 c = tgsi_util_get_full_src_register_extswizzle(src, chan);
912 switch (c) {
913 case TGSI_EXTSWIZZLE_X:
914 case TGSI_EXTSWIZZLE_Y:
915 case TGSI_EXTSWIZZLE_Z:
916 case TGSI_EXTSWIZZLE_W:
917 switch (src->SrcRegister.File) {
918 case TGSI_FILE_INPUT:
919 r = &pc->attr[src->SrcRegister.Index * 4 + c];
920 break;
921 case TGSI_FILE_TEMPORARY:
922 r = &pc->temp[src->SrcRegister.Index * 4 + c];
923 break;
924 case TGSI_FILE_CONSTANT:
925 r = &pc->param[src->SrcRegister.Index * 4 + c];
926 break;
927 case TGSI_FILE_IMMEDIATE:
928 r = &pc->immd[src->SrcRegister.Index * 4 + c];
929 break;
930 case TGSI_FILE_SAMPLER:
931 break;
932 default:
933 assert(0);
934 break;
935 }
936 break;
937 case TGSI_EXTSWIZZLE_ZERO:
938 r = alloc_immd(pc, 0.0);
939 return r;
940 case TGSI_EXTSWIZZLE_ONE:
941 if (sgn == TGSI_UTIL_SIGN_TOGGLE || sgn == TGSI_UTIL_SIGN_SET)
942 return alloc_immd(pc, -1.0);
943 return alloc_immd(pc, 1.0);
944 default:
945 assert(0);
946 break;
947 }
948
949 switch (sgn) {
950 case TGSI_UTIL_SIGN_KEEP:
951 break;
952 case TGSI_UTIL_SIGN_CLEAR:
953 temp = temp_temp(pc);
954 emit_abs(pc, temp, r);
955 r = temp;
956 break;
957 case TGSI_UTIL_SIGN_TOGGLE:
958 temp = temp_temp(pc);
959 emit_neg(pc, temp, r);
960 r = temp;
961 break;
962 case TGSI_UTIL_SIGN_SET:
963 temp = temp_temp(pc);
964 emit_abs(pc, temp, r);
965 emit_neg(pc, temp, temp);
966 r = temp;
967 break;
968 default:
969 assert(0);
970 break;
971 }
972
973 return r;
974 }
975
976 static boolean
977 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
978 {
979 const struct tgsi_full_instruction *inst = &tok->FullInstruction;
980 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
981 unsigned mask, sat, unit;
982 int i, c;
983
984 mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
985 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
986
987 for (c = 0; c < 4; c++) {
988 if (mask & (1 << c))
989 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
990 else
991 dst[c] = NULL;
992 rdst[c] = NULL;
993 src[0][c] = NULL;
994 src[1][c] = NULL;
995 src[2][c] = NULL;
996 }
997
998 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
999 const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i];
1000
1001 if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)
1002 unit = fs->SrcRegister.Index;
1003
1004 for (c = 0; c < 4; c++)
1005 src[i][c] = tgsi_src(pc, c, fs);
1006 }
1007
1008 if (sat) {
1009 for (c = 0; c < 4; c++) {
1010 rdst[c] = dst[c];
1011 dst[c] = temp_temp(pc);
1012 }
1013 }
1014
1015 switch (inst->Instruction.Opcode) {
1016 case TGSI_OPCODE_ABS:
1017 for (c = 0; c < 4; c++) {
1018 if (!(mask & (1 << c)))
1019 continue;
1020 emit_abs(pc, dst[c], src[0][c]);
1021 }
1022 break;
1023 case TGSI_OPCODE_ADD:
1024 for (c = 0; c < 4; c++) {
1025 if (!(mask & (1 << c)))
1026 continue;
1027 emit_add(pc, dst[c], src[0][c], src[1][c]);
1028 }
1029 break;
1030 case TGSI_OPCODE_COS:
1031 temp = temp_temp(pc);
1032 emit_precossin(pc, temp, src[0][0]);
1033 emit_flop(pc, 5, temp, temp);
1034 for (c = 0; c < 4; c++) {
1035 if (!(mask & (1 << c)))
1036 continue;
1037 emit_mov(pc, dst[c], temp);
1038 }
1039 break;
1040 case TGSI_OPCODE_DP3:
1041 temp = temp_temp(pc);
1042 emit_mul(pc, temp, src[0][0], src[1][0]);
1043 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1044 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1045 for (c = 0; c < 4; c++) {
1046 if (!(mask & (1 << c)))
1047 continue;
1048 emit_mov(pc, dst[c], temp);
1049 }
1050 break;
1051 case TGSI_OPCODE_DP4:
1052 temp = temp_temp(pc);
1053 emit_mul(pc, temp, src[0][0], src[1][0]);
1054 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1055 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1056 emit_mad(pc, temp, src[0][3], src[1][3], temp);
1057 for (c = 0; c < 4; c++) {
1058 if (!(mask & (1 << c)))
1059 continue;
1060 emit_mov(pc, dst[c], temp);
1061 }
1062 break;
1063 case TGSI_OPCODE_DPH:
1064 temp = temp_temp(pc);
1065 emit_mul(pc, temp, src[0][0], src[1][0]);
1066 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1067 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1068 emit_add(pc, temp, src[1][3], temp);
1069 for (c = 0; c < 4; c++) {
1070 if (!(mask & (1 << c)))
1071 continue;
1072 emit_mov(pc, dst[c], temp);
1073 }
1074 break;
1075 case TGSI_OPCODE_DST:
1076 {
1077 struct nv50_reg *one = alloc_immd(pc, 1.0);
1078 if (mask & (1 << 0))
1079 emit_mov(pc, dst[0], one);
1080 if (mask & (1 << 1))
1081 emit_mul(pc, dst[1], src[0][1], src[1][1]);
1082 if (mask & (1 << 2))
1083 emit_mov(pc, dst[2], src[0][2]);
1084 if (mask & (1 << 3))
1085 emit_mov(pc, dst[3], src[1][3]);
1086 FREE(one);
1087 }
1088 break;
1089 case TGSI_OPCODE_EX2:
1090 temp = temp_temp(pc);
1091 emit_preex2(pc, temp, src[0][0]);
1092 emit_flop(pc, 6, temp, temp);
1093 for (c = 0; c < 4; c++) {
1094 if (!(mask & (1 << c)))
1095 continue;
1096 emit_mov(pc, dst[c], temp);
1097 }
1098 break;
1099 case TGSI_OPCODE_FLR:
1100 for (c = 0; c < 4; c++) {
1101 if (!(mask & (1 << c)))
1102 continue;
1103 emit_flr(pc, dst[c], src[0][c]);
1104 }
1105 break;
1106 case TGSI_OPCODE_FRC:
1107 temp = temp_temp(pc);
1108 for (c = 0; c < 4; c++) {
1109 if (!(mask & (1 << c)))
1110 continue;
1111 emit_flr(pc, temp, src[0][c]);
1112 emit_sub(pc, dst[c], src[0][c], temp);
1113 }
1114 break;
1115 case TGSI_OPCODE_KIL:
1116 emit_kil(pc, src[0][0]);
1117 emit_kil(pc, src[0][1]);
1118 emit_kil(pc, src[0][2]);
1119 emit_kil(pc, src[0][3]);
1120 break;
1121 case TGSI_OPCODE_LIT:
1122 emit_lit(pc, &dst[0], mask, &src[0][0]);
1123 break;
1124 case TGSI_OPCODE_LG2:
1125 temp = temp_temp(pc);
1126 emit_flop(pc, 3, temp, src[0][0]);
1127 for (c = 0; c < 4; c++) {
1128 if (!(mask & (1 << c)))
1129 continue;
1130 emit_mov(pc, dst[c], temp);
1131 }
1132 break;
1133 case TGSI_OPCODE_LRP:
1134 temp = temp_temp(pc);
1135 for (c = 0; c < 4; c++) {
1136 if (!(mask & (1 << c)))
1137 continue;
1138 emit_sub(pc, temp, src[1][c], src[2][c]);
1139 emit_mad(pc, dst[c], temp, src[0][c], src[2][c]);
1140 }
1141 break;
1142 case TGSI_OPCODE_MAD:
1143 for (c = 0; c < 4; c++) {
1144 if (!(mask & (1 << c)))
1145 continue;
1146 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
1147 }
1148 break;
1149 case TGSI_OPCODE_MAX:
1150 for (c = 0; c < 4; c++) {
1151 if (!(mask & (1 << c)))
1152 continue;
1153 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
1154 }
1155 break;
1156 case TGSI_OPCODE_MIN:
1157 for (c = 0; c < 4; c++) {
1158 if (!(mask & (1 << c)))
1159 continue;
1160 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
1161 }
1162 break;
1163 case TGSI_OPCODE_MOV:
1164 for (c = 0; c < 4; c++) {
1165 if (!(mask & (1 << c)))
1166 continue;
1167 emit_mov(pc, dst[c], src[0][c]);
1168 }
1169 break;
1170 case TGSI_OPCODE_MUL:
1171 for (c = 0; c < 4; c++) {
1172 if (!(mask & (1 << c)))
1173 continue;
1174 emit_mul(pc, dst[c], src[0][c], src[1][c]);
1175 }
1176 break;
1177 case TGSI_OPCODE_POW:
1178 temp = temp_temp(pc);
1179 emit_pow(pc, temp, src[0][0], src[1][0]);
1180 for (c = 0; c < 4; c++) {
1181 if (!(mask & (1 << c)))
1182 continue;
1183 emit_mov(pc, dst[c], temp);
1184 }
1185 break;
1186 case TGSI_OPCODE_RCP:
1187 for (c = 0; c < 4; c++) {
1188 if (!(mask & (1 << c)))
1189 continue;
1190 emit_flop(pc, 0, dst[c], src[0][0]);
1191 }
1192 break;
1193 case TGSI_OPCODE_RSQ:
1194 for (c = 0; c < 4; c++) {
1195 if (!(mask & (1 << c)))
1196 continue;
1197 emit_flop(pc, 2, dst[c], src[0][0]);
1198 }
1199 break;
1200 case TGSI_OPCODE_SCS:
1201 temp = temp_temp(pc);
1202 emit_precossin(pc, temp, src[0][0]);
1203 if (mask & (1 << 0))
1204 emit_flop(pc, 5, dst[0], temp);
1205 if (mask & (1 << 1))
1206 emit_flop(pc, 4, dst[1], temp);
1207 if (mask & (1 << 2))
1208 emit_mov_immdval(pc, dst[2], 0.0);
1209 if (mask & (1 << 3))
1210 emit_mov_immdval(pc, dst[3], 1.0);
1211 break;
1212 case TGSI_OPCODE_SGE:
1213 for (c = 0; c < 4; c++) {
1214 if (!(mask & (1 << c)))
1215 continue;
1216 emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
1217 }
1218 break;
1219 case TGSI_OPCODE_SIN:
1220 temp = temp_temp(pc);
1221 emit_precossin(pc, temp, src[0][0]);
1222 emit_flop(pc, 4, temp, temp);
1223 for (c = 0; c < 4; c++) {
1224 if (!(mask & (1 << c)))
1225 continue;
1226 emit_mov(pc, dst[c], temp);
1227 }
1228 break;
1229 case TGSI_OPCODE_SLT:
1230 for (c = 0; c < 4; c++) {
1231 if (!(mask & (1 << c)))
1232 continue;
1233 emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1234 }
1235 break;
1236 case TGSI_OPCODE_SUB:
1237 for (c = 0; c < 4; c++) {
1238 if (!(mask & (1 << c)))
1239 continue;
1240 emit_sub(pc, dst[c], src[0][c], src[1][c]);
1241 }
1242 break;
1243 case TGSI_OPCODE_TEX:
1244 case TGSI_OPCODE_TXP:
1245 {
1246 struct nv50_reg *t[4];
1247 struct nv50_program_exec *e;
1248
1249 alloc_temp4(pc, t, 0);
1250 emit_mov(pc, t[0], src[0][0]);
1251 emit_mov(pc, t[1], src[0][1]);
1252
1253 e = exec(pc);
1254 e->inst[0] = 0xf6400000;
1255 e->inst[0] |= (unit << 9);
1256 set_long(pc, e);
1257 e->inst[1] |= 0x0000c004;
1258 set_dst(pc, t[0], e);
1259 emit(pc, e);
1260
1261 if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]);
1262 if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]);
1263 if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]);
1264 if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]);
1265
1266 free_temp4(pc, t);
1267 }
1268 break;
1269 case TGSI_OPCODE_XPD:
1270 temp = temp_temp(pc);
1271 if (mask & (1 << 0)) {
1272 emit_mul(pc, temp, src[0][2], src[1][1]);
1273 emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1274 }
1275 if (mask & (1 << 1)) {
1276 emit_mul(pc, temp, src[0][0], src[1][2]);
1277 emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1278 }
1279 if (mask & (1 << 2)) {
1280 emit_mul(pc, temp, src[0][1], src[1][0]);
1281 emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1282 }
1283 if (mask & (1 << 3))
1284 emit_mov_immdval(pc, dst[3], 1.0);
1285 break;
1286 case TGSI_OPCODE_END:
1287 break;
1288 default:
1289 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1290 return FALSE;
1291 }
1292
1293 if (sat) {
1294 for (c = 0; c < 4; c++) {
1295 struct nv50_program_exec *e;
1296
1297 if (!(mask & (1 << c)))
1298 continue;
1299 e = exec(pc);
1300
1301 e->inst[0] = 0xa0000000; /* cvt */
1302 set_long(pc, e);
1303 e->inst[1] |= (6 << 29); /* cvt */
1304 e->inst[1] |= 0x04000000; /* 32 bit */
1305 e->inst[1] |= (1 << 14); /* src .f32 */
1306 e->inst[1] |= ((1 << 5) << 14); /* .sat */
1307 set_dst(pc, rdst[c], e);
1308 set_src_0(pc, dst[c], e);
1309 emit(pc, e);
1310 }
1311 }
1312
1313 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1314 for (c = 0; c < 4; c++) {
1315 if (!src[i][c])
1316 continue;
1317 if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
1318 FREE(src[i][c]);
1319 }
1320 }
1321
1322 kill_temp_temp(pc);
1323 return TRUE;
1324 }
1325
1326 static boolean
1327 nv50_program_tx_prep(struct nv50_pc *pc)
1328 {
1329 struct tgsi_parse_context p;
1330 boolean ret = FALSE;
1331 unsigned i, c;
1332
1333 tgsi_parse_init(&p, pc->p->pipe.tokens);
1334 while (!tgsi_parse_end_of_tokens(&p)) {
1335 const union tgsi_full_token *tok = &p.FullToken;
1336
1337 tgsi_parse_token(&p);
1338 switch (tok->Token.Type) {
1339 case TGSI_TOKEN_TYPE_IMMEDIATE:
1340 {
1341 const struct tgsi_full_immediate *imm =
1342 &p.FullToken.FullImmediate;
1343
1344 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1345 imm->u.ImmediateFloat32[1].Float,
1346 imm->u.ImmediateFloat32[2].Float,
1347 imm->u.ImmediateFloat32[3].Float);
1348 }
1349 break;
1350 case TGSI_TOKEN_TYPE_DECLARATION:
1351 {
1352 const struct tgsi_full_declaration *d;
1353 unsigned last;
1354
1355 d = &p.FullToken.FullDeclaration;
1356 last = d->DeclarationRange.Last;
1357
1358 switch (d->Declaration.File) {
1359 case TGSI_FILE_TEMPORARY:
1360 if (pc->temp_nr < (last + 1))
1361 pc->temp_nr = last + 1;
1362 break;
1363 case TGSI_FILE_OUTPUT:
1364 if (pc->result_nr < (last + 1))
1365 pc->result_nr = last + 1;
1366 break;
1367 case TGSI_FILE_INPUT:
1368 if (pc->attr_nr < (last + 1))
1369 pc->attr_nr = last + 1;
1370 break;
1371 case TGSI_FILE_CONSTANT:
1372 if (pc->param_nr < (last + 1))
1373 pc->param_nr = last + 1;
1374 break;
1375 case TGSI_FILE_SAMPLER:
1376 break;
1377 default:
1378 NOUVEAU_ERR("bad decl file %d\n",
1379 d->Declaration.File);
1380 goto out_err;
1381 }
1382 }
1383 break;
1384 case TGSI_TOKEN_TYPE_INSTRUCTION:
1385 break;
1386 default:
1387 break;
1388 }
1389 }
1390
1391 if (pc->temp_nr) {
1392 pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg));
1393 if (!pc->temp)
1394 goto out_err;
1395
1396 for (i = 0; i < pc->temp_nr; i++) {
1397 for (c = 0; c < 4; c++) {
1398 pc->temp[i*4+c].type = P_TEMP;
1399 pc->temp[i*4+c].hw = -1;
1400 pc->temp[i*4+c].index = i;
1401 }
1402 }
1403 }
1404
1405 if (pc->attr_nr) {
1406 struct nv50_reg *iv = NULL;
1407 int aid = 0;
1408
1409 pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
1410 if (!pc->attr)
1411 goto out_err;
1412
1413 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1414 iv = alloc_temp(pc, NULL);
1415 emit_interp(pc, iv, iv, NULL);
1416 emit_flop(pc, 0, iv, iv);
1417 aid++;
1418 }
1419
1420 for (i = 0; i < pc->attr_nr; i++) {
1421 struct nv50_reg *a = &pc->attr[i*4];
1422
1423 for (c = 0; c < 4; c++) {
1424 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1425 struct nv50_reg *at =
1426 alloc_temp(pc, NULL);
1427 pc->attr[i*4+c].type = at->type;
1428 pc->attr[i*4+c].hw = at->hw;
1429 pc->attr[i*4+c].index = at->index;
1430 } else {
1431 pc->p->cfg.vp.attr[aid/32] |=
1432 (1 << (aid % 32));
1433 pc->attr[i*4+c].type = P_ATTR;
1434 pc->attr[i*4+c].hw = aid++;
1435 pc->attr[i*4+c].index = i;
1436 }
1437 }
1438
1439 if (pc->p->type != PIPE_SHADER_FRAGMENT)
1440 continue;
1441
1442 emit_interp(pc, &a[0], &a[0], iv);
1443 emit_interp(pc, &a[1], &a[1], iv);
1444 emit_interp(pc, &a[2], &a[2], iv);
1445 emit_interp(pc, &a[3], &a[3], iv);
1446 }
1447
1448 if (iv)
1449 free_temp(pc, iv);
1450 }
1451
1452 if (pc->result_nr) {
1453 int rid = 0;
1454
1455 pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg));
1456 if (!pc->result)
1457 goto out_err;
1458
1459 for (i = 0; i < pc->result_nr; i++) {
1460 for (c = 0; c < 4; c++) {
1461 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1462 pc->result[i*4+c].type = P_TEMP;
1463 pc->result[i*4+c].hw = -1;
1464 } else {
1465 pc->result[i*4+c].type = P_RESULT;
1466 pc->result[i*4+c].hw = rid++;
1467 }
1468 pc->result[i*4+c].index = i;
1469 }
1470 }
1471 }
1472
1473 if (pc->param_nr) {
1474 int rid = 0;
1475
1476 pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg));
1477 if (!pc->param)
1478 goto out_err;
1479
1480 for (i = 0; i < pc->param_nr; i++) {
1481 for (c = 0; c < 4; c++) {
1482 pc->param[i*4+c].type = P_CONST;
1483 pc->param[i*4+c].hw = rid++;
1484 pc->param[i*4+c].index = i;
1485 }
1486 }
1487 }
1488
1489 if (pc->immd_nr) {
1490 int rid = pc->param_nr * 4;
1491
1492 pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
1493 if (!pc->immd)
1494 goto out_err;
1495
1496 for (i = 0; i < pc->immd_nr; i++) {
1497 for (c = 0; c < 4; c++) {
1498 pc->immd[i*4+c].type = P_IMMD;
1499 pc->immd[i*4+c].hw = rid++;
1500 pc->immd[i*4+c].index = i;
1501 }
1502 }
1503 }
1504
1505 ret = TRUE;
1506 out_err:
1507 tgsi_parse_free(&p);
1508 return ret;
1509 }
1510
1511 static void
1512 free_nv50_pc(struct nv50_pc *pc)
1513 {
1514 unsigned i;
1515
1516 if (pc->immd)
1517 FREE(pc->immd);
1518 if (pc->param)
1519 FREE(pc->param);
1520 if (pc->result)
1521 FREE(pc->result);
1522 if (pc->attr)
1523 FREE(pc->attr);
1524 if (pc->temp)
1525 FREE(pc->temp);
1526
1527 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
1528 /* deallocate fragment program attributes */
1529 if (pc->r_temp[i] && pc->r_temp[i]->index == -1)
1530 FREE(pc->r_temp[i]);
1531 }
1532
1533 FREE(pc);
1534 }
1535
1536 static boolean
1537 nv50_program_tx(struct nv50_program *p)
1538 {
1539 struct tgsi_parse_context parse;
1540 struct nv50_pc *pc;
1541 boolean ret;
1542
1543 pc = CALLOC_STRUCT(nv50_pc);
1544 if (!pc)
1545 return FALSE;
1546 pc->p = p;
1547 pc->p->cfg.high_temp = 4;
1548
1549 ret = nv50_program_tx_prep(pc);
1550 if (ret == FALSE)
1551 goto out_cleanup;
1552
1553 tgsi_parse_init(&parse, pc->p->pipe.tokens);
1554 while (!tgsi_parse_end_of_tokens(&parse)) {
1555 const union tgsi_full_token *tok = &parse.FullToken;
1556
1557 tgsi_parse_token(&parse);
1558
1559 switch (tok->Token.Type) {
1560 case TGSI_TOKEN_TYPE_INSTRUCTION:
1561 ret = nv50_program_tx_insn(pc, tok);
1562 if (ret == FALSE)
1563 goto out_err;
1564 break;
1565 default:
1566 break;
1567 }
1568 }
1569
1570 if (p->type == PIPE_SHADER_FRAGMENT) {
1571 struct nv50_reg out;
1572
1573 out.type = P_TEMP;
1574 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1575 emit_mov(pc, &out, &pc->result[out.hw]);
1576 }
1577
1578 assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
1579 pc->p->exec_tail->inst[1] |= 0x00000001;
1580
1581 p->param_nr = pc->param_nr * 4;
1582 p->immd_nr = pc->immd_nr * 4;
1583 p->immd = pc->immd_buf;
1584
1585 out_err:
1586 tgsi_parse_free(&parse);
1587
1588 out_cleanup:
1589 free_nv50_pc(pc);
1590 return ret;
1591 }
1592
1593 static void
1594 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1595 {
1596 if (nv50_program_tx(p) == FALSE)
1597 assert(0);
1598 p->translated = TRUE;
1599 }
1600
1601 static void
1602 nv50_program_upload_data(struct nv50_context *nv50, float *map,
1603 unsigned start, unsigned count)
1604 {
1605 struct nouveau_channel *chan = nv50->screen->nvws->channel;
1606 struct nouveau_grobj *tesla = nv50->screen->tesla;
1607
1608 while (count) {
1609 unsigned nr = count > 2047 ? 2047 : count;
1610
1611 BEGIN_RING(chan, tesla, 0x00000f00, 1);
1612 OUT_RING (chan, (NV50_CB_PMISC << 0) | (start << 8));
1613 BEGIN_RING(chan, tesla, 0x40000f04, nr);
1614 OUT_RINGp (chan, map, nr);
1615
1616 map += nr;
1617 start += nr;
1618 count -= nr;
1619 }
1620 }
1621
1622 static void
1623 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1624 {
1625 struct nouveau_winsys *nvws = nv50->screen->nvws;
1626 struct pipe_winsys *ws = nv50->pipe.winsys;
1627 unsigned nr = p->param_nr + p->immd_nr;
1628
1629 if (!p->data && nr) {
1630 struct nouveau_resource *heap = nv50->screen->vp_data_heap;
1631
1632 if (nvws->res_alloc(heap, nr, p, &p->data)) {
1633 while (heap->next && heap->size < nr) {
1634 struct nv50_program *evict = heap->next->priv;
1635 nvws->res_free(&evict->data);
1636 }
1637
1638 if (nvws->res_alloc(heap, nr, p, &p->data))
1639 assert(0);
1640 }
1641 }
1642
1643 if (p->param_nr) {
1644 float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
1645 PIPE_BUFFER_USAGE_CPU_READ);
1646 nv50_program_upload_data(nv50, map, p->data->start,
1647 p->param_nr);
1648 ws->buffer_unmap(ws, nv50->constbuf[p->type]);
1649 }
1650
1651 if (p->immd_nr) {
1652 nv50_program_upload_data(nv50, p->immd,
1653 p->data->start + p->param_nr,
1654 p->immd_nr);
1655 }
1656 }
1657
1658 static void
1659 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1660 {
1661 struct nouveau_channel *chan = nv50->screen->nvws->channel;
1662 struct nouveau_grobj *tesla = nv50->screen->tesla;
1663 struct pipe_screen *screen = nv50->pipe.screen;
1664 struct nv50_program_exec *e;
1665 struct nouveau_stateobj *so;
1666 const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
1667 unsigned start, count, *up, *ptr;
1668 boolean upload = FALSE;
1669
1670 if (!p->buffer) {
1671 p->buffer = screen->buffer_create(screen, 0x100, 0, p->exec_size * 4);
1672 upload = TRUE;
1673 }
1674
1675 if (p->data && p->data->start != p->data_start) {
1676 for (e = p->exec_head; e; e = e->next) {
1677 unsigned ei, ci;
1678
1679 if (e->param.index < 0)
1680 continue;
1681 ei = e->param.shift >> 5;
1682 ci = e->param.index + p->data->start;
1683
1684 e->inst[ei] &= ~e->param.mask;
1685 e->inst[ei] |= (ci << e->param.shift);
1686 }
1687
1688 p->data_start = p->data->start;
1689 upload = TRUE;
1690 }
1691
1692 if (!upload)
1693 return;
1694
1695 #ifdef NV50_PROGRAM_DUMP
1696 NOUVEAU_ERR("-------\n");
1697 for (e = p->exec_head; e; e = e->next) {
1698 NOUVEAU_ERR("0x%08x\n", e->inst[0]);
1699 if (is_long(e))
1700 NOUVEAU_ERR("0x%08x\n", e->inst[1]);
1701 }
1702 #endif
1703
1704 up = ptr = MALLOC(p->exec_size * 4);
1705 for (e = p->exec_head; e; e = e->next) {
1706 *(ptr++) = e->inst[0];
1707 if (is_long(e))
1708 *(ptr++) = e->inst[1];
1709 }
1710
1711 so = so_new(4,2);
1712 so_method(so, nv50->screen->tesla, 0x1280, 3);
1713 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
1714 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0);
1715 so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
1716
1717 start = 0; count = p->exec_size;
1718 while (count) {
1719 struct nouveau_winsys *nvws = nv50->screen->nvws;
1720 unsigned nr;
1721
1722 so_emit(nvws, so);
1723
1724 nr = MIN2(count, 2047);
1725 nr = MIN2(nvws->channel->pushbuf->remaining, nr);
1726 if (nvws->channel->pushbuf->remaining < (nr + 3)) {
1727 FIRE_RING(chan);
1728 continue;
1729 }
1730
1731 BEGIN_RING(chan, tesla, 0x0f00, 1);
1732 OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD);
1733 BEGIN_RING(chan, tesla, 0x40000f04, nr);
1734 OUT_RINGp (chan, up + start, nr);
1735
1736 start += nr;
1737 count -= nr;
1738 }
1739
1740 FREE(up);
1741 so_ref(NULL, &so);
1742 }
1743
1744 void
1745 nv50_vertprog_validate(struct nv50_context *nv50)
1746 {
1747 struct nouveau_grobj *tesla = nv50->screen->tesla;
1748 struct nv50_program *p = nv50->vertprog;
1749 struct nouveau_stateobj *so;
1750
1751 if (!p->translated) {
1752 nv50_program_validate(nv50, p);
1753 if (!p->translated)
1754 assert(0);
1755 }
1756
1757 nv50_program_validate_data(nv50, p);
1758 nv50_program_validate_code(nv50, p);
1759
1760 so = so_new(13, 2);
1761 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1762 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1763 NOUVEAU_BO_HIGH, 0, 0);
1764 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1765 NOUVEAU_BO_LOW, 0, 0);
1766 so_method(so, tesla, 0x1650, 2);
1767 so_data (so, p->cfg.vp.attr[0]);
1768 so_data (so, p->cfg.vp.attr[1]);
1769 so_method(so, tesla, 0x16b8, 1);
1770 so_data (so, p->cfg.high_result);
1771 so_method(so, tesla, 0x16ac, 2);
1772 so_data (so, p->cfg.high_result); //8);
1773 so_data (so, p->cfg.high_temp);
1774 so_method(so, tesla, 0x140c, 1);
1775 so_data (so, 0); /* program start offset */
1776 so_ref(so, &nv50->state.vertprog);
1777 so_ref(NULL, &so);
1778 }
1779
1780 void
1781 nv50_fragprog_validate(struct nv50_context *nv50)
1782 {
1783 struct nouveau_grobj *tesla = nv50->screen->tesla;
1784 struct nv50_program *p = nv50->fragprog;
1785 struct nouveau_stateobj *so;
1786
1787 if (!p->translated) {
1788 nv50_program_validate(nv50, p);
1789 if (!p->translated)
1790 assert(0);
1791 }
1792
1793 nv50_program_validate_data(nv50, p);
1794 nv50_program_validate_code(nv50, p);
1795
1796 so = so_new(64, 2);
1797 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1798 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1799 NOUVEAU_BO_HIGH, 0, 0);
1800 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1801 NOUVEAU_BO_LOW, 0, 0);
1802 so_method(so, tesla, 0x1904, 4);
1803 so_data (so, 0x00040404); /* p: 0x01000404 */
1804 so_data (so, 0x00000004);
1805 so_data (so, 0x00000000);
1806 so_data (so, 0x00000000);
1807 so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */
1808 so_data (so, 0x03020100);
1809 so_data (so, 0x07060504);
1810 so_data (so, 0x0b0a0908);
1811 so_method(so, tesla, 0x1988, 2);
1812 so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */
1813 so_data (so, p->cfg.high_temp);
1814 so_method(so, tesla, 0x1414, 1);
1815 so_data (so, 0); /* program start offset */
1816 so_ref(so, &nv50->state.fragprog);
1817 so_ref(NULL, &so);
1818 }
1819
1820 void
1821 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1822 {
1823 struct pipe_screen *pscreen = nv50->pipe.screen;
1824
1825 while (p->exec_head) {
1826 struct nv50_program_exec *e = p->exec_head;
1827
1828 p->exec_head = e->next;
1829 FREE(e);
1830 }
1831 p->exec_tail = NULL;
1832 p->exec_size = 0;
1833
1834 if (p->buffer)
1835 pipe_buffer_reference(&p->buffer, NULL);
1836
1837 nv50->screen->nvws->res_free(&p->data);
1838
1839 p->translated = 0;
1840 }
1841