nv50: enable npot textures
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_defines.h"
25 #include "pipe/p_state.h"
26 #include "pipe/p_inlines.h"
27
28 #include "pipe/p_shader_tokens.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "tgsi/tgsi_util.h"
31
32 #include "nv50_context.h"
33
34 #define NV50_SU_MAX_TEMP 64
35 //#define NV50_PROGRAM_DUMP
36
37 /* ARL - gallium craps itself on progs/vp/arl.txt
38 *
39 * MSB - Like MAD, but MUL+SUB
40 * - Fuck it off, introduce a way to negate args for ops that
41 * support it.
42 *
43 * Look into inlining IMMD for ops other than MOV (make it general?)
44 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
45 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
46 *
47 * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
48 * case, if the emit_src() causes the inst to suddenly become long.
49 *
50 * Verify half-insns work where expected - and force disable them where they
51 * don't work - MUL has it forcibly disabled atm as it fixes POW..
52 *
53 * FUCK! watch dst==src vectors, can overwrite components that are needed.
54 * ie. SUB R0, R0.yzxw, R0
55 *
56 * Things to check with renouveau:
57 * FP attr/result assignment - how?
58 * attrib
59 * - 0x16bc maps vp output onto fp hpos
60 * - 0x16c0 maps vp output onto fp col0
61 * result
62 * - colr always 0-3
63 * - depr always 4
64 * 0x16bc->0x16e8 --> some binding between vp/fp regs
65 * 0x16b8 --> VP output count
66 *
67 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
68 * "MOV rcol.x, fcol.y" = 0x00000004
69 * 0x19a8 --> as above but 0x00000100 and 0x00000000
70 * - 0x00100000 used when KIL used
71 * 0x196c --> as above but 0x00000011 and 0x00000000
72 *
73 * 0x1988 --> 0xXXNNNNNN
74 * - XX == FP high something
75 */
76 struct nv50_reg {
77 enum {
78 P_TEMP,
79 P_ATTR,
80 P_RESULT,
81 P_CONST,
82 P_IMMD
83 } type;
84 int index;
85
86 int hw;
87 int neg;
88 };
89
90 struct nv50_pc {
91 struct nv50_program *p;
92
93 /* hw resources */
94 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
95
96 /* tgsi resources */
97 struct nv50_reg *temp;
98 int temp_nr;
99 struct nv50_reg *attr;
100 int attr_nr;
101 struct nv50_reg *result;
102 int result_nr;
103 struct nv50_reg *param;
104 int param_nr;
105 struct nv50_reg *immd;
106 float *immd_buf;
107 int immd_nr;
108
109 struct nv50_reg *temp_temp[16];
110 unsigned temp_temp_nr;
111 };
112
113 static void
114 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
115 {
116 int i;
117
118 if (reg->type == P_RESULT) {
119 if (pc->p->cfg.high_result < (reg->hw + 1))
120 pc->p->cfg.high_result = reg->hw + 1;
121 }
122
123 if (reg->type != P_TEMP)
124 return;
125
126 if (reg->hw >= 0) {
127 /*XXX: do this here too to catch FP temp-as-attr usage..
128 * not clean, but works */
129 if (pc->p->cfg.high_temp < (reg->hw + 1))
130 pc->p->cfg.high_temp = reg->hw + 1;
131 return;
132 }
133
134 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
135 if (!(pc->r_temp[i])) {
136 pc->r_temp[i] = reg;
137 reg->hw = i;
138 if (pc->p->cfg.high_temp < (i + 1))
139 pc->p->cfg.high_temp = i + 1;
140 return;
141 }
142 }
143
144 assert(0);
145 }
146
147 static struct nv50_reg *
148 alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
149 {
150 struct nv50_reg *r;
151 int i;
152
153 if (dst && dst->type == P_TEMP && dst->hw == -1)
154 return dst;
155
156 for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
157 if (!pc->r_temp[i]) {
158 r = CALLOC_STRUCT(nv50_reg);
159 r->type = P_TEMP;
160 r->index = -1;
161 r->hw = i;
162 pc->r_temp[i] = r;
163 return r;
164 }
165 }
166
167 assert(0);
168 return NULL;
169 }
170
171 static void
172 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
173 {
174 if (r->index == -1) {
175 unsigned hw = r->hw;
176
177 FREE(pc->r_temp[hw]);
178 pc->r_temp[hw] = NULL;
179 }
180 }
181
182 static int
183 alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
184 {
185 int i;
186
187 if ((idx + 4) >= NV50_SU_MAX_TEMP)
188 return 1;
189
190 if (pc->r_temp[idx] || pc->r_temp[idx + 1] ||
191 pc->r_temp[idx + 2] || pc->r_temp[idx + 3])
192 return alloc_temp4(pc, dst, idx + 1);
193
194 for (i = 0; i < 4; i++) {
195 dst[i] = CALLOC_STRUCT(nv50_reg);
196 dst[i]->type = P_TEMP;
197 dst[i]->index = -1;
198 dst[i]->hw = idx + i;
199 pc->r_temp[idx + i] = dst[i];
200 }
201
202 return 0;
203 }
204
205 static void
206 free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4])
207 {
208 int i;
209
210 for (i = 0; i < 4; i++)
211 free_temp(pc, reg[i]);
212 }
213
214 static struct nv50_reg *
215 temp_temp(struct nv50_pc *pc)
216 {
217 if (pc->temp_temp_nr >= 16)
218 assert(0);
219
220 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
221 return pc->temp_temp[pc->temp_temp_nr++];
222 }
223
224 static void
225 kill_temp_temp(struct nv50_pc *pc)
226 {
227 int i;
228
229 for (i = 0; i < pc->temp_temp_nr; i++)
230 free_temp(pc, pc->temp_temp[i]);
231 pc->temp_temp_nr = 0;
232 }
233
234 static int
235 ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
236 {
237 pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)),
238 (pc->immd_nr + 1) * 4 * sizeof(float));
239 pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
240 pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
241 pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
242 pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
243
244 return pc->immd_nr++;
245 }
246
247 static struct nv50_reg *
248 alloc_immd(struct nv50_pc *pc, float f)
249 {
250 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
251 unsigned hw;
252
253 hw = ctor_immd(pc, f, 0, 0, 0) * 4;
254 r->type = P_IMMD;
255 r->hw = hw;
256 r->index = -1;
257 return r;
258 }
259
260 static struct nv50_program_exec *
261 exec(struct nv50_pc *pc)
262 {
263 struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
264
265 e->param.index = -1;
266 return e;
267 }
268
269 static void
270 emit(struct nv50_pc *pc, struct nv50_program_exec *e)
271 {
272 struct nv50_program *p = pc->p;
273
274 if (p->exec_tail)
275 p->exec_tail->next = e;
276 if (!p->exec_head)
277 p->exec_head = e;
278 p->exec_tail = e;
279 p->exec_size += (e->inst[0] & 1) ? 2 : 1;
280 }
281
282 static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
283
284 static boolean
285 is_long(struct nv50_program_exec *e)
286 {
287 if (e->inst[0] & 1)
288 return TRUE;
289 return FALSE;
290 }
291
292 static boolean
293 is_immd(struct nv50_program_exec *e)
294 {
295 if (is_long(e) && (e->inst[1] & 3) == 3)
296 return TRUE;
297 return FALSE;
298 }
299
300 static INLINE void
301 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
302 struct nv50_program_exec *e)
303 {
304 set_long(pc, e);
305 e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
306 e->inst[1] |= (pred << 7) | (idx << 12);
307 }
308
309 static INLINE void
310 set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
311 struct nv50_program_exec *e)
312 {
313 set_long(pc, e);
314 e->inst[1] &= ~((0x3 << 4) | (1 << 6));
315 e->inst[1] |= (idx << 4) | (on << 6);
316 }
317
318 static INLINE void
319 set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
320 {
321 if (is_long(e))
322 return;
323
324 e->inst[0] |= 1;
325 set_pred(pc, 0xf, 0, e);
326 set_pred_wr(pc, 0, 0, e);
327 }
328
329 static INLINE void
330 set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
331 {
332 if (dst->type == P_RESULT) {
333 set_long(pc, e);
334 e->inst[1] |= 0x00000008;
335 }
336
337 alloc_reg(pc, dst);
338 e->inst[0] |= (dst->hw << 2);
339 }
340
341 static INLINE void
342 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
343 {
344 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
345
346 set_long(pc, e);
347 /*XXX: can't be predicated - bits overlap.. catch cases where both
348 * are required and avoid them. */
349 set_pred(pc, 0, 0, e);
350 set_pred_wr(pc, 0, 0, e);
351
352 e->inst[1] |= 0x00000002 | 0x00000001;
353 e->inst[0] |= (val & 0x3f) << 16;
354 e->inst[1] |= (val >> 6) << 2;
355 }
356
357 static void
358 emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
359 struct nv50_reg *src, struct nv50_reg *iv)
360 {
361 struct nv50_program_exec *e = exec(pc);
362
363 e->inst[0] |= 0x80000000;
364 set_dst(pc, dst, e);
365 alloc_reg(pc, src);
366 e->inst[0] |= (src->hw << 16);
367 if (iv) {
368 e->inst[0] |= (1 << 25);
369 alloc_reg(pc, iv);
370 e->inst[0] |= (iv->hw << 9);
371 }
372
373 emit(pc, e);
374 }
375
376 static void
377 set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
378 struct nv50_program_exec *e)
379 {
380 set_long(pc, e);
381 #if 1
382 e->inst[1] |= (1 << 22);
383 #else
384 if (src->type == P_IMMD) {
385 e->inst[1] |= (NV50_CB_PMISC << 22);
386 } else {
387 if (pc->p->type == PIPE_SHADER_VERTEX)
388 e->inst[1] |= (NV50_CB_PVP << 22);
389 else
390 e->inst[1] |= (NV50_CB_PFP << 22);
391 }
392 #endif
393
394 e->param.index = src->hw;
395 e->param.shift = s;
396 e->param.mask = m << (s % 32);
397 }
398
399 static void
400 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
401 {
402 struct nv50_program_exec *e = exec(pc);
403
404 e->inst[0] |= 0x10000000;
405
406 set_dst(pc, dst, e);
407
408 if (0 && dst->type != P_RESULT && src->type == P_IMMD) {
409 set_immd(pc, src, e);
410 /*XXX: 32-bit, but steals part of "half" reg space - need to
411 * catch and handle this case if/when we do half-regs
412 */
413 e->inst[0] |= 0x00008000;
414 } else
415 if (src->type == P_IMMD || src->type == P_CONST) {
416 set_long(pc, e);
417 set_data(pc, src, 0x7f, 9, e);
418 e->inst[1] |= 0x20000000; /* src0 const? */
419 } else {
420 if (src->type == P_ATTR) {
421 set_long(pc, e);
422 e->inst[1] |= 0x00200000;
423 }
424
425 alloc_reg(pc, src);
426 e->inst[0] |= (src->hw << 9);
427 }
428
429 /* We really should support "half" instructions here at some point,
430 * but I don't feel confident enough about them yet.
431 */
432 set_long(pc, e);
433 if (is_long(e) && !is_immd(e)) {
434 e->inst[1] |= 0x04000000; /* 32-bit */
435 e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
436 }
437
438 emit(pc, e);
439 }
440
441 static boolean
442 check_swap_src_0_1(struct nv50_pc *pc,
443 struct nv50_reg **s0, struct nv50_reg **s1)
444 {
445 struct nv50_reg *src0 = *s0, *src1 = *s1;
446
447 if (src0->type == P_CONST) {
448 if (src1->type != P_CONST) {
449 *s0 = src1;
450 *s1 = src0;
451 return TRUE;
452 }
453 } else
454 if (src1->type == P_ATTR) {
455 if (src0->type != P_ATTR) {
456 *s0 = src1;
457 *s1 = src0;
458 return TRUE;
459 }
460 }
461
462 return FALSE;
463 }
464
465 static void
466 set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
467 {
468 if (src->type == P_ATTR) {
469 set_long(pc, e);
470 e->inst[1] |= 0x00200000;
471 } else
472 if (src->type == P_CONST || src->type == P_IMMD) {
473 struct nv50_reg *temp = temp_temp(pc);
474
475 emit_mov(pc, temp, src);
476 src = temp;
477 }
478
479 alloc_reg(pc, src);
480 e->inst[0] |= (src->hw << 9);
481 }
482
483 static void
484 set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
485 {
486 if (src->type == P_ATTR) {
487 struct nv50_reg *temp = temp_temp(pc);
488
489 emit_mov(pc, temp, src);
490 src = temp;
491 } else
492 if (src->type == P_CONST || src->type == P_IMMD) {
493 assert(!(e->inst[0] & 0x00800000));
494 if (e->inst[0] & 0x01000000) {
495 struct nv50_reg *temp = temp_temp(pc);
496
497 emit_mov(pc, temp, src);
498 src = temp;
499 } else {
500 set_data(pc, src, 0x7f, 16, e);
501 e->inst[0] |= 0x00800000;
502 }
503 }
504
505 alloc_reg(pc, src);
506 e->inst[0] |= (src->hw << 16);
507 }
508
509 static void
510 set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
511 {
512 set_long(pc, e);
513
514 if (src->type == P_ATTR) {
515 struct nv50_reg *temp = temp_temp(pc);
516
517 emit_mov(pc, temp, src);
518 src = temp;
519 } else
520 if (src->type == P_CONST || src->type == P_IMMD) {
521 assert(!(e->inst[0] & 0x01000000));
522 if (e->inst[0] & 0x00800000) {
523 struct nv50_reg *temp = temp_temp(pc);
524
525 emit_mov(pc, temp, src);
526 src = temp;
527 } else {
528 set_data(pc, src, 0x7f, 32+14, e);
529 e->inst[0] |= 0x01000000;
530 }
531 }
532
533 alloc_reg(pc, src);
534 e->inst[1] |= (src->hw << 14);
535 }
536
537 static void
538 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
539 struct nv50_reg *src1)
540 {
541 struct nv50_program_exec *e = exec(pc);
542
543 e->inst[0] |= 0xc0000000;
544 set_long(pc, e);
545
546 check_swap_src_0_1(pc, &src0, &src1);
547 set_dst(pc, dst, e);
548 set_src_0(pc, src0, e);
549 set_src_1(pc, src1, e);
550
551 emit(pc, e);
552 }
553
554 static void
555 emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
556 struct nv50_reg *src0, struct nv50_reg *src1)
557 {
558 struct nv50_program_exec *e = exec(pc);
559
560 e->inst[0] |= 0xb0000000;
561
562 check_swap_src_0_1(pc, &src0, &src1);
563 set_dst(pc, dst, e);
564 set_src_0(pc, src0, e);
565 if (is_long(e))
566 set_src_2(pc, src1, e);
567 else
568 set_src_1(pc, src1, e);
569
570 emit(pc, e);
571 }
572
573 static void
574 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
575 struct nv50_reg *src0, struct nv50_reg *src1)
576 {
577 struct nv50_program_exec *e = exec(pc);
578
579 set_long(pc, e);
580 e->inst[0] |= 0xb0000000;
581 e->inst[1] |= (sub << 29);
582
583 check_swap_src_0_1(pc, &src0, &src1);
584 set_dst(pc, dst, e);
585 set_src_0(pc, src0, e);
586 set_src_1(pc, src1, e);
587
588 emit(pc, e);
589 }
590
591 static void
592 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
593 struct nv50_reg *src1)
594 {
595 struct nv50_program_exec *e = exec(pc);
596
597 e->inst[0] |= 0xb0000000;
598
599 set_long(pc, e);
600 if (check_swap_src_0_1(pc, &src0, &src1))
601 e->inst[1] |= 0x04000000;
602 else
603 e->inst[1] |= 0x08000000;
604
605 set_dst(pc, dst, e);
606 set_src_0(pc, src0, e);
607 set_src_2(pc, src1, e);
608
609 emit(pc, e);
610 }
611
612 static void
613 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
614 struct nv50_reg *src1, struct nv50_reg *src2)
615 {
616 struct nv50_program_exec *e = exec(pc);
617
618 e->inst[0] |= 0xe0000000;
619
620 check_swap_src_0_1(pc, &src0, &src1);
621 set_dst(pc, dst, e);
622 set_src_0(pc, src0, e);
623 set_src_1(pc, src1, e);
624 set_src_2(pc, src2, e);
625
626 emit(pc, e);
627 }
628
629 static void
630 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
631 struct nv50_reg *src1, struct nv50_reg *src2)
632 {
633 struct nv50_program_exec *e = exec(pc);
634
635 e->inst[0] |= 0xe0000000;
636 set_long(pc, e);
637 e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
638
639 check_swap_src_0_1(pc, &src0, &src1);
640 set_dst(pc, dst, e);
641 set_src_0(pc, src0, e);
642 set_src_1(pc, src1, e);
643 set_src_2(pc, src2, e);
644
645 emit(pc, e);
646 }
647
648 static void
649 emit_flop(struct nv50_pc *pc, unsigned sub,
650 struct nv50_reg *dst, struct nv50_reg *src)
651 {
652 struct nv50_program_exec *e = exec(pc);
653
654 e->inst[0] |= 0x90000000;
655 if (sub) {
656 set_long(pc, e);
657 e->inst[1] |= (sub << 29);
658 }
659
660 set_dst(pc, dst, e);
661 set_src_0(pc, src, e);
662
663 emit(pc, e);
664 }
665
666 static void
667 emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
668 {
669 struct nv50_program_exec *e = exec(pc);
670
671 e->inst[0] |= 0xb0000000;
672
673 set_dst(pc, dst, e);
674 set_src_0(pc, src, e);
675 set_long(pc, e);
676 e->inst[1] |= (6 << 29) | 0x00004000;
677
678 emit(pc, e);
679 }
680
681 static void
682 emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
683 {
684 struct nv50_program_exec *e = exec(pc);
685
686 e->inst[0] |= 0xb0000000;
687
688 set_dst(pc, dst, e);
689 set_src_0(pc, src, e);
690 set_long(pc, e);
691 e->inst[1] |= (6 << 29);
692
693 emit(pc, e);
694 }
695
696 static void
697 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
698 struct nv50_reg *src0, struct nv50_reg *src1)
699 {
700 struct nv50_program_exec *e = exec(pc);
701 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
702 struct nv50_reg *rdst;
703
704 assert(c_op <= 7);
705 if (check_swap_src_0_1(pc, &src0, &src1))
706 c_op = inv_cop[c_op];
707
708 rdst = dst;
709 if (dst->type != P_TEMP)
710 dst = alloc_temp(pc, NULL);
711
712 /* set.u32 */
713 set_long(pc, e);
714 e->inst[0] |= 0xb0000000;
715 e->inst[1] |= (3 << 29);
716 e->inst[1] |= (c_op << 14);
717 /*XXX: breaks things, .u32 by default?
718 * decuda will disasm as .u16 and use .lo/.hi regs, but this
719 * doesn't seem to match what the hw actually does.
720 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
721 */
722 set_dst(pc, dst, e);
723 set_src_0(pc, src0, e);
724 set_src_1(pc, src1, e);
725 emit(pc, e);
726
727 /* cvt.f32.u32 */
728 e = exec(pc);
729 e->inst[0] = 0xa0000001;
730 e->inst[1] = 0x64014780;
731 set_dst(pc, rdst, e);
732 set_src_0(pc, dst, e);
733 emit(pc, e);
734
735 if (dst != rdst)
736 free_temp(pc, dst);
737 }
738
739 static void
740 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
741 {
742 struct nv50_program_exec *e = exec(pc);
743
744 e->inst[0] = 0xa0000000; /* cvt */
745 set_long(pc, e);
746 e->inst[1] |= (6 << 29); /* cvt */
747 e->inst[1] |= 0x08000000; /* integer mode */
748 e->inst[1] |= 0x04000000; /* 32 bit */
749 e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
750 e->inst[1] |= (1 << 14); /* src .f32 */
751 set_dst(pc, dst, e);
752 set_src_0(pc, src, e);
753
754 emit(pc, e);
755 }
756
757 static void
758 emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
759 struct nv50_reg *v, struct nv50_reg *e)
760 {
761 struct nv50_reg *temp = alloc_temp(pc, NULL);
762
763 emit_flop(pc, 3, temp, v);
764 emit_mul(pc, temp, temp, e);
765 emit_preex2(pc, temp, temp);
766 emit_flop(pc, 6, dst, temp);
767
768 free_temp(pc, temp);
769 }
770
771 static void
772 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
773 {
774 struct nv50_program_exec *e = exec(pc);
775
776 e->inst[0] = 0xa0000000; /* cvt */
777 set_long(pc, e);
778 e->inst[1] |= (6 << 29); /* cvt */
779 e->inst[1] |= 0x04000000; /* 32 bit */
780 e->inst[1] |= (1 << 14); /* src .f32 */
781 e->inst[1] |= ((1 << 6) << 14); /* .abs */
782 set_dst(pc, dst, e);
783 set_src_0(pc, src, e);
784
785 emit(pc, e);
786 }
787
788 static void
789 emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
790 struct nv50_reg **src)
791 {
792 struct nv50_reg *one = alloc_immd(pc, 1.0);
793 struct nv50_reg *zero = alloc_immd(pc, 0.0);
794 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
795 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
796 struct nv50_reg *tmp[4];
797
798 if (mask & (1 << 0))
799 emit_mov(pc, dst[0], one);
800
801 if (mask & (1 << 3))
802 emit_mov(pc, dst[3], one);
803
804 if (mask & (3 << 1)) {
805 if (mask & (1 << 1))
806 tmp[0] = dst[1];
807 else
808 tmp[0] = temp_temp(pc);
809 emit_minmax(pc, 4, tmp[0], src[0], zero);
810 }
811
812 if (mask & (1 << 2)) {
813 set_pred_wr(pc, 1, 0, pc->p->exec_tail);
814
815 tmp[1] = temp_temp(pc);
816 emit_minmax(pc, 4, tmp[1], src[1], zero);
817
818 tmp[3] = temp_temp(pc);
819 emit_minmax(pc, 4, tmp[3], src[3], neg128);
820 emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
821
822 emit_pow(pc, dst[2], tmp[1], tmp[3]);
823 emit_mov(pc, dst[2], zero);
824 set_pred(pc, 3, 0, pc->p->exec_tail);
825 }
826 }
827
828 static void
829 emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
830 {
831 struct nv50_program_exec *e = exec(pc);
832
833 set_long(pc, e);
834 e->inst[0] |= 0xa0000000; /* delta */
835 e->inst[1] |= (7 << 29); /* delta */
836 e->inst[1] |= 0x04000000; /* negate arg0? probably not */
837 e->inst[1] |= (1 << 14); /* src .f32 */
838 set_dst(pc, dst, e);
839 set_src_0(pc, src, e);
840
841 emit(pc, e);
842 }
843
844 static struct nv50_reg *
845 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
846 {
847 switch (dst->DstRegister.File) {
848 case TGSI_FILE_TEMPORARY:
849 return &pc->temp[dst->DstRegister.Index * 4 + c];
850 case TGSI_FILE_OUTPUT:
851 return &pc->result[dst->DstRegister.Index * 4 + c];
852 case TGSI_FILE_NULL:
853 return NULL;
854 default:
855 break;
856 }
857
858 return NULL;
859 }
860
861 static struct nv50_reg *
862 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
863 {
864 struct nv50_reg *r = NULL;
865 struct nv50_reg *temp;
866 unsigned c;
867
868 c = tgsi_util_get_full_src_register_extswizzle(src, chan);
869 switch (c) {
870 case TGSI_EXTSWIZZLE_X:
871 case TGSI_EXTSWIZZLE_Y:
872 case TGSI_EXTSWIZZLE_Z:
873 case TGSI_EXTSWIZZLE_W:
874 switch (src->SrcRegister.File) {
875 case TGSI_FILE_INPUT:
876 r = &pc->attr[src->SrcRegister.Index * 4 + c];
877 break;
878 case TGSI_FILE_TEMPORARY:
879 r = &pc->temp[src->SrcRegister.Index * 4 + c];
880 break;
881 case TGSI_FILE_CONSTANT:
882 r = &pc->param[src->SrcRegister.Index * 4 + c];
883 break;
884 case TGSI_FILE_IMMEDIATE:
885 r = &pc->immd[src->SrcRegister.Index * 4 + c];
886 break;
887 case TGSI_FILE_SAMPLER:
888 break;
889 default:
890 assert(0);
891 break;
892 }
893 break;
894 case TGSI_EXTSWIZZLE_ZERO:
895 r = alloc_immd(pc, 0.0);
896 break;
897 case TGSI_EXTSWIZZLE_ONE:
898 r = alloc_immd(pc, 1.0);
899 break;
900 default:
901 assert(0);
902 break;
903 }
904
905 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
906 case TGSI_UTIL_SIGN_KEEP:
907 break;
908 case TGSI_UTIL_SIGN_CLEAR:
909 temp = temp_temp(pc);
910 emit_abs(pc, temp, r);
911 r = temp;
912 break;
913 case TGSI_UTIL_SIGN_TOGGLE:
914 temp = temp_temp(pc);
915 emit_neg(pc, temp, r);
916 r = temp;
917 break;
918 case TGSI_UTIL_SIGN_SET:
919 temp = temp_temp(pc);
920 emit_abs(pc, temp, r);
921 emit_neg(pc, temp, r);
922 r = temp;
923 break;
924 default:
925 assert(0);
926 break;
927 }
928
929 return r;
930 }
931
932 static boolean
933 nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
934 {
935 const struct tgsi_full_instruction *inst = &tok->FullInstruction;
936 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
937 unsigned mask, sat, unit;
938 int i, c;
939
940 mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
941 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
942
943 for (c = 0; c < 4; c++) {
944 if (mask & (1 << c))
945 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
946 else
947 dst[c] = NULL;
948 }
949
950 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
951 struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i];
952
953 if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)
954 unit = fs->SrcRegister.Index;
955
956 for (c = 0; c < 4; c++)
957 src[i][c] = tgsi_src(pc, c, fs);
958 }
959
960 if (sat) {
961 for (c = 0; c < 4; c++) {
962 rdst[c] = dst[c];
963 dst[c] = temp_temp(pc);
964 }
965 }
966
967 switch (inst->Instruction.Opcode) {
968 case TGSI_OPCODE_ABS:
969 for (c = 0; c < 4; c++) {
970 if (!(mask & (1 << c)))
971 continue;
972 emit_abs(pc, dst[c], src[0][c]);
973 }
974 break;
975 case TGSI_OPCODE_ADD:
976 for (c = 0; c < 4; c++) {
977 if (!(mask & (1 << c)))
978 continue;
979 emit_add(pc, dst[c], src[0][c], src[1][c]);
980 }
981 break;
982 case TGSI_OPCODE_COS:
983 temp = alloc_temp(pc, NULL);
984 emit_precossin(pc, temp, src[0][0]);
985 emit_flop(pc, 5, temp, temp);
986 for (c = 0; c < 4; c++) {
987 if (!(mask & (1 << c)))
988 continue;
989 emit_mov(pc, dst[c], temp);
990 }
991 break;
992 case TGSI_OPCODE_DP3:
993 temp = alloc_temp(pc, NULL);
994 emit_mul(pc, temp, src[0][0], src[1][0]);
995 emit_mad(pc, temp, src[0][1], src[1][1], temp);
996 emit_mad(pc, temp, src[0][2], src[1][2], temp);
997 for (c = 0; c < 4; c++) {
998 if (!(mask & (1 << c)))
999 continue;
1000 emit_mov(pc, dst[c], temp);
1001 }
1002 free_temp(pc, temp);
1003 break;
1004 case TGSI_OPCODE_DP4:
1005 temp = alloc_temp(pc, NULL);
1006 emit_mul(pc, temp, src[0][0], src[1][0]);
1007 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1008 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1009 emit_mad(pc, temp, src[0][3], src[1][3], temp);
1010 for (c = 0; c < 4; c++) {
1011 if (!(mask & (1 << c)))
1012 continue;
1013 emit_mov(pc, dst[c], temp);
1014 }
1015 free_temp(pc, temp);
1016 break;
1017 case TGSI_OPCODE_DPH:
1018 temp = alloc_temp(pc, NULL);
1019 emit_mul(pc, temp, src[0][0], src[1][0]);
1020 emit_mad(pc, temp, src[0][1], src[1][1], temp);
1021 emit_mad(pc, temp, src[0][2], src[1][2], temp);
1022 emit_add(pc, temp, src[1][3], temp);
1023 for (c = 0; c < 4; c++) {
1024 if (!(mask & (1 << c)))
1025 continue;
1026 emit_mov(pc, dst[c], temp);
1027 }
1028 free_temp(pc, temp);
1029 break;
1030 case TGSI_OPCODE_DST:
1031 {
1032 struct nv50_reg *one = alloc_immd(pc, 1.0);
1033 if (mask & (1 << 0))
1034 emit_mov(pc, dst[0], one);
1035 if (mask & (1 << 1))
1036 emit_mul(pc, dst[1], src[0][1], src[1][1]);
1037 if (mask & (1 << 2))
1038 emit_mov(pc, dst[2], src[0][2]);
1039 if (mask & (1 << 3))
1040 emit_mov(pc, dst[3], src[1][3]);
1041 FREE(one);
1042 }
1043 break;
1044 case TGSI_OPCODE_EX2:
1045 temp = alloc_temp(pc, NULL);
1046 emit_preex2(pc, temp, src[0][0]);
1047 emit_flop(pc, 6, temp, temp);
1048 for (c = 0; c < 4; c++) {
1049 if (!(mask & (1 << c)))
1050 continue;
1051 emit_mov(pc, dst[c], temp);
1052 }
1053 free_temp(pc, temp);
1054 break;
1055 case TGSI_OPCODE_FLR:
1056 for (c = 0; c < 4; c++) {
1057 if (!(mask & (1 << c)))
1058 continue;
1059 emit_flr(pc, dst[c], src[0][c]);
1060 }
1061 break;
1062 case TGSI_OPCODE_FRC:
1063 temp = alloc_temp(pc, NULL);
1064 for (c = 0; c < 4; c++) {
1065 if (!(mask & (1 << c)))
1066 continue;
1067 emit_flr(pc, temp, src[0][c]);
1068 emit_sub(pc, dst[c], src[0][c], temp);
1069 }
1070 free_temp(pc, temp);
1071 break;
1072 case TGSI_OPCODE_LIT:
1073 emit_lit(pc, &dst[0], mask, &src[0][0]);
1074 break;
1075 case TGSI_OPCODE_LG2:
1076 temp = alloc_temp(pc, NULL);
1077 emit_flop(pc, 3, temp, src[0][0]);
1078 for (c = 0; c < 4; c++) {
1079 if (!(mask & (1 << c)))
1080 continue;
1081 emit_mov(pc, dst[c], temp);
1082 }
1083 break;
1084 case TGSI_OPCODE_LRP:
1085 for (c = 0; c < 4; c++) {
1086 if (!(mask & (1 << c)))
1087 continue;
1088 /*XXX: we can do better than this */
1089 temp = alloc_temp(pc, NULL);
1090 emit_neg(pc, temp, src[0][c]);
1091 emit_mad(pc, temp, temp, src[2][c], src[2][c]);
1092 emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
1093 free_temp(pc, temp);
1094 }
1095 break;
1096 case TGSI_OPCODE_MAD:
1097 for (c = 0; c < 4; c++) {
1098 if (!(mask & (1 << c)))
1099 continue;
1100 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
1101 }
1102 break;
1103 case TGSI_OPCODE_MAX:
1104 for (c = 0; c < 4; c++) {
1105 if (!(mask & (1 << c)))
1106 continue;
1107 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
1108 }
1109 break;
1110 case TGSI_OPCODE_MIN:
1111 for (c = 0; c < 4; c++) {
1112 if (!(mask & (1 << c)))
1113 continue;
1114 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
1115 }
1116 break;
1117 case TGSI_OPCODE_MOV:
1118 for (c = 0; c < 4; c++) {
1119 if (!(mask & (1 << c)))
1120 continue;
1121 emit_mov(pc, dst[c], src[0][c]);
1122 }
1123 break;
1124 case TGSI_OPCODE_MUL:
1125 for (c = 0; c < 4; c++) {
1126 if (!(mask & (1 << c)))
1127 continue;
1128 emit_mul(pc, dst[c], src[0][c], src[1][c]);
1129 }
1130 break;
1131 case TGSI_OPCODE_POW:
1132 temp = alloc_temp(pc, NULL);
1133 emit_pow(pc, temp, src[0][0], src[1][0]);
1134 for (c = 0; c < 4; c++) {
1135 if (!(mask & (1 << c)))
1136 continue;
1137 emit_mov(pc, dst[c], temp);
1138 }
1139 free_temp(pc, temp);
1140 break;
1141 case TGSI_OPCODE_RCP:
1142 for (c = 0; c < 4; c++) {
1143 if (!(mask & (1 << c)))
1144 continue;
1145 emit_flop(pc, 0, dst[c], src[0][0]);
1146 }
1147 break;
1148 case TGSI_OPCODE_RSQ:
1149 for (c = 0; c < 4; c++) {
1150 if (!(mask & (1 << c)))
1151 continue;
1152 emit_flop(pc, 2, dst[c], src[0][0]);
1153 }
1154 break;
1155 case TGSI_OPCODE_SCS:
1156 temp = alloc_temp(pc, NULL);
1157 emit_precossin(pc, temp, src[0][0]);
1158 if (mask & (1 << 0))
1159 emit_flop(pc, 5, dst[0], temp);
1160 if (mask & (1 << 1))
1161 emit_flop(pc, 4, dst[1], temp);
1162 break;
1163 case TGSI_OPCODE_SGE:
1164 for (c = 0; c < 4; c++) {
1165 if (!(mask & (1 << c)))
1166 continue;
1167 emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
1168 }
1169 break;
1170 case TGSI_OPCODE_SIN:
1171 temp = alloc_temp(pc, NULL);
1172 emit_precossin(pc, temp, src[0][0]);
1173 emit_flop(pc, 4, temp, temp);
1174 for (c = 0; c < 4; c++) {
1175 if (!(mask & (1 << c)))
1176 continue;
1177 emit_mov(pc, dst[c], temp);
1178 }
1179 break;
1180 case TGSI_OPCODE_SLT:
1181 for (c = 0; c < 4; c++) {
1182 if (!(mask & (1 << c)))
1183 continue;
1184 emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
1185 }
1186 break;
1187 case TGSI_OPCODE_SUB:
1188 for (c = 0; c < 4; c++) {
1189 if (!(mask & (1 << c)))
1190 continue;
1191 emit_sub(pc, dst[c], src[0][c], src[1][c]);
1192 }
1193 break;
1194 case TGSI_OPCODE_TEX:
1195 case TGSI_OPCODE_TXP:
1196 {
1197 struct nv50_reg *t[4];
1198 struct nv50_program_exec *e;
1199
1200 alloc_temp4(pc, t, 0);
1201 emit_mov(pc, t[0], src[0][0]);
1202 emit_mov(pc, t[1], src[0][1]);
1203
1204 e = exec(pc);
1205 e->inst[0] = 0xf6400000;
1206 e->inst[0] |= (unit << 9);
1207 set_long(pc, e);
1208 e->inst[1] |= 0x0000c004;
1209 set_dst(pc, t[0], e);
1210 emit(pc, e);
1211
1212 if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]);
1213 if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]);
1214 if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]);
1215 if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]);
1216
1217 free_temp4(pc, t);
1218 }
1219 break;
1220 case TGSI_OPCODE_XPD:
1221 temp = alloc_temp(pc, NULL);
1222 if (mask & (1 << 0)) {
1223 emit_mul(pc, temp, src[0][2], src[1][1]);
1224 emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
1225 }
1226 if (mask & (1 << 1)) {
1227 emit_mul(pc, temp, src[0][0], src[1][2]);
1228 emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
1229 }
1230 if (mask & (1 << 2)) {
1231 emit_mul(pc, temp, src[0][1], src[1][0]);
1232 emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
1233 }
1234 free_temp(pc, temp);
1235 break;
1236 case TGSI_OPCODE_END:
1237 break;
1238 default:
1239 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
1240 return FALSE;
1241 }
1242
1243 if (sat) {
1244 for (c = 0; c < 4; c++) {
1245 struct nv50_program_exec *e;
1246
1247 if (!(mask & (1 << c)))
1248 continue;
1249 e = exec(pc);
1250
1251 e->inst[0] = 0xa0000000; /* cvt */
1252 set_long(pc, e);
1253 e->inst[1] |= (6 << 29); /* cvt */
1254 e->inst[1] |= 0x04000000; /* 32 bit */
1255 e->inst[1] |= (1 << 14); /* src .f32 */
1256 e->inst[1] |= ((1 << 5) << 14); /* .sat */
1257 set_dst(pc, rdst[c], e);
1258 set_src_0(pc, dst[c], e);
1259 emit(pc, e);
1260 }
1261 }
1262
1263 kill_temp_temp(pc);
1264 return TRUE;
1265 }
1266
1267 static boolean
1268 nv50_program_tx_prep(struct nv50_pc *pc)
1269 {
1270 struct tgsi_parse_context p;
1271 boolean ret = FALSE;
1272 unsigned i, c;
1273
1274 tgsi_parse_init(&p, pc->p->pipe.tokens);
1275 while (!tgsi_parse_end_of_tokens(&p)) {
1276 const union tgsi_full_token *tok = &p.FullToken;
1277
1278 tgsi_parse_token(&p);
1279 switch (tok->Token.Type) {
1280 case TGSI_TOKEN_TYPE_IMMEDIATE:
1281 {
1282 const struct tgsi_full_immediate *imm =
1283 &p.FullToken.FullImmediate;
1284
1285 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
1286 imm->u.ImmediateFloat32[1].Float,
1287 imm->u.ImmediateFloat32[2].Float,
1288 imm->u.ImmediateFloat32[3].Float);
1289 }
1290 break;
1291 case TGSI_TOKEN_TYPE_DECLARATION:
1292 {
1293 const struct tgsi_full_declaration *d;
1294 unsigned last;
1295
1296 d = &p.FullToken.FullDeclaration;
1297 last = d->DeclarationRange.Last;
1298
1299 switch (d->Declaration.File) {
1300 case TGSI_FILE_TEMPORARY:
1301 if (pc->temp_nr < (last + 1))
1302 pc->temp_nr = last + 1;
1303 break;
1304 case TGSI_FILE_OUTPUT:
1305 if (pc->result_nr < (last + 1))
1306 pc->result_nr = last + 1;
1307 break;
1308 case TGSI_FILE_INPUT:
1309 if (pc->attr_nr < (last + 1))
1310 pc->attr_nr = last + 1;
1311 break;
1312 case TGSI_FILE_CONSTANT:
1313 if (pc->param_nr < (last + 1))
1314 pc->param_nr = last + 1;
1315 break;
1316 case TGSI_FILE_SAMPLER:
1317 break;
1318 default:
1319 NOUVEAU_ERR("bad decl file %d\n",
1320 d->Declaration.File);
1321 goto out_err;
1322 }
1323 }
1324 break;
1325 case TGSI_TOKEN_TYPE_INSTRUCTION:
1326 break;
1327 default:
1328 break;
1329 }
1330 }
1331
1332 if (pc->temp_nr) {
1333 pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg));
1334 if (!pc->temp)
1335 goto out_err;
1336
1337 for (i = 0; i < pc->temp_nr; i++) {
1338 for (c = 0; c < 4; c++) {
1339 pc->temp[i*4+c].type = P_TEMP;
1340 pc->temp[i*4+c].hw = -1;
1341 pc->temp[i*4+c].index = i;
1342 }
1343 }
1344 }
1345
1346 if (pc->attr_nr) {
1347 struct nv50_reg *iv = NULL;
1348 int aid = 0;
1349
1350 pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
1351 if (!pc->attr)
1352 goto out_err;
1353
1354 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1355 iv = alloc_temp(pc, NULL);
1356 emit_interp(pc, iv, iv, NULL);
1357 emit_flop(pc, 0, iv, iv);
1358 aid++;
1359 }
1360
1361 for (i = 0; i < pc->attr_nr; i++) {
1362 struct nv50_reg *a = &pc->attr[i*4];
1363
1364 for (c = 0; c < 4; c++) {
1365 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1366 struct nv50_reg *at =
1367 alloc_temp(pc, NULL);
1368 pc->attr[i*4+c].type = at->type;
1369 pc->attr[i*4+c].hw = at->hw;
1370 pc->attr[i*4+c].index = at->index;
1371 } else {
1372 pc->p->cfg.vp.attr[aid/32] |=
1373 (1 << (aid % 32));
1374 pc->attr[i*4+c].type = P_ATTR;
1375 pc->attr[i*4+c].hw = aid++;
1376 pc->attr[i*4+c].index = i;
1377 }
1378 }
1379
1380 if (pc->p->type != PIPE_SHADER_FRAGMENT)
1381 continue;
1382
1383 emit_interp(pc, &a[0], &a[0], iv);
1384 emit_interp(pc, &a[1], &a[1], iv);
1385 emit_interp(pc, &a[2], &a[2], iv);
1386 emit_interp(pc, &a[3], &a[3], iv);
1387 }
1388
1389 if (iv)
1390 free_temp(pc, iv);
1391 }
1392
1393 if (pc->result_nr) {
1394 int rid = 0;
1395
1396 pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg));
1397 if (!pc->result)
1398 goto out_err;
1399
1400 for (i = 0; i < pc->result_nr; i++) {
1401 for (c = 0; c < 4; c++) {
1402 if (pc->p->type == PIPE_SHADER_FRAGMENT) {
1403 pc->result[i*4+c].type = P_TEMP;
1404 pc->result[i*4+c].hw = -1;
1405 } else {
1406 pc->result[i*4+c].type = P_RESULT;
1407 pc->result[i*4+c].hw = rid++;
1408 }
1409 pc->result[i*4+c].index = i;
1410 }
1411 }
1412 }
1413
1414 if (pc->param_nr) {
1415 int rid = 0;
1416
1417 pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg));
1418 if (!pc->param)
1419 goto out_err;
1420
1421 for (i = 0; i < pc->param_nr; i++) {
1422 for (c = 0; c < 4; c++) {
1423 pc->param[i*4+c].type = P_CONST;
1424 pc->param[i*4+c].hw = rid++;
1425 pc->param[i*4+c].index = i;
1426 }
1427 }
1428 }
1429
1430 if (pc->immd_nr) {
1431 int rid = pc->param_nr * 4;
1432
1433 pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
1434 if (!pc->immd)
1435 goto out_err;
1436
1437 for (i = 0; i < pc->immd_nr; i++) {
1438 for (c = 0; c < 4; c++) {
1439 pc->immd[i*4+c].type = P_IMMD;
1440 pc->immd[i*4+c].hw = rid++;
1441 pc->immd[i*4+c].index = i;
1442 }
1443 }
1444 }
1445
1446 ret = TRUE;
1447 out_err:
1448 tgsi_parse_free(&p);
1449 return ret;
1450 }
1451
1452 static boolean
1453 nv50_program_tx(struct nv50_program *p)
1454 {
1455 struct tgsi_parse_context parse;
1456 struct nv50_pc *pc;
1457 boolean ret;
1458
1459 pc = CALLOC_STRUCT(nv50_pc);
1460 if (!pc)
1461 return FALSE;
1462 pc->p = p;
1463 pc->p->cfg.high_temp = 4;
1464
1465 ret = nv50_program_tx_prep(pc);
1466 if (ret == FALSE)
1467 goto out_cleanup;
1468
1469 tgsi_parse_init(&parse, pc->p->pipe.tokens);
1470 while (!tgsi_parse_end_of_tokens(&parse)) {
1471 const union tgsi_full_token *tok = &parse.FullToken;
1472
1473 tgsi_parse_token(&parse);
1474
1475 switch (tok->Token.Type) {
1476 case TGSI_TOKEN_TYPE_INSTRUCTION:
1477 ret = nv50_program_tx_insn(pc, tok);
1478 if (ret == FALSE)
1479 goto out_err;
1480 break;
1481 default:
1482 break;
1483 }
1484 }
1485
1486 if (p->type == PIPE_SHADER_FRAGMENT) {
1487 struct nv50_reg out;
1488
1489 out.type = P_TEMP;
1490 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
1491 emit_mov(pc, &out, &pc->result[out.hw]);
1492 }
1493
1494 assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
1495 pc->p->exec_tail->inst[1] |= 0x00000001;
1496
1497 p->param_nr = pc->param_nr * 4;
1498 p->immd_nr = pc->immd_nr * 4;
1499 p->immd = pc->immd_buf;
1500
1501 out_err:
1502 tgsi_parse_free(&parse);
1503
1504 out_cleanup:
1505 return ret;
1506 }
1507
1508 static void
1509 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
1510 {
1511 if (nv50_program_tx(p) == FALSE)
1512 assert(0);
1513 p->translated = TRUE;
1514 }
1515
1516 static void
1517 nv50_program_upload_data(struct nv50_context *nv50, float *map,
1518 unsigned start, unsigned count)
1519 {
1520 while (count) {
1521 unsigned nr = count > 2047 ? 2047 : count;
1522
1523 BEGIN_RING(tesla, 0x00000f00, 1);
1524 OUT_RING ((NV50_CB_PMISC << 0) | (start << 8));
1525 BEGIN_RING(tesla, 0x40000f04, nr);
1526 OUT_RINGp (map, nr);
1527
1528 map += nr;
1529 start += nr;
1530 count -= nr;
1531 }
1532 }
1533
1534 static void
1535 nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
1536 {
1537 struct nouveau_winsys *nvws = nv50->screen->nvws;
1538 struct pipe_winsys *ws = nv50->pipe.winsys;
1539 unsigned nr = p->param_nr + p->immd_nr;
1540
1541 if (!p->data && nr) {
1542 struct nouveau_resource *heap = nv50->screen->vp_data_heap;
1543
1544 if (nvws->res_alloc(heap, nr, p, &p->data)) {
1545 while (heap->next && heap->size < nr) {
1546 struct nv50_program *evict = heap->next->priv;
1547 nvws->res_free(&evict->data);
1548 }
1549
1550 if (nvws->res_alloc(heap, nr, p, &p->data))
1551 assert(0);
1552 }
1553 }
1554
1555 if (p->param_nr) {
1556 float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
1557 PIPE_BUFFER_USAGE_CPU_READ);
1558 nv50_program_upload_data(nv50, map, p->data->start,
1559 p->param_nr);
1560 ws->buffer_unmap(ws, nv50->constbuf[p->type]);
1561 }
1562
1563 if (p->immd_nr) {
1564 nv50_program_upload_data(nv50, p->immd,
1565 p->data->start + p->param_nr,
1566 p->immd_nr);
1567 }
1568 }
1569
1570 static void
1571 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
1572 {
1573 struct pipe_winsys *ws = nv50->pipe.winsys;
1574 struct nv50_program_exec *e;
1575 struct nouveau_stateobj *so;
1576 const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
1577 unsigned start, count, *up, *ptr;
1578 boolean upload = FALSE;
1579
1580 if (!p->buffer) {
1581 p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4);
1582 upload = TRUE;
1583 }
1584
1585 if (p->data && p->data->start != p->data_start) {
1586 for (e = p->exec_head; e; e = e->next) {
1587 unsigned ei, ci;
1588
1589 if (e->param.index < 0)
1590 continue;
1591 ei = e->param.shift >> 5;
1592 ci = e->param.index + p->data->start;
1593
1594 e->inst[ei] &= ~e->param.mask;
1595 e->inst[ei] |= (ci << e->param.shift);
1596 }
1597
1598 p->data_start = p->data->start;
1599 upload = TRUE;
1600 }
1601
1602 if (!upload)
1603 return;
1604
1605 #ifdef NV50_PROGRAM_DUMP
1606 NOUVEAU_ERR("-------\n");
1607 up = ptr = MALLOC(p->exec_size * 4);
1608 for (e = p->exec_head; e; e = e->next) {
1609 NOUVEAU_ERR("0x%08x\n", e->inst[0]);
1610 if (is_long(e))
1611 NOUVEAU_ERR("0x%08x\n", e->inst[1]);
1612 }
1613
1614 #endif
1615
1616 up = ptr = MALLOC(p->exec_size * 4);
1617 for (e = p->exec_head; e; e = e->next) {
1618 *(ptr++) = e->inst[0];
1619 if (is_long(e))
1620 *(ptr++) = e->inst[1];
1621 }
1622
1623 so = so_new(4,2);
1624 so_method(so, nv50->screen->tesla, 0x1280, 3);
1625 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
1626 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0);
1627 so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
1628
1629 start = 0; count = p->exec_size;
1630 while (count) {
1631 struct nouveau_winsys *nvws = nv50->screen->nvws;
1632 unsigned nr;
1633
1634 so_emit(nvws, so);
1635
1636 nr = MIN2(count, 2047);
1637 nr = MIN2(nvws->channel->pushbuf->remaining, nr);
1638 if (nvws->channel->pushbuf->remaining < (nr + 3)) {
1639 FIRE_RING(NULL);
1640 continue;
1641 }
1642
1643 BEGIN_RING(tesla, 0x0f00, 1);
1644 OUT_RING ((start << 8) | NV50_CB_PUPLOAD);
1645 BEGIN_RING(tesla, 0x40000f04, nr);
1646 OUT_RINGp (up + start, nr);
1647
1648 start += nr;
1649 count -= nr;
1650 }
1651
1652 FREE(up);
1653 so_ref(NULL, &so);
1654 }
1655
1656 void
1657 nv50_vertprog_validate(struct nv50_context *nv50)
1658 {
1659 struct nouveau_grobj *tesla = nv50->screen->tesla;
1660 struct nv50_program *p = nv50->vertprog;
1661 struct nouveau_stateobj *so;
1662
1663 if (!p->translated) {
1664 nv50_program_validate(nv50, p);
1665 if (!p->translated)
1666 assert(0);
1667 }
1668
1669 nv50_program_validate_data(nv50, p);
1670 nv50_program_validate_code(nv50, p);
1671
1672 so = so_new(13, 2);
1673 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
1674 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1675 NOUVEAU_BO_HIGH, 0, 0);
1676 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1677 NOUVEAU_BO_LOW, 0, 0);
1678 so_method(so, tesla, 0x1650, 2);
1679 so_data (so, p->cfg.vp.attr[0]);
1680 so_data (so, p->cfg.vp.attr[1]);
1681 so_method(so, tesla, 0x16b8, 1);
1682 so_data (so, p->cfg.high_result);
1683 so_method(so, tesla, 0x16ac, 2);
1684 so_data (so, p->cfg.high_result); //8);
1685 so_data (so, p->cfg.high_temp);
1686 so_method(so, tesla, 0x140c, 1);
1687 so_data (so, 0); /* program start offset */
1688 so_ref(so, &nv50->state.vertprog);
1689 }
1690
1691 void
1692 nv50_fragprog_validate(struct nv50_context *nv50)
1693 {
1694 struct nouveau_grobj *tesla = nv50->screen->tesla;
1695 struct nv50_program *p = nv50->fragprog;
1696 struct nouveau_stateobj *so;
1697
1698 if (!p->translated) {
1699 nv50_program_validate(nv50, p);
1700 if (!p->translated)
1701 assert(0);
1702 }
1703
1704 nv50_program_validate_data(nv50, p);
1705 nv50_program_validate_code(nv50, p);
1706
1707 so = so_new(64, 2);
1708 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
1709 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1710 NOUVEAU_BO_HIGH, 0, 0);
1711 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
1712 NOUVEAU_BO_LOW, 0, 0);
1713 so_method(so, tesla, 0x1904, 4);
1714 so_data (so, 0x01040404); /* p: 0x01000404 */
1715 so_data (so, 0x00000004);
1716 so_data (so, 0x00000000);
1717 so_data (so, 0x00000000);
1718 so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */
1719 so_data (so, 0x03020100);
1720 so_data (so, 0x07060504);
1721 so_data (so, 0x0b0a0908);
1722 so_method(so, tesla, 0x1988, 2);
1723 so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */
1724 so_data (so, p->cfg.high_temp);
1725 so_method(so, tesla, 0x1414, 1);
1726 so_data (so, 0); /* program start offset */
1727 so_ref(so, &nv50->state.fragprog);
1728 }
1729
1730 void
1731 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
1732 {
1733 struct pipe_screen *pscreen = nv50->pipe.screen;
1734
1735 while (p->exec_head) {
1736 struct nv50_program_exec *e = p->exec_head;
1737
1738 p->exec_head = e->next;
1739 FREE(e);
1740 }
1741 p->exec_tail = NULL;
1742 p->exec_size = 0;
1743
1744 if (p->buffer)
1745 pipe_buffer_reference(pscreen, &p->buffer, NULL);
1746
1747 nv50->screen->nvws->res_free(&p->data);
1748
1749 p->translated = 0;
1750 }
1751