1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
10 #include "nv50_context.h"
11 #include "nv50_state.h"
13 #define NV50_SU_MAX_TEMP 64
15 /* ARL - gallium craps itself on progs/vp/arl.txt
17 * MSB - Like MAD, but MUL+SUB
18 * - Fuck it off, introduce a way to negate args for ops that
21 * Look into inlining IMMD for ops other than MOV (make it general?)
22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
25 * Verify half-insns work where expected - and force disable them where they
26 * don't work - MUL has it forcibly disabled atm as it fixes POW..
28 * FUCK! watch dst==src vectors, can overwrite components that are needed.
29 * ie. SUB R0, R0.yzxw, R0
32 * "delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0)
35 * Things to check with renouveau:
36 * FP attr/result assignment - how?
38 * - 0x16bc maps vp output onto fp hpos
39 * - 0x16c0 maps vp output onto fp col0
43 * 0x16bc->0x16e8 --> some binding between vp/fp regs
44 * 0x16b8 --> VP output count
46 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
47 * "MOV rcol.x, fcol.y" = 0x00000004
48 * 0x19a8 --> as above but 0x00000100 and 0x00000000
49 * - 0x00100000 used when KIL used
50 * 0x196c --> as above but 0x00000011 and 0x00000000
52 * 0x1988 --> 0xXXNNNNNN
53 * - XX == FP high something
70 struct nv50_program
*p
;
73 struct nv50_reg
*r_temp
[NV50_SU_MAX_TEMP
];
76 struct nv50_reg
*temp
;
78 struct nv50_reg
*attr
;
80 struct nv50_reg
*result
;
82 struct nv50_reg
*param
;
84 struct nv50_reg
*immd
;
88 struct nv50_reg
*temp_temp
[16];
89 unsigned temp_temp_nr
;
93 alloc_reg(struct nv50_pc
*pc
, struct nv50_reg
*reg
)
97 if (reg
->type
!= P_TEMP
)
101 /*XXX: do this here too to catch FP temp-as-attr usage..
102 * not clean, but works */
103 if (pc
->p
->cfg
.high_temp
< (reg
->hw
+ 1))
104 pc
->p
->cfg
.high_temp
= reg
->hw
+ 1;
108 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
109 if (!(pc
->r_temp
[i
])) {
112 if (pc
->p
->cfg
.high_temp
< (i
+ 1))
113 pc
->p
->cfg
.high_temp
= i
+ 1;
121 static struct nv50_reg
*
122 alloc_temp(struct nv50_pc
*pc
, struct nv50_reg
*dst
)
127 if (dst
&& dst
->type
== P_TEMP
&& dst
->hw
== -1)
130 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
131 if (!pc
->r_temp
[i
]) {
132 r
= CALLOC_STRUCT(nv50_reg
);
146 free_temp(struct nv50_pc
*pc
, struct nv50_reg
*r
)
148 if (r
->index
== -1) {
149 FREE(pc
->r_temp
[r
->hw
]);
150 pc
->r_temp
[r
->hw
] = NULL
;
154 static struct nv50_reg
*
155 temp_temp(struct nv50_pc
*pc
)
157 if (pc
->temp_temp_nr
>= 16)
160 pc
->temp_temp
[pc
->temp_temp_nr
] = alloc_temp(pc
, NULL
);
161 return pc
->temp_temp
[pc
->temp_temp_nr
++];
165 kill_temp_temp(struct nv50_pc
*pc
)
169 for (i
= 0; i
< pc
->temp_temp_nr
; i
++)
170 free_temp(pc
, pc
->temp_temp
[i
]);
171 pc
->temp_temp_nr
= 0;
175 ctor_immd(struct nv50_pc
*pc
, float x
, float y
, float z
, float w
)
177 pc
->immd_buf
= realloc(pc
->immd_buf
, (pc
->immd_nr
+ 1) * 4 *
179 pc
->immd_buf
[(pc
->immd_nr
* 4) + 0] = x
;
180 pc
->immd_buf
[(pc
->immd_nr
* 4) + 1] = y
;
181 pc
->immd_buf
[(pc
->immd_nr
* 4) + 2] = z
;
182 pc
->immd_buf
[(pc
->immd_nr
* 4) + 3] = w
;
184 return pc
->immd_nr
++;
187 static struct nv50_reg
*
188 alloc_immd(struct nv50_pc
*pc
, float f
)
190 struct nv50_reg
*r
= CALLOC_STRUCT(nv50_reg
);
193 hw
= ctor_immd(pc
, f
, 0, 0, 0) * 4;
201 emit(struct nv50_pc
*pc
, unsigned *inst
)
203 struct nv50_program
*p
= pc
->p
;
207 p
->insns
= realloc(p
->insns
, sizeof(unsigned) * p
->insns_nr
);
208 memcpy(p
->insns
+ (p
->insns_nr
- 2), inst
, sizeof(unsigned)*2);
211 p
->insns
= realloc(p
->insns
, sizeof(unsigned) * p
->insns_nr
);
212 memcpy(p
->insns
+ (p
->insns_nr
- 1), inst
, sizeof(unsigned));
216 static INLINE
void set_long(struct nv50_pc
*, unsigned *);
219 is_long(unsigned *inst
)
227 is_immd(unsigned *inst
)
229 if (is_long(inst
) && (inst
[1] & 3) == 3)
235 set_pred(struct nv50_pc
*pc
, unsigned pred
, unsigned idx
, unsigned *inst
)
238 inst
[1] &= ~((0x1f << 7) | (0x3 << 12));
239 inst
[1] |= (pred
<< 7) | (idx
<< 12);
243 set_pred_wr(struct nv50_pc
*pc
, unsigned on
, unsigned idx
, unsigned *inst
)
246 inst
[1] &= ~((0x3 << 4) | (1 << 6));
247 inst
[1] |= (idx
<< 4) | (on
<< 6);
251 set_long(struct nv50_pc
*pc
, unsigned *inst
)
257 set_pred(pc
, 0xf, 0, inst
);
258 set_pred_wr(pc
, 0, 0, inst
);
262 set_dst(struct nv50_pc
*pc
, struct nv50_reg
*dst
, unsigned *inst
)
264 if (dst
->type
== P_RESULT
) {
266 inst
[1] |= 0x00000008;
270 inst
[0] |= (dst
->hw
<< 2);
274 set_immd(struct nv50_pc
*pc
, struct nv50_reg
*imm
, unsigned *inst
)
276 unsigned val
= fui(pc
->immd_buf
[imm
->hw
]); /* XXX */
279 /*XXX: can't be predicated - bits overlap.. catch cases where both
280 * are required and avoid them. */
281 set_pred(pc
, 0, 0, inst
);
282 set_pred_wr(pc
, 0, 0, inst
);
284 inst
[1] |= 0x00000002 | 0x00000001;
285 inst
[0] |= (val
& 0x3f) << 16;
286 inst
[1] |= (val
>> 6) << 2;
290 emit_interp(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
291 struct nv50_reg
*src
, struct nv50_reg
*iv
, boolean noperspective
)
293 unsigned inst
[2] = { 0, 0 };
295 inst
[0] |= 0x80000000;
296 set_dst(pc
, dst
, inst
);
298 inst
[0] |= (iv
->hw
<< 9);
300 inst
[0] |= (src
->hw
<< 16);
302 inst
[0] |= (1 << 25);
308 set_cseg(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
311 if (src
->type
== P_IMMD
) {
312 inst
[1] |= (NV50_CB_PMISC
<< 22);
314 if (pc
->p
->type
== PIPE_SHADER_VERTEX
)
315 inst
[1] |= (NV50_CB_PVP
<< 22);
317 inst
[1] |= (NV50_CB_PFP
<< 22);
322 emit_mov(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
324 unsigned inst
[2] = { 0, 0 };
326 inst
[0] |= 0x10000000;
328 set_dst(pc
, dst
, inst
);
330 if (dst
->type
!= P_RESULT
&& src
->type
== P_IMMD
) {
331 set_immd(pc
, src
, inst
);
332 /*XXX: 32-bit, but steals part of "half" reg space - need to
333 * catch and handle this case if/when we do half-regs
335 inst
[0] |= 0x00008000;
337 if (src
->type
== P_IMMD
|| src
->type
== P_CONST
) {
339 set_cseg(pc
, src
, inst
);
340 inst
[0] |= (src
->hw
<< 9);
341 inst
[1] |= 0x20000000; /* src0 const? */
343 if (src
->type
== P_ATTR
) {
345 inst
[1] |= 0x00200000;
349 inst
[0] |= (src
->hw
<< 9);
352 /* We really should support "half" instructions here at some point,
353 * but I don't feel confident enough about them yet.
356 if (is_long(inst
) && !is_immd(inst
)) {
357 inst
[1] |= 0x04000000; /* 32-bit */
358 inst
[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
365 check_swap_src_0_1(struct nv50_pc
*pc
,
366 struct nv50_reg
**s0
, struct nv50_reg
**s1
)
368 struct nv50_reg
*src0
= *s0
, *src1
= *s1
;
370 if (src0
->type
== P_CONST
) {
371 if (src1
->type
!= P_CONST
) {
377 if (src1
->type
== P_ATTR
) {
378 if (src0
->type
!= P_ATTR
) {
389 set_src_0(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
391 if (src
->type
== P_ATTR
) {
393 inst
[1] |= 0x00200000;
395 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
396 struct nv50_reg
*temp
= temp_temp(pc
);
398 emit_mov(pc
, temp
, src
);
403 inst
[0] |= (src
->hw
<< 9);
407 set_src_1(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
409 if (src
->type
== P_ATTR
) {
410 struct nv50_reg
*temp
= temp_temp(pc
);
412 emit_mov(pc
, temp
, src
);
415 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
416 assert(!(inst
[0] & 0x00800000));
417 if (inst
[0] & 0x01000000) {
418 struct nv50_reg
*temp
= temp_temp(pc
);
420 emit_mov(pc
, temp
, src
);
423 set_cseg(pc
, src
, inst
);
424 inst
[0] |= 0x00800000;
429 inst
[0] |= (src
->hw
<< 16);
433 set_src_2(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
437 if (src
->type
== P_ATTR
) {
438 struct nv50_reg
*temp
= temp_temp(pc
);
440 emit_mov(pc
, temp
, src
);
443 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
444 assert(!(inst
[0] & 0x01000000));
445 if (inst
[0] & 0x00800000) {
446 struct nv50_reg
*temp
= temp_temp(pc
);
448 emit_mov(pc
, temp
, src
);
451 set_cseg(pc
, src
, inst
);
452 inst
[0] |= 0x01000000;
457 inst
[1] |= (src
->hw
<< 14);
461 emit_mul(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
462 struct nv50_reg
*src1
)
464 unsigned inst
[2] = { 0, 0 };
466 inst
[0] |= 0xc0000000;
469 check_swap_src_0_1(pc
, &src0
, &src1
);
470 set_dst(pc
, dst
, inst
);
471 set_src_0(pc
, src0
, inst
);
472 set_src_1(pc
, src1
, inst
);
478 emit_add(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
479 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
481 unsigned inst
[2] = { 0, 0 };
483 inst
[0] |= 0xb0000000;
485 check_swap_src_0_1(pc
, &src0
, &src1
);
486 set_dst(pc
, dst
, inst
);
487 set_src_0(pc
, src0
, inst
);
489 set_src_2(pc
, src1
, inst
);
491 set_src_1(pc
, src1
, inst
);
497 emit_minmax(struct nv50_pc
*pc
, unsigned sub
, struct nv50_reg
*dst
,
498 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
500 unsigned inst
[2] = { 0, 0 };
503 inst
[0] |= 0xb0000000;
504 inst
[1] |= (sub
<< 29);
506 check_swap_src_0_1(pc
, &src0
, &src1
);
507 set_dst(pc
, dst
, inst
);
508 set_src_0(pc
, src0
, inst
);
509 set_src_1(pc
, src1
, inst
);
515 emit_sub(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
516 struct nv50_reg
*src1
)
518 unsigned inst
[2] = { 0, 0 };
520 inst
[0] |= 0xb0000000;
523 if (check_swap_src_0_1(pc
, &src0
, &src1
))
524 inst
[1] |= 0x04000000;
526 inst
[1] |= 0x08000000;
528 set_dst(pc
, dst
, inst
);
529 set_src_0(pc
, src0
, inst
);
530 set_src_2(pc
, src1
, inst
);
536 emit_mad(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
537 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
539 unsigned inst
[2] = { 0, 0 };
541 inst
[0] |= 0xe0000000;
543 check_swap_src_0_1(pc
, &src0
, &src1
);
544 set_dst(pc
, dst
, inst
);
545 set_src_0(pc
, src0
, inst
);
546 set_src_1(pc
, src1
, inst
);
547 set_src_2(pc
, src2
, inst
);
553 emit_msb(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
554 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
556 unsigned inst
[2] = { 0, 0 };
558 inst
[0] |= 0xe0000000;
560 inst
[1] |= 0x08000000; /* src0 * src1 - src2 */
562 check_swap_src_0_1(pc
, &src0
, &src1
);
563 set_dst(pc
, dst
, inst
);
564 set_src_0(pc
, src0
, inst
);
565 set_src_1(pc
, src1
, inst
);
566 set_src_2(pc
, src2
, inst
);
572 emit_flop(struct nv50_pc
*pc
, unsigned sub
,
573 struct nv50_reg
*dst
, struct nv50_reg
*src
)
575 unsigned inst
[2] = { 0, 0 };
577 inst
[0] |= 0x90000000;
580 inst
[1] |= (sub
<< 29);
583 set_dst(pc
, dst
, inst
);
584 set_src_0(pc
, src
, inst
);
590 emit_preex2(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
592 unsigned inst
[2] = { 0, 0 };
594 inst
[0] |= 0xb0000000;
596 set_dst(pc
, dst
, inst
);
597 set_src_0(pc
, src
, inst
);
599 inst
[1] |= (6 << 29) | 0x00004000;
605 emit_precossin(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
607 unsigned inst
[2] = { 0, 0 };
609 inst
[0] |= 0xb0000000;
611 set_dst(pc
, dst
, inst
);
612 set_src_0(pc
, src
, inst
);
614 inst
[1] |= (6 << 29);
620 emit_set(struct nv50_pc
*pc
, unsigned c_op
, struct nv50_reg
*dst
,
621 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
623 unsigned inst
[2] = { 0, 0 };
624 unsigned inv_cop
[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
625 struct nv50_reg
*rdst
;
628 if (check_swap_src_0_1(pc
, &src0
, &src1
))
629 c_op
= inv_cop
[c_op
];
632 if (dst
->type
!= P_TEMP
)
633 dst
= alloc_temp(pc
, NULL
);
637 inst
[0] |= 0xb0000000;
638 inst
[1] |= (3 << 29);
639 inst
[1] |= (c_op
<< 14);
640 /*XXX: breaks things, .u32 by default?
641 * decuda will disasm as .u16 and use .lo/.hi regs, but this
642 * doesn't seem to match what the hw actually does.
643 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
645 set_dst(pc
, dst
, inst
);
646 set_src_0(pc
, src0
, inst
);
647 set_src_1(pc
, src1
, inst
);
651 inst
[0] = 0xa0000001;
652 inst
[1] = 0x64014780;
653 set_dst(pc
, rdst
, inst
);
654 set_src_0(pc
, dst
, inst
);
662 emit_flr(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
664 unsigned inst
[2] = { 0, 0 };
666 inst
[0] = 0xa0000000; /* cvt */
668 inst
[1] |= (6 << 29); /* cvt */
669 inst
[1] |= 0x08000000; /* integer mode */
670 inst
[1] |= 0x04000000; /* 32 bit */
671 inst
[1] |= ((0x1 << 3)) << 14; /* .rn */
672 inst
[1] |= (1 << 14); /* src .f32 */
673 set_dst(pc
, dst
, inst
);
674 set_src_0(pc
, src
, inst
);
680 emit_pow(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
681 struct nv50_reg
*v
, struct nv50_reg
*e
)
683 struct nv50_reg
*temp
= alloc_temp(pc
, NULL
);
685 emit_flop(pc
, 3, temp
, v
);
686 emit_mul(pc
, temp
, temp
, e
);
687 emit_preex2(pc
, temp
, temp
);
688 emit_flop(pc
, 6, dst
, temp
);
694 emit_abs(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
696 unsigned inst
[2] = { 0, 0 };
698 inst
[0] = 0xa0000000; /* cvt */
700 inst
[1] |= (6 << 29); /* cvt */
701 inst
[1] |= 0x04000000; /* 32 bit */
702 inst
[1] |= (1 << 14); /* src .f32 */
703 inst
[1] |= ((1 << 6) << 14); /* .abs */
704 set_dst(pc
, dst
, inst
);
705 set_src_0(pc
, src
, inst
);
711 emit_lit(struct nv50_pc
*pc
, struct nv50_reg
**dst
, unsigned mask
,
712 struct nv50_reg
**src
)
714 struct nv50_reg
*one
= alloc_immd(pc
, 1.0);
715 struct nv50_reg
*zero
= alloc_immd(pc
, 0.0);
716 struct nv50_reg
*neg128
= alloc_immd(pc
, -127.999999);
717 struct nv50_reg
*pos128
= alloc_immd(pc
, 127.999999);
718 struct nv50_reg
*tmp
[4];
721 emit_mov(pc
, dst
[0], one
);
724 emit_mov(pc
, dst
[3], one
);
726 if (mask
& (3 << 1)) {
730 tmp
[0] = temp_temp(pc
);
731 emit_minmax(pc
, 4, tmp
[0], src
[0], zero
);
734 if (mask
& (1 << 2)) {
735 set_pred_wr(pc
, 1, 0, &pc
->p
->insns
[pc
->p
->insns_nr
- 2]);
737 tmp
[1] = temp_temp(pc
);
738 emit_minmax(pc
, 4, tmp
[1], src
[1], zero
);
740 tmp
[3] = temp_temp(pc
);
741 emit_minmax(pc
, 4, tmp
[3], src
[3], neg128
);
742 emit_minmax(pc
, 5, tmp
[3], tmp
[3], pos128
);
744 emit_pow(pc
, dst
[2], tmp
[1], tmp
[3]);
745 emit_mov(pc
, dst
[2], zero
);
746 set_pred(pc
, 3, 0, &pc
->p
->insns
[pc
->p
->insns_nr
- 2]);
751 emit_neg(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
753 unsigned inst
[2] = { 0, 0 };
756 inst
[0] |= 0xa0000000; /* delta */
757 inst
[1] |= (7 << 29); /* delta */
758 inst
[1] |= 0x04000000; /* negate arg0? probably not */
759 inst
[1] |= (1 << 14); /* src .f32 */
760 set_dst(pc
, dst
, inst
);
761 set_src_0(pc
, src
, inst
);
766 static struct nv50_reg
*
767 tgsi_dst(struct nv50_pc
*pc
, int c
, const struct tgsi_full_dst_register
*dst
)
769 switch (dst
->DstRegister
.File
) {
770 case TGSI_FILE_TEMPORARY
:
771 return &pc
->temp
[dst
->DstRegister
.Index
* 4 + c
];
772 case TGSI_FILE_OUTPUT
:
773 return &pc
->result
[dst
->DstRegister
.Index
* 4 + c
];
783 static struct nv50_reg
*
784 tgsi_src(struct nv50_pc
*pc
, int chan
, const struct tgsi_full_src_register
*src
)
786 struct nv50_reg
*r
= NULL
;
787 struct nv50_reg
*temp
;
790 c
= tgsi_util_get_full_src_register_extswizzle(src
, chan
);
792 case TGSI_EXTSWIZZLE_X
:
793 case TGSI_EXTSWIZZLE_Y
:
794 case TGSI_EXTSWIZZLE_Z
:
795 case TGSI_EXTSWIZZLE_W
:
796 switch (src
->SrcRegister
.File
) {
797 case TGSI_FILE_INPUT
:
798 r
= &pc
->attr
[src
->SrcRegister
.Index
* 4 + c
];
800 case TGSI_FILE_TEMPORARY
:
801 r
= &pc
->temp
[src
->SrcRegister
.Index
* 4 + c
];
803 case TGSI_FILE_CONSTANT
:
804 r
= &pc
->param
[src
->SrcRegister
.Index
* 4 + c
];
806 case TGSI_FILE_IMMEDIATE
:
807 r
= &pc
->immd
[src
->SrcRegister
.Index
* 4 + c
];
814 case TGSI_EXTSWIZZLE_ZERO
:
815 r
= alloc_immd(pc
, 0.0);
817 case TGSI_EXTSWIZZLE_ONE
:
818 r
= alloc_immd(pc
, 1.0);
825 switch (tgsi_util_get_full_src_register_sign_mode(src
, chan
)) {
826 case TGSI_UTIL_SIGN_KEEP
:
828 case TGSI_UTIL_SIGN_CLEAR
:
829 temp
= temp_temp(pc
);
830 emit_abs(pc
, temp
, r
);
833 case TGSI_UTIL_SIGN_TOGGLE
:
834 temp
= temp_temp(pc
);
835 emit_neg(pc
, temp
, r
);
838 case TGSI_UTIL_SIGN_SET
:
839 temp
= temp_temp(pc
);
840 emit_abs(pc
, temp
, r
);
841 emit_neg(pc
, temp
, r
);
853 nv50_program_tx_insn(struct nv50_pc
*pc
, const union tgsi_full_token
*tok
)
855 const struct tgsi_full_instruction
*inst
= &tok
->FullInstruction
;
856 struct nv50_reg
*rdst
[4], *dst
[4], *src
[3][4], *temp
;
860 NOUVEAU_ERR("insn %p\n", tok
);
862 mask
= inst
->FullDstRegisters
[0].DstRegister
.WriteMask
;
863 sat
= inst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
;
865 for (c
= 0; c
< 4; c
++) {
867 dst
[c
] = tgsi_dst(pc
, c
, &inst
->FullDstRegisters
[0]);
872 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
873 for (c
= 0; c
< 4; c
++)
874 src
[i
][c
] = tgsi_src(pc
, c
, &inst
->FullSrcRegisters
[i
]);
878 for (c
= 0; c
< 4; c
++) {
880 dst
[c
] = temp_temp(pc
);
884 switch (inst
->Instruction
.Opcode
) {
885 case TGSI_OPCODE_ABS
:
886 for (c
= 0; c
< 4; c
++) {
887 if (!(mask
& (1 << c
)))
889 emit_abs(pc
, dst
[c
], src
[0][c
]);
892 case TGSI_OPCODE_ADD
:
893 for (c
= 0; c
< 4; c
++) {
894 if (!(mask
& (1 << c
)))
896 emit_add(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
899 case TGSI_OPCODE_COS
:
900 temp
= alloc_temp(pc
, NULL
);
901 emit_precossin(pc
, temp
, src
[0][0]);
902 emit_flop(pc
, 5, temp
, temp
);
903 for (c
= 0; c
< 4; c
++) {
904 if (!(mask
& (1 << c
)))
906 emit_mov(pc
, dst
[c
], temp
);
909 case TGSI_OPCODE_DP3
:
910 temp
= alloc_temp(pc
, NULL
);
911 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
912 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
913 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
914 for (c
= 0; c
< 4; c
++) {
915 if (!(mask
& (1 << c
)))
917 emit_mov(pc
, dst
[c
], temp
);
921 case TGSI_OPCODE_DP4
:
922 temp
= alloc_temp(pc
, NULL
);
923 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
924 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
925 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
926 emit_mad(pc
, temp
, src
[0][3], src
[1][3], temp
);
927 for (c
= 0; c
< 4; c
++) {
928 if (!(mask
& (1 << c
)))
930 emit_mov(pc
, dst
[c
], temp
);
934 case TGSI_OPCODE_DPH
:
935 temp
= alloc_temp(pc
, NULL
);
936 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
937 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
938 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
939 emit_add(pc
, temp
, src
[1][3], temp
);
940 for (c
= 0; c
< 4; c
++) {
941 if (!(mask
& (1 << c
)))
943 emit_mov(pc
, dst
[c
], temp
);
947 case TGSI_OPCODE_DST
:
949 struct nv50_reg
*one
= alloc_immd(pc
, 1.0);
951 emit_mov(pc
, dst
[0], one
);
953 emit_mul(pc
, dst
[1], src
[0][1], src
[1][1]);
955 emit_mov(pc
, dst
[2], src
[0][2]);
957 emit_mov(pc
, dst
[3], src
[1][3]);
961 case TGSI_OPCODE_EX2
:
962 temp
= alloc_temp(pc
, NULL
);
963 emit_preex2(pc
, temp
, src
[0][0]);
964 emit_flop(pc
, 6, temp
, temp
);
965 for (c
= 0; c
< 4; c
++) {
966 if (!(mask
& (1 << c
)))
968 emit_mov(pc
, dst
[c
], temp
);
972 case TGSI_OPCODE_FLR
:
973 for (c
= 0; c
< 4; c
++) {
974 if (!(mask
& (1 << c
)))
976 emit_flr(pc
, dst
[c
], src
[0][c
]);
979 case TGSI_OPCODE_FRC
:
980 temp
= alloc_temp(pc
, NULL
);
981 for (c
= 0; c
< 4; c
++) {
982 if (!(mask
& (1 << c
)))
984 emit_flr(pc
, temp
, src
[0][c
]);
985 emit_sub(pc
, dst
[c
], src
[0][c
], temp
);
989 case TGSI_OPCODE_LIT
:
990 emit_lit(pc
, &dst
[0], mask
, &src
[0][0]);
992 case TGSI_OPCODE_LG2
:
993 temp
= alloc_temp(pc
, NULL
);
994 emit_flop(pc
, 3, temp
, src
[0][0]);
995 for (c
= 0; c
< 4; c
++) {
996 if (!(mask
& (1 << c
)))
998 emit_mov(pc
, dst
[c
], temp
);
1001 case TGSI_OPCODE_LRP
:
1002 for (c
= 0; c
< 4; c
++) {
1003 if (!(mask
& (1 << c
)))
1005 /*XXX: we can do better than this */
1006 temp
= alloc_temp(pc
, NULL
);
1007 emit_neg(pc
, temp
, src
[0][c
]);
1008 emit_mad(pc
, temp
, temp
, src
[2][c
], src
[2][c
]);
1009 emit_mad(pc
, dst
[c
], src
[0][c
], src
[1][c
], temp
);
1010 free_temp(pc
, temp
);
1013 case TGSI_OPCODE_MAD
:
1014 for (c
= 0; c
< 4; c
++) {
1015 if (!(mask
& (1 << c
)))
1017 emit_mad(pc
, dst
[c
], src
[0][c
], src
[1][c
], src
[2][c
]);
1020 case TGSI_OPCODE_MAX
:
1021 for (c
= 0; c
< 4; c
++) {
1022 if (!(mask
& (1 << c
)))
1024 emit_minmax(pc
, 4, dst
[c
], src
[0][c
], src
[1][c
]);
1027 case TGSI_OPCODE_MIN
:
1028 for (c
= 0; c
< 4; c
++) {
1029 if (!(mask
& (1 << c
)))
1031 emit_minmax(pc
, 5, dst
[c
], src
[0][c
], src
[1][c
]);
1034 case TGSI_OPCODE_MOV
:
1035 for (c
= 0; c
< 4; c
++) {
1036 if (!(mask
& (1 << c
)))
1038 emit_mov(pc
, dst
[c
], src
[0][c
]);
1041 case TGSI_OPCODE_MUL
:
1042 for (c
= 0; c
< 4; c
++) {
1043 if (!(mask
& (1 << c
)))
1045 emit_mul(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
1048 case TGSI_OPCODE_POW
:
1049 temp
= alloc_temp(pc
, NULL
);
1050 emit_pow(pc
, temp
, src
[0][0], src
[1][0]);
1051 for (c
= 0; c
< 4; c
++) {
1052 if (!(mask
& (1 << c
)))
1054 emit_mov(pc
, dst
[c
], temp
);
1056 free_temp(pc
, temp
);
1058 case TGSI_OPCODE_RCP
:
1059 for (c
= 0; c
< 4; c
++) {
1060 if (!(mask
& (1 << c
)))
1062 emit_flop(pc
, 0, dst
[c
], src
[0][0]);
1065 case TGSI_OPCODE_RSQ
:
1066 for (c
= 0; c
< 4; c
++) {
1067 if (!(mask
& (1 << c
)))
1069 emit_flop(pc
, 2, dst
[c
], src
[0][0]);
1072 case TGSI_OPCODE_SCS
:
1073 temp
= alloc_temp(pc
, NULL
);
1074 emit_precossin(pc
, temp
, src
[0][0]);
1075 if (mask
& (1 << 0))
1076 emit_flop(pc
, 5, dst
[0], temp
);
1077 if (mask
& (1 << 1))
1078 emit_flop(pc
, 4, dst
[1], temp
);
1080 case TGSI_OPCODE_SGE
:
1081 for (c
= 0; c
< 4; c
++) {
1082 if (!(mask
& (1 << c
)))
1084 emit_set(pc
, 6, dst
[c
], src
[0][c
], src
[1][c
]);
1087 case TGSI_OPCODE_SIN
:
1088 temp
= alloc_temp(pc
, NULL
);
1089 emit_precossin(pc
, temp
, src
[0][0]);
1090 emit_flop(pc
, 4, temp
, temp
);
1091 for (c
= 0; c
< 4; c
++) {
1092 if (!(mask
& (1 << c
)))
1094 emit_mov(pc
, dst
[c
], temp
);
1097 case TGSI_OPCODE_SLT
:
1098 for (c
= 0; c
< 4; c
++) {
1099 if (!(mask
& (1 << c
)))
1101 emit_set(pc
, 1, dst
[c
], src
[0][c
], src
[1][c
]);
1104 case TGSI_OPCODE_SUB
:
1105 for (c
= 0; c
< 4; c
++) {
1106 if (!(mask
& (1 << c
)))
1108 emit_sub(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
1111 case TGSI_OPCODE_XPD
:
1112 temp
= alloc_temp(pc
, NULL
);
1113 if (mask
& (1 << 0)) {
1114 emit_mul(pc
, temp
, src
[0][2], src
[1][1]);
1115 emit_msb(pc
, dst
[0], src
[0][1], src
[1][2], temp
);
1117 if (mask
& (1 << 1)) {
1118 emit_mul(pc
, temp
, src
[0][0], src
[1][2]);
1119 emit_msb(pc
, dst
[1], src
[0][2], src
[1][0], temp
);
1121 if (mask
& (1 << 2)) {
1122 emit_mul(pc
, temp
, src
[0][1], src
[1][0]);
1123 emit_msb(pc
, dst
[2], src
[0][0], src
[1][1], temp
);
1125 free_temp(pc
, temp
);
1127 case TGSI_OPCODE_END
:
1130 NOUVEAU_ERR("invalid opcode %d\n", inst
->Instruction
.Opcode
);
1135 for (c
= 0; c
< 4; c
++) {
1136 unsigned inst
[2] = { 0, 0 };
1138 if (!(mask
& (1 << c
)))
1141 inst
[0] = 0xa0000000; /* cvt */
1143 inst
[1] |= (6 << 29); /* cvt */
1144 inst
[1] |= 0x04000000; /* 32 bit */
1145 inst
[1] |= (1 << 14); /* src .f32 */
1146 inst
[1] |= ((1 << 5) << 14); /* .sat */
1147 set_dst(pc
, rdst
[c
], inst
);
1148 set_src_0(pc
, dst
[c
], inst
);
1158 nv50_program_tx_prep(struct nv50_pc
*pc
)
1160 struct tgsi_parse_context p
;
1161 boolean ret
= FALSE
;
1164 tgsi_parse_init(&p
, pc
->p
->pipe
.tokens
);
1165 while (!tgsi_parse_end_of_tokens(&p
)) {
1166 const union tgsi_full_token
*tok
= &p
.FullToken
;
1168 tgsi_parse_token(&p
);
1169 switch (tok
->Token
.Type
) {
1170 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1172 const struct tgsi_full_immediate
*imm
=
1173 &p
.FullToken
.FullImmediate
;
1175 ctor_immd(pc
, imm
->u
.ImmediateFloat32
[0].Float
,
1176 imm
->u
.ImmediateFloat32
[1].Float
,
1177 imm
->u
.ImmediateFloat32
[2].Float
,
1178 imm
->u
.ImmediateFloat32
[3].Float
);
1181 case TGSI_TOKEN_TYPE_DECLARATION
:
1183 const struct tgsi_full_declaration
*d
;
1186 d
= &p
.FullToken
.FullDeclaration
;
1187 last
= d
->u
.DeclarationRange
.Last
;
1189 switch (d
->Declaration
.File
) {
1190 case TGSI_FILE_TEMPORARY
:
1191 if (pc
->temp_nr
< (last
+ 1))
1192 pc
->temp_nr
= last
+ 1;
1194 case TGSI_FILE_OUTPUT
:
1195 if (pc
->result_nr
< (last
+ 1))
1196 pc
->result_nr
= last
+ 1;
1198 case TGSI_FILE_INPUT
:
1199 if (pc
->attr_nr
< (last
+ 1))
1200 pc
->attr_nr
= last
+ 1;
1202 case TGSI_FILE_CONSTANT
:
1203 if (pc
->param_nr
< (last
+ 1))
1204 pc
->param_nr
= last
+ 1;
1207 NOUVEAU_ERR("bad decl file %d\n",
1208 d
->Declaration
.File
);
1213 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1220 NOUVEAU_ERR("%d temps\n", pc
->temp_nr
);
1222 pc
->temp
= calloc(pc
->temp_nr
* 4, sizeof(struct nv50_reg
));
1226 for (i
= 0; i
< pc
->temp_nr
; i
++) {
1227 for (c
= 0; c
< 4; c
++) {
1228 pc
->temp
[i
*4+c
].type
= P_TEMP
;
1229 pc
->temp
[i
*4+c
].hw
= -1;
1230 pc
->temp
[i
*4+c
].index
= i
;
1235 NOUVEAU_ERR("%d attrib regs\n", pc
->attr_nr
);
1237 struct nv50_reg
*iv
= NULL
, *tmp
= NULL
;
1240 pc
->attr
= calloc(pc
->attr_nr
* 4, sizeof(struct nv50_reg
));
1244 if (pc
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1245 iv
= alloc_temp(pc
, NULL
);
1249 for (i
= 0; i
< pc
->attr_nr
; i
++) {
1250 struct nv50_reg
*a
= &pc
->attr
[i
*4];
1252 for (c
= 0; c
< 4; c
++) {
1253 if (pc
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1254 struct nv50_reg
*at
=
1255 alloc_temp(pc
, NULL
);
1256 pc
->attr
[i
*4+c
].type
= at
->type
;
1257 pc
->attr
[i
*4+c
].hw
= at
->hw
;
1258 pc
->attr
[i
*4+c
].index
= at
->index
;
1260 pc
->p
->cfg
.vp
.attr
[aid
/32] |=
1262 pc
->attr
[i
*4+c
].type
= P_ATTR
;
1263 pc
->attr
[i
*4+c
].hw
= aid
++;
1264 pc
->attr
[i
*4+c
].index
= i
;
1268 if (pc
->p
->type
!= PIPE_SHADER_FRAGMENT
)
1271 emit_interp(pc
, iv
, iv
, iv
, FALSE
);
1272 tmp
= alloc_temp(pc
, NULL
);
1273 emit_flop(pc
, 0, tmp
, iv
);
1274 emit_interp(pc
, &a
[0], &a
[0], tmp
, TRUE
);
1275 emit_interp(pc
, &a
[1], &a
[1], tmp
, TRUE
);
1276 emit_interp(pc
, &a
[2], &a
[2], tmp
, TRUE
);
1277 emit_interp(pc
, &a
[3], &a
[3], tmp
, TRUE
);
1285 NOUVEAU_ERR("%d result regs\n", pc
->result_nr
);
1286 if (pc
->result_nr
) {
1289 pc
->result
= calloc(pc
->result_nr
* 4, sizeof(struct nv50_reg
));
1293 for (i
= 0; i
< pc
->result_nr
; i
++) {
1294 for (c
= 0; c
< 4; c
++) {
1295 if (pc
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1296 pc
->result
[i
*4+c
].type
= P_TEMP
;
1297 pc
->result
[i
*4+c
].hw
= -1;
1299 pc
->result
[i
*4+c
].type
= P_RESULT
;
1300 pc
->result
[i
*4+c
].hw
= rid
++;
1302 pc
->result
[i
*4+c
].index
= i
;
1307 NOUVEAU_ERR("%d param regs\n", pc
->param_nr
);
1311 pc
->param
= calloc(pc
->param_nr
* 4, sizeof(struct nv50_reg
));
1315 for (i
= 0; i
< pc
->param_nr
; i
++) {
1316 for (c
= 0; c
< 4; c
++) {
1317 pc
->param
[i
*4+c
].type
= P_CONST
;
1318 pc
->param
[i
*4+c
].hw
= rid
++;
1319 pc
->param
[i
*4+c
].index
= i
;
1327 pc
->immd
= calloc(pc
->immd_nr
* 4, sizeof(struct nv50_reg
));
1331 for (i
= 0; i
< pc
->immd_nr
; i
++) {
1332 for (c
= 0; c
< 4; c
++) {
1333 pc
->immd
[i
*4+c
].type
= P_IMMD
;
1334 pc
->immd
[i
*4+c
].hw
= rid
++;
1335 pc
->immd
[i
*4+c
].index
= i
;
1342 tgsi_parse_free(&p
);
1347 nv50_program_tx(struct nv50_program
*p
)
1349 struct tgsi_parse_context parse
;
1353 pc
= CALLOC_STRUCT(nv50_pc
);
1357 pc
->p
->cfg
.high_temp
= 4;
1359 ret
= nv50_program_tx_prep(pc
);
1363 tgsi_parse_init(&parse
, pc
->p
->pipe
.tokens
);
1364 while (!tgsi_parse_end_of_tokens(&parse
)) {
1365 const union tgsi_full_token
*tok
= &parse
.FullToken
;
1367 tgsi_parse_token(&parse
);
1369 switch (tok
->Token
.Type
) {
1370 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1371 ret
= nv50_program_tx_insn(pc
, tok
);
1380 if (p
->type
== PIPE_SHADER_FRAGMENT
) {
1381 struct nv50_reg out
;
1384 for (out
.hw
= 0; out
.hw
< pc
->result_nr
* 4; out
.hw
++)
1385 emit_mov(pc
, &out
, &pc
->result
[out
.hw
]);
1388 p
->immd_nr
= pc
->immd_nr
* 4;
1389 p
->immd
= pc
->immd_buf
;
1392 tgsi_parse_free(&parse
);
1399 nv50_program_validate(struct nv50_context
*nv50
, struct nv50_program
*p
)
1403 if (nv50_program_tx(p
) == FALSE
)
1405 /* *not* sufficient, it's fine if last inst is long and
1406 * NOT immd - otherwise it's fucked fucked fucked */
1407 p
->insns
[p
->insns_nr
- 1] |= 0x00000001;
1409 if (p
->type
== PIPE_SHADER_VERTEX
) {
1410 for (i
= 0; i
< p
->insns_nr
; i
++)
1411 NOUVEAU_ERR("VP0x%08x\n", p
->insns
[i
]);
1413 for (i
= 0; i
< p
->insns_nr
; i
++)
1414 NOUVEAU_ERR("FP0x%08x\n", p
->insns
[i
]);
1417 p
->translated
= TRUE
;
1421 nv50_program_validate_data(struct nv50_context
*nv50
, struct nv50_program
*p
)
1425 for (i
= 0; i
< p
->immd_nr
; i
++) {
1426 BEGIN_RING(tesla
, 0x0f00, 2);
1427 OUT_RING ((NV50_CB_PMISC
<< 0) | (i
<< 8));
1428 OUT_RING (fui(p
->immd
[i
]));
1433 nv50_program_validate_code(struct nv50_context
*nv50
, struct nv50_program
*p
)
1435 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1439 p
->buffer
= ws
->buffer_create(ws
, 0x100, 0, p
->insns_nr
* 4);
1440 map
= ws
->buffer_map(ws
, p
->buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
);
1441 memcpy(map
, p
->insns
, p
->insns_nr
* 4);
1442 ws
->buffer_unmap(ws
, p
->buffer
);
1446 nv50_vertprog_validate(struct nv50_context
*nv50
)
1448 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1449 struct nv50_program
*p
= nv50
->vertprog
;
1450 struct nouveau_stateobj
*so
;
1452 if (!p
->translated
) {
1453 nv50_program_validate(nv50
, p
);
1458 nv50_program_validate_data(nv50
, p
);
1459 nv50_program_validate_code(nv50
, p
);
1462 so_method(so
, tesla
, NV50TCL_VP_ADDRESS_HIGH
, 2);
1463 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1464 NOUVEAU_BO_HIGH
, 0, 0);
1465 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1466 NOUVEAU_BO_LOW
, 0, 0);
1467 so_method(so
, tesla
, 0x1650, 2);
1468 so_data (so
, p
->cfg
.vp
.attr
[0]);
1469 so_data (so
, p
->cfg
.vp
.attr
[1]);
1470 so_method(so
, tesla
, 0x16ac, 2);
1472 so_data (so
, p
->cfg
.high_temp
);
1473 so_method(so
, tesla
, 0x140c, 1);
1474 so_data (so
, 0); /* program start offset */
1475 so_emit(nv50
->screen
->nvws
, so
);
1480 nv50_fragprog_validate(struct nv50_context
*nv50
)
1482 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1483 struct nv50_program
*p
= nv50
->fragprog
;
1484 struct nouveau_stateobj
*so
;
1486 if (!p
->translated
) {
1487 nv50_program_validate(nv50
, p
);
1492 nv50_program_validate_data(nv50
, p
);
1493 nv50_program_validate_code(nv50
, p
);
1496 so_method(so
, tesla
, NV50TCL_FP_ADDRESS_HIGH
, 2);
1497 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1498 NOUVEAU_BO_HIGH
, 0, 0);
1499 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1500 NOUVEAU_BO_LOW
, 0, 0);
1501 so_method(so
, tesla
, 0x1904, 4);
1502 so_data (so
, 0x01040404); /* p: 0x01000404 */
1503 so_data (so
, 0x00000004);
1504 so_data (so
, 0x00000000);
1505 so_data (so
, 0x00000000);
1506 so_method(so
, tesla
, 0x16bc, 2);
1507 so_data (so
, 0x03020100);
1508 so_data (so
, 0x07060504);
1509 so_method(so
, tesla
, 0x1988, 2);
1510 so_data (so
, 0x08040404); /* p: 0x0f000401 */
1511 so_data (so
, p
->cfg
.high_temp
);
1512 so_method(so
, tesla
, 0x16ac, 2);
1513 so_data (so
, 0x00000008); /* p: 0x00000004 */
1514 so_data (so
, 0x00000004);
1515 so_method(so
, tesla
, 0x1414, 1);
1516 so_data (so
, 0); /* program start offset */
1517 so_emit(nv50
->screen
->nvws
, so
);
1522 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
1524 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1533 pipe_buffer_reference(ws
, &p
->buffer
, NULL
);