1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
10 #include "nv50_context.h"
11 #include "nv50_state.h"
13 #define NV50_SU_MAX_TEMP 64
18 * MSB - Like MAD, but MUL+SUB
19 * - Fuck it off, introduce a way to negate args for ops that
22 * Look into inlining IMMD for ops other than MOV (make it general?)
23 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
24 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
26 * Verify half-insns work where expected - and force disable them where they
27 * don't work - MUL has it forcibly disabled atm as it fixes POW..
44 struct nv50_program
*p
;
47 struct nv50_reg
*r_temp
[NV50_SU_MAX_TEMP
];
50 struct nv50_reg
*temp
;
52 struct nv50_reg
*attr
;
54 struct nv50_reg
*result
;
56 struct nv50_reg
*param
;
58 struct nv50_reg
*immd
;
62 struct nv50_reg
*temp_temp
[8];
63 unsigned temp_temp_nr
;
67 alloc_reg(struct nv50_pc
*pc
, struct nv50_reg
*reg
)
71 if (reg
->type
!= P_TEMP
)
75 /*XXX: do this here too to catch FP temp-as-attr usage..
76 * not clean, but works */
77 if (pc
->p
->cfg
.high_temp
< (reg
->hw
+ 1))
78 pc
->p
->cfg
.high_temp
= reg
->hw
+ 1;
82 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
83 if (!(pc
->r_temp
[i
])) {
86 if (pc
->p
->cfg
.high_temp
< (i
+ 1))
87 pc
->p
->cfg
.high_temp
= i
+ 1;
95 static struct nv50_reg
*
96 alloc_temp(struct nv50_pc
*pc
, struct nv50_reg
*dst
)
101 if (dst
&& dst
->type
== P_TEMP
&& dst
->hw
== -1)
104 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
105 if (!pc
->r_temp
[i
]) {
106 r
= CALLOC_STRUCT(nv50_reg
);
120 free_temp(struct nv50_pc
*pc
, struct nv50_reg
*r
)
122 if (r
->index
== -1) {
123 FREE(pc
->r_temp
[r
->hw
]);
124 pc
->r_temp
[r
->hw
] = NULL
;
128 static struct nv50_reg
*
129 temp_temp(struct nv50_pc
*pc
)
131 if (pc
->temp_temp_nr
>= 8)
134 pc
->temp_temp
[pc
->temp_temp_nr
] = alloc_temp(pc
, NULL
);
135 return pc
->temp_temp
[pc
->temp_temp_nr
++];
139 kill_temp_temp(struct nv50_pc
*pc
)
143 for (i
= 0; i
< pc
->temp_temp_nr
; i
++)
144 free_temp(pc
, pc
->temp_temp
[i
]);
145 pc
->temp_temp_nr
= 0;
149 ctor_immd(struct nv50_pc
*pc
, float x
, float y
, float z
, float w
)
151 pc
->immd_buf
= realloc(pc
->immd_buf
, (pc
->immd_nr
+ 1) * 4 *
153 pc
->immd_buf
[(pc
->immd_nr
* 4) + 0] = x
;
154 pc
->immd_buf
[(pc
->immd_nr
* 4) + 1] = y
;
155 pc
->immd_buf
[(pc
->immd_nr
* 4) + 2] = z
;
156 pc
->immd_buf
[(pc
->immd_nr
* 4) + 3] = w
;
158 return pc
->immd_nr
++;
161 static struct nv50_reg
*
162 alloc_immd(struct nv50_pc
*pc
, float f
)
164 struct nv50_reg
*r
= CALLOC_STRUCT(nv50_reg
);
167 hw
= ctor_immd(pc
, f
, 0, 0, 0);
174 static struct nv50_reg
*
175 tgsi_dst(struct nv50_pc
*pc
, int c
, const struct tgsi_full_dst_register
*dst
)
177 switch (dst
->DstRegister
.File
) {
178 case TGSI_FILE_TEMPORARY
:
179 return &pc
->temp
[dst
->DstRegister
.Index
* 4 + c
];
180 case TGSI_FILE_OUTPUT
:
181 return &pc
->result
[dst
->DstRegister
.Index
* 4 + c
];
191 static struct nv50_reg
*
192 tgsi_src(struct nv50_pc
*pc
, int chan
, const struct tgsi_full_src_register
*src
)
194 struct nv50_reg
*r
= NULL
;
197 c
= tgsi_util_get_full_src_register_extswizzle(src
, chan
);
199 case TGSI_EXTSWIZZLE_X
:
200 case TGSI_EXTSWIZZLE_Y
:
201 case TGSI_EXTSWIZZLE_Z
:
202 case TGSI_EXTSWIZZLE_W
:
203 switch (src
->SrcRegister
.File
) {
204 case TGSI_FILE_INPUT
:
205 r
= &pc
->attr
[src
->SrcRegister
.Index
* 4 + c
];
207 case TGSI_FILE_TEMPORARY
:
208 r
= &pc
->temp
[src
->SrcRegister
.Index
* 4 + c
];
210 case TGSI_FILE_CONSTANT
:
211 r
= &pc
->param
[src
->SrcRegister
.Index
* 4 + c
];
213 case TGSI_FILE_IMMEDIATE
:
214 r
= &pc
->immd
[src
->SrcRegister
.Index
* 4 + c
];
221 case TGSI_EXTSWIZZLE_ZERO
:
222 r
= alloc_immd(pc
, 0.0);
224 case TGSI_EXTSWIZZLE_ONE
:
225 r
= alloc_immd(pc
, 1.0);
232 switch (tgsi_util_get_full_src_register_sign_mode(src
, chan
)) {
233 case TGSI_UTIL_SIGN_KEEP
:
244 emit(struct nv50_pc
*pc
, unsigned *inst
)
246 struct nv50_program
*p
= pc
->p
;
250 p
->insns
= realloc(p
->insns
, sizeof(unsigned) * p
->insns_nr
);
251 memcpy(p
->insns
+ (p
->insns_nr
- 2), inst
, sizeof(unsigned)*2);
254 p
->insns
= realloc(p
->insns
, sizeof(unsigned) * p
->insns_nr
);
255 memcpy(p
->insns
+ (p
->insns_nr
- 1), inst
, sizeof(unsigned));
259 static INLINE
void set_long(struct nv50_pc
*, unsigned *);
262 is_long(unsigned *inst
)
270 is_immd(unsigned *inst
)
272 if (is_long(inst
) && (inst
[1] & 3) == 3)
278 set_pred(struct nv50_pc
*pc
, unsigned pred
, unsigned idx
, unsigned *inst
)
281 inst
[1] &= ~((0x1f << 7) | (0x3 << 12));
282 inst
[1] |= (pred
<< 7) | (idx
<< 12);
286 set_pred_wr(struct nv50_pc
*pc
, unsigned on
, unsigned idx
, unsigned *inst
)
289 inst
[1] &= ~((0x3 << 4) | (1 << 6));
290 inst
[1] |= (idx
<< 4) | (on
<< 6);
294 set_long(struct nv50_pc
*pc
, unsigned *inst
)
300 set_pred(pc
, 0xf, 0, inst
);
301 set_pred_wr(pc
, 0, 0, inst
);
305 set_dst(struct nv50_pc
*pc
, struct nv50_reg
*dst
, unsigned *inst
)
307 if (dst
->type
== P_RESULT
) {
309 inst
[1] |= 0x00000008;
313 inst
[0] |= (dst
->hw
<< 2);
317 set_immd(struct nv50_pc
*pc
, struct nv50_reg
*imm
, unsigned *inst
)
319 unsigned val
= fui(pc
->immd_buf
[imm
->hw
]); /* XXX */
322 /*XXX: can't be predicated - bits overlap.. catch cases where both
323 * are required and avoid them. */
324 set_pred(pc
, 0, 0, inst
);
325 set_pred_wr(pc
, 0, 0, inst
);
327 inst
[1] |= 0x00000002 | 0x00000001;
328 inst
[0] |= (val
& 0x3f) << 16;
329 inst
[1] |= (val
>> 6) << 2;
333 emit_interp(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
334 struct nv50_reg
*src
, struct nv50_reg
*iv
, boolean noperspective
)
336 unsigned inst
[2] = { 0, 0 };
338 inst
[0] |= 0x80000000;
339 set_dst(pc
, dst
, inst
);
341 inst
[0] |= (iv
->hw
<< 9);
343 inst
[0] |= (src
->hw
<< 16);
345 inst
[0] |= (1 << 25);
351 set_cseg(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
354 if (src
->type
== P_IMMD
) {
355 inst
[1] |= (NV50_CB_PMISC
<< 22);
357 if (pc
->p
->type
== NV50_PROG_VERTEX
)
358 inst
[1] |= (NV50_CB_PVP
<< 22);
360 inst
[1] |= (NV50_CB_PFP
<< 22);
365 emit_mov(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
367 unsigned inst
[2] = { 0, 0 };
369 inst
[0] |= 0x10000000;
371 set_dst(pc
, dst
, inst
);
373 if (dst
->type
!= P_RESULT
&& src
->type
== P_IMMD
) {
374 set_immd(pc
, src
, inst
);
375 /*XXX: 32-bit, but steals part of "half" reg space - need to
376 * catch and handle this case if/when we do half-regs
378 inst
[0] |= 0x00008000;
380 if (src
->type
== P_IMMD
|| src
->type
== P_CONST
) {
382 set_cseg(pc
, src
, inst
);
383 inst
[0] |= (src
->hw
<< 9);
384 inst
[1] |= 0x20000000; /* src0 const? */
386 if (src
->type
== P_ATTR
) {
388 inst
[1] |= 0x00200000;
392 inst
[0] |= (src
->hw
<< 9);
395 /* We really should support "half" instructions here at some point,
396 * but I don't feel confident enough about them yet.
399 if (is_long(inst
) && !is_immd(inst
)) {
400 inst
[1] |= 0x04000000; /* 32-bit */
401 inst
[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
408 check_swap_src_0_1(struct nv50_pc
*pc
,
409 struct nv50_reg
**s0
, struct nv50_reg
**s1
)
411 struct nv50_reg
*src0
= *s0
, *src1
= *s1
;
413 if (src0
->type
== P_CONST
) {
414 if (src1
->type
!= P_CONST
) {
420 if (src1
->type
== P_ATTR
) {
421 if (src0
->type
!= P_ATTR
) {
432 set_src_0(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
434 if (src
->type
== P_ATTR
) {
436 inst
[1] |= 0x00200000;
438 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
439 struct nv50_reg
*temp
= temp_temp(pc
);
441 emit_mov(pc
, temp
, src
);
446 inst
[0] |= (src
->hw
<< 9);
450 set_src_1(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
452 if (src
->type
== P_ATTR
) {
453 struct nv50_reg
*temp
= temp_temp(pc
);
455 emit_mov(pc
, temp
, src
);
458 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
459 set_cseg(pc
, src
, inst
);
460 inst
[0] |= 0x00800000;
464 inst
[0] |= (src
->hw
<< 16);
468 set_src_2(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
472 if (src
->type
== P_ATTR
) {
473 struct nv50_reg
*temp
= temp_temp(pc
);
475 emit_mov(pc
, temp
, src
);
478 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
479 set_cseg(pc
, src
, inst
);
480 inst
[0] |= 0x01000000;
484 inst
[1] |= (src
->hw
<< 14);
488 emit_mul(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
489 struct nv50_reg
*src1
)
491 unsigned inst
[2] = { 0, 0 };
493 inst
[0] |= 0xc0000000;
496 check_swap_src_0_1(pc
, &src0
, &src1
);
497 set_dst(pc
, dst
, inst
);
498 set_src_0(pc
, src0
, inst
);
499 set_src_1(pc
, src1
, inst
);
505 emit_add(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
506 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
508 unsigned inst
[2] = { 0, 0 };
510 inst
[0] |= 0xb0000000;
512 check_swap_src_0_1(pc
, &src0
, &src1
);
513 set_dst(pc
, dst
, inst
);
514 set_src_0(pc
, src0
, inst
);
516 set_src_2(pc
, src1
, inst
);
518 set_src_1(pc
, src1
, inst
);
524 emit_minmax(struct nv50_pc
*pc
, unsigned sub
, struct nv50_reg
*dst
,
525 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
527 unsigned inst
[2] = { 0, 0 };
530 inst
[0] |= 0xb0000000;
531 inst
[1] |= (sub
<< 29);
533 check_swap_src_0_1(pc
, &src0
, &src1
);
534 set_dst(pc
, dst
, inst
);
535 set_src_0(pc
, src0
, inst
);
536 set_src_1(pc
, src1
, inst
);
542 emit_sub(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
543 struct nv50_reg
*src1
)
545 unsigned inst
[2] = { 0, 0 };
547 inst
[0] |= 0xb0000000;
550 if (check_swap_src_0_1(pc
, &src0
, &src1
))
551 inst
[1] |= 0x04000000;
553 inst
[1] |= 0x08000000;
555 set_dst(pc
, dst
, inst
);
556 set_src_0(pc
, src0
, inst
);
557 set_src_2(pc
, src1
, inst
);
563 emit_mad(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
564 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
566 unsigned inst
[2] = { 0, 0 };
568 inst
[0] |= 0xe0000000;
570 check_swap_src_0_1(pc
, &src0
, &src1
);
571 set_dst(pc
, dst
, inst
);
572 set_src_0(pc
, src0
, inst
);
573 set_src_1(pc
, src1
, inst
);
574 set_src_2(pc
, src2
, inst
);
580 emit_msb(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
581 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
583 unsigned inst
[2] = { 0, 0 };
585 inst
[0] |= 0xe0000000;
587 inst
[1] |= 0x08000000; /* src0 * src1 - src2 */
589 check_swap_src_0_1(pc
, &src0
, &src1
);
590 set_dst(pc
, dst
, inst
);
591 set_src_0(pc
, src0
, inst
);
592 set_src_1(pc
, src1
, inst
);
593 set_src_2(pc
, src2
, inst
);
599 emit_flop(struct nv50_pc
*pc
, unsigned sub
,
600 struct nv50_reg
*dst
, struct nv50_reg
*src
)
602 unsigned inst
[2] = { 0, 0 };
604 inst
[0] |= 0x90000000;
607 inst
[1] |= (sub
<< 29);
610 set_dst(pc
, dst
, inst
);
611 set_src_0(pc
, src
, inst
);
617 emit_preex2(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
619 unsigned inst
[2] = { 0, 0 };
621 inst
[0] |= 0xb0000000;
623 set_dst(pc
, dst
, inst
);
624 set_src_0(pc
, src
, inst
);
626 inst
[1] |= (6 << 29) | 0x00004000;
631 /*XXX: inaccurate results.. why? */
632 #define ALLOW_SET_SWAP 0
635 emit_set(struct nv50_pc
*pc
, unsigned c_op
, struct nv50_reg
*dst
,
636 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
638 unsigned inst
[2] = { 0, 0 };
640 unsigned inv_cop
[8] = { 0, 6, 2, 4, 3, 5, 1, 7 };
642 struct nv50_reg
*rdst
;
646 if (check_swap_src_0_1(pc
, &src0
, &src1
))
647 c_op
= inv_cop
[c_op
];
651 if (dst
->type
!= P_TEMP
)
652 dst
= alloc_temp(pc
, NULL
);
656 inst
[0] |= 0xb0000000;
657 inst
[1] |= (3 << 29);
658 inst
[1] |= (c_op
<< 14);
659 /*XXX: breaks things, .u32 by default?
660 * decuda will disasm as .u16 and use .lo/.hi regs, but this
661 * doesn't seem to match what the hw actually does.
662 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
664 set_dst(pc
, dst
, inst
);
665 set_src_0(pc
, src0
, inst
);
666 set_src_1(pc
, src1
, inst
);
670 inst
[0] = 0xa0000001;
671 inst
[1] = 0x64014780;
672 set_dst(pc
, rdst
, inst
);
673 set_src_0(pc
, dst
, inst
);
681 emit_flr(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
683 unsigned inst
[2] = { 0, 0 };
685 inst
[0] = 0xa0000000; /* cvt */
687 inst
[1] |= (6 << 29); /* cvt */
688 inst
[1] |= 0x08000000; /* integer mode */
689 inst
[1] |= 0x04000000; /* 32 bit */
690 inst
[1] |= ((0x1 << 3)) << 14; /* .rn */
691 inst
[1] |= (1 << 14); /* src .f32 */
692 set_dst(pc
, dst
, inst
);
693 set_src_0(pc
, src
, inst
);
699 emit_pow(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
700 struct nv50_reg
*v
, struct nv50_reg
*e
)
702 struct nv50_reg
*temp
= alloc_temp(pc
, NULL
);
704 emit_flop(pc
, 3, temp
, v
);
705 emit_mul(pc
, temp
, temp
, e
);
706 emit_preex2(pc
, temp
, temp
);
707 emit_flop(pc
, 6, dst
, temp
);
713 nv50_program_tx_insn(struct nv50_pc
*pc
, const union tgsi_full_token
*tok
)
715 const struct tgsi_full_instruction
*inst
= &tok
->FullInstruction
;
716 struct nv50_reg
*rdst
[4], *dst
[4], *src
[3][4], *temp
;
720 NOUVEAU_ERR("insn %p\n", tok
);
722 mask
= inst
->FullDstRegisters
[0].DstRegister
.WriteMask
;
723 sat
= inst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
;
725 for (c
= 0; c
< 4; c
++) {
727 dst
[c
] = tgsi_dst(pc
, c
, &inst
->FullDstRegisters
[0]);
732 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
733 for (c
= 0; c
< 4; c
++)
734 src
[i
][c
] = tgsi_src(pc
, c
, &inst
->FullSrcRegisters
[i
]);
738 for (c
= 0; c
< 4; c
++) {
740 dst
[c
] = temp_temp(pc
);
744 switch (inst
->Instruction
.Opcode
) {
745 case TGSI_OPCODE_ABS
:
746 for (c
= 0; c
< 4; c
++) {
747 unsigned inst
[2] = { 0, 0 };
749 inst
[0] = 0xa0000000; /* cvt */
751 inst
[1] |= (6 << 29); /* cvt */
752 inst
[1] |= 0x04000000; /* 32 bit */
753 inst
[1] |= (1 << 14); /* src .f32 */
754 inst
[1] |= ((1 << 6) << 14); /* .abs */
755 set_dst(pc
, dst
[c
], inst
);
756 set_src_0(pc
, src
[0][c
], inst
);
760 case TGSI_OPCODE_ADD
:
761 for (c
= 0; c
< 4; c
++) {
762 if (!(mask
& (1 << c
)))
764 emit_add(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
767 case TGSI_OPCODE_COS
:
768 for (c
= 0; c
< 4; c
++) {
769 if (!(mask
& (1 << c
)))
771 emit_flop(pc
, 5, dst
[c
], src
[0][c
]);
774 case TGSI_OPCODE_DP3
:
775 temp
= alloc_temp(pc
, NULL
);
776 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
777 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
778 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
779 for (c
= 0; c
< 4; c
++) {
780 if (!(mask
& (1 << c
)))
782 emit_mov(pc
, dst
[c
], temp
);
786 case TGSI_OPCODE_DP4
:
787 temp
= alloc_temp(pc
, NULL
);
788 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
789 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
790 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
791 emit_mad(pc
, temp
, src
[0][3], src
[1][3], temp
);
792 for (c
= 0; c
< 4; c
++) {
793 if (!(mask
& (1 << c
)))
795 emit_mov(pc
, dst
[c
], temp
);
799 case TGSI_OPCODE_DPH
:
800 temp
= alloc_temp(pc
, NULL
);
801 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
802 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
803 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
804 emit_add(pc
, temp
, src
[1][3], temp
);
805 for (c
= 0; c
< 4; c
++) {
806 if (!(mask
& (1 << c
)))
808 emit_mov(pc
, dst
[c
], temp
);
812 case TGSI_OPCODE_DST
:
814 struct nv50_reg
*one
= alloc_immd(pc
, 1.0);
815 emit_mov(pc
, dst
[0], one
);
816 emit_mul(pc
, dst
[1], src
[0][1], src
[1][1]);
817 emit_mov(pc
, dst
[2], src
[0][2]);
818 emit_mov(pc
, dst
[3], src
[1][3]);
822 case TGSI_OPCODE_EX2
:
823 temp
= alloc_temp(pc
, NULL
);
824 for (c
= 0; c
< 4; c
++) {
825 if (!(mask
& (1 << c
)))
827 emit_preex2(pc
, temp
, src
[0][c
]);
828 emit_flop(pc
, 6, dst
[c
], temp
);
832 case TGSI_OPCODE_FLR
:
833 for (c
= 0; c
< 4; c
++) {
834 if (!(mask
& (1 << c
)))
836 emit_flr(pc
, dst
[c
], src
[0][c
]);
839 case TGSI_OPCODE_FRC
:
840 temp
= alloc_temp(pc
, NULL
);
841 for (c
= 0; c
< 4; c
++) {
842 if (!(mask
& (1 << c
)))
844 emit_flr(pc
, temp
, src
[0][c
]);
845 emit_sub(pc
, dst
[c
], src
[0][c
], temp
);
849 case TGSI_OPCODE_LG2
:
850 for (c
= 0; c
< 4; c
++) {
851 if (!(mask
& (1 << c
)))
853 emit_flop(pc
, 3, dst
[c
], src
[0][c
]);
856 case TGSI_OPCODE_MAD
:
857 for (c
= 0; c
< 4; c
++) {
858 if (!(mask
& (1 << c
)))
860 emit_mad(pc
, dst
[c
], src
[0][c
], src
[1][c
], src
[2][c
]);
863 case TGSI_OPCODE_MAX
:
864 for (c
= 0; c
< 4; c
++) {
865 if (!(mask
& (1 << c
)))
867 emit_minmax(pc
, 4, dst
[c
], src
[0][c
], src
[1][c
]);
870 case TGSI_OPCODE_MIN
:
871 for (c
= 0; c
< 4; c
++) {
872 if (!(mask
& (1 << c
)))
874 emit_minmax(pc
, 5, dst
[c
], src
[0][c
], src
[1][c
]);
877 case TGSI_OPCODE_MOV
:
878 for (c
= 0; c
< 4; c
++) {
879 if (!(mask
& (1 << c
)))
881 emit_mov(pc
, dst
[c
], src
[0][c
]);
884 case TGSI_OPCODE_MUL
:
885 for (c
= 0; c
< 4; c
++) {
886 if (!(mask
& (1 << c
)))
888 emit_mul(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
891 case TGSI_OPCODE_POW
:
892 temp
= alloc_temp(pc
, NULL
);
893 emit_pow(pc
, temp
, src
[0][0], src
[1][0]);
894 for (c
= 0; c
< 4; c
++) {
895 if (!(mask
& (1 << c
)))
897 emit_mov(pc
, dst
[c
], temp
);
901 case TGSI_OPCODE_RCP
:
902 for (c
= 0; c
< 4; c
++) {
903 if (!(mask
& (1 << c
)))
905 emit_flop(pc
, 0, dst
[c
], src
[0][c
]);
908 case TGSI_OPCODE_RSQ
:
909 for (c
= 0; c
< 4; c
++) {
910 if (!(mask
& (1 << c
)))
912 emit_flop(pc
, 2, dst
[c
], src
[0][c
]);
915 case TGSI_OPCODE_SGE
:
916 for (c
= 0; c
< 4; c
++) {
917 if (!(mask
& (1 << c
)))
919 emit_set(pc
, 6, dst
[c
], src
[0][c
], src
[1][c
]);
922 case TGSI_OPCODE_SIN
:
923 for (c
= 0; c
< 4; c
++) {
924 if (!(mask
& (1 << c
)))
926 emit_flop(pc
, 4, dst
[c
], src
[0][c
]);
929 case TGSI_OPCODE_SLT
:
930 for (c
= 0; c
< 4; c
++) {
931 if (!(mask
& (1 << c
)))
933 emit_set(pc
, 1, dst
[c
], src
[0][c
], src
[1][c
]);
936 case TGSI_OPCODE_SUB
:
937 for (c
= 0; c
< 4; c
++) {
938 if (!(mask
& (1 << c
)))
940 emit_sub(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
943 case TGSI_OPCODE_XPD
:
944 temp
= alloc_temp(pc
, NULL
);
945 emit_mul(pc
, temp
, src
[0][2], src
[1][1]);
946 emit_msb(pc
, dst
[0], src
[0][1], src
[1][2], temp
);
947 emit_mul(pc
, temp
, src
[0][0], src
[1][2]);
948 emit_msb(pc
, dst
[1], src
[0][2], src
[1][0], temp
);
949 emit_mul(pc
, temp
, src
[0][1], src
[1][0]);
950 emit_msb(pc
, dst
[2], src
[0][0], src
[1][1], temp
);
953 case TGSI_OPCODE_END
:
956 NOUVEAU_ERR("invalid opcode %d\n", inst
->Instruction
.Opcode
);
961 for (c
= 0; c
< 4; c
++) {
962 unsigned inst
[2] = { 0, 0 };
964 if (!(mask
& (1 << c
)))
967 inst
[0] = 0xa0000000; /* cvt */
969 inst
[1] |= (6 << 29); /* cvt */
970 inst
[1] |= 0x04000000; /* 32 bit */
971 inst
[1] |= (1 << 14); /* src .f32 */
972 inst
[1] |= ((1 << 5) << 14); /* .sat */
973 set_dst(pc
, rdst
[c
], inst
);
974 set_src_0(pc
, dst
[c
], inst
);
984 nv50_program_tx_prep(struct nv50_pc
*pc
)
986 struct tgsi_parse_context p
;
990 tgsi_parse_init(&p
, pc
->p
->pipe
.tokens
);
991 while (!tgsi_parse_end_of_tokens(&p
)) {
992 const union tgsi_full_token
*tok
= &p
.FullToken
;
994 tgsi_parse_token(&p
);
995 switch (tok
->Token
.Type
) {
996 case TGSI_TOKEN_TYPE_IMMEDIATE
:
998 const struct tgsi_full_immediate
*imm
=
999 &p
.FullToken
.FullImmediate
;
1001 ctor_immd(pc
, imm
->u
.ImmediateFloat32
[0].Float
,
1002 imm
->u
.ImmediateFloat32
[1].Float
,
1003 imm
->u
.ImmediateFloat32
[2].Float
,
1004 imm
->u
.ImmediateFloat32
[3].Float
);
1007 case TGSI_TOKEN_TYPE_DECLARATION
:
1009 const struct tgsi_full_declaration
*d
;
1012 d
= &p
.FullToken
.FullDeclaration
;
1013 last
= d
->u
.DeclarationRange
.Last
;
1015 switch (d
->Declaration
.File
) {
1016 case TGSI_FILE_TEMPORARY
:
1017 if (pc
->temp_nr
< (last
+ 1))
1018 pc
->temp_nr
= last
+ 1;
1020 case TGSI_FILE_OUTPUT
:
1021 if (pc
->result_nr
< (last
+ 1))
1022 pc
->result_nr
= last
+ 1;
1024 case TGSI_FILE_INPUT
:
1025 if (pc
->attr_nr
< (last
+ 1))
1026 pc
->attr_nr
= last
+ 1;
1028 case TGSI_FILE_CONSTANT
:
1029 if (pc
->param_nr
< (last
+ 1))
1030 pc
->param_nr
= last
+ 1;
1033 NOUVEAU_ERR("bad decl file %d\n",
1034 d
->Declaration
.File
);
1039 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1046 NOUVEAU_ERR("%d temps\n", pc
->temp_nr
);
1048 pc
->temp
= calloc(pc
->temp_nr
* 4, sizeof(struct nv50_reg
));
1052 for (i
= 0; i
< pc
->temp_nr
; i
++) {
1053 for (c
= 0; c
< 4; c
++) {
1054 pc
->temp
[i
*4+c
].type
= P_TEMP
;
1055 pc
->temp
[i
*4+c
].hw
= -1;
1056 pc
->temp
[i
*4+c
].index
= i
;
1061 NOUVEAU_ERR("%d attrib regs\n", pc
->attr_nr
);
1063 struct nv50_reg
*iv
= NULL
, *tmp
= NULL
;
1066 pc
->attr
= calloc(pc
->attr_nr
* 4, sizeof(struct nv50_reg
));
1070 if (pc
->p
->type
== NV50_PROG_FRAGMENT
) {
1071 iv
= alloc_temp(pc
, NULL
);
1075 for (i
= 0; i
< pc
->attr_nr
; i
++) {
1076 struct nv50_reg
*a
= &pc
->attr
[i
*4];
1078 for (c
= 0; c
< 4; c
++) {
1079 if (pc
->p
->type
== NV50_PROG_FRAGMENT
) {
1080 struct nv50_reg
*at
=
1081 alloc_temp(pc
, NULL
);
1082 pc
->attr
[i
*4+c
].type
= at
->type
;
1083 pc
->attr
[i
*4+c
].hw
= at
->hw
;
1084 pc
->attr
[i
*4+c
].index
= at
->index
;
1086 pc
->p
->cfg
.vp
.attr
[aid
/32] |=
1088 pc
->attr
[i
*4+c
].type
= P_ATTR
;
1089 pc
->attr
[i
*4+c
].hw
= aid
++;
1090 pc
->attr
[i
*4+c
].index
= i
;
1094 if (pc
->p
->type
!= NV50_PROG_FRAGMENT
)
1097 emit_interp(pc
, iv
, iv
, iv
, FALSE
);
1098 tmp
= alloc_temp(pc
, NULL
);
1100 unsigned inst
[2] = { 0, 0 };
1101 inst
[0] = 0x90000000;
1102 inst
[0] |= (tmp
->hw
<< 2);
1105 emit_interp(pc
, &a
[0], &a
[0], tmp
, TRUE
);
1106 emit_interp(pc
, &a
[1], &a
[1], tmp
, TRUE
);
1107 emit_interp(pc
, &a
[2], &a
[2], tmp
, TRUE
);
1108 emit_interp(pc
, &a
[3], &a
[3], tmp
, TRUE
);
1116 NOUVEAU_ERR("%d result regs\n", pc
->result_nr
);
1117 if (pc
->result_nr
) {
1120 pc
->result
= calloc(pc
->result_nr
* 4, sizeof(struct nv50_reg
));
1124 for (i
= 0; i
< pc
->result_nr
; i
++) {
1125 for (c
= 0; c
< 4; c
++) {
1126 if (pc
->p
->type
== NV50_PROG_FRAGMENT
)
1127 pc
->result
[i
*4+c
].type
= P_TEMP
;
1129 pc
->result
[i
*4+c
].type
= P_RESULT
;
1130 pc
->result
[i
*4+c
].hw
= rid
++;
1131 pc
->result
[i
*4+c
].index
= i
;
1136 NOUVEAU_ERR("%d param regs\n", pc
->param_nr
);
1140 pc
->param
= calloc(pc
->param_nr
* 4, sizeof(struct nv50_reg
));
1144 for (i
= 0; i
< pc
->param_nr
; i
++) {
1145 for (c
= 0; c
< 4; c
++) {
1146 pc
->param
[i
*4+c
].type
= P_CONST
;
1147 pc
->param
[i
*4+c
].hw
= rid
++;
1148 pc
->param
[i
*4+c
].index
= i
;
1156 pc
->immd
= calloc(pc
->immd_nr
* 4, sizeof(struct nv50_reg
));
1160 for (i
= 0; i
< pc
->immd_nr
; i
++) {
1161 for (c
= 0; c
< 4; c
++) {
1162 pc
->immd
[i
*4+c
].type
= P_IMMD
;
1163 pc
->immd
[i
*4+c
].hw
= rid
++;
1164 pc
->immd
[i
*4+c
].index
= i
;
1171 tgsi_parse_free(&p
);
1176 nv50_program_tx(struct nv50_program
*p
)
1178 struct tgsi_parse_context parse
;
1182 pc
= CALLOC_STRUCT(nv50_pc
);
1186 pc
->p
->cfg
.high_temp
= 4;
1188 ret
= nv50_program_tx_prep(pc
);
1192 tgsi_parse_init(&parse
, pc
->p
->pipe
.tokens
);
1193 while (!tgsi_parse_end_of_tokens(&parse
)) {
1194 const union tgsi_full_token
*tok
= &parse
.FullToken
;
1196 tgsi_parse_token(&parse
);
1198 switch (tok
->Token
.Type
) {
1199 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1200 ret
= nv50_program_tx_insn(pc
, tok
);
1209 p
->immd_nr
= pc
->immd_nr
* 4;
1210 p
->immd
= pc
->immd_buf
;
1213 tgsi_parse_free(&parse
);
1220 nv50_program_validate(struct nv50_context
*nv50
, struct nv50_program
*p
)
1224 if (nv50_program_tx(p
) == FALSE
)
1226 /* *not* sufficient, it's fine if last inst is long and
1227 * NOT immd - otherwise it's fucked fucked fucked */
1228 p
->insns
[p
->insns_nr
- 1] |= 0x00000001;
1230 if (p
->type
== NV50_PROG_VERTEX
) {
1231 for (i
= 0; i
< p
->insns_nr
; i
++)
1232 NOUVEAU_ERR("VP0x%08x\n", p
->insns
[i
]);
1234 for (i
= 0; i
< p
->insns_nr
; i
++)
1235 NOUVEAU_ERR("FP0x%08x\n", p
->insns
[i
]);
1238 p
->translated
= TRUE
;
1242 nv50_program_validate_data(struct nv50_context
*nv50
, struct nv50_program
*p
)
1246 for (i
= 0; i
< p
->immd_nr
; i
++) {
1247 BEGIN_RING(tesla
, 0x0f00, 2);
1248 OUT_RING ((NV50_CB_PMISC
<< 16) | (i
<< 8));
1249 OUT_RING (fui(p
->immd
[i
]));
1254 nv50_program_validate_code(struct nv50_context
*nv50
, struct nv50_program
*p
)
1256 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1260 p
->buffer
= ws
->buffer_create(ws
, 0x100, 0, p
->insns_nr
* 4);
1261 map
= ws
->buffer_map(ws
, p
->buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
);
1262 memcpy(map
, p
->insns
, p
->insns_nr
* 4);
1263 ws
->buffer_unmap(ws
, p
->buffer
);
1267 nv50_vertprog_validate(struct nv50_context
*nv50
)
1269 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1270 struct nv50_program
*p
= nv50
->vertprog
;
1271 struct nouveau_stateobj
*so
;
1273 if (!p
->translated
) {
1274 nv50_program_validate(nv50
, p
);
1279 nv50_program_validate_data(nv50
, p
);
1280 nv50_program_validate_code(nv50
, p
);
1283 so_method(so
, tesla
, NV50TCL_VP_ADDRESS_HIGH
, 2);
1284 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1285 NOUVEAU_BO_HIGH
, 0, 0);
1286 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1287 NOUVEAU_BO_LOW
, 0, 0);
1288 so_method(so
, tesla
, 0x1650, 2);
1289 so_data (so
, p
->cfg
.vp
.attr
[0]);
1290 so_data (so
, p
->cfg
.vp
.attr
[1]);
1291 so_method(so
, tesla
, 0x16ac, 2);
1293 so_data (so
, p
->cfg
.high_temp
);
1294 so_method(so
, tesla
, 0x140c, 1);
1295 so_data (so
, 0); /* program start offset */
1296 so_emit(nv50
->screen
->nvws
, so
);
1301 nv50_fragprog_validate(struct nv50_context
*nv50
)
1303 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1304 struct nv50_program
*p
= nv50
->fragprog
;
1305 struct nouveau_stateobj
*so
;
1307 if (!p
->translated
) {
1308 nv50_program_validate(nv50
, p
);
1313 nv50_program_validate_data(nv50
, p
);
1314 nv50_program_validate_code(nv50
, p
);
1317 so_method(so
, tesla
, NV50TCL_FP_ADDRESS_HIGH
, 2);
1318 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1319 NOUVEAU_BO_HIGH
, 0, 0);
1320 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1321 NOUVEAU_BO_LOW
, 0, 0);
1322 so_method(so
, tesla
, 0x198c, 1);
1323 so_data (so
, p
->cfg
.high_temp
);
1324 so_method(so
, tesla
, 0x1414, 1);
1325 so_data (so
, 0); /* program start offset */
1326 so_emit(nv50
->screen
->nvws
, so
);
1331 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
1333 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1342 pipe_buffer_reference(ws
, &p
->buffer
, NULL
);