1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
10 #include "nv50_context.h"
11 #include "nv50_state.h"
13 #define NV50_SU_MAX_TEMP 64
18 * MSB - Like MAD, but MUL+SUB
19 * - Fuck it off, introduce a way to negate args for ops that
22 * Look into inlining IMMD for ops other than MOV (make it general?)
23 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
24 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
26 * Verify half-insns work where expected - and force disable them where they
27 * don't work - MUL has it forcibly disabled atm as it fixes POW..
44 struct nv50_program
*p
;
47 struct nv50_reg
*r_temp
[NV50_SU_MAX_TEMP
];
50 struct nv50_reg
*temp
;
52 struct nv50_reg
*attr
;
54 struct nv50_reg
*result
;
56 struct nv50_reg
*param
;
58 struct nv50_reg
*immd
;
62 struct nv50_reg
*temp_temp
[8];
63 unsigned temp_temp_nr
;
67 alloc_reg(struct nv50_pc
*pc
, struct nv50_reg
*reg
)
71 if (reg
->type
!= P_TEMP
)
75 /*XXX: do this here too to catch FP temp-as-attr usage..
76 * not clean, but works */
77 if (pc
->p
->cfg
.high_temp
< (reg
->hw
+ 1))
78 pc
->p
->cfg
.high_temp
= reg
->hw
+ 1;
82 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
83 if (!(pc
->r_temp
[i
])) {
86 if (pc
->p
->cfg
.high_temp
< (i
+ 1))
87 pc
->p
->cfg
.high_temp
= i
+ 1;
95 static struct nv50_reg
*
96 alloc_temp(struct nv50_pc
*pc
, struct nv50_reg
*dst
)
101 if (dst
&& dst
->type
== P_TEMP
&& dst
->hw
== -1)
104 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
105 if (!pc
->r_temp
[i
]) {
106 r
= CALLOC_STRUCT(nv50_reg
);
120 free_temp(struct nv50_pc
*pc
, struct nv50_reg
*r
)
122 if (r
->index
== -1) {
123 FREE(pc
->r_temp
[r
->hw
]);
124 pc
->r_temp
[r
->hw
] = NULL
;
128 static struct nv50_reg
*
129 temp_temp(struct nv50_pc
*pc
)
131 if (pc
->temp_temp_nr
>= 8)
134 pc
->temp_temp
[pc
->temp_temp_nr
] = alloc_temp(pc
, NULL
);
135 return pc
->temp_temp
[pc
->temp_temp_nr
++];
139 kill_temp_temp(struct nv50_pc
*pc
)
143 for (i
= 0; i
< pc
->temp_temp_nr
; i
++)
144 free_temp(pc
, pc
->temp_temp
[i
]);
145 pc
->temp_temp_nr
= 0;
149 ctor_immd(struct nv50_pc
*pc
, float x
, float y
, float z
, float w
)
151 pc
->immd_buf
= realloc(pc
->immd_buf
, (pc
->immd_nr
+ 1) * 4 *
153 pc
->immd_buf
[(pc
->immd_nr
* 4) + 0] = x
;
154 pc
->immd_buf
[(pc
->immd_nr
* 4) + 1] = y
;
155 pc
->immd_buf
[(pc
->immd_nr
* 4) + 2] = z
;
156 pc
->immd_buf
[(pc
->immd_nr
* 4) + 3] = w
;
158 return pc
->immd_nr
++;
161 static struct nv50_reg
*
162 alloc_immd(struct nv50_pc
*pc
, float f
)
164 struct nv50_reg
*r
= CALLOC_STRUCT(nv50_reg
);
167 hw
= ctor_immd(pc
, f
, 0, 0, 0);
175 emit(struct nv50_pc
*pc
, unsigned *inst
)
177 struct nv50_program
*p
= pc
->p
;
181 p
->insns
= realloc(p
->insns
, sizeof(unsigned) * p
->insns_nr
);
182 memcpy(p
->insns
+ (p
->insns_nr
- 2), inst
, sizeof(unsigned)*2);
185 p
->insns
= realloc(p
->insns
, sizeof(unsigned) * p
->insns_nr
);
186 memcpy(p
->insns
+ (p
->insns_nr
- 1), inst
, sizeof(unsigned));
190 static INLINE
void set_long(struct nv50_pc
*, unsigned *);
193 is_long(unsigned *inst
)
201 is_immd(unsigned *inst
)
203 if (is_long(inst
) && (inst
[1] & 3) == 3)
209 set_pred(struct nv50_pc
*pc
, unsigned pred
, unsigned idx
, unsigned *inst
)
212 inst
[1] &= ~((0x1f << 7) | (0x3 << 12));
213 inst
[1] |= (pred
<< 7) | (idx
<< 12);
217 set_pred_wr(struct nv50_pc
*pc
, unsigned on
, unsigned idx
, unsigned *inst
)
220 inst
[1] &= ~((0x3 << 4) | (1 << 6));
221 inst
[1] |= (idx
<< 4) | (on
<< 6);
225 set_long(struct nv50_pc
*pc
, unsigned *inst
)
231 set_pred(pc
, 0xf, 0, inst
);
232 set_pred_wr(pc
, 0, 0, inst
);
236 set_dst(struct nv50_pc
*pc
, struct nv50_reg
*dst
, unsigned *inst
)
238 if (dst
->type
== P_RESULT
) {
240 inst
[1] |= 0x00000008;
244 inst
[0] |= (dst
->hw
<< 2);
248 set_immd(struct nv50_pc
*pc
, struct nv50_reg
*imm
, unsigned *inst
)
250 unsigned val
= fui(pc
->immd_buf
[imm
->hw
]); /* XXX */
253 /*XXX: can't be predicated - bits overlap.. catch cases where both
254 * are required and avoid them. */
255 set_pred(pc
, 0, 0, inst
);
256 set_pred_wr(pc
, 0, 0, inst
);
258 inst
[1] |= 0x00000002 | 0x00000001;
259 inst
[0] |= (val
& 0x3f) << 16;
260 inst
[1] |= (val
>> 6) << 2;
264 emit_interp(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
265 struct nv50_reg
*src
, struct nv50_reg
*iv
, boolean noperspective
)
267 unsigned inst
[2] = { 0, 0 };
269 inst
[0] |= 0x80000000;
270 set_dst(pc
, dst
, inst
);
272 inst
[0] |= (iv
->hw
<< 9);
274 inst
[0] |= (src
->hw
<< 16);
276 inst
[0] |= (1 << 25);
282 set_cseg(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
285 if (src
->type
== P_IMMD
) {
286 inst
[1] |= (NV50_CB_PMISC
<< 22);
288 if (pc
->p
->type
== NV50_PROG_VERTEX
)
289 inst
[1] |= (NV50_CB_PVP
<< 22);
291 inst
[1] |= (NV50_CB_PFP
<< 22);
296 emit_mov(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
298 unsigned inst
[2] = { 0, 0 };
300 inst
[0] |= 0x10000000;
302 set_dst(pc
, dst
, inst
);
304 if (dst
->type
!= P_RESULT
&& src
->type
== P_IMMD
) {
305 set_immd(pc
, src
, inst
);
306 /*XXX: 32-bit, but steals part of "half" reg space - need to
307 * catch and handle this case if/when we do half-regs
309 inst
[0] |= 0x00008000;
311 if (src
->type
== P_IMMD
|| src
->type
== P_CONST
) {
313 set_cseg(pc
, src
, inst
);
314 inst
[0] |= (src
->hw
<< 9);
315 inst
[1] |= 0x20000000; /* src0 const? */
317 if (src
->type
== P_ATTR
) {
319 inst
[1] |= 0x00200000;
323 inst
[0] |= (src
->hw
<< 9);
326 /* We really should support "half" instructions here at some point,
327 * but I don't feel confident enough about them yet.
330 if (is_long(inst
) && !is_immd(inst
)) {
331 inst
[1] |= 0x04000000; /* 32-bit */
332 inst
[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
339 check_swap_src_0_1(struct nv50_pc
*pc
,
340 struct nv50_reg
**s0
, struct nv50_reg
**s1
)
342 struct nv50_reg
*src0
= *s0
, *src1
= *s1
;
344 if (src0
->type
== P_CONST
) {
345 if (src1
->type
!= P_CONST
) {
351 if (src1
->type
== P_ATTR
) {
352 if (src0
->type
!= P_ATTR
) {
363 set_src_0(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
365 if (src
->type
== P_ATTR
) {
367 inst
[1] |= 0x00200000;
369 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
370 struct nv50_reg
*temp
= temp_temp(pc
);
372 emit_mov(pc
, temp
, src
);
377 inst
[0] |= (src
->hw
<< 9);
381 set_src_1(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
383 if (src
->type
== P_ATTR
) {
384 struct nv50_reg
*temp
= temp_temp(pc
);
386 emit_mov(pc
, temp
, src
);
389 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
390 set_cseg(pc
, src
, inst
);
391 inst
[0] |= 0x00800000;
395 inst
[0] |= (src
->hw
<< 16);
399 set_src_2(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
403 if (src
->type
== P_ATTR
) {
404 struct nv50_reg
*temp
= temp_temp(pc
);
406 emit_mov(pc
, temp
, src
);
409 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
410 set_cseg(pc
, src
, inst
);
411 inst
[0] |= 0x01000000;
415 inst
[1] |= (src
->hw
<< 14);
419 emit_mul(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
420 struct nv50_reg
*src1
)
422 unsigned inst
[2] = { 0, 0 };
424 inst
[0] |= 0xc0000000;
427 check_swap_src_0_1(pc
, &src0
, &src1
);
428 set_dst(pc
, dst
, inst
);
429 set_src_0(pc
, src0
, inst
);
430 set_src_1(pc
, src1
, inst
);
436 emit_add(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
437 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
439 unsigned inst
[2] = { 0, 0 };
441 inst
[0] |= 0xb0000000;
443 check_swap_src_0_1(pc
, &src0
, &src1
);
444 set_dst(pc
, dst
, inst
);
445 set_src_0(pc
, src0
, inst
);
447 set_src_2(pc
, src1
, inst
);
449 set_src_1(pc
, src1
, inst
);
455 emit_minmax(struct nv50_pc
*pc
, unsigned sub
, struct nv50_reg
*dst
,
456 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
458 unsigned inst
[2] = { 0, 0 };
461 inst
[0] |= 0xb0000000;
462 inst
[1] |= (sub
<< 29);
464 check_swap_src_0_1(pc
, &src0
, &src1
);
465 set_dst(pc
, dst
, inst
);
466 set_src_0(pc
, src0
, inst
);
467 set_src_1(pc
, src1
, inst
);
473 emit_sub(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
474 struct nv50_reg
*src1
)
476 unsigned inst
[2] = { 0, 0 };
478 inst
[0] |= 0xb0000000;
481 if (check_swap_src_0_1(pc
, &src0
, &src1
))
482 inst
[1] |= 0x04000000;
484 inst
[1] |= 0x08000000;
486 set_dst(pc
, dst
, inst
);
487 set_src_0(pc
, src0
, inst
);
488 set_src_2(pc
, src1
, inst
);
494 emit_mad(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
495 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
497 unsigned inst
[2] = { 0, 0 };
499 inst
[0] |= 0xe0000000;
501 check_swap_src_0_1(pc
, &src0
, &src1
);
502 set_dst(pc
, dst
, inst
);
503 set_src_0(pc
, src0
, inst
);
504 set_src_1(pc
, src1
, inst
);
505 set_src_2(pc
, src2
, inst
);
511 emit_msb(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
512 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
514 unsigned inst
[2] = { 0, 0 };
516 inst
[0] |= 0xe0000000;
518 inst
[1] |= 0x08000000; /* src0 * src1 - src2 */
520 check_swap_src_0_1(pc
, &src0
, &src1
);
521 set_dst(pc
, dst
, inst
);
522 set_src_0(pc
, src0
, inst
);
523 set_src_1(pc
, src1
, inst
);
524 set_src_2(pc
, src2
, inst
);
530 emit_flop(struct nv50_pc
*pc
, unsigned sub
,
531 struct nv50_reg
*dst
, struct nv50_reg
*src
)
533 unsigned inst
[2] = { 0, 0 };
535 inst
[0] |= 0x90000000;
538 inst
[1] |= (sub
<< 29);
541 set_dst(pc
, dst
, inst
);
542 set_src_0(pc
, src
, inst
);
548 emit_preex2(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
550 unsigned inst
[2] = { 0, 0 };
552 inst
[0] |= 0xb0000000;
554 set_dst(pc
, dst
, inst
);
555 set_src_0(pc
, src
, inst
);
557 inst
[1] |= (6 << 29) | 0x00004000;
562 /*XXX: inaccurate results.. why? */
563 #define ALLOW_SET_SWAP 0
566 emit_set(struct nv50_pc
*pc
, unsigned c_op
, struct nv50_reg
*dst
,
567 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
569 unsigned inst
[2] = { 0, 0 };
571 unsigned inv_cop
[8] = { 0, 6, 2, 4, 3, 5, 1, 7 };
573 struct nv50_reg
*rdst
;
577 if (check_swap_src_0_1(pc
, &src0
, &src1
))
578 c_op
= inv_cop
[c_op
];
582 if (dst
->type
!= P_TEMP
)
583 dst
= alloc_temp(pc
, NULL
);
587 inst
[0] |= 0xb0000000;
588 inst
[1] |= (3 << 29);
589 inst
[1] |= (c_op
<< 14);
590 /*XXX: breaks things, .u32 by default?
591 * decuda will disasm as .u16 and use .lo/.hi regs, but this
592 * doesn't seem to match what the hw actually does.
593 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
595 set_dst(pc
, dst
, inst
);
596 set_src_0(pc
, src0
, inst
);
597 set_src_1(pc
, src1
, inst
);
601 inst
[0] = 0xa0000001;
602 inst
[1] = 0x64014780;
603 set_dst(pc
, rdst
, inst
);
604 set_src_0(pc
, dst
, inst
);
612 emit_flr(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
614 unsigned inst
[2] = { 0, 0 };
616 inst
[0] = 0xa0000000; /* cvt */
618 inst
[1] |= (6 << 29); /* cvt */
619 inst
[1] |= 0x08000000; /* integer mode */
620 inst
[1] |= 0x04000000; /* 32 bit */
621 inst
[1] |= ((0x1 << 3)) << 14; /* .rn */
622 inst
[1] |= (1 << 14); /* src .f32 */
623 set_dst(pc
, dst
, inst
);
624 set_src_0(pc
, src
, inst
);
630 emit_pow(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
631 struct nv50_reg
*v
, struct nv50_reg
*e
)
633 struct nv50_reg
*temp
= alloc_temp(pc
, NULL
);
635 emit_flop(pc
, 3, temp
, v
);
636 emit_mul(pc
, temp
, temp
, e
);
637 emit_preex2(pc
, temp
, temp
);
638 emit_flop(pc
, 6, dst
, temp
);
644 emit_abs(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
646 unsigned inst
[2] = { 0, 0 };
648 inst
[0] = 0xa0000000; /* cvt */
650 inst
[1] |= (6 << 29); /* cvt */
651 inst
[1] |= 0x04000000; /* 32 bit */
652 inst
[1] |= (1 << 14); /* src .f32 */
653 inst
[1] |= ((1 << 6) << 14); /* .abs */
654 set_dst(pc
, dst
, inst
);
655 set_src_0(pc
, src
, inst
);
660 static struct nv50_reg
*
661 tgsi_dst(struct nv50_pc
*pc
, int c
, const struct tgsi_full_dst_register
*dst
)
663 switch (dst
->DstRegister
.File
) {
664 case TGSI_FILE_TEMPORARY
:
665 return &pc
->temp
[dst
->DstRegister
.Index
* 4 + c
];
666 case TGSI_FILE_OUTPUT
:
667 return &pc
->result
[dst
->DstRegister
.Index
* 4 + c
];
677 static struct nv50_reg
*
678 tgsi_src(struct nv50_pc
*pc
, int chan
, const struct tgsi_full_src_register
*src
)
680 struct nv50_reg
*r
= NULL
;
681 struct nv50_reg
*temp
;
684 c
= tgsi_util_get_full_src_register_extswizzle(src
, chan
);
686 case TGSI_EXTSWIZZLE_X
:
687 case TGSI_EXTSWIZZLE_Y
:
688 case TGSI_EXTSWIZZLE_Z
:
689 case TGSI_EXTSWIZZLE_W
:
690 switch (src
->SrcRegister
.File
) {
691 case TGSI_FILE_INPUT
:
692 r
= &pc
->attr
[src
->SrcRegister
.Index
* 4 + c
];
694 case TGSI_FILE_TEMPORARY
:
695 r
= &pc
->temp
[src
->SrcRegister
.Index
* 4 + c
];
697 case TGSI_FILE_CONSTANT
:
698 r
= &pc
->param
[src
->SrcRegister
.Index
* 4 + c
];
700 case TGSI_FILE_IMMEDIATE
:
701 r
= &pc
->immd
[src
->SrcRegister
.Index
* 4 + c
];
708 case TGSI_EXTSWIZZLE_ZERO
:
709 r
= alloc_immd(pc
, 0.0);
711 case TGSI_EXTSWIZZLE_ONE
:
712 r
= alloc_immd(pc
, 1.0);
719 switch (tgsi_util_get_full_src_register_sign_mode(src
, chan
)) {
720 case TGSI_UTIL_SIGN_KEEP
:
722 case TGSI_UTIL_SIGN_CLEAR
:
723 temp
= temp_temp(pc
);
724 emit_abs(pc
, temp
, r
);
736 nv50_program_tx_insn(struct nv50_pc
*pc
, const union tgsi_full_token
*tok
)
738 const struct tgsi_full_instruction
*inst
= &tok
->FullInstruction
;
739 struct nv50_reg
*rdst
[4], *dst
[4], *src
[3][4], *temp
;
743 NOUVEAU_ERR("insn %p\n", tok
);
745 mask
= inst
->FullDstRegisters
[0].DstRegister
.WriteMask
;
746 sat
= inst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
;
748 for (c
= 0; c
< 4; c
++) {
750 dst
[c
] = tgsi_dst(pc
, c
, &inst
->FullDstRegisters
[0]);
755 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
756 for (c
= 0; c
< 4; c
++)
757 src
[i
][c
] = tgsi_src(pc
, c
, &inst
->FullSrcRegisters
[i
]);
761 for (c
= 0; c
< 4; c
++) {
763 dst
[c
] = temp_temp(pc
);
767 switch (inst
->Instruction
.Opcode
) {
768 case TGSI_OPCODE_ABS
:
769 for (c
= 0; c
< 4; c
++) {
770 if (!(mask
& (1 << c
)))
772 emit_abs(pc
, dst
[c
], src
[0][c
]);
775 case TGSI_OPCODE_ADD
:
776 for (c
= 0; c
< 4; c
++) {
777 if (!(mask
& (1 << c
)))
779 emit_add(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
782 case TGSI_OPCODE_COS
:
783 for (c
= 0; c
< 4; c
++) {
784 if (!(mask
& (1 << c
)))
786 emit_flop(pc
, 5, dst
[c
], src
[0][c
]);
789 case TGSI_OPCODE_DP3
:
790 temp
= alloc_temp(pc
, NULL
);
791 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
792 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
793 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
794 for (c
= 0; c
< 4; c
++) {
795 if (!(mask
& (1 << c
)))
797 emit_mov(pc
, dst
[c
], temp
);
801 case TGSI_OPCODE_DP4
:
802 temp
= alloc_temp(pc
, NULL
);
803 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
804 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
805 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
806 emit_mad(pc
, temp
, src
[0][3], src
[1][3], temp
);
807 for (c
= 0; c
< 4; c
++) {
808 if (!(mask
& (1 << c
)))
810 emit_mov(pc
, dst
[c
], temp
);
814 case TGSI_OPCODE_DPH
:
815 temp
= alloc_temp(pc
, NULL
);
816 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
817 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
818 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
819 emit_add(pc
, temp
, src
[1][3], temp
);
820 for (c
= 0; c
< 4; c
++) {
821 if (!(mask
& (1 << c
)))
823 emit_mov(pc
, dst
[c
], temp
);
827 case TGSI_OPCODE_DST
:
829 struct nv50_reg
*one
= alloc_immd(pc
, 1.0);
830 emit_mov(pc
, dst
[0], one
);
831 emit_mul(pc
, dst
[1], src
[0][1], src
[1][1]);
832 emit_mov(pc
, dst
[2], src
[0][2]);
833 emit_mov(pc
, dst
[3], src
[1][3]);
837 case TGSI_OPCODE_EX2
:
838 temp
= alloc_temp(pc
, NULL
);
839 for (c
= 0; c
< 4; c
++) {
840 if (!(mask
& (1 << c
)))
842 emit_preex2(pc
, temp
, src
[0][c
]);
843 emit_flop(pc
, 6, dst
[c
], temp
);
847 case TGSI_OPCODE_FLR
:
848 for (c
= 0; c
< 4; c
++) {
849 if (!(mask
& (1 << c
)))
851 emit_flr(pc
, dst
[c
], src
[0][c
]);
854 case TGSI_OPCODE_FRC
:
855 temp
= alloc_temp(pc
, NULL
);
856 for (c
= 0; c
< 4; c
++) {
857 if (!(mask
& (1 << c
)))
859 emit_flr(pc
, temp
, src
[0][c
]);
860 emit_sub(pc
, dst
[c
], src
[0][c
], temp
);
864 case TGSI_OPCODE_LG2
:
865 for (c
= 0; c
< 4; c
++) {
866 if (!(mask
& (1 << c
)))
868 emit_flop(pc
, 3, dst
[c
], src
[0][c
]);
871 case TGSI_OPCODE_MAD
:
872 for (c
= 0; c
< 4; c
++) {
873 if (!(mask
& (1 << c
)))
875 emit_mad(pc
, dst
[c
], src
[0][c
], src
[1][c
], src
[2][c
]);
878 case TGSI_OPCODE_MAX
:
879 for (c
= 0; c
< 4; c
++) {
880 if (!(mask
& (1 << c
)))
882 emit_minmax(pc
, 4, dst
[c
], src
[0][c
], src
[1][c
]);
885 case TGSI_OPCODE_MIN
:
886 for (c
= 0; c
< 4; c
++) {
887 if (!(mask
& (1 << c
)))
889 emit_minmax(pc
, 5, dst
[c
], src
[0][c
], src
[1][c
]);
892 case TGSI_OPCODE_MOV
:
893 for (c
= 0; c
< 4; c
++) {
894 if (!(mask
& (1 << c
)))
896 emit_mov(pc
, dst
[c
], src
[0][c
]);
899 case TGSI_OPCODE_MUL
:
900 for (c
= 0; c
< 4; c
++) {
901 if (!(mask
& (1 << c
)))
903 emit_mul(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
906 case TGSI_OPCODE_POW
:
907 temp
= alloc_temp(pc
, NULL
);
908 emit_pow(pc
, temp
, src
[0][0], src
[1][0]);
909 for (c
= 0; c
< 4; c
++) {
910 if (!(mask
& (1 << c
)))
912 emit_mov(pc
, dst
[c
], temp
);
916 case TGSI_OPCODE_RCP
:
917 for (c
= 0; c
< 4; c
++) {
918 if (!(mask
& (1 << c
)))
920 emit_flop(pc
, 0, dst
[c
], src
[0][c
]);
923 case TGSI_OPCODE_RSQ
:
924 for (c
= 0; c
< 4; c
++) {
925 if (!(mask
& (1 << c
)))
927 emit_flop(pc
, 2, dst
[c
], src
[0][c
]);
930 case TGSI_OPCODE_SGE
:
931 for (c
= 0; c
< 4; c
++) {
932 if (!(mask
& (1 << c
)))
934 emit_set(pc
, 6, dst
[c
], src
[0][c
], src
[1][c
]);
937 case TGSI_OPCODE_SIN
:
938 for (c
= 0; c
< 4; c
++) {
939 if (!(mask
& (1 << c
)))
941 emit_flop(pc
, 4, dst
[c
], src
[0][c
]);
944 case TGSI_OPCODE_SLT
:
945 for (c
= 0; c
< 4; c
++) {
946 if (!(mask
& (1 << c
)))
948 emit_set(pc
, 1, dst
[c
], src
[0][c
], src
[1][c
]);
951 case TGSI_OPCODE_SUB
:
952 for (c
= 0; c
< 4; c
++) {
953 if (!(mask
& (1 << c
)))
955 emit_sub(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
958 case TGSI_OPCODE_XPD
:
959 temp
= alloc_temp(pc
, NULL
);
960 emit_mul(pc
, temp
, src
[0][2], src
[1][1]);
961 emit_msb(pc
, dst
[0], src
[0][1], src
[1][2], temp
);
962 emit_mul(pc
, temp
, src
[0][0], src
[1][2]);
963 emit_msb(pc
, dst
[1], src
[0][2], src
[1][0], temp
);
964 emit_mul(pc
, temp
, src
[0][1], src
[1][0]);
965 emit_msb(pc
, dst
[2], src
[0][0], src
[1][1], temp
);
968 case TGSI_OPCODE_END
:
971 NOUVEAU_ERR("invalid opcode %d\n", inst
->Instruction
.Opcode
);
976 for (c
= 0; c
< 4; c
++) {
977 unsigned inst
[2] = { 0, 0 };
979 if (!(mask
& (1 << c
)))
982 inst
[0] = 0xa0000000; /* cvt */
984 inst
[1] |= (6 << 29); /* cvt */
985 inst
[1] |= 0x04000000; /* 32 bit */
986 inst
[1] |= (1 << 14); /* src .f32 */
987 inst
[1] |= ((1 << 5) << 14); /* .sat */
988 set_dst(pc
, rdst
[c
], inst
);
989 set_src_0(pc
, dst
[c
], inst
);
999 nv50_program_tx_prep(struct nv50_pc
*pc
)
1001 struct tgsi_parse_context p
;
1002 boolean ret
= FALSE
;
1005 tgsi_parse_init(&p
, pc
->p
->pipe
.tokens
);
1006 while (!tgsi_parse_end_of_tokens(&p
)) {
1007 const union tgsi_full_token
*tok
= &p
.FullToken
;
1009 tgsi_parse_token(&p
);
1010 switch (tok
->Token
.Type
) {
1011 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1013 const struct tgsi_full_immediate
*imm
=
1014 &p
.FullToken
.FullImmediate
;
1016 ctor_immd(pc
, imm
->u
.ImmediateFloat32
[0].Float
,
1017 imm
->u
.ImmediateFloat32
[1].Float
,
1018 imm
->u
.ImmediateFloat32
[2].Float
,
1019 imm
->u
.ImmediateFloat32
[3].Float
);
1022 case TGSI_TOKEN_TYPE_DECLARATION
:
1024 const struct tgsi_full_declaration
*d
;
1027 d
= &p
.FullToken
.FullDeclaration
;
1028 last
= d
->u
.DeclarationRange
.Last
;
1030 switch (d
->Declaration
.File
) {
1031 case TGSI_FILE_TEMPORARY
:
1032 if (pc
->temp_nr
< (last
+ 1))
1033 pc
->temp_nr
= last
+ 1;
1035 case TGSI_FILE_OUTPUT
:
1036 if (pc
->result_nr
< (last
+ 1))
1037 pc
->result_nr
= last
+ 1;
1039 case TGSI_FILE_INPUT
:
1040 if (pc
->attr_nr
< (last
+ 1))
1041 pc
->attr_nr
= last
+ 1;
1043 case TGSI_FILE_CONSTANT
:
1044 if (pc
->param_nr
< (last
+ 1))
1045 pc
->param_nr
= last
+ 1;
1048 NOUVEAU_ERR("bad decl file %d\n",
1049 d
->Declaration
.File
);
1054 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1061 NOUVEAU_ERR("%d temps\n", pc
->temp_nr
);
1063 pc
->temp
= calloc(pc
->temp_nr
* 4, sizeof(struct nv50_reg
));
1067 for (i
= 0; i
< pc
->temp_nr
; i
++) {
1068 for (c
= 0; c
< 4; c
++) {
1069 pc
->temp
[i
*4+c
].type
= P_TEMP
;
1070 pc
->temp
[i
*4+c
].hw
= -1;
1071 pc
->temp
[i
*4+c
].index
= i
;
1076 NOUVEAU_ERR("%d attrib regs\n", pc
->attr_nr
);
1078 struct nv50_reg
*iv
= NULL
, *tmp
= NULL
;
1081 pc
->attr
= calloc(pc
->attr_nr
* 4, sizeof(struct nv50_reg
));
1085 if (pc
->p
->type
== NV50_PROG_FRAGMENT
) {
1086 iv
= alloc_temp(pc
, NULL
);
1090 for (i
= 0; i
< pc
->attr_nr
; i
++) {
1091 struct nv50_reg
*a
= &pc
->attr
[i
*4];
1093 for (c
= 0; c
< 4; c
++) {
1094 if (pc
->p
->type
== NV50_PROG_FRAGMENT
) {
1095 struct nv50_reg
*at
=
1096 alloc_temp(pc
, NULL
);
1097 pc
->attr
[i
*4+c
].type
= at
->type
;
1098 pc
->attr
[i
*4+c
].hw
= at
->hw
;
1099 pc
->attr
[i
*4+c
].index
= at
->index
;
1101 pc
->p
->cfg
.vp
.attr
[aid
/32] |=
1103 pc
->attr
[i
*4+c
].type
= P_ATTR
;
1104 pc
->attr
[i
*4+c
].hw
= aid
++;
1105 pc
->attr
[i
*4+c
].index
= i
;
1109 if (pc
->p
->type
!= NV50_PROG_FRAGMENT
)
1112 emit_interp(pc
, iv
, iv
, iv
, FALSE
);
1113 tmp
= alloc_temp(pc
, NULL
);
1115 unsigned inst
[2] = { 0, 0 };
1116 inst
[0] = 0x90000000;
1117 inst
[0] |= (tmp
->hw
<< 2);
1120 emit_interp(pc
, &a
[0], &a
[0], tmp
, TRUE
);
1121 emit_interp(pc
, &a
[1], &a
[1], tmp
, TRUE
);
1122 emit_interp(pc
, &a
[2], &a
[2], tmp
, TRUE
);
1123 emit_interp(pc
, &a
[3], &a
[3], tmp
, TRUE
);
1131 NOUVEAU_ERR("%d result regs\n", pc
->result_nr
);
1132 if (pc
->result_nr
) {
1135 pc
->result
= calloc(pc
->result_nr
* 4, sizeof(struct nv50_reg
));
1139 for (i
= 0; i
< pc
->result_nr
; i
++) {
1140 for (c
= 0; c
< 4; c
++) {
1141 if (pc
->p
->type
== NV50_PROG_FRAGMENT
)
1142 pc
->result
[i
*4+c
].type
= P_TEMP
;
1144 pc
->result
[i
*4+c
].type
= P_RESULT
;
1145 pc
->result
[i
*4+c
].hw
= rid
++;
1146 pc
->result
[i
*4+c
].index
= i
;
1151 NOUVEAU_ERR("%d param regs\n", pc
->param_nr
);
1155 pc
->param
= calloc(pc
->param_nr
* 4, sizeof(struct nv50_reg
));
1159 for (i
= 0; i
< pc
->param_nr
; i
++) {
1160 for (c
= 0; c
< 4; c
++) {
1161 pc
->param
[i
*4+c
].type
= P_CONST
;
1162 pc
->param
[i
*4+c
].hw
= rid
++;
1163 pc
->param
[i
*4+c
].index
= i
;
1171 pc
->immd
= calloc(pc
->immd_nr
* 4, sizeof(struct nv50_reg
));
1175 for (i
= 0; i
< pc
->immd_nr
; i
++) {
1176 for (c
= 0; c
< 4; c
++) {
1177 pc
->immd
[i
*4+c
].type
= P_IMMD
;
1178 pc
->immd
[i
*4+c
].hw
= rid
++;
1179 pc
->immd
[i
*4+c
].index
= i
;
1186 tgsi_parse_free(&p
);
1191 nv50_program_tx(struct nv50_program
*p
)
1193 struct tgsi_parse_context parse
;
1197 pc
= CALLOC_STRUCT(nv50_pc
);
1201 pc
->p
->cfg
.high_temp
= 4;
1203 ret
= nv50_program_tx_prep(pc
);
1207 tgsi_parse_init(&parse
, pc
->p
->pipe
.tokens
);
1208 while (!tgsi_parse_end_of_tokens(&parse
)) {
1209 const union tgsi_full_token
*tok
= &parse
.FullToken
;
1211 tgsi_parse_token(&parse
);
1213 switch (tok
->Token
.Type
) {
1214 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1215 ret
= nv50_program_tx_insn(pc
, tok
);
1224 p
->immd_nr
= pc
->immd_nr
* 4;
1225 p
->immd
= pc
->immd_buf
;
1228 tgsi_parse_free(&parse
);
1235 nv50_program_validate(struct nv50_context
*nv50
, struct nv50_program
*p
)
1239 if (nv50_program_tx(p
) == FALSE
)
1241 /* *not* sufficient, it's fine if last inst is long and
1242 * NOT immd - otherwise it's fucked fucked fucked */
1243 p
->insns
[p
->insns_nr
- 1] |= 0x00000001;
1245 if (p
->type
== NV50_PROG_VERTEX
) {
1246 for (i
= 0; i
< p
->insns_nr
; i
++)
1247 NOUVEAU_ERR("VP0x%08x\n", p
->insns
[i
]);
1249 for (i
= 0; i
< p
->insns_nr
; i
++)
1250 NOUVEAU_ERR("FP0x%08x\n", p
->insns
[i
]);
1253 p
->translated
= TRUE
;
1257 nv50_program_validate_data(struct nv50_context
*nv50
, struct nv50_program
*p
)
1261 for (i
= 0; i
< p
->immd_nr
; i
++) {
1262 BEGIN_RING(tesla
, 0x0f00, 2);
1263 OUT_RING ((NV50_CB_PMISC
<< 16) | (i
<< 8));
1264 OUT_RING (fui(p
->immd
[i
]));
1269 nv50_program_validate_code(struct nv50_context
*nv50
, struct nv50_program
*p
)
1271 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1275 p
->buffer
= ws
->buffer_create(ws
, 0x100, 0, p
->insns_nr
* 4);
1276 map
= ws
->buffer_map(ws
, p
->buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
);
1277 memcpy(map
, p
->insns
, p
->insns_nr
* 4);
1278 ws
->buffer_unmap(ws
, p
->buffer
);
1282 nv50_vertprog_validate(struct nv50_context
*nv50
)
1284 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1285 struct nv50_program
*p
= nv50
->vertprog
;
1286 struct nouveau_stateobj
*so
;
1288 if (!p
->translated
) {
1289 nv50_program_validate(nv50
, p
);
1294 nv50_program_validate_data(nv50
, p
);
1295 nv50_program_validate_code(nv50
, p
);
1298 so_method(so
, tesla
, NV50TCL_VP_ADDRESS_HIGH
, 2);
1299 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1300 NOUVEAU_BO_HIGH
, 0, 0);
1301 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1302 NOUVEAU_BO_LOW
, 0, 0);
1303 so_method(so
, tesla
, 0x1650, 2);
1304 so_data (so
, p
->cfg
.vp
.attr
[0]);
1305 so_data (so
, p
->cfg
.vp
.attr
[1]);
1306 so_method(so
, tesla
, 0x16ac, 2);
1308 so_data (so
, p
->cfg
.high_temp
);
1309 so_method(so
, tesla
, 0x140c, 1);
1310 so_data (so
, 0); /* program start offset */
1311 so_emit(nv50
->screen
->nvws
, so
);
1316 nv50_fragprog_validate(struct nv50_context
*nv50
)
1318 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1319 struct nv50_program
*p
= nv50
->fragprog
;
1320 struct nouveau_stateobj
*so
;
1322 if (!p
->translated
) {
1323 nv50_program_validate(nv50
, p
);
1328 nv50_program_validate_data(nv50
, p
);
1329 nv50_program_validate_code(nv50
, p
);
1332 so_method(so
, tesla
, NV50TCL_FP_ADDRESS_HIGH
, 2);
1333 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1334 NOUVEAU_BO_HIGH
, 0, 0);
1335 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1336 NOUVEAU_BO_LOW
, 0, 0);
1337 so_method(so
, tesla
, 0x198c, 1);
1338 so_data (so
, p
->cfg
.high_temp
);
1339 so_method(so
, tesla
, 0x1414, 1);
1340 so_data (so
, 0); /* program start offset */
1341 so_emit(nv50
->screen
->nvws
, so
);
1346 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
1348 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1357 pipe_buffer_reference(ws
, &p
->buffer
, NULL
);