1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
10 #include "nv50_context.h"
11 #include "nv50_state.h"
13 #define NV50_SU_MAX_TEMP 64
24 * MSB - Like MAD, but MUL+SUB
25 * - Fuck it off, introduce a way to negate args for ops that
28 * Need ability to specifiy driver IMMD values, like nv40 constant()
30 * Look into inlining IMMD for ops other than MOV
47 struct nv50_program
*p
;
50 struct nv50_reg
*r_temp
[NV50_SU_MAX_TEMP
];
53 struct nv50_reg
*temp
;
55 struct nv50_reg
*attr
;
57 struct nv50_reg
*result
;
59 struct nv50_reg
*param
;
61 struct nv50_reg
*immd
;
65 struct nv50_reg
*temp_temp
[8];
66 unsigned temp_temp_nr
;
70 alloc_reg(struct nv50_pc
*pc
, struct nv50_reg
*reg
)
74 if (reg
->type
!= P_TEMP
)
78 /*XXX: do this here too to catch FP temp-as-attr usage..
79 * not clean, but works */
80 if (pc
->p
->cfg
.high_temp
< (reg
->hw
+ 1))
81 pc
->p
->cfg
.high_temp
= reg
->hw
+ 1;
85 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
86 if (!(pc
->r_temp
[i
])) {
89 if (pc
->p
->cfg
.high_temp
< (i
+ 1))
90 pc
->p
->cfg
.high_temp
= i
+ 1;
98 static struct nv50_reg
*
99 alloc_temp(struct nv50_pc
*pc
, struct nv50_reg
*dst
)
104 if (dst
&& dst
->type
== P_TEMP
&& dst
->hw
== -1)
107 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
108 if (!pc
->r_temp
[i
]) {
109 r
= CALLOC_STRUCT(nv50_reg
);
123 free_temp(struct nv50_pc
*pc
, struct nv50_reg
*r
)
125 if (r
->index
== -1) {
126 FREE(pc
->r_temp
[r
->hw
]);
127 pc
->r_temp
[r
->hw
] = NULL
;
131 static struct nv50_reg
*
132 temp_temp(struct nv50_pc
*pc
)
134 if (pc
->temp_temp_nr
>= 8)
137 pc
->temp_temp
[pc
->temp_temp_nr
] = alloc_temp(pc
, NULL
);
138 return pc
->temp_temp
[pc
->temp_temp_nr
++];
142 kill_temp_temp(struct nv50_pc
*pc
)
146 for (i
= 0; i
< pc
->temp_temp_nr
; i
++)
147 free_temp(pc
, pc
->temp_temp
[i
]);
148 pc
->temp_temp_nr
= 0;
151 static struct nv50_reg
*
152 tgsi_dst(struct nv50_pc
*pc
, int c
, const struct tgsi_full_dst_register
*dst
)
154 switch (dst
->DstRegister
.File
) {
155 case TGSI_FILE_TEMPORARY
:
156 return &pc
->temp
[dst
->DstRegister
.Index
* 4 + c
];
157 case TGSI_FILE_OUTPUT
:
158 return &pc
->result
[dst
->DstRegister
.Index
* 4 + c
];
168 static struct nv50_reg
*
169 tgsi_src(struct nv50_pc
*pc
, int c
, const struct tgsi_full_src_register
*src
)
171 /* Handle swizzling */
173 case 0: c
= src
->SrcRegister
.SwizzleX
; break;
174 case 1: c
= src
->SrcRegister
.SwizzleY
; break;
175 case 2: c
= src
->SrcRegister
.SwizzleZ
; break;
176 case 3: c
= src
->SrcRegister
.SwizzleW
; break;
181 switch (src
->SrcRegister
.File
) {
182 case TGSI_FILE_INPUT
:
183 return &pc
->attr
[src
->SrcRegister
.Index
* 4 + c
];
184 case TGSI_FILE_TEMPORARY
:
185 return &pc
->temp
[src
->SrcRegister
.Index
* 4 + c
];
186 case TGSI_FILE_CONSTANT
:
187 return &pc
->param
[src
->SrcRegister
.Index
* 4 + c
];
188 case TGSI_FILE_IMMEDIATE
:
189 return &pc
->immd
[src
->SrcRegister
.Index
* 4 + c
];
198 emit(struct nv50_pc
*pc
, unsigned *inst
)
200 struct nv50_program
*p
= pc
->p
;
204 p
->insns
= realloc(p
->insns
, sizeof(unsigned) * p
->insns_nr
);
205 memcpy(p
->insns
+ (p
->insns_nr
- 2), inst
, sizeof(unsigned)*2);
208 p
->insns
= realloc(p
->insns
, sizeof(unsigned) * p
->insns_nr
);
209 memcpy(p
->insns
+ (p
->insns_nr
- 1), inst
, sizeof(unsigned));
213 static INLINE
void set_long(struct nv50_pc
*, unsigned *);
216 is_long(unsigned *inst
)
224 is_immd(unsigned *inst
)
226 if (is_long(inst
) && (inst
[1] & 3) == 3)
232 set_pred(struct nv50_pc
*pc
, unsigned pred
, unsigned idx
, unsigned *inst
)
235 inst
[1] &= ~((0x1f << 7) | (0x3 << 12));
236 inst
[1] |= (pred
<< 7) | (idx
<< 12);
240 set_pred_wr(struct nv50_pc
*pc
, unsigned on
, unsigned idx
, unsigned *inst
)
243 inst
[1] &= ~((0x3 << 4) | (1 << 6));
244 inst
[1] |= (idx
<< 4) | (on
<< 6);
248 set_long(struct nv50_pc
*pc
, unsigned *inst
)
254 set_pred(pc
, 0xf, 0, inst
);
255 set_pred_wr(pc
, 0, 0, inst
);
259 set_dst(struct nv50_pc
*pc
, struct nv50_reg
*dst
, unsigned *inst
)
261 if (dst
->type
== P_RESULT
) {
263 inst
[1] |= 0x00000008;
267 inst
[0] |= (dst
->hw
<< 2);
271 set_immd(struct nv50_pc
*pc
, struct nv50_reg
*imm
, unsigned *inst
)
273 unsigned val
= fui(pc
->immd_buf
[imm
->hw
]); /* XXX */
276 /*XXX: can't be predicated - bits overlap.. catch cases where both
277 * are required and avoid them. */
278 set_pred(pc
, 0, 0, inst
);
279 set_pred_wr(pc
, 0, 0, inst
);
281 inst
[1] |= 0x00000002 | 0x00000001;
282 inst
[0] |= (val
& 0x3f) << 16;
283 inst
[1] |= (val
>> 6) << 2;
287 emit_interp(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
288 struct nv50_reg
*src
, struct nv50_reg
*iv
, boolean noperspective
)
290 unsigned inst
[2] = { 0, 0 };
292 inst
[0] |= 0x80000000;
293 set_dst(pc
, dst
, inst
);
295 inst
[0] |= (iv
->hw
<< 9);
297 inst
[0] |= (src
->hw
<< 16);
299 inst
[0] |= (1 << 25);
305 set_cseg(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
308 if (src
->type
== P_IMMD
) {
309 inst
[1] |= (NV50_CB_PMISC
<< 22);
311 if (pc
->p
->type
== NV50_PROG_VERTEX
)
312 inst
[1] |= (NV50_CB_PVP
<< 22);
314 inst
[1] |= (NV50_CB_PFP
<< 22);
319 emit_mov(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
321 unsigned inst
[2] = { 0, 0 };
323 inst
[0] |= 0x10000000;
325 set_dst(pc
, dst
, inst
);
327 if (dst
->type
!= P_RESULT
&& src
->type
== P_IMMD
) {
328 set_immd(pc
, src
, inst
);
329 /*XXX: 32-bit, but steals part of "half" reg space - need to
330 * catch and handle this case if/when we do half-regs
332 inst
[0] |= 0x00008000;
334 if (src
->type
== P_IMMD
|| src
->type
== P_CONST
) {
336 set_cseg(pc
, src
, inst
);
337 inst
[0] |= (src
->hw
<< 9);
338 inst
[1] |= 0x20000000; /* src0 const? */
340 if (src
->type
== P_ATTR
) {
342 inst
[1] |= 0x00200000;
346 inst
[0] |= (src
->hw
<< 9);
349 /* We really should support "half" instructions here at some point,
350 * but I don't feel confident enough about them yet.
353 if (is_long(inst
) && !is_immd(inst
)) {
354 inst
[1] |= 0x04000000; /* 32-bit */
355 inst
[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
362 check_swap_src_0_1(struct nv50_pc
*pc
,
363 struct nv50_reg
**s0
, struct nv50_reg
**s1
)
365 struct nv50_reg
*src0
= *s0
, *src1
= *s1
;
367 if (src0
->type
== P_CONST
) {
368 if (src1
->type
!= P_CONST
) {
374 if (src1
->type
== P_ATTR
) {
375 if (src0
->type
!= P_ATTR
) {
386 set_src_0(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
388 if (src
->type
== P_ATTR
) {
390 inst
[1] |= 0x00200000;
392 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
393 struct nv50_reg
*temp
= temp_temp(pc
);
395 emit_mov(pc
, temp
, src
);
400 inst
[0] |= (src
->hw
<< 9);
404 set_src_1(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
406 if (src
->type
== P_ATTR
) {
407 struct nv50_reg
*temp
= temp_temp(pc
);
409 emit_mov(pc
, temp
, src
);
412 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
413 set_cseg(pc
, src
, inst
);
414 inst
[0] |= 0x00800000;
418 inst
[0] |= (src
->hw
<< 16);
422 set_src_2(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned *inst
)
426 if (src
->type
== P_ATTR
) {
427 struct nv50_reg
*temp
= temp_temp(pc
);
429 emit_mov(pc
, temp
, src
);
432 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
433 set_cseg(pc
, src
, inst
);
434 inst
[0] |= 0x01000000;
438 inst
[1] |= (src
->hw
<< 14);
442 emit_mul(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
443 struct nv50_reg
*src1
)
445 unsigned inst
[2] = { 0, 0 };
447 inst
[0] |= 0xc0000000;
449 check_swap_src_0_1(pc
, &src0
, &src1
);
450 set_dst(pc
, dst
, inst
);
451 set_src_0(pc
, src0
, inst
);
452 set_src_1(pc
, src1
, inst
);
458 emit_add(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
459 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
461 unsigned inst
[2] = { 0, 0 };
463 inst
[0] |= 0xb0000000;
465 check_swap_src_0_1(pc
, &src0
, &src1
);
466 set_dst(pc
, dst
, inst
);
467 set_src_0(pc
, src0
, inst
);
469 set_src_2(pc
, src1
, inst
);
471 set_src_1(pc
, src1
, inst
);
477 emit_minmax(struct nv50_pc
*pc
, unsigned sub
, struct nv50_reg
*dst
,
478 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
480 unsigned inst
[2] = { 0, 0 };
483 inst
[0] |= 0xb0000000;
484 inst
[1] |= (sub
<< 29);
486 check_swap_src_0_1(pc
, &src0
, &src1
);
487 set_dst(pc
, dst
, inst
);
488 set_src_0(pc
, src0
, inst
);
489 set_src_1(pc
, src1
, inst
);
495 emit_sub(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
496 struct nv50_reg
*src1
)
498 unsigned inst
[2] = { 0, 0 };
500 inst
[0] |= 0xb0000000;
503 if (check_swap_src_0_1(pc
, &src0
, &src1
))
504 inst
[1] |= 0x04000000;
506 inst
[1] |= 0x08000000;
508 set_dst(pc
, dst
, inst
);
509 set_src_0(pc
, src0
, inst
);
510 set_src_2(pc
, src1
, inst
);
516 emit_mad(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
517 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
519 unsigned inst
[2] = { 0, 0 };
521 inst
[0] |= 0xe0000000;
523 check_swap_src_0_1(pc
, &src0
, &src1
);
524 set_dst(pc
, dst
, inst
);
525 set_src_0(pc
, src0
, inst
);
526 set_src_1(pc
, src1
, inst
);
527 set_src_2(pc
, src2
, inst
);
533 emit_msb(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
534 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
536 unsigned inst
[2] = { 0, 0 };
538 inst
[0] |= 0xe0000000;
540 inst
[1] |= 0x08000000; /* src0 * src1 - src2 */
542 check_swap_src_0_1(pc
, &src0
, &src1
);
543 set_dst(pc
, dst
, inst
);
544 set_src_0(pc
, src0
, inst
);
545 set_src_1(pc
, src1
, inst
);
546 set_src_2(pc
, src2
, inst
);
552 emit_flop(struct nv50_pc
*pc
, unsigned sub
,
553 struct nv50_reg
*dst
, struct nv50_reg
*src
)
555 unsigned inst
[2] = { 0, 0 };
557 inst
[0] |= 0x90000000;
560 inst
[1] |= (sub
<< 29);
563 set_dst(pc
, dst
, inst
);
564 set_src_0(pc
, src
, inst
);
570 emit_preex2(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
572 unsigned inst
[2] = { 0, 0 };
574 inst
[0] |= 0xb0000000;
576 set_dst(pc
, dst
, inst
);
577 set_src_0(pc
, src
, inst
);
579 inst
[1] |= (6 << 29) | 0x00004000;
583 /*XXX: inaccurate results.. why? */
584 #define ALLOW_SET_SWAP 0
587 emit_set(struct nv50_pc
*pc
, unsigned c_op
, struct nv50_reg
*dst
,
588 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
590 unsigned inst
[2] = { 0, 0 };
592 unsigned inv_cop
[8] = { 0, 6, 2, 4, 3, 5, 1, 7 };
594 struct nv50_reg
*rdst
;
598 if (check_swap_src_0_1(pc
, &src0
, &src1
))
599 c_op
= inv_cop
[c_op
];
603 if (dst
->type
!= P_TEMP
)
604 dst
= alloc_temp(pc
, NULL
);
608 inst
[0] |= 0xb0000000;
609 inst
[1] |= (3 << 29);
610 inst
[1] |= (c_op
<< 14);
611 /*XXX: breaks things, .u32 by default?
612 * decuda will disasm as .u16 and use .lo/.hi regs, but this
613 * doesn't seem to match what the hw actually does.
614 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
616 set_dst(pc
, dst
, inst
);
617 set_src_0(pc
, src0
, inst
);
618 set_src_1(pc
, src1
, inst
);
622 inst
[0] = 0xa0000001;
623 inst
[1] = 0x64014780;
624 set_dst(pc
, rdst
, inst
);
625 set_src_0(pc
, dst
, inst
);
633 nv50_program_tx_insn(struct nv50_pc
*pc
, const union tgsi_full_token
*tok
)
635 const struct tgsi_full_instruction
*inst
= &tok
->FullInstruction
;
636 struct nv50_reg
*dst
[4], *src
[3][4], *temp
;
640 NOUVEAU_ERR("insn %p\n", tok
);
642 mask
= inst
->FullDstRegisters
[0].DstRegister
.WriteMask
;
644 for (c
= 0; c
< 4; c
++) {
646 dst
[c
] = tgsi_dst(pc
, c
, &inst
->FullDstRegisters
[0]);
651 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
652 for (c
= 0; c
< 4; c
++)
653 src
[i
][c
] = tgsi_src(pc
, c
, &inst
->FullSrcRegisters
[i
]);
656 switch (inst
->Instruction
.Opcode
) {
657 case TGSI_OPCODE_ADD
:
658 for (c
= 0; c
< 4; c
++) {
659 if (!(mask
& (1 << c
)))
661 emit_add(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
664 case TGSI_OPCODE_COS
:
665 for (c
= 0; c
< 4; c
++) {
666 if (!(mask
& (1 << c
)))
668 emit_flop(pc
, 5, dst
[c
], src
[0][c
]);
671 case TGSI_OPCODE_DP3
:
672 temp
= alloc_temp(pc
, NULL
);
673 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
674 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
675 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
676 for (c
= 0; c
< 4; c
++) {
677 if (!(mask
& (1 << c
)))
679 emit_mov(pc
, dst
[c
], temp
);
683 case TGSI_OPCODE_DP4
:
684 temp
= alloc_temp(pc
, NULL
);
685 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
686 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
687 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
688 emit_mad(pc
, temp
, src
[0][3], src
[1][3], temp
);
689 for (c
= 0; c
< 4; c
++) {
690 if (!(mask
& (1 << c
)))
692 emit_mov(pc
, dst
[c
], temp
);
696 case TGSI_OPCODE_DPH
:
697 temp
= alloc_temp(pc
, NULL
);
698 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
699 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
700 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
701 emit_add(pc
, temp
, src
[1][3], temp
);
702 for (c
= 0; c
< 4; c
++) {
703 if (!(mask
& (1 << c
)))
705 emit_mov(pc
, dst
[c
], temp
);
709 case TGSI_OPCODE_EX2
:
710 temp
= alloc_temp(pc
, NULL
);
711 for (c
= 0; c
< 4; c
++) {
712 if (!(mask
& (1 << c
)))
714 emit_preex2(pc
, temp
, src
[0][c
]);
715 emit_flop(pc
, 6, dst
[c
], temp
);
719 case TGSI_OPCODE_LG2
:
720 for (c
= 0; c
< 4; c
++) {
721 if (!(mask
& (1 << c
)))
723 emit_flop(pc
, 3, dst
[c
], src
[0][c
]);
726 case TGSI_OPCODE_MAD
:
727 for (c
= 0; c
< 4; c
++) {
728 if (!(mask
& (1 << c
)))
730 emit_mad(pc
, dst
[c
], src
[0][c
], src
[1][c
], src
[2][c
]);
733 case TGSI_OPCODE_MAX
:
734 for (c
= 0; c
< 4; c
++) {
735 if (!(mask
& (1 << c
)))
737 emit_minmax(pc
, 4, dst
[c
], src
[0][c
], src
[1][c
]);
740 case TGSI_OPCODE_MIN
:
741 for (c
= 0; c
< 4; c
++) {
742 if (!(mask
& (1 << c
)))
744 emit_minmax(pc
, 5, dst
[c
], src
[0][c
], src
[1][c
]);
747 case TGSI_OPCODE_MOV
:
748 for (c
= 0; c
< 4; c
++) {
749 if (!(mask
& (1 << c
)))
751 emit_mov(pc
, dst
[c
], src
[0][c
]);
754 case TGSI_OPCODE_MUL
:
755 for (c
= 0; c
< 4; c
++) {
756 if (!(mask
& (1 << c
)))
758 emit_mul(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
761 case TGSI_OPCODE_RCP
:
762 for (c
= 0; c
< 4; c
++) {
763 if (!(mask
& (1 << c
)))
765 emit_flop(pc
, 0, dst
[c
], src
[0][c
]);
768 case TGSI_OPCODE_RSQ
:
769 for (c
= 0; c
< 4; c
++) {
770 if (!(mask
& (1 << c
)))
772 emit_flop(pc
, 2, dst
[c
], src
[0][c
]);
775 case TGSI_OPCODE_SGE
:
776 for (c
= 0; c
< 4; c
++) {
777 if (!(mask
& (1 << c
)))
779 emit_set(pc
, 6, dst
[c
], src
[0][c
], src
[1][c
]);
782 case TGSI_OPCODE_SIN
:
783 for (c
= 0; c
< 4; c
++) {
784 if (!(mask
& (1 << c
)))
786 emit_flop(pc
, 4, dst
[c
], src
[0][c
]);
789 case TGSI_OPCODE_SLT
:
790 for (c
= 0; c
< 4; c
++) {
791 if (!(mask
& (1 << c
)))
793 emit_set(pc
, 1, dst
[c
], src
[0][c
], src
[1][c
]);
796 case TGSI_OPCODE_SUB
:
797 for (c
= 0; c
< 4; c
++) {
798 if (!(mask
& (1 << c
)))
800 emit_sub(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
803 case TGSI_OPCODE_XPD
:
804 temp
= alloc_temp(pc
, NULL
);
805 emit_mul(pc
, temp
, src
[0][2], src
[1][1]);
806 emit_msb(pc
, dst
[0], src
[0][1], src
[1][2], temp
);
807 emit_mul(pc
, temp
, src
[0][0], src
[1][2]);
808 emit_msb(pc
, dst
[1], src
[0][2], src
[1][0], temp
);
809 emit_mul(pc
, temp
, src
[0][1], src
[1][0]);
810 emit_msb(pc
, dst
[2], src
[0][0], src
[1][1], temp
);
813 case TGSI_OPCODE_END
:
816 NOUVEAU_ERR("invalid opcode %d\n", inst
->Instruction
.Opcode
);
825 nv50_program_tx_prep(struct nv50_pc
*pc
)
827 struct tgsi_parse_context p
;
831 tgsi_parse_init(&p
, pc
->p
->pipe
.tokens
);
832 while (!tgsi_parse_end_of_tokens(&p
)) {
833 const union tgsi_full_token
*tok
= &p
.FullToken
;
835 tgsi_parse_token(&p
);
836 switch (tok
->Token
.Type
) {
837 case TGSI_TOKEN_TYPE_IMMEDIATE
:
839 const struct tgsi_full_immediate
*imm
=
840 &p
.FullToken
.FullImmediate
;
843 pc
->immd_buf
= realloc(pc
->immd_buf
, 4 * pc
->immd_nr
*
845 pc
->immd_buf
[4 * (pc
->immd_nr
- 1) + 0] =
846 imm
->u
.ImmediateFloat32
[0].Float
;
847 pc
->immd_buf
[4 * (pc
->immd_nr
- 1) + 1] =
848 imm
->u
.ImmediateFloat32
[1].Float
;
849 pc
->immd_buf
[4 * (pc
->immd_nr
- 1) + 2] =
850 imm
->u
.ImmediateFloat32
[2].Float
;
851 pc
->immd_buf
[4 * (pc
->immd_nr
- 1) + 3] =
852 imm
->u
.ImmediateFloat32
[3].Float
;
855 case TGSI_TOKEN_TYPE_DECLARATION
:
857 const struct tgsi_full_declaration
*d
;
860 d
= &p
.FullToken
.FullDeclaration
;
861 last
= d
->u
.DeclarationRange
.Last
;
863 switch (d
->Declaration
.File
) {
864 case TGSI_FILE_TEMPORARY
:
865 if (pc
->temp_nr
< (last
+ 1))
866 pc
->temp_nr
= last
+ 1;
868 case TGSI_FILE_OUTPUT
:
869 if (pc
->result_nr
< (last
+ 1))
870 pc
->result_nr
= last
+ 1;
872 case TGSI_FILE_INPUT
:
873 if (pc
->attr_nr
< (last
+ 1))
874 pc
->attr_nr
= last
+ 1;
876 case TGSI_FILE_CONSTANT
:
877 if (pc
->param_nr
< (last
+ 1))
878 pc
->param_nr
= last
+ 1;
881 NOUVEAU_ERR("bad decl file %d\n",
882 d
->Declaration
.File
);
887 case TGSI_TOKEN_TYPE_INSTRUCTION
:
894 NOUVEAU_ERR("%d temps\n", pc
->temp_nr
);
896 pc
->temp
= calloc(pc
->temp_nr
* 4, sizeof(struct nv50_reg
));
900 for (i
= 0; i
< pc
->temp_nr
; i
++) {
901 for (c
= 0; c
< 4; c
++) {
902 pc
->temp
[i
*4+c
].type
= P_TEMP
;
903 pc
->temp
[i
*4+c
].hw
= -1;
904 pc
->temp
[i
*4+c
].index
= i
;
909 NOUVEAU_ERR("%d attrib regs\n", pc
->attr_nr
);
911 struct nv50_reg
*iv
= NULL
, *tmp
= NULL
;
914 pc
->attr
= calloc(pc
->attr_nr
* 4, sizeof(struct nv50_reg
));
918 if (pc
->p
->type
== NV50_PROG_FRAGMENT
) {
919 iv
= alloc_temp(pc
, NULL
);
923 for (i
= 0; i
< pc
->attr_nr
; i
++) {
924 struct nv50_reg
*a
= &pc
->attr
[i
*4];
926 for (c
= 0; c
< 4; c
++) {
927 if (pc
->p
->type
== NV50_PROG_FRAGMENT
) {
928 struct nv50_reg
*at
=
929 alloc_temp(pc
, NULL
);
930 pc
->attr
[i
*4+c
].type
= at
->type
;
931 pc
->attr
[i
*4+c
].hw
= at
->hw
;
932 pc
->attr
[i
*4+c
].index
= at
->index
;
934 pc
->p
->cfg
.vp
.attr
[aid
/32] |=
936 pc
->attr
[i
*4+c
].type
= P_ATTR
;
937 pc
->attr
[i
*4+c
].hw
= aid
++;
938 pc
->attr
[i
*4+c
].index
= i
;
942 if (pc
->p
->type
!= NV50_PROG_FRAGMENT
)
945 emit_interp(pc
, iv
, iv
, iv
, FALSE
);
946 tmp
= alloc_temp(pc
, NULL
);
948 unsigned inst
[2] = { 0, 0 };
949 inst
[0] = 0x90000000;
950 inst
[0] |= (tmp
->hw
<< 2);
953 emit_interp(pc
, &a
[0], &a
[0], tmp
, TRUE
);
954 emit_interp(pc
, &a
[1], &a
[1], tmp
, TRUE
);
955 emit_interp(pc
, &a
[2], &a
[2], tmp
, TRUE
);
956 emit_interp(pc
, &a
[3], &a
[3], tmp
, TRUE
);
964 NOUVEAU_ERR("%d result regs\n", pc
->result_nr
);
968 pc
->result
= calloc(pc
->result_nr
* 4, sizeof(struct nv50_reg
));
972 for (i
= 0; i
< pc
->result_nr
; i
++) {
973 for (c
= 0; c
< 4; c
++) {
974 if (pc
->p
->type
== NV50_PROG_FRAGMENT
)
975 pc
->result
[i
*4+c
].type
= P_TEMP
;
977 pc
->result
[i
*4+c
].type
= P_RESULT
;
978 pc
->result
[i
*4+c
].hw
= rid
++;
979 pc
->result
[i
*4+c
].index
= i
;
984 NOUVEAU_ERR("%d param regs\n", pc
->param_nr
);
988 pc
->param
= calloc(pc
->param_nr
* 4, sizeof(struct nv50_reg
));
992 for (i
= 0; i
< pc
->param_nr
; i
++) {
993 for (c
= 0; c
< 4; c
++) {
994 pc
->param
[i
*4+c
].type
= P_CONST
;
995 pc
->param
[i
*4+c
].hw
= rid
++;
996 pc
->param
[i
*4+c
].index
= i
;
1004 pc
->immd
= calloc(pc
->immd_nr
* 4, sizeof(struct nv50_reg
));
1008 for (i
= 0; i
< pc
->immd_nr
; i
++) {
1009 for (c
= 0; c
< 4; c
++) {
1010 pc
->immd
[i
*4+c
].type
= P_IMMD
;
1011 pc
->immd
[i
*4+c
].hw
= rid
++;
1012 pc
->immd
[i
*4+c
].index
= i
;
1019 tgsi_parse_free(&p
);
1024 nv50_program_tx(struct nv50_program
*p
)
1026 struct tgsi_parse_context parse
;
1030 pc
= CALLOC_STRUCT(nv50_pc
);
1034 pc
->p
->cfg
.high_temp
= 4;
1036 ret
= nv50_program_tx_prep(pc
);
1040 tgsi_parse_init(&parse
, pc
->p
->pipe
.tokens
);
1041 while (!tgsi_parse_end_of_tokens(&parse
)) {
1042 const union tgsi_full_token
*tok
= &parse
.FullToken
;
1044 tgsi_parse_token(&parse
);
1046 switch (tok
->Token
.Type
) {
1047 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1048 ret
= nv50_program_tx_insn(pc
, tok
);
1057 p
->immd_nr
= pc
->immd_nr
* 4;
1058 p
->immd
= pc
->immd_buf
;
1061 tgsi_parse_free(&parse
);
1068 nv50_program_validate(struct nv50_context
*nv50
, struct nv50_program
*p
)
1072 if (nv50_program_tx(p
) == FALSE
)
1074 /* *not* sufficient, it's fine if last inst is long and
1075 * NOT immd - otherwise it's fucked fucked fucked */
1076 p
->insns
[p
->insns_nr
- 1] |= 0x00000001;
1078 if (p
->type
== NV50_PROG_VERTEX
) {
1079 for (i
= 0; i
< p
->insns_nr
; i
++)
1080 NOUVEAU_ERR("VP0x%08x\n", p
->insns
[i
]);
1082 for (i
= 0; i
< p
->insns_nr
; i
++)
1083 NOUVEAU_ERR("FP0x%08x\n", p
->insns
[i
]);
1086 p
->translated
= TRUE
;
1090 nv50_program_validate_data(struct nv50_context
*nv50
, struct nv50_program
*p
)
1094 for (i
= 0; i
< p
->immd_nr
; i
++) {
1095 BEGIN_RING(tesla
, 0x0f00, 2);
1096 OUT_RING ((NV50_CB_PMISC
<< 16) | (i
<< 8));
1097 OUT_RING (fui(p
->immd
[i
]));
1102 nv50_program_validate_code(struct nv50_context
*nv50
, struct nv50_program
*p
)
1104 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1108 p
->buffer
= ws
->buffer_create(ws
, 0x100, 0, p
->insns_nr
* 4);
1109 map
= ws
->buffer_map(ws
, p
->buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
);
1110 memcpy(map
, p
->insns
, p
->insns_nr
* 4);
1111 ws
->buffer_unmap(ws
, p
->buffer
);
1115 nv50_vertprog_validate(struct nv50_context
*nv50
)
1117 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1118 struct nv50_program
*p
= nv50
->vertprog
;
1119 struct nouveau_stateobj
*so
;
1121 if (!p
->translated
) {
1122 nv50_program_validate(nv50
, p
);
1127 nv50_program_validate_data(nv50
, p
);
1128 nv50_program_validate_code(nv50
, p
);
1131 so_method(so
, tesla
, NV50TCL_VP_ADDRESS_HIGH
, 2);
1132 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1133 NOUVEAU_BO_HIGH
, 0, 0);
1134 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1135 NOUVEAU_BO_LOW
, 0, 0);
1136 so_method(so
, tesla
, 0x1650, 2);
1137 so_data (so
, p
->cfg
.vp
.attr
[0]);
1138 so_data (so
, p
->cfg
.vp
.attr
[1]);
1139 so_method(so
, tesla
, 0x16ac, 2);
1141 so_data (so
, p
->cfg
.high_temp
);
1142 so_method(so
, tesla
, 0x140c, 1);
1143 so_data (so
, 0); /* program start offset */
1144 so_emit(nv50
->screen
->nvws
, so
);
1149 nv50_fragprog_validate(struct nv50_context
*nv50
)
1151 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1152 struct nv50_program
*p
= nv50
->fragprog
;
1153 struct nouveau_stateobj
*so
;
1155 if (!p
->translated
) {
1156 nv50_program_validate(nv50
, p
);
1161 nv50_program_validate_data(nv50
, p
);
1162 nv50_program_validate_code(nv50
, p
);
1165 so_method(so
, tesla
, NV50TCL_FP_ADDRESS_HIGH
, 2);
1166 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1167 NOUVEAU_BO_HIGH
, 0, 0);
1168 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1169 NOUVEAU_BO_LOW
, 0, 0);
1170 so_method(so
, tesla
, 0x198c, 1);
1171 so_data (so
, p
->cfg
.high_temp
);
1172 so_method(so
, tesla
, 0x1414, 1);
1173 so_data (so
, 0); /* program start offset */
1174 so_emit(nv50
->screen
->nvws
, so
);
1179 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
1181 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1190 pipe_buffer_reference(ws
, &p
->buffer
, NULL
);