1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
10 #include "nv50_context.h"
12 #define NV50_SU_MAX_TEMP 64
13 #define NV50_PROGRAM_DUMP
15 /* ARL - gallium craps itself on progs/vp/arl.txt
17 * MSB - Like MAD, but MUL+SUB
18 * - Fuck it off, introduce a way to negate args for ops that
21 * Look into inlining IMMD for ops other than MOV (make it general?)
22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
25 * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
26 * case, if the emit_src() causes the inst to suddenly become long.
28 * Verify half-insns work where expected - and force disable them where they
29 * don't work - MUL has it forcibly disabled atm as it fixes POW..
31 * FUCK! watch dst==src vectors, can overwrite components that are needed.
32 * ie. SUB R0, R0.yzxw, R0
34 * Things to check with renouveau:
35 * FP attr/result assignment - how?
37 * - 0x16bc maps vp output onto fp hpos
38 * - 0x16c0 maps vp output onto fp col0
42 * 0x16bc->0x16e8 --> some binding between vp/fp regs
43 * 0x16b8 --> VP output count
45 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
46 * "MOV rcol.x, fcol.y" = 0x00000004
47 * 0x19a8 --> as above but 0x00000100 and 0x00000000
48 * - 0x00100000 used when KIL used
49 * 0x196c --> as above but 0x00000011 and 0x00000000
51 * 0x1988 --> 0xXXNNNNNN
52 * - XX == FP high something
69 struct nv50_program
*p
;
72 struct nv50_reg
*r_temp
[NV50_SU_MAX_TEMP
];
75 struct nv50_reg
*temp
;
77 struct nv50_reg
*attr
;
79 struct nv50_reg
*result
;
81 struct nv50_reg
*param
;
83 struct nv50_reg
*immd
;
87 struct nv50_reg
*temp_temp
[16];
88 unsigned temp_temp_nr
;
92 alloc_reg(struct nv50_pc
*pc
, struct nv50_reg
*reg
)
96 if (reg
->type
== P_RESULT
) {
97 if (pc
->p
->cfg
.high_result
< (reg
->hw
+ 1))
98 pc
->p
->cfg
.high_result
= reg
->hw
+ 1;
101 if (reg
->type
!= P_TEMP
)
105 /*XXX: do this here too to catch FP temp-as-attr usage..
106 * not clean, but works */
107 if (pc
->p
->cfg
.high_temp
< (reg
->hw
+ 1))
108 pc
->p
->cfg
.high_temp
= reg
->hw
+ 1;
112 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
113 if (!(pc
->r_temp
[i
])) {
116 if (pc
->p
->cfg
.high_temp
< (i
+ 1))
117 pc
->p
->cfg
.high_temp
= i
+ 1;
125 static struct nv50_reg
*
126 alloc_temp(struct nv50_pc
*pc
, struct nv50_reg
*dst
)
131 if (dst
&& dst
->type
== P_TEMP
&& dst
->hw
== -1)
134 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
135 if (!pc
->r_temp
[i
]) {
136 r
= CALLOC_STRUCT(nv50_reg
);
150 free_temp(struct nv50_pc
*pc
, struct nv50_reg
*r
)
152 if (r
->index
== -1) {
155 FREE(pc
->r_temp
[hw
]);
156 pc
->r_temp
[hw
] = NULL
;
160 static struct nv50_reg
*
161 temp_temp(struct nv50_pc
*pc
)
163 if (pc
->temp_temp_nr
>= 16)
166 pc
->temp_temp
[pc
->temp_temp_nr
] = alloc_temp(pc
, NULL
);
167 return pc
->temp_temp
[pc
->temp_temp_nr
++];
171 kill_temp_temp(struct nv50_pc
*pc
)
175 for (i
= 0; i
< pc
->temp_temp_nr
; i
++)
176 free_temp(pc
, pc
->temp_temp
[i
]);
177 pc
->temp_temp_nr
= 0;
181 ctor_immd(struct nv50_pc
*pc
, float x
, float y
, float z
, float w
)
183 pc
->immd_buf
= realloc(pc
->immd_buf
, (pc
->immd_nr
+ 1) * 4 *
185 pc
->immd_buf
[(pc
->immd_nr
* 4) + 0] = x
;
186 pc
->immd_buf
[(pc
->immd_nr
* 4) + 1] = y
;
187 pc
->immd_buf
[(pc
->immd_nr
* 4) + 2] = z
;
188 pc
->immd_buf
[(pc
->immd_nr
* 4) + 3] = w
;
190 return pc
->immd_nr
++;
193 static struct nv50_reg
*
194 alloc_immd(struct nv50_pc
*pc
, float f
)
196 struct nv50_reg
*r
= CALLOC_STRUCT(nv50_reg
);
199 hw
= ctor_immd(pc
, f
, 0, 0, 0) * 4;
206 static struct nv50_program_exec
*
207 exec(struct nv50_pc
*pc
)
209 struct nv50_program_exec
*e
= CALLOC_STRUCT(nv50_program_exec
);
216 emit(struct nv50_pc
*pc
, struct nv50_program_exec
*e
)
218 struct nv50_program
*p
= pc
->p
;
221 p
->exec_tail
->next
= e
;
225 p
->exec_size
+= (e
->inst
[0] & 1) ? 2 : 1;
228 static INLINE
void set_long(struct nv50_pc
*, struct nv50_program_exec
*);
231 is_long(struct nv50_program_exec
*e
)
239 is_immd(struct nv50_program_exec
*e
)
241 if (is_long(e
) && (e
->inst
[1] & 3) == 3)
247 set_pred(struct nv50_pc
*pc
, unsigned pred
, unsigned idx
,
248 struct nv50_program_exec
*e
)
251 e
->inst
[1] &= ~((0x1f << 7) | (0x3 << 12));
252 e
->inst
[1] |= (pred
<< 7) | (idx
<< 12);
256 set_pred_wr(struct nv50_pc
*pc
, unsigned on
, unsigned idx
,
257 struct nv50_program_exec
*e
)
260 e
->inst
[1] &= ~((0x3 << 4) | (1 << 6));
261 e
->inst
[1] |= (idx
<< 4) | (on
<< 6);
265 set_long(struct nv50_pc
*pc
, struct nv50_program_exec
*e
)
271 set_pred(pc
, 0xf, 0, e
);
272 set_pred_wr(pc
, 0, 0, e
);
276 set_dst(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_program_exec
*e
)
278 if (dst
->type
== P_RESULT
) {
280 e
->inst
[1] |= 0x00000008;
284 e
->inst
[0] |= (dst
->hw
<< 2);
288 set_immd(struct nv50_pc
*pc
, struct nv50_reg
*imm
, struct nv50_program_exec
*e
)
290 unsigned val
= fui(pc
->immd_buf
[imm
->hw
]); /* XXX */
293 /*XXX: can't be predicated - bits overlap.. catch cases where both
294 * are required and avoid them. */
295 set_pred(pc
, 0, 0, e
);
296 set_pred_wr(pc
, 0, 0, e
);
298 e
->inst
[1] |= 0x00000002 | 0x00000001;
299 e
->inst
[0] |= (val
& 0x3f) << 16;
300 e
->inst
[1] |= (val
>> 6) << 2;
304 emit_interp(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
305 struct nv50_reg
*src
, struct nv50_reg
*iv
, boolean noperspective
)
307 struct nv50_program_exec
*e
= exec(pc
);
309 e
->inst
[0] |= 0x80000000;
312 e
->inst
[0] |= (iv
->hw
<< 9);
314 e
->inst
[0] |= (src
->hw
<< 16);
316 e
->inst
[0] |= (1 << 25);
322 set_data(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned m
, unsigned s
,
323 struct nv50_program_exec
*e
)
327 e
->inst
[1] |= (1 << 22);
329 if (src
->type
== P_IMMD
) {
330 e
->inst
[1] |= (NV50_CB_PMISC
<< 22);
332 if (pc
->p
->type
== PIPE_SHADER_VERTEX
)
333 e
->inst
[1] |= (NV50_CB_PVP
<< 22);
335 e
->inst
[1] |= (NV50_CB_PFP
<< 22);
339 e
->param
.index
= src
->hw
;
341 e
->param
.mask
= m
<< (s
% 32);
345 emit_mov(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
347 struct nv50_program_exec
*e
= exec(pc
);
349 e
->inst
[0] |= 0x10000000;
353 if (0 && dst
->type
!= P_RESULT
&& src
->type
== P_IMMD
) {
354 set_immd(pc
, src
, e
);
355 /*XXX: 32-bit, but steals part of "half" reg space - need to
356 * catch and handle this case if/when we do half-regs
358 e
->inst
[0] |= 0x00008000;
360 if (src
->type
== P_IMMD
|| src
->type
== P_CONST
) {
362 set_data(pc
, src
, 0x7f, 9, e
);
363 e
->inst
[1] |= 0x20000000; /* src0 const? */
365 if (src
->type
== P_ATTR
) {
367 e
->inst
[1] |= 0x00200000;
371 e
->inst
[0] |= (src
->hw
<< 9);
374 /* We really should support "half" instructions here at some point,
375 * but I don't feel confident enough about them yet.
378 if (is_long(e
) && !is_immd(e
)) {
379 e
->inst
[1] |= 0x04000000; /* 32-bit */
380 e
->inst
[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
387 check_swap_src_0_1(struct nv50_pc
*pc
,
388 struct nv50_reg
**s0
, struct nv50_reg
**s1
)
390 struct nv50_reg
*src0
= *s0
, *src1
= *s1
;
392 if (src0
->type
== P_CONST
) {
393 if (src1
->type
!= P_CONST
) {
399 if (src1
->type
== P_ATTR
) {
400 if (src0
->type
!= P_ATTR
) {
411 set_src_0(struct nv50_pc
*pc
, struct nv50_reg
*src
, struct nv50_program_exec
*e
)
413 if (src
->type
== P_ATTR
) {
415 e
->inst
[1] |= 0x00200000;
417 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
418 struct nv50_reg
*temp
= temp_temp(pc
);
420 emit_mov(pc
, temp
, src
);
425 e
->inst
[0] |= (src
->hw
<< 9);
429 set_src_1(struct nv50_pc
*pc
, struct nv50_reg
*src
, struct nv50_program_exec
*e
)
431 if (src
->type
== P_ATTR
) {
432 struct nv50_reg
*temp
= temp_temp(pc
);
434 emit_mov(pc
, temp
, src
);
437 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
438 assert(!(e
->inst
[0] & 0x00800000));
439 if (e
->inst
[0] & 0x01000000) {
440 struct nv50_reg
*temp
= temp_temp(pc
);
442 emit_mov(pc
, temp
, src
);
445 set_data(pc
, src
, 0x7f, 16, e
);
446 e
->inst
[0] |= 0x00800000;
451 e
->inst
[0] |= (src
->hw
<< 16);
455 set_src_2(struct nv50_pc
*pc
, struct nv50_reg
*src
, struct nv50_program_exec
*e
)
459 if (src
->type
== P_ATTR
) {
460 struct nv50_reg
*temp
= temp_temp(pc
);
462 emit_mov(pc
, temp
, src
);
465 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
466 assert(!(e
->inst
[0] & 0x01000000));
467 if (e
->inst
[0] & 0x00800000) {
468 struct nv50_reg
*temp
= temp_temp(pc
);
470 emit_mov(pc
, temp
, src
);
473 set_data(pc
, src
, 0x7f, 32+14, e
);
474 e
->inst
[0] |= 0x01000000;
479 e
->inst
[1] |= (src
->hw
<< 14);
483 emit_mul(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
484 struct nv50_reg
*src1
)
486 struct nv50_program_exec
*e
= exec(pc
);
488 e
->inst
[0] |= 0xc0000000;
491 check_swap_src_0_1(pc
, &src0
, &src1
);
493 set_src_0(pc
, src0
, e
);
494 set_src_1(pc
, src1
, e
);
500 emit_add(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
501 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
503 struct nv50_program_exec
*e
= exec(pc
);
505 e
->inst
[0] |= 0xb0000000;
507 check_swap_src_0_1(pc
, &src0
, &src1
);
509 set_src_0(pc
, src0
, e
);
511 set_src_2(pc
, src1
, e
);
513 set_src_1(pc
, src1
, e
);
519 emit_minmax(struct nv50_pc
*pc
, unsigned sub
, struct nv50_reg
*dst
,
520 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
522 struct nv50_program_exec
*e
= exec(pc
);
525 e
->inst
[0] |= 0xb0000000;
526 e
->inst
[1] |= (sub
<< 29);
528 check_swap_src_0_1(pc
, &src0
, &src1
);
530 set_src_0(pc
, src0
, e
);
531 set_src_1(pc
, src1
, e
);
537 emit_sub(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
538 struct nv50_reg
*src1
)
540 struct nv50_program_exec
*e
= exec(pc
);
542 e
->inst
[0] |= 0xb0000000;
545 if (check_swap_src_0_1(pc
, &src0
, &src1
))
546 e
->inst
[1] |= 0x04000000;
548 e
->inst
[1] |= 0x08000000;
551 set_src_0(pc
, src0
, e
);
552 set_src_2(pc
, src1
, e
);
558 emit_mad(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
559 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
561 struct nv50_program_exec
*e
= exec(pc
);
563 e
->inst
[0] |= 0xe0000000;
565 check_swap_src_0_1(pc
, &src0
, &src1
);
567 set_src_0(pc
, src0
, e
);
568 set_src_1(pc
, src1
, e
);
569 set_src_2(pc
, src2
, e
);
575 emit_msb(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
576 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
578 struct nv50_program_exec
*e
= exec(pc
);
580 e
->inst
[0] |= 0xe0000000;
582 e
->inst
[1] |= 0x08000000; /* src0 * src1 - src2 */
584 check_swap_src_0_1(pc
, &src0
, &src1
);
586 set_src_0(pc
, src0
, e
);
587 set_src_1(pc
, src1
, e
);
588 set_src_2(pc
, src2
, e
);
594 emit_flop(struct nv50_pc
*pc
, unsigned sub
,
595 struct nv50_reg
*dst
, struct nv50_reg
*src
)
597 struct nv50_program_exec
*e
= exec(pc
);
599 e
->inst
[0] |= 0x90000000;
602 e
->inst
[1] |= (sub
<< 29);
606 set_src_0(pc
, src
, e
);
612 emit_preex2(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
614 struct nv50_program_exec
*e
= exec(pc
);
616 e
->inst
[0] |= 0xb0000000;
619 set_src_0(pc
, src
, e
);
621 e
->inst
[1] |= (6 << 29) | 0x00004000;
627 emit_precossin(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
629 struct nv50_program_exec
*e
= exec(pc
);
631 e
->inst
[0] |= 0xb0000000;
634 set_src_0(pc
, src
, e
);
636 e
->inst
[1] |= (6 << 29);
642 emit_set(struct nv50_pc
*pc
, unsigned c_op
, struct nv50_reg
*dst
,
643 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
645 struct nv50_program_exec
*e
= exec(pc
);
646 unsigned inv_cop
[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
647 struct nv50_reg
*rdst
;
650 if (check_swap_src_0_1(pc
, &src0
, &src1
))
651 c_op
= inv_cop
[c_op
];
654 if (dst
->type
!= P_TEMP
)
655 dst
= alloc_temp(pc
, NULL
);
659 e
->inst
[0] |= 0xb0000000;
660 e
->inst
[1] |= (3 << 29);
661 e
->inst
[1] |= (c_op
<< 14);
662 /*XXX: breaks things, .u32 by default?
663 * decuda will disasm as .u16 and use .lo/.hi regs, but this
664 * doesn't seem to match what the hw actually does.
665 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
668 set_src_0(pc
, src0
, e
);
669 set_src_1(pc
, src1
, e
);
674 e
->inst
[0] = 0xa0000001;
675 e
->inst
[1] = 0x64014780;
676 set_dst(pc
, rdst
, e
);
677 set_src_0(pc
, dst
, e
);
685 emit_flr(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
687 struct nv50_program_exec
*e
= exec(pc
);
689 e
->inst
[0] = 0xa0000000; /* cvt */
691 e
->inst
[1] |= (6 << 29); /* cvt */
692 e
->inst
[1] |= 0x08000000; /* integer mode */
693 e
->inst
[1] |= 0x04000000; /* 32 bit */
694 e
->inst
[1] |= ((0x1 << 3)) << 14; /* .rn */
695 e
->inst
[1] |= (1 << 14); /* src .f32 */
697 set_src_0(pc
, src
, e
);
703 emit_pow(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
704 struct nv50_reg
*v
, struct nv50_reg
*e
)
706 struct nv50_reg
*temp
= alloc_temp(pc
, NULL
);
708 emit_flop(pc
, 3, temp
, v
);
709 emit_mul(pc
, temp
, temp
, e
);
710 emit_preex2(pc
, temp
, temp
);
711 emit_flop(pc
, 6, dst
, temp
);
717 emit_abs(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
719 struct nv50_program_exec
*e
= exec(pc
);
721 e
->inst
[0] = 0xa0000000; /* cvt */
723 e
->inst
[1] |= (6 << 29); /* cvt */
724 e
->inst
[1] |= 0x04000000; /* 32 bit */
725 e
->inst
[1] |= (1 << 14); /* src .f32 */
726 e
->inst
[1] |= ((1 << 6) << 14); /* .abs */
728 set_src_0(pc
, src
, e
);
734 emit_lit(struct nv50_pc
*pc
, struct nv50_reg
**dst
, unsigned mask
,
735 struct nv50_reg
**src
)
737 struct nv50_reg
*one
= alloc_immd(pc
, 1.0);
738 struct nv50_reg
*zero
= alloc_immd(pc
, 0.0);
739 struct nv50_reg
*neg128
= alloc_immd(pc
, -127.999999);
740 struct nv50_reg
*pos128
= alloc_immd(pc
, 127.999999);
741 struct nv50_reg
*tmp
[4];
744 emit_mov(pc
, dst
[0], one
);
747 emit_mov(pc
, dst
[3], one
);
749 if (mask
& (3 << 1)) {
753 tmp
[0] = temp_temp(pc
);
754 emit_minmax(pc
, 4, tmp
[0], src
[0], zero
);
757 if (mask
& (1 << 2)) {
758 set_pred_wr(pc
, 1, 0, pc
->p
->exec_tail
);
760 tmp
[1] = temp_temp(pc
);
761 emit_minmax(pc
, 4, tmp
[1], src
[1], zero
);
763 tmp
[3] = temp_temp(pc
);
764 emit_minmax(pc
, 4, tmp
[3], src
[3], neg128
);
765 emit_minmax(pc
, 5, tmp
[3], tmp
[3], pos128
);
767 emit_pow(pc
, dst
[2], tmp
[1], tmp
[3]);
768 emit_mov(pc
, dst
[2], zero
);
769 set_pred(pc
, 3, 0, pc
->p
->exec_tail
);
774 emit_neg(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
776 struct nv50_program_exec
*e
= exec(pc
);
779 e
->inst
[0] |= 0xa0000000; /* delta */
780 e
->inst
[1] |= (7 << 29); /* delta */
781 e
->inst
[1] |= 0x04000000; /* negate arg0? probably not */
782 e
->inst
[1] |= (1 << 14); /* src .f32 */
784 set_src_0(pc
, src
, e
);
789 static struct nv50_reg
*
790 tgsi_dst(struct nv50_pc
*pc
, int c
, const struct tgsi_full_dst_register
*dst
)
792 switch (dst
->DstRegister
.File
) {
793 case TGSI_FILE_TEMPORARY
:
794 return &pc
->temp
[dst
->DstRegister
.Index
* 4 + c
];
795 case TGSI_FILE_OUTPUT
:
796 return &pc
->result
[dst
->DstRegister
.Index
* 4 + c
];
806 static struct nv50_reg
*
807 tgsi_src(struct nv50_pc
*pc
, int chan
, const struct tgsi_full_src_register
*src
)
809 struct nv50_reg
*r
= NULL
;
810 struct nv50_reg
*temp
;
813 c
= tgsi_util_get_full_src_register_extswizzle(src
, chan
);
815 case TGSI_EXTSWIZZLE_X
:
816 case TGSI_EXTSWIZZLE_Y
:
817 case TGSI_EXTSWIZZLE_Z
:
818 case TGSI_EXTSWIZZLE_W
:
819 switch (src
->SrcRegister
.File
) {
820 case TGSI_FILE_INPUT
:
821 r
= &pc
->attr
[src
->SrcRegister
.Index
* 4 + c
];
823 case TGSI_FILE_TEMPORARY
:
824 r
= &pc
->temp
[src
->SrcRegister
.Index
* 4 + c
];
826 case TGSI_FILE_CONSTANT
:
827 r
= &pc
->param
[src
->SrcRegister
.Index
* 4 + c
];
829 case TGSI_FILE_IMMEDIATE
:
830 r
= &pc
->immd
[src
->SrcRegister
.Index
* 4 + c
];
832 case TGSI_FILE_SAMPLER
:
839 case TGSI_EXTSWIZZLE_ZERO
:
840 r
= alloc_immd(pc
, 0.0);
842 case TGSI_EXTSWIZZLE_ONE
:
843 r
= alloc_immd(pc
, 1.0);
850 switch (tgsi_util_get_full_src_register_sign_mode(src
, chan
)) {
851 case TGSI_UTIL_SIGN_KEEP
:
853 case TGSI_UTIL_SIGN_CLEAR
:
854 temp
= temp_temp(pc
);
855 emit_abs(pc
, temp
, r
);
858 case TGSI_UTIL_SIGN_TOGGLE
:
859 temp
= temp_temp(pc
);
860 emit_neg(pc
, temp
, r
);
863 case TGSI_UTIL_SIGN_SET
:
864 temp
= temp_temp(pc
);
865 emit_abs(pc
, temp
, r
);
866 emit_neg(pc
, temp
, r
);
878 nv50_program_tx_insn(struct nv50_pc
*pc
, const union tgsi_full_token
*tok
)
880 const struct tgsi_full_instruction
*inst
= &tok
->FullInstruction
;
881 struct nv50_reg
*rdst
[4], *dst
[4], *src
[3][4], *temp
;
885 NOUVEAU_ERR("insn %p\n", tok
);
887 mask
= inst
->FullDstRegisters
[0].DstRegister
.WriteMask
;
888 sat
= inst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
;
890 for (c
= 0; c
< 4; c
++) {
892 dst
[c
] = tgsi_dst(pc
, c
, &inst
->FullDstRegisters
[0]);
897 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
898 for (c
= 0; c
< 4; c
++)
899 src
[i
][c
] = tgsi_src(pc
, c
, &inst
->FullSrcRegisters
[i
]);
903 for (c
= 0; c
< 4; c
++) {
905 dst
[c
] = temp_temp(pc
);
909 switch (inst
->Instruction
.Opcode
) {
910 case TGSI_OPCODE_ABS
:
911 for (c
= 0; c
< 4; c
++) {
912 if (!(mask
& (1 << c
)))
914 emit_abs(pc
, dst
[c
], src
[0][c
]);
917 case TGSI_OPCODE_ADD
:
918 for (c
= 0; c
< 4; c
++) {
919 if (!(mask
& (1 << c
)))
921 emit_add(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
924 case TGSI_OPCODE_COS
:
925 temp
= alloc_temp(pc
, NULL
);
926 emit_precossin(pc
, temp
, src
[0][0]);
927 emit_flop(pc
, 5, temp
, temp
);
928 for (c
= 0; c
< 4; c
++) {
929 if (!(mask
& (1 << c
)))
931 emit_mov(pc
, dst
[c
], temp
);
934 case TGSI_OPCODE_DP3
:
935 temp
= alloc_temp(pc
, NULL
);
936 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
937 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
938 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
939 for (c
= 0; c
< 4; c
++) {
940 if (!(mask
& (1 << c
)))
942 emit_mov(pc
, dst
[c
], temp
);
946 case TGSI_OPCODE_DP4
:
947 temp
= alloc_temp(pc
, NULL
);
948 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
949 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
950 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
951 emit_mad(pc
, temp
, src
[0][3], src
[1][3], temp
);
952 for (c
= 0; c
< 4; c
++) {
953 if (!(mask
& (1 << c
)))
955 emit_mov(pc
, dst
[c
], temp
);
959 case TGSI_OPCODE_DPH
:
960 temp
= alloc_temp(pc
, NULL
);
961 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
962 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
963 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
964 emit_add(pc
, temp
, src
[1][3], temp
);
965 for (c
= 0; c
< 4; c
++) {
966 if (!(mask
& (1 << c
)))
968 emit_mov(pc
, dst
[c
], temp
);
972 case TGSI_OPCODE_DST
:
974 struct nv50_reg
*one
= alloc_immd(pc
, 1.0);
976 emit_mov(pc
, dst
[0], one
);
978 emit_mul(pc
, dst
[1], src
[0][1], src
[1][1]);
980 emit_mov(pc
, dst
[2], src
[0][2]);
982 emit_mov(pc
, dst
[3], src
[1][3]);
986 case TGSI_OPCODE_EX2
:
987 temp
= alloc_temp(pc
, NULL
);
988 emit_preex2(pc
, temp
, src
[0][0]);
989 emit_flop(pc
, 6, temp
, temp
);
990 for (c
= 0; c
< 4; c
++) {
991 if (!(mask
& (1 << c
)))
993 emit_mov(pc
, dst
[c
], temp
);
997 case TGSI_OPCODE_FLR
:
998 for (c
= 0; c
< 4; c
++) {
999 if (!(mask
& (1 << c
)))
1001 emit_flr(pc
, dst
[c
], src
[0][c
]);
1004 case TGSI_OPCODE_FRC
:
1005 temp
= alloc_temp(pc
, NULL
);
1006 for (c
= 0; c
< 4; c
++) {
1007 if (!(mask
& (1 << c
)))
1009 emit_flr(pc
, temp
, src
[0][c
]);
1010 emit_sub(pc
, dst
[c
], src
[0][c
], temp
);
1012 free_temp(pc
, temp
);
1014 case TGSI_OPCODE_LIT
:
1015 emit_lit(pc
, &dst
[0], mask
, &src
[0][0]);
1017 case TGSI_OPCODE_LG2
:
1018 temp
= alloc_temp(pc
, NULL
);
1019 emit_flop(pc
, 3, temp
, src
[0][0]);
1020 for (c
= 0; c
< 4; c
++) {
1021 if (!(mask
& (1 << c
)))
1023 emit_mov(pc
, dst
[c
], temp
);
1026 case TGSI_OPCODE_LRP
:
1027 for (c
= 0; c
< 4; c
++) {
1028 if (!(mask
& (1 << c
)))
1030 /*XXX: we can do better than this */
1031 temp
= alloc_temp(pc
, NULL
);
1032 emit_neg(pc
, temp
, src
[0][c
]);
1033 emit_mad(pc
, temp
, temp
, src
[2][c
], src
[2][c
]);
1034 emit_mad(pc
, dst
[c
], src
[0][c
], src
[1][c
], temp
);
1035 free_temp(pc
, temp
);
1038 case TGSI_OPCODE_MAD
:
1039 for (c
= 0; c
< 4; c
++) {
1040 if (!(mask
& (1 << c
)))
1042 emit_mad(pc
, dst
[c
], src
[0][c
], src
[1][c
], src
[2][c
]);
1045 case TGSI_OPCODE_MAX
:
1046 for (c
= 0; c
< 4; c
++) {
1047 if (!(mask
& (1 << c
)))
1049 emit_minmax(pc
, 4, dst
[c
], src
[0][c
], src
[1][c
]);
1052 case TGSI_OPCODE_MIN
:
1053 for (c
= 0; c
< 4; c
++) {
1054 if (!(mask
& (1 << c
)))
1056 emit_minmax(pc
, 5, dst
[c
], src
[0][c
], src
[1][c
]);
1059 case TGSI_OPCODE_MOV
:
1060 for (c
= 0; c
< 4; c
++) {
1061 if (!(mask
& (1 << c
)))
1063 emit_mov(pc
, dst
[c
], src
[0][c
]);
1066 case TGSI_OPCODE_MUL
:
1067 for (c
= 0; c
< 4; c
++) {
1068 if (!(mask
& (1 << c
)))
1070 emit_mul(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
1073 case TGSI_OPCODE_POW
:
1074 temp
= alloc_temp(pc
, NULL
);
1075 emit_pow(pc
, temp
, src
[0][0], src
[1][0]);
1076 for (c
= 0; c
< 4; c
++) {
1077 if (!(mask
& (1 << c
)))
1079 emit_mov(pc
, dst
[c
], temp
);
1081 free_temp(pc
, temp
);
1083 case TGSI_OPCODE_RCP
:
1084 for (c
= 0; c
< 4; c
++) {
1085 if (!(mask
& (1 << c
)))
1087 emit_flop(pc
, 0, dst
[c
], src
[0][0]);
1090 case TGSI_OPCODE_RSQ
:
1091 for (c
= 0; c
< 4; c
++) {
1092 if (!(mask
& (1 << c
)))
1094 emit_flop(pc
, 2, dst
[c
], src
[0][0]);
1097 case TGSI_OPCODE_SCS
:
1098 temp
= alloc_temp(pc
, NULL
);
1099 emit_precossin(pc
, temp
, src
[0][0]);
1100 if (mask
& (1 << 0))
1101 emit_flop(pc
, 5, dst
[0], temp
);
1102 if (mask
& (1 << 1))
1103 emit_flop(pc
, 4, dst
[1], temp
);
1105 case TGSI_OPCODE_SGE
:
1106 for (c
= 0; c
< 4; c
++) {
1107 if (!(mask
& (1 << c
)))
1109 emit_set(pc
, 6, dst
[c
], src
[0][c
], src
[1][c
]);
1112 case TGSI_OPCODE_SIN
:
1113 temp
= alloc_temp(pc
, NULL
);
1114 emit_precossin(pc
, temp
, src
[0][0]);
1115 emit_flop(pc
, 4, temp
, temp
);
1116 for (c
= 0; c
< 4; c
++) {
1117 if (!(mask
& (1 << c
)))
1119 emit_mov(pc
, dst
[c
], temp
);
1122 case TGSI_OPCODE_SLT
:
1123 for (c
= 0; c
< 4; c
++) {
1124 if (!(mask
& (1 << c
)))
1126 emit_set(pc
, 1, dst
[c
], src
[0][c
], src
[1][c
]);
1129 case TGSI_OPCODE_SUB
:
1130 for (c
= 0; c
< 4; c
++) {
1131 if (!(mask
& (1 << c
)))
1133 emit_sub(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
1136 case TGSI_OPCODE_TEX
:
1138 case TGSI_OPCODE_XPD
:
1139 temp
= alloc_temp(pc
, NULL
);
1140 if (mask
& (1 << 0)) {
1141 emit_mul(pc
, temp
, src
[0][2], src
[1][1]);
1142 emit_msb(pc
, dst
[0], src
[0][1], src
[1][2], temp
);
1144 if (mask
& (1 << 1)) {
1145 emit_mul(pc
, temp
, src
[0][0], src
[1][2]);
1146 emit_msb(pc
, dst
[1], src
[0][2], src
[1][0], temp
);
1148 if (mask
& (1 << 2)) {
1149 emit_mul(pc
, temp
, src
[0][1], src
[1][0]);
1150 emit_msb(pc
, dst
[2], src
[0][0], src
[1][1], temp
);
1152 free_temp(pc
, temp
);
1154 case TGSI_OPCODE_END
:
1157 NOUVEAU_ERR("invalid opcode %d\n", inst
->Instruction
.Opcode
);
1162 for (c
= 0; c
< 4; c
++) {
1163 struct nv50_program_exec
*e
;
1165 if (!(mask
& (1 << c
)))
1169 e
->inst
[0] = 0xa0000000; /* cvt */
1171 e
->inst
[1] |= (6 << 29); /* cvt */
1172 e
->inst
[1] |= 0x04000000; /* 32 bit */
1173 e
->inst
[1] |= (1 << 14); /* src .f32 */
1174 e
->inst
[1] |= ((1 << 5) << 14); /* .sat */
1175 set_dst(pc
, rdst
[c
], e
);
1176 set_src_0(pc
, dst
[c
], e
);
1186 nv50_program_tx_prep(struct nv50_pc
*pc
)
1188 struct tgsi_parse_context p
;
1189 boolean ret
= FALSE
;
1192 tgsi_parse_init(&p
, pc
->p
->pipe
.tokens
);
1193 while (!tgsi_parse_end_of_tokens(&p
)) {
1194 const union tgsi_full_token
*tok
= &p
.FullToken
;
1196 tgsi_parse_token(&p
);
1197 switch (tok
->Token
.Type
) {
1198 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1200 const struct tgsi_full_immediate
*imm
=
1201 &p
.FullToken
.FullImmediate
;
1203 ctor_immd(pc
, imm
->u
.ImmediateFloat32
[0].Float
,
1204 imm
->u
.ImmediateFloat32
[1].Float
,
1205 imm
->u
.ImmediateFloat32
[2].Float
,
1206 imm
->u
.ImmediateFloat32
[3].Float
);
1209 case TGSI_TOKEN_TYPE_DECLARATION
:
1211 const struct tgsi_full_declaration
*d
;
1214 d
= &p
.FullToken
.FullDeclaration
;
1215 last
= d
->u
.DeclarationRange
.Last
;
1217 switch (d
->Declaration
.File
) {
1218 case TGSI_FILE_TEMPORARY
:
1219 if (pc
->temp_nr
< (last
+ 1))
1220 pc
->temp_nr
= last
+ 1;
1222 case TGSI_FILE_OUTPUT
:
1223 if (pc
->result_nr
< (last
+ 1))
1224 pc
->result_nr
= last
+ 1;
1226 case TGSI_FILE_INPUT
:
1227 if (pc
->attr_nr
< (last
+ 1))
1228 pc
->attr_nr
= last
+ 1;
1230 case TGSI_FILE_CONSTANT
:
1231 if (pc
->param_nr
< (last
+ 1))
1232 pc
->param_nr
= last
+ 1;
1234 case TGSI_FILE_SAMPLER
:
1237 NOUVEAU_ERR("bad decl file %d\n",
1238 d
->Declaration
.File
);
1243 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1250 NOUVEAU_ERR("%d temps\n", pc
->temp_nr
);
1252 pc
->temp
= calloc(pc
->temp_nr
* 4, sizeof(struct nv50_reg
));
1256 for (i
= 0; i
< pc
->temp_nr
; i
++) {
1257 for (c
= 0; c
< 4; c
++) {
1258 pc
->temp
[i
*4+c
].type
= P_TEMP
;
1259 pc
->temp
[i
*4+c
].hw
= -1;
1260 pc
->temp
[i
*4+c
].index
= i
;
1265 NOUVEAU_ERR("%d attrib regs\n", pc
->attr_nr
);
1267 struct nv50_reg
*iv
= NULL
;
1270 pc
->attr
= calloc(pc
->attr_nr
* 4, sizeof(struct nv50_reg
));
1274 if (pc
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1275 iv
= alloc_temp(pc
, NULL
);
1276 emit_interp(pc
, iv
, iv
, iv
, FALSE
);
1277 emit_flop(pc
, 0, iv
, iv
);
1281 for (i
= 0; i
< pc
->attr_nr
; i
++) {
1282 struct nv50_reg
*a
= &pc
->attr
[i
*4];
1284 for (c
= 0; c
< 4; c
++) {
1285 if (pc
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1286 struct nv50_reg
*at
=
1287 alloc_temp(pc
, NULL
);
1288 pc
->attr
[i
*4+c
].type
= at
->type
;
1289 pc
->attr
[i
*4+c
].hw
= at
->hw
;
1290 pc
->attr
[i
*4+c
].index
= at
->index
;
1292 pc
->p
->cfg
.vp
.attr
[aid
/32] |=
1294 pc
->attr
[i
*4+c
].type
= P_ATTR
;
1295 pc
->attr
[i
*4+c
].hw
= aid
++;
1296 pc
->attr
[i
*4+c
].index
= i
;
1300 if (pc
->p
->type
!= PIPE_SHADER_FRAGMENT
)
1303 emit_interp(pc
, &a
[0], &a
[0], iv
, TRUE
);
1304 emit_interp(pc
, &a
[1], &a
[1], iv
, TRUE
);
1305 emit_interp(pc
, &a
[2], &a
[2], iv
, TRUE
);
1306 emit_interp(pc
, &a
[3], &a
[3], iv
, TRUE
);
1313 NOUVEAU_ERR("%d result regs\n", pc
->result_nr
);
1314 if (pc
->result_nr
) {
1317 pc
->result
= calloc(pc
->result_nr
* 4, sizeof(struct nv50_reg
));
1321 for (i
= 0; i
< pc
->result_nr
; i
++) {
1322 for (c
= 0; c
< 4; c
++) {
1323 if (pc
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1324 pc
->result
[i
*4+c
].type
= P_TEMP
;
1325 pc
->result
[i
*4+c
].hw
= -1;
1327 pc
->result
[i
*4+c
].type
= P_RESULT
;
1328 pc
->result
[i
*4+c
].hw
= rid
++;
1330 pc
->result
[i
*4+c
].index
= i
;
1335 NOUVEAU_ERR("%d param regs\n", pc
->param_nr
);
1339 pc
->param
= calloc(pc
->param_nr
* 4, sizeof(struct nv50_reg
));
1343 for (i
= 0; i
< pc
->param_nr
; i
++) {
1344 for (c
= 0; c
< 4; c
++) {
1345 pc
->param
[i
*4+c
].type
= P_CONST
;
1346 pc
->param
[i
*4+c
].hw
= rid
++;
1347 pc
->param
[i
*4+c
].index
= i
;
1353 int rid
= pc
->param_nr
* 4;
1355 pc
->immd
= calloc(pc
->immd_nr
* 4, sizeof(struct nv50_reg
));
1359 for (i
= 0; i
< pc
->immd_nr
; i
++) {
1360 for (c
= 0; c
< 4; c
++) {
1361 pc
->immd
[i
*4+c
].type
= P_IMMD
;
1362 pc
->immd
[i
*4+c
].hw
= rid
++;
1363 pc
->immd
[i
*4+c
].index
= i
;
1370 tgsi_parse_free(&p
);
1375 nv50_program_tx(struct nv50_program
*p
)
1377 struct tgsi_parse_context parse
;
1381 pc
= CALLOC_STRUCT(nv50_pc
);
1385 pc
->p
->cfg
.high_temp
= 4;
1387 ret
= nv50_program_tx_prep(pc
);
1391 tgsi_parse_init(&parse
, pc
->p
->pipe
.tokens
);
1392 while (!tgsi_parse_end_of_tokens(&parse
)) {
1393 const union tgsi_full_token
*tok
= &parse
.FullToken
;
1395 tgsi_parse_token(&parse
);
1397 switch (tok
->Token
.Type
) {
1398 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1399 ret
= nv50_program_tx_insn(pc
, tok
);
1408 if (p
->type
== PIPE_SHADER_FRAGMENT
) {
1409 struct nv50_reg out
;
1412 for (out
.hw
= 0; out
.hw
< pc
->result_nr
* 4; out
.hw
++)
1413 emit_mov(pc
, &out
, &pc
->result
[out
.hw
]);
1416 assert(is_long(pc
->p
->exec_tail
) && !is_immd(pc
->p
->exec_head
));
1417 pc
->p
->exec_tail
->inst
[1] |= 0x00000001;
1419 p
->param_nr
= pc
->param_nr
* 4;
1420 p
->immd_nr
= pc
->immd_nr
* 4;
1421 p
->immd
= pc
->immd_buf
;
1424 tgsi_parse_free(&parse
);
1431 nv50_program_validate(struct nv50_context
*nv50
, struct nv50_program
*p
)
1433 if (nv50_program_tx(p
) == FALSE
)
1435 p
->translated
= TRUE
;
1439 nv50_program_upload_data(struct nv50_context
*nv50
, float *map
,
1440 unsigned start
, unsigned count
)
1443 unsigned nr
= count
> 2047 ? 2047 : count
;
1445 BEGIN_RING(tesla
, 0x00000f00, 1);
1446 OUT_RING ((NV50_CB_PMISC
<< 0) | (start
<< 8));
1447 BEGIN_RING(tesla
, 0x40000f04, nr
);
1448 OUT_RINGp (map
, nr
);
1457 nv50_program_validate_data(struct nv50_context
*nv50
, struct nv50_program
*p
)
1459 struct nouveau_winsys
*nvws
= nv50
->screen
->nvws
;
1460 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1461 unsigned nr
= p
->param_nr
+ p
->immd_nr
;
1463 if (!p
->data
&& nr
) {
1464 struct nouveau_resource
*heap
= nv50
->screen
->vp_data_heap
;
1466 if (nvws
->res_alloc(heap
, nr
, p
, &p
->data
)) {
1467 while (heap
->next
&& heap
->size
< nr
) {
1468 struct nv50_program
*evict
= heap
->next
->priv
;
1469 nvws
->res_free(&evict
->data
);
1472 if (nvws
->res_alloc(heap
, nr
, p
, &p
->data
))
1478 float *map
= ws
->buffer_map(ws
, nv50
->constbuf
[p
->type
],
1479 PIPE_BUFFER_USAGE_CPU_READ
);
1480 nv50_program_upload_data(nv50
, map
, p
->data
->start
,
1482 ws
->buffer_unmap(ws
, nv50
->constbuf
[p
->type
]);
1486 nv50_program_upload_data(nv50
, p
->immd
,
1487 p
->data
->start
+ p
->param_nr
,
1493 nv50_program_validate_code(struct nv50_context
*nv50
, struct nv50_program
*p
)
1495 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1496 struct nv50_program_exec
*e
;
1497 struct nouveau_stateobj
*so
;
1498 const unsigned flags
= NOUVEAU_BO_VRAM
| NOUVEAU_BO_WR
;
1499 unsigned start
, count
, *up
, *ptr
;
1500 boolean upload
= FALSE
;
1503 p
->buffer
= ws
->buffer_create(ws
, 0x100, 0, p
->exec_size
* 4);
1507 if (p
->data
&& p
->data
->start
!= p
->data_start
) {
1508 for (e
= p
->exec_head
; e
; e
= e
->next
) {
1511 if (e
->param
.index
< 0)
1513 ei
= e
->param
.shift
>> 5;
1514 ci
= e
->param
.index
+ p
->data
->start
;
1516 e
->inst
[ei
] &= ~e
->param
.mask
;
1517 e
->inst
[ei
] |= (ci
<< e
->param
.shift
);
1520 p
->data_start
= p
->data
->start
;
1527 up
= ptr
= MALLOC(p
->exec_size
* 4);
1528 for (e
= p
->exec_head
; e
; e
= e
->next
) {
1529 *(ptr
++) = e
->inst
[0];
1531 *(ptr
++) = e
->inst
[1];
1535 so_method(so
, nv50
->screen
->tesla
, 0x1280, 3);
1536 so_reloc (so
, p
->buffer
, 0, flags
| NOUVEAU_BO_HIGH
, 0, 0);
1537 so_reloc (so
, p
->buffer
, 0, flags
| NOUVEAU_BO_LOW
, 0, 0);
1538 so_data (so
, (NV50_CB_PUPLOAD
<< 16) | 0x0800); //(p->exec_size * 4));
1540 start
= 0; count
= p
->exec_size
;
1542 struct nouveau_winsys
*nvws
= nv50
->screen
->nvws
;
1547 nr
= MIN2(count
, 2047);
1548 nr
= MIN2(nvws
->channel
->pushbuf
->remaining
, nr
);
1549 if (nvws
->channel
->pushbuf
->remaining
< (nr
+ 3)) {
1554 BEGIN_RING(tesla
, 0x0f00, 1);
1555 OUT_RING ((start
<< 8) | NV50_CB_PUPLOAD
);
1556 BEGIN_RING(tesla
, 0x40000f04, nr
);
1557 OUT_RINGp (up
+ start
, nr
);
1568 nv50_vertprog_validate(struct nv50_context
*nv50
)
1570 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1571 struct nv50_program
*p
= nv50
->vertprog
;
1572 struct nouveau_stateobj
*so
;
1574 if (!p
->translated
) {
1575 nv50_program_validate(nv50
, p
);
1580 nv50_program_validate_data(nv50
, p
);
1581 nv50_program_validate_code(nv50
, p
);
1584 so_method(so
, tesla
, NV50TCL_VP_ADDRESS_HIGH
, 2);
1585 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1586 NOUVEAU_BO_HIGH
, 0, 0);
1587 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1588 NOUVEAU_BO_LOW
, 0, 0);
1589 so_method(so
, tesla
, 0x1650, 2);
1590 so_data (so
, p
->cfg
.vp
.attr
[0]);
1591 so_data (so
, p
->cfg
.vp
.attr
[1]);
1592 so_method(so
, tesla
, 0x16b8, 1);
1593 so_data (so
, p
->cfg
.high_result
);
1594 so_method(so
, tesla
, 0x16ac, 2);
1596 so_data (so
, p
->cfg
.high_temp
);
1597 so_method(so
, tesla
, 0x140c, 1);
1598 so_data (so
, 0); /* program start offset */
1599 so_emit(nv50
->screen
->nvws
, so
);
1604 nv50_fragprog_validate(struct nv50_context
*nv50
)
1606 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1607 struct nv50_program
*p
= nv50
->fragprog
;
1608 struct nouveau_stateobj
*so
;
1610 if (!p
->translated
) {
1611 nv50_program_validate(nv50
, p
);
1616 nv50_program_validate_data(nv50
, p
);
1617 nv50_program_validate_code(nv50
, p
);
1620 so_method(so
, tesla
, NV50TCL_FP_ADDRESS_HIGH
, 2);
1621 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1622 NOUVEAU_BO_HIGH
, 0, 0);
1623 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1624 NOUVEAU_BO_LOW
, 0, 0);
1625 so_method(so
, tesla
, 0x1904, 4);
1626 so_data (so
, 0x01040404); /* p: 0x01000404 */
1627 so_data (so
, 0x00000004);
1628 so_data (so
, 0x00000000);
1629 so_data (so
, 0x00000000);
1630 so_method(so
, tesla
, 0x16bc, 3); /*XXX: fixme */
1631 so_data (so
, 0x03020100);
1632 so_data (so
, 0x07060504);
1633 so_data (so
, 0x0b0a0908);
1634 so_method(so
, tesla
, 0x1988, 2);
1635 so_data (so
, 0x08040404); /* p: 0x0f000401 */
1636 so_data (so
, p
->cfg
.high_temp
);
1637 so_method(so
, tesla
, 0x1414, 1);
1638 so_data (so
, 0); /* program start offset */
1639 so_emit(nv50
->screen
->nvws
, so
);
1644 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
1646 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1648 while (p
->exec_head
) {
1649 struct nv50_program_exec
*e
= p
->exec_head
;
1651 p
->exec_head
= e
->next
;
1654 p
->exec_tail
= NULL
;
1658 pipe_buffer_reference(ws
, &p
->buffer
, NULL
);