1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
10 #include "nv50_context.h"
12 #define NV50_SU_MAX_TEMP 64
13 #define NV50_PROGRAM_DUMP
15 /* ARL - gallium craps itself on progs/vp/arl.txt
17 * MSB - Like MAD, but MUL+SUB
18 * - Fuck it off, introduce a way to negate args for ops that
21 * Look into inlining IMMD for ops other than MOV (make it general?)
22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this
25 * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
26 * case, if the emit_src() causes the inst to suddenly become long.
28 * Verify half-insns work where expected - and force disable them where they
29 * don't work - MUL has it forcibly disabled atm as it fixes POW..
31 * FUCK! watch dst==src vectors, can overwrite components that are needed.
32 * ie. SUB R0, R0.yzxw, R0
35 * "delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0)
38 * Things to check with renouveau:
39 * FP attr/result assignment - how?
41 * - 0x16bc maps vp output onto fp hpos
42 * - 0x16c0 maps vp output onto fp col0
46 * 0x16bc->0x16e8 --> some binding between vp/fp regs
47 * 0x16b8 --> VP output count
49 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
50 * "MOV rcol.x, fcol.y" = 0x00000004
51 * 0x19a8 --> as above but 0x00000100 and 0x00000000
52 * - 0x00100000 used when KIL used
53 * 0x196c --> as above but 0x00000011 and 0x00000000
55 * 0x1988 --> 0xXXNNNNNN
56 * - XX == FP high something
73 struct nv50_program
*p
;
76 struct nv50_reg
*r_temp
[NV50_SU_MAX_TEMP
];
79 struct nv50_reg
*temp
;
81 struct nv50_reg
*attr
;
83 struct nv50_reg
*result
;
85 struct nv50_reg
*param
;
87 struct nv50_reg
*immd
;
91 struct nv50_reg
*temp_temp
[16];
92 unsigned temp_temp_nr
;
96 alloc_reg(struct nv50_pc
*pc
, struct nv50_reg
*reg
)
100 if (reg
->type
!= P_TEMP
)
104 /*XXX: do this here too to catch FP temp-as-attr usage..
105 * not clean, but works */
106 if (pc
->p
->cfg
.high_temp
< (reg
->hw
+ 1))
107 pc
->p
->cfg
.high_temp
= reg
->hw
+ 1;
111 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
112 if (!(pc
->r_temp
[i
])) {
115 if (pc
->p
->cfg
.high_temp
< (i
+ 1))
116 pc
->p
->cfg
.high_temp
= i
+ 1;
124 static struct nv50_reg
*
125 alloc_temp(struct nv50_pc
*pc
, struct nv50_reg
*dst
)
130 if (dst
&& dst
->type
== P_TEMP
&& dst
->hw
== -1)
133 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
134 if (!pc
->r_temp
[i
]) {
135 r
= CALLOC_STRUCT(nv50_reg
);
149 free_temp(struct nv50_pc
*pc
, struct nv50_reg
*r
)
151 if (r
->index
== -1) {
154 FREE(pc
->r_temp
[hw
]);
155 pc
->r_temp
[hw
] = NULL
;
159 static struct nv50_reg
*
160 temp_temp(struct nv50_pc
*pc
)
162 if (pc
->temp_temp_nr
>= 16)
165 pc
->temp_temp
[pc
->temp_temp_nr
] = alloc_temp(pc
, NULL
);
166 return pc
->temp_temp
[pc
->temp_temp_nr
++];
170 kill_temp_temp(struct nv50_pc
*pc
)
174 for (i
= 0; i
< pc
->temp_temp_nr
; i
++)
175 free_temp(pc
, pc
->temp_temp
[i
]);
176 pc
->temp_temp_nr
= 0;
180 ctor_immd(struct nv50_pc
*pc
, float x
, float y
, float z
, float w
)
182 pc
->immd_buf
= realloc(pc
->immd_buf
, (pc
->immd_nr
+ 1) * 4 *
184 pc
->immd_buf
[(pc
->immd_nr
* 4) + 0] = x
;
185 pc
->immd_buf
[(pc
->immd_nr
* 4) + 1] = y
;
186 pc
->immd_buf
[(pc
->immd_nr
* 4) + 2] = z
;
187 pc
->immd_buf
[(pc
->immd_nr
* 4) + 3] = w
;
189 return pc
->immd_nr
++;
192 static struct nv50_reg
*
193 alloc_immd(struct nv50_pc
*pc
, float f
)
195 struct nv50_reg
*r
= CALLOC_STRUCT(nv50_reg
);
198 hw
= ctor_immd(pc
, f
, 0, 0, 0) * 4;
205 static struct nv50_program_exec
*
206 exec(struct nv50_pc
*pc
)
208 struct nv50_program_exec
*e
= CALLOC_STRUCT(nv50_program_exec
);
215 emit(struct nv50_pc
*pc
, struct nv50_program_exec
*e
)
217 struct nv50_program
*p
= pc
->p
;
220 p
->exec_tail
->next
= e
;
224 p
->exec_size
+= (e
->inst
[0] & 1) ? 2 : 1;
227 static INLINE
void set_long(struct nv50_pc
*, struct nv50_program_exec
*);
230 is_long(struct nv50_program_exec
*e
)
238 is_immd(struct nv50_program_exec
*e
)
240 if (is_long(e
) && (e
->inst
[1] & 3) == 3)
246 set_pred(struct nv50_pc
*pc
, unsigned pred
, unsigned idx
,
247 struct nv50_program_exec
*e
)
250 e
->inst
[1] &= ~((0x1f << 7) | (0x3 << 12));
251 e
->inst
[1] |= (pred
<< 7) | (idx
<< 12);
255 set_pred_wr(struct nv50_pc
*pc
, unsigned on
, unsigned idx
,
256 struct nv50_program_exec
*e
)
259 e
->inst
[1] &= ~((0x3 << 4) | (1 << 6));
260 e
->inst
[1] |= (idx
<< 4) | (on
<< 6);
264 set_long(struct nv50_pc
*pc
, struct nv50_program_exec
*e
)
270 set_pred(pc
, 0xf, 0, e
);
271 set_pred_wr(pc
, 0, 0, e
);
275 set_dst(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_program_exec
*e
)
277 if (dst
->type
== P_RESULT
) {
279 e
->inst
[1] |= 0x00000008;
283 e
->inst
[0] |= (dst
->hw
<< 2);
287 set_immd(struct nv50_pc
*pc
, struct nv50_reg
*imm
, struct nv50_program_exec
*e
)
289 unsigned val
= fui(pc
->immd_buf
[imm
->hw
]); /* XXX */
292 /*XXX: can't be predicated - bits overlap.. catch cases where both
293 * are required and avoid them. */
294 set_pred(pc
, 0, 0, e
);
295 set_pred_wr(pc
, 0, 0, e
);
297 e
->inst
[1] |= 0x00000002 | 0x00000001;
298 e
->inst
[0] |= (val
& 0x3f) << 16;
299 e
->inst
[1] |= (val
>> 6) << 2;
303 emit_interp(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
304 struct nv50_reg
*src
, struct nv50_reg
*iv
, boolean noperspective
)
306 struct nv50_program_exec
*e
= exec(pc
);
308 e
->inst
[0] |= 0x80000000;
311 e
->inst
[0] |= (iv
->hw
<< 9);
313 e
->inst
[0] |= (src
->hw
<< 16);
315 e
->inst
[0] |= (1 << 25);
321 set_data(struct nv50_pc
*pc
, struct nv50_reg
*src
, unsigned m
, unsigned s
,
322 struct nv50_program_exec
*e
)
326 e
->inst
[1] |= (1 << 22);
328 if (src
->type
== P_IMMD
) {
329 e
->inst
[1] |= (NV50_CB_PMISC
<< 22);
331 if (pc
->p
->type
== PIPE_SHADER_VERTEX
)
332 e
->inst
[1] |= (NV50_CB_PVP
<< 22);
334 e
->inst
[1] |= (NV50_CB_PFP
<< 22);
338 e
->param
.index
= src
->hw
;
340 e
->param
.mask
= m
<< (s
% 32);
344 emit_mov(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
346 struct nv50_program_exec
*e
= exec(pc
);
348 e
->inst
[0] |= 0x10000000;
352 if (dst
->type
!= P_RESULT
&& src
->type
== P_IMMD
) {
353 set_immd(pc
, src
, e
);
354 /*XXX: 32-bit, but steals part of "half" reg space - need to
355 * catch and handle this case if/when we do half-regs
357 e
->inst
[0] |= 0x00008000;
359 if (src
->type
== P_IMMD
|| src
->type
== P_CONST
) {
361 set_data(pc
, src
, 0x7f, 9, e
);
362 e
->inst
[1] |= 0x20000000; /* src0 const? */
364 if (src
->type
== P_ATTR
) {
366 e
->inst
[1] |= 0x00200000;
370 e
->inst
[0] |= (src
->hw
<< 9);
373 /* We really should support "half" instructions here at some point,
374 * but I don't feel confident enough about them yet.
377 if (is_long(e
) && !is_immd(e
)) {
378 e
->inst
[1] |= 0x04000000; /* 32-bit */
379 e
->inst
[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
386 check_swap_src_0_1(struct nv50_pc
*pc
,
387 struct nv50_reg
**s0
, struct nv50_reg
**s1
)
389 struct nv50_reg
*src0
= *s0
, *src1
= *s1
;
391 if (src0
->type
== P_CONST
) {
392 if (src1
->type
!= P_CONST
) {
398 if (src1
->type
== P_ATTR
) {
399 if (src0
->type
!= P_ATTR
) {
410 set_src_0(struct nv50_pc
*pc
, struct nv50_reg
*src
, struct nv50_program_exec
*e
)
412 if (src
->type
== P_ATTR
) {
414 e
->inst
[1] |= 0x00200000;
416 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
417 struct nv50_reg
*temp
= temp_temp(pc
);
419 emit_mov(pc
, temp
, src
);
424 e
->inst
[0] |= (src
->hw
<< 9);
428 set_src_1(struct nv50_pc
*pc
, struct nv50_reg
*src
, struct nv50_program_exec
*e
)
430 if (src
->type
== P_ATTR
) {
431 struct nv50_reg
*temp
= temp_temp(pc
);
433 emit_mov(pc
, temp
, src
);
436 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
437 assert(!(e
->inst
[0] & 0x00800000));
438 if (e
->inst
[0] & 0x01000000) {
439 struct nv50_reg
*temp
= temp_temp(pc
);
441 emit_mov(pc
, temp
, src
);
444 set_data(pc
, src
, 0x7f, 16, e
);
445 e
->inst
[0] |= 0x00800000;
450 e
->inst
[0] |= (src
->hw
<< 16);
454 set_src_2(struct nv50_pc
*pc
, struct nv50_reg
*src
, struct nv50_program_exec
*e
)
458 if (src
->type
== P_ATTR
) {
459 struct nv50_reg
*temp
= temp_temp(pc
);
461 emit_mov(pc
, temp
, src
);
464 if (src
->type
== P_CONST
|| src
->type
== P_IMMD
) {
465 assert(!(e
->inst
[0] & 0x01000000));
466 if (e
->inst
[0] & 0x00800000) {
467 struct nv50_reg
*temp
= temp_temp(pc
);
469 emit_mov(pc
, temp
, src
);
472 set_data(pc
, src
, 0x7f, 32+14, e
);
473 e
->inst
[0] |= 0x01000000;
478 e
->inst
[1] |= (src
->hw
<< 14);
482 emit_mul(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
483 struct nv50_reg
*src1
)
485 struct nv50_program_exec
*e
= exec(pc
);
487 e
->inst
[0] |= 0xc0000000;
490 check_swap_src_0_1(pc
, &src0
, &src1
);
492 set_src_0(pc
, src0
, e
);
493 set_src_1(pc
, src1
, e
);
499 emit_add(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
500 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
502 struct nv50_program_exec
*e
= exec(pc
);
504 e
->inst
[0] |= 0xb0000000;
506 check_swap_src_0_1(pc
, &src0
, &src1
);
508 set_src_0(pc
, src0
, e
);
510 set_src_2(pc
, src1
, e
);
512 set_src_1(pc
, src1
, e
);
518 emit_minmax(struct nv50_pc
*pc
, unsigned sub
, struct nv50_reg
*dst
,
519 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
521 struct nv50_program_exec
*e
= exec(pc
);
524 e
->inst
[0] |= 0xb0000000;
525 e
->inst
[1] |= (sub
<< 29);
527 check_swap_src_0_1(pc
, &src0
, &src1
);
529 set_src_0(pc
, src0
, e
);
530 set_src_1(pc
, src1
, e
);
536 emit_sub(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
537 struct nv50_reg
*src1
)
539 struct nv50_program_exec
*e
= exec(pc
);
541 e
->inst
[0] |= 0xb0000000;
544 if (check_swap_src_0_1(pc
, &src0
, &src1
))
545 e
->inst
[1] |= 0x04000000;
547 e
->inst
[1] |= 0x08000000;
550 set_src_0(pc
, src0
, e
);
551 set_src_2(pc
, src1
, e
);
557 emit_mad(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
558 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
560 struct nv50_program_exec
*e
= exec(pc
);
562 e
->inst
[0] |= 0xe0000000;
564 check_swap_src_0_1(pc
, &src0
, &src1
);
566 set_src_0(pc
, src0
, e
);
567 set_src_1(pc
, src1
, e
);
568 set_src_2(pc
, src2
, e
);
574 emit_msb(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src0
,
575 struct nv50_reg
*src1
, struct nv50_reg
*src2
)
577 struct nv50_program_exec
*e
= exec(pc
);
579 e
->inst
[0] |= 0xe0000000;
581 e
->inst
[1] |= 0x08000000; /* src0 * src1 - src2 */
583 check_swap_src_0_1(pc
, &src0
, &src1
);
585 set_src_0(pc
, src0
, e
);
586 set_src_1(pc
, src1
, e
);
587 set_src_2(pc
, src2
, e
);
593 emit_flop(struct nv50_pc
*pc
, unsigned sub
,
594 struct nv50_reg
*dst
, struct nv50_reg
*src
)
596 struct nv50_program_exec
*e
= exec(pc
);
598 e
->inst
[0] |= 0x90000000;
601 e
->inst
[1] |= (sub
<< 29);
605 set_src_0(pc
, src
, e
);
611 emit_preex2(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
613 struct nv50_program_exec
*e
= exec(pc
);
615 e
->inst
[0] |= 0xb0000000;
618 set_src_0(pc
, src
, e
);
620 e
->inst
[1] |= (6 << 29) | 0x00004000;
626 emit_precossin(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
628 struct nv50_program_exec
*e
= exec(pc
);
630 e
->inst
[0] |= 0xb0000000;
633 set_src_0(pc
, src
, e
);
635 e
->inst
[1] |= (6 << 29);
641 emit_set(struct nv50_pc
*pc
, unsigned c_op
, struct nv50_reg
*dst
,
642 struct nv50_reg
*src0
, struct nv50_reg
*src1
)
644 struct nv50_program_exec
*e
= exec(pc
);
645 unsigned inv_cop
[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
646 struct nv50_reg
*rdst
;
649 if (check_swap_src_0_1(pc
, &src0
, &src1
))
650 c_op
= inv_cop
[c_op
];
653 if (dst
->type
!= P_TEMP
)
654 dst
= alloc_temp(pc
, NULL
);
658 e
->inst
[0] |= 0xb0000000;
659 e
->inst
[1] |= (3 << 29);
660 e
->inst
[1] |= (c_op
<< 14);
661 /*XXX: breaks things, .u32 by default?
662 * decuda will disasm as .u16 and use .lo/.hi regs, but this
663 * doesn't seem to match what the hw actually does.
664 inst[1] |= 0x04000000; << breaks things.. .u32 by default?
667 set_src_0(pc
, src0
, e
);
668 set_src_1(pc
, src1
, e
);
673 e
->inst
[0] = 0xa0000001;
674 e
->inst
[1] = 0x64014780;
675 set_dst(pc
, rdst
, e
);
676 set_src_0(pc
, dst
, e
);
684 emit_flr(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
686 struct nv50_program_exec
*e
= exec(pc
);
688 e
->inst
[0] = 0xa0000000; /* cvt */
690 e
->inst
[1] |= (6 << 29); /* cvt */
691 e
->inst
[1] |= 0x08000000; /* integer mode */
692 e
->inst
[1] |= 0x04000000; /* 32 bit */
693 e
->inst
[1] |= ((0x1 << 3)) << 14; /* .rn */
694 e
->inst
[1] |= (1 << 14); /* src .f32 */
696 set_src_0(pc
, src
, e
);
702 emit_pow(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
703 struct nv50_reg
*v
, struct nv50_reg
*e
)
705 struct nv50_reg
*temp
= alloc_temp(pc
, NULL
);
707 emit_flop(pc
, 3, temp
, v
);
708 emit_mul(pc
, temp
, temp
, e
);
709 emit_preex2(pc
, temp
, temp
);
710 emit_flop(pc
, 6, dst
, temp
);
716 emit_abs(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
718 struct nv50_program_exec
*e
= exec(pc
);
720 e
->inst
[0] = 0xa0000000; /* cvt */
722 e
->inst
[1] |= (6 << 29); /* cvt */
723 e
->inst
[1] |= 0x04000000; /* 32 bit */
724 e
->inst
[1] |= (1 << 14); /* src .f32 */
725 e
->inst
[1] |= ((1 << 6) << 14); /* .abs */
727 set_src_0(pc
, src
, e
);
733 emit_lit(struct nv50_pc
*pc
, struct nv50_reg
**dst
, unsigned mask
,
734 struct nv50_reg
**src
)
736 struct nv50_reg
*one
= alloc_immd(pc
, 1.0);
737 struct nv50_reg
*zero
= alloc_immd(pc
, 0.0);
738 struct nv50_reg
*neg128
= alloc_immd(pc
, -127.999999);
739 struct nv50_reg
*pos128
= alloc_immd(pc
, 127.999999);
740 struct nv50_reg
*tmp
[4];
743 emit_mov(pc
, dst
[0], one
);
746 emit_mov(pc
, dst
[3], one
);
748 if (mask
& (3 << 1)) {
752 tmp
[0] = temp_temp(pc
);
753 emit_minmax(pc
, 4, tmp
[0], src
[0], zero
);
756 if (mask
& (1 << 2)) {
757 set_pred_wr(pc
, 1, 0, pc
->p
->exec_tail
);
759 tmp
[1] = temp_temp(pc
);
760 emit_minmax(pc
, 4, tmp
[1], src
[1], zero
);
762 tmp
[3] = temp_temp(pc
);
763 emit_minmax(pc
, 4, tmp
[3], src
[3], neg128
);
764 emit_minmax(pc
, 5, tmp
[3], tmp
[3], pos128
);
766 emit_pow(pc
, dst
[2], tmp
[1], tmp
[3]);
767 emit_mov(pc
, dst
[2], zero
);
768 set_pred(pc
, 3, 0, pc
->p
->exec_tail
);
773 emit_neg(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
775 struct nv50_program_exec
*e
= exec(pc
);
778 e
->inst
[0] |= 0xa0000000; /* delta */
779 e
->inst
[1] |= (7 << 29); /* delta */
780 e
->inst
[1] |= 0x04000000; /* negate arg0? probably not */
781 e
->inst
[1] |= (1 << 14); /* src .f32 */
783 set_src_0(pc
, src
, e
);
788 static struct nv50_reg
*
789 tgsi_dst(struct nv50_pc
*pc
, int c
, const struct tgsi_full_dst_register
*dst
)
791 switch (dst
->DstRegister
.File
) {
792 case TGSI_FILE_TEMPORARY
:
793 return &pc
->temp
[dst
->DstRegister
.Index
* 4 + c
];
794 case TGSI_FILE_OUTPUT
:
795 return &pc
->result
[dst
->DstRegister
.Index
* 4 + c
];
805 static struct nv50_reg
*
806 tgsi_src(struct nv50_pc
*pc
, int chan
, const struct tgsi_full_src_register
*src
)
808 struct nv50_reg
*r
= NULL
;
809 struct nv50_reg
*temp
;
812 c
= tgsi_util_get_full_src_register_extswizzle(src
, chan
);
814 case TGSI_EXTSWIZZLE_X
:
815 case TGSI_EXTSWIZZLE_Y
:
816 case TGSI_EXTSWIZZLE_Z
:
817 case TGSI_EXTSWIZZLE_W
:
818 switch (src
->SrcRegister
.File
) {
819 case TGSI_FILE_INPUT
:
820 r
= &pc
->attr
[src
->SrcRegister
.Index
* 4 + c
];
822 case TGSI_FILE_TEMPORARY
:
823 r
= &pc
->temp
[src
->SrcRegister
.Index
* 4 + c
];
825 case TGSI_FILE_CONSTANT
:
826 r
= &pc
->param
[src
->SrcRegister
.Index
* 4 + c
];
828 case TGSI_FILE_IMMEDIATE
:
829 r
= &pc
->immd
[src
->SrcRegister
.Index
* 4 + c
];
836 case TGSI_EXTSWIZZLE_ZERO
:
837 r
= alloc_immd(pc
, 0.0);
839 case TGSI_EXTSWIZZLE_ONE
:
840 r
= alloc_immd(pc
, 1.0);
847 switch (tgsi_util_get_full_src_register_sign_mode(src
, chan
)) {
848 case TGSI_UTIL_SIGN_KEEP
:
850 case TGSI_UTIL_SIGN_CLEAR
:
851 temp
= temp_temp(pc
);
852 emit_abs(pc
, temp
, r
);
855 case TGSI_UTIL_SIGN_TOGGLE
:
856 temp
= temp_temp(pc
);
857 emit_neg(pc
, temp
, r
);
860 case TGSI_UTIL_SIGN_SET
:
861 temp
= temp_temp(pc
);
862 emit_abs(pc
, temp
, r
);
863 emit_neg(pc
, temp
, r
);
875 nv50_program_tx_insn(struct nv50_pc
*pc
, const union tgsi_full_token
*tok
)
877 const struct tgsi_full_instruction
*inst
= &tok
->FullInstruction
;
878 struct nv50_reg
*rdst
[4], *dst
[4], *src
[3][4], *temp
;
882 NOUVEAU_ERR("insn %p\n", tok
);
884 mask
= inst
->FullDstRegisters
[0].DstRegister
.WriteMask
;
885 sat
= inst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
;
887 for (c
= 0; c
< 4; c
++) {
889 dst
[c
] = tgsi_dst(pc
, c
, &inst
->FullDstRegisters
[0]);
894 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
895 for (c
= 0; c
< 4; c
++)
896 src
[i
][c
] = tgsi_src(pc
, c
, &inst
->FullSrcRegisters
[i
]);
900 for (c
= 0; c
< 4; c
++) {
902 dst
[c
] = temp_temp(pc
);
906 switch (inst
->Instruction
.Opcode
) {
907 case TGSI_OPCODE_ABS
:
908 for (c
= 0; c
< 4; c
++) {
909 if (!(mask
& (1 << c
)))
911 emit_abs(pc
, dst
[c
], src
[0][c
]);
914 case TGSI_OPCODE_ADD
:
915 for (c
= 0; c
< 4; c
++) {
916 if (!(mask
& (1 << c
)))
918 emit_add(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
921 case TGSI_OPCODE_COS
:
922 temp
= alloc_temp(pc
, NULL
);
923 emit_precossin(pc
, temp
, src
[0][0]);
924 emit_flop(pc
, 5, temp
, temp
);
925 for (c
= 0; c
< 4; c
++) {
926 if (!(mask
& (1 << c
)))
928 emit_mov(pc
, dst
[c
], temp
);
931 case TGSI_OPCODE_DP3
:
932 temp
= alloc_temp(pc
, NULL
);
933 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
934 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
935 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
936 for (c
= 0; c
< 4; c
++) {
937 if (!(mask
& (1 << c
)))
939 emit_mov(pc
, dst
[c
], temp
);
943 case TGSI_OPCODE_DP4
:
944 temp
= alloc_temp(pc
, NULL
);
945 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
946 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
947 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
948 emit_mad(pc
, temp
, src
[0][3], src
[1][3], temp
);
949 for (c
= 0; c
< 4; c
++) {
950 if (!(mask
& (1 << c
)))
952 emit_mov(pc
, dst
[c
], temp
);
956 case TGSI_OPCODE_DPH
:
957 temp
= alloc_temp(pc
, NULL
);
958 emit_mul(pc
, temp
, src
[0][0], src
[1][0]);
959 emit_mad(pc
, temp
, src
[0][1], src
[1][1], temp
);
960 emit_mad(pc
, temp
, src
[0][2], src
[1][2], temp
);
961 emit_add(pc
, temp
, src
[1][3], temp
);
962 for (c
= 0; c
< 4; c
++) {
963 if (!(mask
& (1 << c
)))
965 emit_mov(pc
, dst
[c
], temp
);
969 case TGSI_OPCODE_DST
:
971 struct nv50_reg
*one
= alloc_immd(pc
, 1.0);
973 emit_mov(pc
, dst
[0], one
);
975 emit_mul(pc
, dst
[1], src
[0][1], src
[1][1]);
977 emit_mov(pc
, dst
[2], src
[0][2]);
979 emit_mov(pc
, dst
[3], src
[1][3]);
983 case TGSI_OPCODE_EX2
:
984 temp
= alloc_temp(pc
, NULL
);
985 emit_preex2(pc
, temp
, src
[0][0]);
986 emit_flop(pc
, 6, temp
, temp
);
987 for (c
= 0; c
< 4; c
++) {
988 if (!(mask
& (1 << c
)))
990 emit_mov(pc
, dst
[c
], temp
);
994 case TGSI_OPCODE_FLR
:
995 for (c
= 0; c
< 4; c
++) {
996 if (!(mask
& (1 << c
)))
998 emit_flr(pc
, dst
[c
], src
[0][c
]);
1001 case TGSI_OPCODE_FRC
:
1002 temp
= alloc_temp(pc
, NULL
);
1003 for (c
= 0; c
< 4; c
++) {
1004 if (!(mask
& (1 << c
)))
1006 emit_flr(pc
, temp
, src
[0][c
]);
1007 emit_sub(pc
, dst
[c
], src
[0][c
], temp
);
1009 free_temp(pc
, temp
);
1011 case TGSI_OPCODE_LIT
:
1012 emit_lit(pc
, &dst
[0], mask
, &src
[0][0]);
1014 case TGSI_OPCODE_LG2
:
1015 temp
= alloc_temp(pc
, NULL
);
1016 emit_flop(pc
, 3, temp
, src
[0][0]);
1017 for (c
= 0; c
< 4; c
++) {
1018 if (!(mask
& (1 << c
)))
1020 emit_mov(pc
, dst
[c
], temp
);
1023 case TGSI_OPCODE_LRP
:
1024 for (c
= 0; c
< 4; c
++) {
1025 if (!(mask
& (1 << c
)))
1027 /*XXX: we can do better than this */
1028 temp
= alloc_temp(pc
, NULL
);
1029 emit_neg(pc
, temp
, src
[0][c
]);
1030 emit_mad(pc
, temp
, temp
, src
[2][c
], src
[2][c
]);
1031 emit_mad(pc
, dst
[c
], src
[0][c
], src
[1][c
], temp
);
1032 free_temp(pc
, temp
);
1035 case TGSI_OPCODE_MAD
:
1036 for (c
= 0; c
< 4; c
++) {
1037 if (!(mask
& (1 << c
)))
1039 emit_mad(pc
, dst
[c
], src
[0][c
], src
[1][c
], src
[2][c
]);
1042 case TGSI_OPCODE_MAX
:
1043 for (c
= 0; c
< 4; c
++) {
1044 if (!(mask
& (1 << c
)))
1046 emit_minmax(pc
, 4, dst
[c
], src
[0][c
], src
[1][c
]);
1049 case TGSI_OPCODE_MIN
:
1050 for (c
= 0; c
< 4; c
++) {
1051 if (!(mask
& (1 << c
)))
1053 emit_minmax(pc
, 5, dst
[c
], src
[0][c
], src
[1][c
]);
1056 case TGSI_OPCODE_MOV
:
1057 for (c
= 0; c
< 4; c
++) {
1058 if (!(mask
& (1 << c
)))
1060 emit_mov(pc
, dst
[c
], src
[0][c
]);
1063 case TGSI_OPCODE_MUL
:
1064 for (c
= 0; c
< 4; c
++) {
1065 if (!(mask
& (1 << c
)))
1067 emit_mul(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
1070 case TGSI_OPCODE_POW
:
1071 temp
= alloc_temp(pc
, NULL
);
1072 emit_pow(pc
, temp
, src
[0][0], src
[1][0]);
1073 for (c
= 0; c
< 4; c
++) {
1074 if (!(mask
& (1 << c
)))
1076 emit_mov(pc
, dst
[c
], temp
);
1078 free_temp(pc
, temp
);
1080 case TGSI_OPCODE_RCP
:
1081 for (c
= 0; c
< 4; c
++) {
1082 if (!(mask
& (1 << c
)))
1084 emit_flop(pc
, 0, dst
[c
], src
[0][0]);
1087 case TGSI_OPCODE_RSQ
:
1088 for (c
= 0; c
< 4; c
++) {
1089 if (!(mask
& (1 << c
)))
1091 emit_flop(pc
, 2, dst
[c
], src
[0][0]);
1094 case TGSI_OPCODE_SCS
:
1095 temp
= alloc_temp(pc
, NULL
);
1096 emit_precossin(pc
, temp
, src
[0][0]);
1097 if (mask
& (1 << 0))
1098 emit_flop(pc
, 5, dst
[0], temp
);
1099 if (mask
& (1 << 1))
1100 emit_flop(pc
, 4, dst
[1], temp
);
1102 case TGSI_OPCODE_SGE
:
1103 for (c
= 0; c
< 4; c
++) {
1104 if (!(mask
& (1 << c
)))
1106 emit_set(pc
, 6, dst
[c
], src
[0][c
], src
[1][c
]);
1109 case TGSI_OPCODE_SIN
:
1110 temp
= alloc_temp(pc
, NULL
);
1111 emit_precossin(pc
, temp
, src
[0][0]);
1112 emit_flop(pc
, 4, temp
, temp
);
1113 for (c
= 0; c
< 4; c
++) {
1114 if (!(mask
& (1 << c
)))
1116 emit_mov(pc
, dst
[c
], temp
);
1119 case TGSI_OPCODE_SLT
:
1120 for (c
= 0; c
< 4; c
++) {
1121 if (!(mask
& (1 << c
)))
1123 emit_set(pc
, 1, dst
[c
], src
[0][c
], src
[1][c
]);
1126 case TGSI_OPCODE_SUB
:
1127 for (c
= 0; c
< 4; c
++) {
1128 if (!(mask
& (1 << c
)))
1130 emit_sub(pc
, dst
[c
], src
[0][c
], src
[1][c
]);
1133 case TGSI_OPCODE_XPD
:
1134 temp
= alloc_temp(pc
, NULL
);
1135 if (mask
& (1 << 0)) {
1136 emit_mul(pc
, temp
, src
[0][2], src
[1][1]);
1137 emit_msb(pc
, dst
[0], src
[0][1], src
[1][2], temp
);
1139 if (mask
& (1 << 1)) {
1140 emit_mul(pc
, temp
, src
[0][0], src
[1][2]);
1141 emit_msb(pc
, dst
[1], src
[0][2], src
[1][0], temp
);
1143 if (mask
& (1 << 2)) {
1144 emit_mul(pc
, temp
, src
[0][1], src
[1][0]);
1145 emit_msb(pc
, dst
[2], src
[0][0], src
[1][1], temp
);
1147 free_temp(pc
, temp
);
1149 case TGSI_OPCODE_END
:
1152 NOUVEAU_ERR("invalid opcode %d\n", inst
->Instruction
.Opcode
);
1157 for (c
= 0; c
< 4; c
++) {
1158 struct nv50_program_exec
*e
;
1160 if (!(mask
& (1 << c
)))
1164 e
->inst
[0] = 0xa0000000; /* cvt */
1166 e
->inst
[1] |= (6 << 29); /* cvt */
1167 e
->inst
[1] |= 0x04000000; /* 32 bit */
1168 e
->inst
[1] |= (1 << 14); /* src .f32 */
1169 e
->inst
[1] |= ((1 << 5) << 14); /* .sat */
1170 set_dst(pc
, rdst
[c
], e
);
1171 set_src_0(pc
, dst
[c
], e
);
1181 nv50_program_tx_prep(struct nv50_pc
*pc
)
1183 struct tgsi_parse_context p
;
1184 boolean ret
= FALSE
;
1187 tgsi_parse_init(&p
, pc
->p
->pipe
.tokens
);
1188 while (!tgsi_parse_end_of_tokens(&p
)) {
1189 const union tgsi_full_token
*tok
= &p
.FullToken
;
1191 tgsi_parse_token(&p
);
1192 switch (tok
->Token
.Type
) {
1193 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1195 const struct tgsi_full_immediate
*imm
=
1196 &p
.FullToken
.FullImmediate
;
1198 ctor_immd(pc
, imm
->u
.ImmediateFloat32
[0].Float
,
1199 imm
->u
.ImmediateFloat32
[1].Float
,
1200 imm
->u
.ImmediateFloat32
[2].Float
,
1201 imm
->u
.ImmediateFloat32
[3].Float
);
1204 case TGSI_TOKEN_TYPE_DECLARATION
:
1206 const struct tgsi_full_declaration
*d
;
1209 d
= &p
.FullToken
.FullDeclaration
;
1210 last
= d
->u
.DeclarationRange
.Last
;
1212 switch (d
->Declaration
.File
) {
1213 case TGSI_FILE_TEMPORARY
:
1214 if (pc
->temp_nr
< (last
+ 1))
1215 pc
->temp_nr
= last
+ 1;
1217 case TGSI_FILE_OUTPUT
:
1218 if (pc
->result_nr
< (last
+ 1))
1219 pc
->result_nr
= last
+ 1;
1221 case TGSI_FILE_INPUT
:
1222 if (pc
->attr_nr
< (last
+ 1))
1223 pc
->attr_nr
= last
+ 1;
1225 case TGSI_FILE_CONSTANT
:
1226 if (pc
->param_nr
< (last
+ 1))
1227 pc
->param_nr
= last
+ 1;
1230 NOUVEAU_ERR("bad decl file %d\n",
1231 d
->Declaration
.File
);
1236 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1243 NOUVEAU_ERR("%d temps\n", pc
->temp_nr
);
1245 pc
->temp
= calloc(pc
->temp_nr
* 4, sizeof(struct nv50_reg
));
1249 for (i
= 0; i
< pc
->temp_nr
; i
++) {
1250 for (c
= 0; c
< 4; c
++) {
1251 pc
->temp
[i
*4+c
].type
= P_TEMP
;
1252 pc
->temp
[i
*4+c
].hw
= -1;
1253 pc
->temp
[i
*4+c
].index
= i
;
1258 NOUVEAU_ERR("%d attrib regs\n", pc
->attr_nr
);
1260 struct nv50_reg
*iv
= NULL
, *tmp
= NULL
;
1263 pc
->attr
= calloc(pc
->attr_nr
* 4, sizeof(struct nv50_reg
));
1267 if (pc
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1268 iv
= alloc_temp(pc
, NULL
);
1272 for (i
= 0; i
< pc
->attr_nr
; i
++) {
1273 struct nv50_reg
*a
= &pc
->attr
[i
*4];
1275 for (c
= 0; c
< 4; c
++) {
1276 if (pc
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1277 struct nv50_reg
*at
=
1278 alloc_temp(pc
, NULL
);
1279 pc
->attr
[i
*4+c
].type
= at
->type
;
1280 pc
->attr
[i
*4+c
].hw
= at
->hw
;
1281 pc
->attr
[i
*4+c
].index
= at
->index
;
1283 pc
->p
->cfg
.vp
.attr
[aid
/32] |=
1285 pc
->attr
[i
*4+c
].type
= P_ATTR
;
1286 pc
->attr
[i
*4+c
].hw
= aid
++;
1287 pc
->attr
[i
*4+c
].index
= i
;
1291 if (pc
->p
->type
!= PIPE_SHADER_FRAGMENT
)
1294 emit_interp(pc
, iv
, iv
, iv
, FALSE
);
1295 tmp
= alloc_temp(pc
, NULL
);
1296 emit_flop(pc
, 0, tmp
, iv
);
1297 emit_interp(pc
, &a
[0], &a
[0], tmp
, TRUE
);
1298 emit_interp(pc
, &a
[1], &a
[1], tmp
, TRUE
);
1299 emit_interp(pc
, &a
[2], &a
[2], tmp
, TRUE
);
1300 emit_interp(pc
, &a
[3], &a
[3], tmp
, TRUE
);
1308 NOUVEAU_ERR("%d result regs\n", pc
->result_nr
);
1309 if (pc
->result_nr
) {
1312 pc
->result
= calloc(pc
->result_nr
* 4, sizeof(struct nv50_reg
));
1316 for (i
= 0; i
< pc
->result_nr
; i
++) {
1317 for (c
= 0; c
< 4; c
++) {
1318 if (pc
->p
->type
== PIPE_SHADER_FRAGMENT
) {
1319 pc
->result
[i
*4+c
].type
= P_TEMP
;
1320 pc
->result
[i
*4+c
].hw
= -1;
1322 pc
->result
[i
*4+c
].type
= P_RESULT
;
1323 pc
->result
[i
*4+c
].hw
= rid
++;
1325 pc
->result
[i
*4+c
].index
= i
;
1330 NOUVEAU_ERR("%d param regs\n", pc
->param_nr
);
1334 pc
->param
= calloc(pc
->param_nr
* 4, sizeof(struct nv50_reg
));
1338 for (i
= 0; i
< pc
->param_nr
; i
++) {
1339 for (c
= 0; c
< 4; c
++) {
1340 pc
->param
[i
*4+c
].type
= P_CONST
;
1341 pc
->param
[i
*4+c
].hw
= rid
++;
1342 pc
->param
[i
*4+c
].index
= i
;
1348 int rid
= pc
->param_nr
* 4;
1350 pc
->immd
= calloc(pc
->immd_nr
* 4, sizeof(struct nv50_reg
));
1354 for (i
= 0; i
< pc
->immd_nr
; i
++) {
1355 for (c
= 0; c
< 4; c
++) {
1356 pc
->immd
[i
*4+c
].type
= P_IMMD
;
1357 pc
->immd
[i
*4+c
].hw
= rid
++;
1358 pc
->immd
[i
*4+c
].index
= i
;
1365 tgsi_parse_free(&p
);
1370 nv50_program_tx(struct nv50_program
*p
)
1372 struct tgsi_parse_context parse
;
1376 pc
= CALLOC_STRUCT(nv50_pc
);
1380 pc
->p
->cfg
.high_temp
= 4;
1382 ret
= nv50_program_tx_prep(pc
);
1386 tgsi_parse_init(&parse
, pc
->p
->pipe
.tokens
);
1387 while (!tgsi_parse_end_of_tokens(&parse
)) {
1388 const union tgsi_full_token
*tok
= &parse
.FullToken
;
1390 tgsi_parse_token(&parse
);
1392 switch (tok
->Token
.Type
) {
1393 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1394 ret
= nv50_program_tx_insn(pc
, tok
);
1403 if (p
->type
== PIPE_SHADER_FRAGMENT
) {
1404 struct nv50_reg out
;
1407 for (out
.hw
= 0; out
.hw
< pc
->result_nr
* 4; out
.hw
++)
1408 emit_mov(pc
, &out
, &pc
->result
[out
.hw
]);
1411 assert(is_long(pc
->p
->exec_tail
) && !is_immd(pc
->p
->exec_head
));
1412 pc
->p
->exec_tail
->inst
[1] |= 0x00000001;
1414 p
->param_nr
= pc
->param_nr
* 4;
1415 p
->immd_nr
= pc
->immd_nr
* 4;
1416 p
->immd
= pc
->immd_buf
;
1419 tgsi_parse_free(&parse
);
1426 nv50_program_validate(struct nv50_context
*nv50
, struct nv50_program
*p
)
1428 if (nv50_program_tx(p
) == FALSE
)
1430 p
->translated
= TRUE
;
1434 nv50_program_upload_data(struct nv50_context
*nv50
, float *map
,
1435 unsigned start
, unsigned count
)
1438 unsigned nr
= count
> 2047 ? 2047 : count
;
1440 BEGIN_RING(tesla
, 0x00000f00, 1);
1441 OUT_RING ((NV50_CB_PMISC
<< 0) | (start
<< 8));
1442 BEGIN_RING(tesla
, 0x40000f04, nr
);
1443 OUT_RINGp (map
, nr
);
1452 nv50_program_validate_data(struct nv50_context
*nv50
, struct nv50_program
*p
)
1454 struct nouveau_winsys
*nvws
= nv50
->screen
->nvws
;
1455 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1456 unsigned nr
= p
->param_nr
+ p
->immd_nr
;
1458 if (!p
->data
&& nr
) {
1459 struct nouveau_resource
*heap
= nv50
->screen
->vp_data_heap
;
1461 if (nvws
->res_alloc(heap
, nr
, p
, &p
->data
)) {
1462 while (heap
->next
&& heap
->size
< nr
) {
1463 struct nv50_program
*evict
= heap
->next
->priv
;
1464 nvws
->res_free(&evict
->data
);
1467 if (nvws
->res_alloc(heap
, nr
, p
, &p
->data
))
1473 float *map
= ws
->buffer_map(ws
, nv50
->constbuf
[p
->type
],
1474 PIPE_BUFFER_USAGE_CPU_READ
);
1475 nv50_program_upload_data(nv50
, map
, p
->data
->start
,
1477 ws
->buffer_unmap(ws
, nv50
->constbuf
[p
->type
]);
1481 nv50_program_upload_data(nv50
, p
->immd
,
1482 p
->data
->start
+ p
->param_nr
,
1488 nv50_program_validate_code(struct nv50_context
*nv50
, struct nv50_program
*p
)
1490 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1491 struct nv50_program_exec
*e
;
1492 boolean upload
= FALSE
;
1496 p
->buffer
= ws
->buffer_create(ws
, 0x100, 0, p
->exec_size
* 4);
1500 if (p
->data
&& p
->data
->start
!= p
->data_start
) {
1501 for (e
= p
->exec_head
; e
; e
= e
->next
) {
1504 if (e
->param
.index
< 0)
1506 ei
= e
->param
.shift
>> 5;
1507 ci
= e
->param
.index
+ p
->data
->start
;
1509 e
->inst
[ei
] &= ~e
->param
.mask
;
1510 e
->inst
[ei
] |= (ci
<< e
->param
.shift
);
1513 p
->data_start
= p
->data
->start
;
1520 map
= ws
->buffer_map(ws
, p
->buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
);
1521 for (e
= p
->exec_head
; e
; e
= e
->next
) {
1522 #ifdef NV50_PROGRAM_DUMP
1523 NOUVEAU_ERR("0x%08x\n", e
->inst
[0]);
1525 *(map
++) = e
->inst
[0];
1527 #ifdef NV50_PROGRAM_DUMP
1528 NOUVEAU_ERR("0x%08x\n", e
->inst
[1]);
1530 *(map
++) = e
->inst
[1];
1533 ws
->buffer_unmap(ws
, p
->buffer
);
1537 nv50_vertprog_validate(struct nv50_context
*nv50
)
1539 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1540 struct nv50_program
*p
= nv50
->vertprog
;
1541 struct nouveau_stateobj
*so
;
1543 if (!p
->translated
) {
1544 nv50_program_validate(nv50
, p
);
1549 nv50_program_validate_data(nv50
, p
);
1550 nv50_program_validate_code(nv50
, p
);
1553 so_method(so
, tesla
, NV50TCL_VP_ADDRESS_HIGH
, 2);
1554 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1555 NOUVEAU_BO_HIGH
, 0, 0);
1556 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1557 NOUVEAU_BO_LOW
, 0, 0);
1558 so_method(so
, tesla
, 0x1650, 2);
1559 so_data (so
, p
->cfg
.vp
.attr
[0]);
1560 so_data (so
, p
->cfg
.vp
.attr
[1]);
1561 so_method(so
, tesla
, 0x16ac, 2);
1563 so_data (so
, p
->cfg
.high_temp
);
1564 so_method(so
, tesla
, 0x140c, 1);
1565 so_data (so
, 0); /* program start offset */
1566 so_emit(nv50
->screen
->nvws
, so
);
1571 nv50_fragprog_validate(struct nv50_context
*nv50
)
1573 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
1574 struct nv50_program
*p
= nv50
->fragprog
;
1575 struct nouveau_stateobj
*so
;
1577 if (!p
->translated
) {
1578 nv50_program_validate(nv50
, p
);
1583 nv50_program_validate_data(nv50
, p
);
1584 nv50_program_validate_code(nv50
, p
);
1587 so_method(so
, tesla
, NV50TCL_FP_ADDRESS_HIGH
, 2);
1588 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1589 NOUVEAU_BO_HIGH
, 0, 0);
1590 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
1591 NOUVEAU_BO_LOW
, 0, 0);
1592 so_method(so
, tesla
, 0x1904, 4);
1593 so_data (so
, 0x01040404); /* p: 0x01000404 */
1594 so_data (so
, 0x00000004);
1595 so_data (so
, 0x00000000);
1596 so_data (so
, 0x00000000);
1597 so_method(so
, tesla
, 0x16bc, 2); /*XXX: fixme */
1598 so_data (so
, 0x03020100);
1599 so_data (so
, 0x07060504);
1600 so_method(so
, tesla
, 0x1988, 2);
1601 so_data (so
, 0x08040404); /* p: 0x0f000401 */
1602 so_data (so
, p
->cfg
.high_temp
);
1603 so_method(so
, tesla
, 0x1414, 1);
1604 so_data (so
, 0); /* program start offset */
1605 so_emit(nv50
->screen
->nvws
, so
);
1610 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
1612 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
1614 while (p
->exec_head
) {
1615 struct nv50_program_exec
*e
= p
->exec_head
;
1617 p
->exec_head
= e
->next
;
1620 p
->exec_tail
= NULL
;
1624 pipe_buffer_reference(ws
, &p
->buffer
, NULL
);