1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/tgsi_dump.h"
8 #include "tgsi/tgsi_parse.h"
9 #include "tgsi/tgsi_util.h"
11 #include "nv30_context.h"
21 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
22 #define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
23 #define DEF_CTEST NV30_FP_OP_COND_TR
24 #include "nv30_shader.h"
26 #define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
27 #define neg(s) nv30_sr_neg((s))
28 #define abs(s) nv30_sr_abs((s))
29 #define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
31 #define MAX_CONSTS 128
34 struct nv30_fragment_program
*fp
;
36 uint attrib_map
[PIPE_MAX_SHADER_INPUTS
];
53 struct nv30_sreg imm
[MAX_IMM
];
57 static INLINE
struct nv30_sreg
58 temp(struct nv30_fpc
*fpc
)
62 idx
= fpc
->temp_temp_count
++;
63 idx
+= fpc
->high_temp
+ 1;
64 return nv30_sr(NV30SR_TEMP
, idx
);
67 static INLINE
struct nv30_sreg
68 constant(struct nv30_fpc
*fpc
, int pipe
, float vals
[4])
72 if (fpc
->nr_consts
== MAX_CONSTS
)
74 idx
= fpc
->nr_consts
++;
76 fpc
->consts
[idx
].pipe
= pipe
;
78 memcpy(fpc
->consts
[idx
].vals
, vals
, 4 * sizeof(float));
79 return nv30_sr(NV30SR_CONST
, idx
);
82 #define arith(cc,s,o,d,m,s0,s1,s2) \
83 nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
84 (d), (m), (s0), (s1), (s2))
85 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
86 nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
87 (d), (m), (s0), none, none)
90 grow_insns(struct nv30_fpc
*fpc
, int size
)
92 struct nv30_fragment_program
*fp
= fpc
->fp
;
95 fp
->insn
= realloc(fp
->insn
, sizeof(uint32_t) * fp
->insn_len
);
99 emit_src(struct nv30_fpc
*fpc
, int pos
, struct nv30_sreg src
)
101 struct nv30_fragment_program
*fp
= fpc
->fp
;
102 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
107 sr
|= (NV30_FP_REG_TYPE_INPUT
<< NV30_FP_REG_TYPE_SHIFT
);
108 hw
[0] |= (src
.index
<< NV30_FP_OP_INPUT_SRC_SHIFT
);
111 sr
|= NV30_FP_REG_SRC_HALF
;
114 sr
|= (NV30_FP_REG_TYPE_TEMP
<< NV30_FP_REG_TYPE_SHIFT
);
115 sr
|= (src
.index
<< NV30_FP_REG_SRC_SHIFT
);
119 hw
= &fp
->insn
[fpc
->inst_offset
];
120 if (fpc
->consts
[src
.index
].pipe
>= 0) {
121 struct nv30_fragment_program_data
*fpd
;
123 fp
->consts
= realloc(fp
->consts
, ++fp
->nr_consts
*
125 fpd
= &fp
->consts
[fp
->nr_consts
- 1];
126 fpd
->offset
= fpc
->inst_offset
+ 4;
127 fpd
->index
= fpc
->consts
[src
.index
].pipe
;
128 memset(&fp
->insn
[fpd
->offset
], 0, sizeof(uint32_t) * 4);
130 memcpy(&fp
->insn
[fpc
->inst_offset
+ 4],
131 fpc
->consts
[src
.index
].vals
,
132 sizeof(uint32_t) * 4);
135 sr
|= (NV30_FP_REG_TYPE_CONST
<< NV30_FP_REG_TYPE_SHIFT
);
138 sr
|= (NV30_FP_REG_TYPE_INPUT
<< NV30_FP_REG_TYPE_SHIFT
);
145 sr
|= NV30_FP_REG_NEGATE
;
148 hw
[1] |= (1 << (29 + pos
));
150 sr
|= ((src
.swz
[0] << NV30_FP_REG_SWZ_X_SHIFT
) |
151 (src
.swz
[1] << NV30_FP_REG_SWZ_Y_SHIFT
) |
152 (src
.swz
[2] << NV30_FP_REG_SWZ_Z_SHIFT
) |
153 (src
.swz
[3] << NV30_FP_REG_SWZ_W_SHIFT
));
159 emit_dst(struct nv30_fpc
*fpc
, struct nv30_sreg dst
)
161 struct nv30_fragment_program
*fp
= fpc
->fp
;
162 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
166 if (fpc
->num_regs
< (dst
.index
+ 1))
167 fpc
->num_regs
= dst
.index
+ 1;
170 if (dst
.index
== 1) {
171 fp
->fp_control
|= 0xe;
173 hw
[0] |= NV30_FP_OP_OUT_REG_HALF
;
183 hw
[0] |= (dst
.index
<< NV30_FP_OP_OUT_REG_SHIFT
);
187 nv30_fp_arith(struct nv30_fpc
*fpc
, int sat
, int op
,
188 struct nv30_sreg dst
, int mask
,
189 struct nv30_sreg s0
, struct nv30_sreg s1
, struct nv30_sreg s2
)
191 struct nv30_fragment_program
*fp
= fpc
->fp
;
194 fpc
->inst_offset
= fp
->insn_len
;
196 hw
= &fp
->insn
[fpc
->inst_offset
];
197 memset(hw
, 0, sizeof(uint32_t) * 4);
199 if (op
== NV30_FP_OP_OPCODE_KIL
)
200 fp
->fp_control
|= NV34TCL_FP_CONTROL_USES_KIL
;
201 hw
[0] |= (op
<< NV30_FP_OP_OPCODE_SHIFT
);
202 hw
[0] |= (mask
<< NV30_FP_OP_OUTMASK_SHIFT
);
203 hw
[2] |= (dst
.dst_scale
<< NV30_FP_OP_DST_SCALE_SHIFT
);
206 hw
[0] |= NV30_FP_OP_OUT_SAT
;
209 hw
[0] |= NV30_FP_OP_COND_WRITE_ENABLE
;
210 hw
[1] |= (dst
.cc_test
<< NV30_FP_OP_COND_SHIFT
);
211 hw
[1] |= ((dst
.cc_swz
[0] << NV30_FP_OP_COND_SWZ_X_SHIFT
) |
212 (dst
.cc_swz
[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT
) |
213 (dst
.cc_swz
[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT
) |
214 (dst
.cc_swz
[3] << NV30_FP_OP_COND_SWZ_W_SHIFT
));
217 emit_src(fpc
, 0, s0
);
218 emit_src(fpc
, 1, s1
);
219 emit_src(fpc
, 2, s2
);
223 nv30_fp_tex(struct nv30_fpc
*fpc
, int sat
, int op
, int unit
,
224 struct nv30_sreg dst
, int mask
,
225 struct nv30_sreg s0
, struct nv30_sreg s1
, struct nv30_sreg s2
)
227 struct nv30_fragment_program
*fp
= fpc
->fp
;
229 nv30_fp_arith(fpc
, sat
, op
, dst
, mask
, s0
, s1
, s2
);
231 fp
->insn
[fpc
->inst_offset
] |= (unit
<< NV30_FP_OP_TEX_UNIT_SHIFT
);
232 fp
->samplers
|= (1 << unit
);
235 static INLINE
struct nv30_sreg
236 tgsi_src(struct nv30_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
)
238 struct nv30_sreg src
;
240 switch (fsrc
->SrcRegister
.File
) {
241 case TGSI_FILE_INPUT
:
242 src
= nv30_sr(NV30SR_INPUT
,
243 fpc
->attrib_map
[fsrc
->SrcRegister
.Index
]);
245 case TGSI_FILE_CONSTANT
:
246 src
= constant(fpc
, fsrc
->SrcRegister
.Index
, NULL
);
248 case TGSI_FILE_IMMEDIATE
:
249 assert(fsrc
->SrcRegister
.Index
< fpc
->nr_imm
);
250 src
= fpc
->imm
[fsrc
->SrcRegister
.Index
];
252 case TGSI_FILE_TEMPORARY
:
253 src
= nv30_sr(NV30SR_TEMP
, fsrc
->SrcRegister
.Index
+ 1);
254 if (fpc
->high_temp
< src
.index
)
255 fpc
->high_temp
= src
.index
;
257 /* This is clearly insane, but gallium hands us shaders like this.
258 * Luckily fragprog results are just temp regs..
260 case TGSI_FILE_OUTPUT
:
261 if (fsrc
->SrcRegister
.Index
== fpc
->colour_id
)
262 return nv30_sr(NV30SR_OUTPUT
, 0);
264 return nv30_sr(NV30SR_OUTPUT
, 1);
267 NOUVEAU_ERR("bad src file\n");
271 src
.abs
= fsrc
->SrcRegisterExtMod
.Absolute
;
272 src
.negate
= fsrc
->SrcRegister
.Negate
;
273 src
.swz
[0] = fsrc
->SrcRegister
.SwizzleX
;
274 src
.swz
[1] = fsrc
->SrcRegister
.SwizzleY
;
275 src
.swz
[2] = fsrc
->SrcRegister
.SwizzleZ
;
276 src
.swz
[3] = fsrc
->SrcRegister
.SwizzleW
;
280 static INLINE
struct nv30_sreg
281 tgsi_dst(struct nv30_fpc
*fpc
, const struct tgsi_full_dst_register
*fdst
) {
284 switch (fdst
->DstRegister
.File
) {
285 case TGSI_FILE_OUTPUT
:
286 if (fdst
->DstRegister
.Index
== fpc
->colour_id
)
287 return nv30_sr(NV30SR_OUTPUT
, 0);
289 return nv30_sr(NV30SR_OUTPUT
, 1);
291 case TGSI_FILE_TEMPORARY
:
292 idx
= fdst
->DstRegister
.Index
+ 1;
293 if (fpc
->high_temp
< idx
)
294 fpc
->high_temp
= idx
;
295 return nv30_sr(NV30SR_TEMP
, idx
);
297 return nv30_sr(NV30SR_NONE
, 0);
299 NOUVEAU_ERR("bad dst file %d\n", fdst
->DstRegister
.File
);
300 return nv30_sr(NV30SR_NONE
, 0);
309 if (tgsi
& TGSI_WRITEMASK_X
) mask
|= MASK_X
;
310 if (tgsi
& TGSI_WRITEMASK_Y
) mask
|= MASK_Y
;
311 if (tgsi
& TGSI_WRITEMASK_Z
) mask
|= MASK_Z
;
312 if (tgsi
& TGSI_WRITEMASK_W
) mask
|= MASK_W
;
317 src_native_swz(struct nv30_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
,
318 struct nv30_sreg
*src
)
320 const struct nv30_sreg none
= nv30_sr(NV30SR_NONE
, 0);
321 struct nv30_sreg tgsi
= tgsi_src(fpc
, fsrc
);
325 for (c
= 0; c
< 4; c
++) {
326 switch (tgsi_util_get_full_src_register_swizzle(fsrc
, c
)) {
338 if (mask
== MASK_ALL
)
344 arith(fpc
, 0, MOV
, *src
, mask
, tgsi
, none
, none
);
350 nv30_fragprog_parse_instruction(struct nv30_fpc
*fpc
,
351 const struct tgsi_full_instruction
*finst
)
353 const struct nv30_sreg none
= nv30_sr(NV30SR_NONE
, 0);
354 struct nv30_sreg src
[3], dst
, tmp
;
355 int mask
, sat
, unit
= 0;
356 int ai
= -1, ci
= -1;
359 if (finst
->Instruction
.Opcode
== TGSI_OPCODE_END
)
362 fpc
->temp_temp_count
= 0;
363 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
364 const struct tgsi_full_src_register
*fsrc
;
366 fsrc
= &finst
->FullSrcRegisters
[i
];
367 if (fsrc
->SrcRegister
.File
== TGSI_FILE_TEMPORARY
) {
368 src
[i
] = tgsi_src(fpc
, fsrc
);
372 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
373 const struct tgsi_full_src_register
*fsrc
;
375 fsrc
= &finst
->FullSrcRegisters
[i
];
377 switch (fsrc
->SrcRegister
.File
) {
378 case TGSI_FILE_INPUT
:
379 case TGSI_FILE_CONSTANT
:
380 case TGSI_FILE_TEMPORARY
:
381 if (!src_native_swz(fpc
, fsrc
, &src
[i
]))
388 switch (fsrc
->SrcRegister
.File
) {
389 case TGSI_FILE_INPUT
:
390 if (ai
== -1 || ai
== fsrc
->SrcRegister
.Index
) {
391 ai
= fsrc
->SrcRegister
.Index
;
392 src
[i
] = tgsi_src(fpc
, fsrc
);
394 NOUVEAU_MSG("extra src attr %d\n",
395 fsrc
->SrcRegister
.Index
);
397 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
398 tgsi_src(fpc
, fsrc
), none
, none
);
401 case TGSI_FILE_CONSTANT
:
402 case TGSI_FILE_IMMEDIATE
:
403 if (ci
== -1 || ci
== fsrc
->SrcRegister
.Index
) {
404 ci
= fsrc
->SrcRegister
.Index
;
405 src
[i
] = tgsi_src(fpc
, fsrc
);
408 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
409 tgsi_src(fpc
, fsrc
), none
, none
);
412 case TGSI_FILE_TEMPORARY
:
415 case TGSI_FILE_SAMPLER
:
416 unit
= fsrc
->SrcRegister
.Index
;
418 case TGSI_FILE_OUTPUT
:
421 NOUVEAU_ERR("bad src file\n");
426 dst
= tgsi_dst(fpc
, &finst
->FullDstRegisters
[0]);
427 mask
= tgsi_mask(finst
->FullDstRegisters
[0].DstRegister
.WriteMask
);
428 sat
= (finst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
);
430 switch (finst
->Instruction
.Opcode
) {
431 case TGSI_OPCODE_ABS
:
432 arith(fpc
, sat
, MOV
, dst
, mask
, abs(src
[0]), none
, none
);
434 case TGSI_OPCODE_ADD
:
435 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], src
[1], none
);
437 case TGSI_OPCODE_CMP
:
439 arith(fpc
, sat
, MOV
, dst
, mask
, src
[2], none
, none
);
441 arith(fpc
, 0, MOV
, tmp
, 0xf, src
[0], none
, none
);
442 dst
.cc_test
= NV30_VP_INST_COND_LT
;
443 arith(fpc
, sat
, MOV
, dst
, mask
, src
[1], none
, none
);
445 case TGSI_OPCODE_COS
:
446 arith(fpc
, sat
, COS
, dst
, mask
, src
[0], none
, none
);
448 case TGSI_OPCODE_DP3
:
449 arith(fpc
, sat
, DP3
, dst
, mask
, src
[0], src
[1], none
);
451 case TGSI_OPCODE_DP4
:
452 arith(fpc
, sat
, DP4
, dst
, mask
, src
[0], src
[1], none
);
454 case TGSI_OPCODE_DPH
:
456 arith(fpc
, 0, DP3
, tmp
, MASK_X
, src
[0], src
[1], none
);
457 arith(fpc
, sat
, ADD
, dst
, mask
, swz(tmp
, X
, X
, X
, X
),
458 swz(src
[1], W
, W
, W
, W
), none
);
460 case TGSI_OPCODE_DST
:
461 arith(fpc
, sat
, DST
, dst
, mask
, src
[0], src
[1], none
);
463 case TGSI_OPCODE_EX2
:
464 arith(fpc
, sat
, EX2
, dst
, mask
, src
[0], none
, none
);
466 case TGSI_OPCODE_FLR
:
467 arith(fpc
, sat
, FLR
, dst
, mask
, src
[0], none
, none
);
469 case TGSI_OPCODE_FRC
:
470 arith(fpc
, sat
, FRC
, dst
, mask
, src
[0], none
, none
);
472 case TGSI_OPCODE_KILP
:
473 arith(fpc
, 0, KIL
, none
, 0, none
, none
, none
);
475 case TGSI_OPCODE_KIL
:
476 dst
= nv30_sr(NV30SR_NONE
, 0);
478 arith(fpc
, 0, MOV
, dst
, MASK_ALL
, src
[0], none
, none
);
479 dst
.cc_update
= 0; dst
.cc_test
= NV30_FP_OP_COND_LT
;
480 arith(fpc
, 0, KIL
, dst
, 0, none
, none
, none
);
482 case TGSI_OPCODE_LG2
:
483 arith(fpc
, sat
, LG2
, dst
, mask
, src
[0], none
, none
);
485 // case TGSI_OPCODE_LIT:
486 case TGSI_OPCODE_LRP
:
487 arith(fpc
, sat
, LRP
, dst
, mask
, src
[0], src
[1], src
[2]);
489 case TGSI_OPCODE_MAD
:
490 arith(fpc
, sat
, MAD
, dst
, mask
, src
[0], src
[1], src
[2]);
492 case TGSI_OPCODE_MAX
:
493 arith(fpc
, sat
, MAX
, dst
, mask
, src
[0], src
[1], none
);
495 case TGSI_OPCODE_MIN
:
496 arith(fpc
, sat
, MIN
, dst
, mask
, src
[0], src
[1], none
);
498 case TGSI_OPCODE_MOV
:
499 arith(fpc
, sat
, MOV
, dst
, mask
, src
[0], none
, none
);
501 case TGSI_OPCODE_MUL
:
502 arith(fpc
, sat
, MUL
, dst
, mask
, src
[0], src
[1], none
);
504 case TGSI_OPCODE_POW
:
505 arith(fpc
, sat
, POW
, dst
, mask
, src
[0], src
[1], none
);
507 case TGSI_OPCODE_RCP
:
508 arith(fpc
, sat
, RCP
, dst
, mask
, src
[0], none
, none
);
510 case TGSI_OPCODE_RET
:
513 case TGSI_OPCODE_RFL
:
514 arith(fpc
, 0, RFL
, dst
, mask
, src
[0], src
[1], none
);
516 case TGSI_OPCODE_RSQ
:
517 arith(fpc
, sat
, RSQ
, dst
, mask
, abs(swz(src
[0], X
, X
, X
, X
)), none
, none
);
519 case TGSI_OPCODE_SCS
:
521 arith(fpc
, sat
, COS
, dst
, MASK_X
,
522 swz(src
[0], X
, X
, X
, X
), none
, none
);
525 arith(fpc
, sat
, SIN
, dst
, MASK_Y
,
526 swz(src
[0], X
, X
, X
, X
), none
, none
);
529 case TGSI_OPCODE_SIN
:
530 arith(fpc
, sat
, SIN
, dst
, mask
, src
[0], none
, none
);
532 case TGSI_OPCODE_SGE
:
533 arith(fpc
, sat
, SGE
, dst
, mask
, src
[0], src
[1], none
);
535 case TGSI_OPCODE_SGT
:
536 arith(fpc
, sat
, SGT
, dst
, mask
, src
[0], src
[1], none
);
538 case TGSI_OPCODE_SLT
:
539 arith(fpc
, sat
, SLT
, dst
, mask
, src
[0], src
[1], none
);
541 case TGSI_OPCODE_SUB
:
542 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], neg(src
[1]), none
);
544 case TGSI_OPCODE_TEX
:
545 tex(fpc
, sat
, TEX
, unit
, dst
, mask
, src
[0], none
, none
);
547 case TGSI_OPCODE_TXB
:
548 tex(fpc
, sat
, TXB
, unit
, dst
, mask
, src
[0], none
, none
);
550 case TGSI_OPCODE_TXP
:
551 tex(fpc
, sat
, TXP
, unit
, dst
, mask
, src
[0], none
, none
);
553 case TGSI_OPCODE_XPD
:
555 arith(fpc
, 0, MUL
, tmp
, mask
,
556 swz(src
[0], Z
, X
, Y
, Y
), swz(src
[1], Y
, Z
, X
, X
), none
);
557 arith(fpc
, sat
, MAD
, dst
, (mask
& ~MASK_W
),
558 swz(src
[0], Y
, Z
, X
, X
), swz(src
[1], Z
, X
, Y
, Y
),
562 NOUVEAU_ERR("invalid opcode %d\n", finst
->Instruction
.Opcode
);
570 nv30_fragprog_parse_decl_attrib(struct nv30_fpc
*fpc
,
571 const struct tgsi_full_declaration
*fdec
)
575 switch (fdec
->Semantic
.SemanticName
) {
576 case TGSI_SEMANTIC_POSITION
:
577 hw
= NV30_FP_OP_INPUT_SRC_POSITION
;
579 case TGSI_SEMANTIC_COLOR
:
580 if (fdec
->Semantic
.SemanticIndex
== 0) {
581 hw
= NV30_FP_OP_INPUT_SRC_COL0
;
583 if (fdec
->Semantic
.SemanticIndex
== 1) {
584 hw
= NV30_FP_OP_INPUT_SRC_COL1
;
586 NOUVEAU_ERR("bad colour semantic index\n");
590 case TGSI_SEMANTIC_FOG
:
591 hw
= NV30_FP_OP_INPUT_SRC_FOGC
;
593 case TGSI_SEMANTIC_GENERIC
:
594 if (fdec
->Semantic
.SemanticIndex
<= 7) {
595 hw
= NV30_FP_OP_INPUT_SRC_TC(fdec
->Semantic
.
598 NOUVEAU_ERR("bad generic semantic index\n");
603 NOUVEAU_ERR("bad input semantic\n");
607 fpc
->attrib_map
[fdec
->DeclarationRange
.First
] = hw
;
612 nv30_fragprog_parse_decl_output(struct nv30_fpc
*fpc
,
613 const struct tgsi_full_declaration
*fdec
)
615 switch (fdec
->Semantic
.SemanticName
) {
616 case TGSI_SEMANTIC_POSITION
:
617 fpc
->depth_id
= fdec
->DeclarationRange
.First
;
619 case TGSI_SEMANTIC_COLOR
:
620 fpc
->colour_id
= fdec
->DeclarationRange
.First
;
623 NOUVEAU_ERR("bad output semantic\n");
631 nv30_fragprog_prepare(struct nv30_fpc
*fpc
)
633 struct tgsi_parse_context p
;
634 /*int high_temp = -1, i;*/
636 tgsi_parse_init(&p
, fpc
->fp
->pipe
.tokens
);
637 while (!tgsi_parse_end_of_tokens(&p
)) {
638 const union tgsi_full_token
*tok
= &p
.FullToken
;
640 tgsi_parse_token(&p
);
641 switch(tok
->Token
.Type
) {
642 case TGSI_TOKEN_TYPE_DECLARATION
:
644 const struct tgsi_full_declaration
*fdec
;
645 fdec
= &p
.FullToken
.FullDeclaration
;
646 switch (fdec
->Declaration
.File
) {
647 case TGSI_FILE_INPUT
:
648 if (!nv30_fragprog_parse_decl_attrib(fpc
, fdec
))
651 case TGSI_FILE_OUTPUT
:
652 if (!nv30_fragprog_parse_decl_output(fpc
, fdec
))
655 /*case TGSI_FILE_TEMPORARY:
656 if (fdec->DeclarationRange.Last > high_temp) {
658 fdec->DeclarationRange.Last;
666 case TGSI_TOKEN_TYPE_IMMEDIATE
:
668 struct tgsi_full_immediate
*imm
;
671 imm
= &p
.FullToken
.FullImmediate
;
672 assert(imm
->Immediate
.DataType
== TGSI_IMM_FLOAT32
);
673 assert(fpc
->nr_imm
< MAX_IMM
);
675 vals
[0] = imm
->u
[0].Float
;
676 vals
[1] = imm
->u
[1].Float
;
677 vals
[2] = imm
->u
[2].Float
;
678 vals
[3] = imm
->u
[3].Float
;
679 fpc
->imm
[fpc
->nr_imm
++] = constant(fpc
, -1, vals
);
689 fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
690 for (i = 0; i < high_temp; i++)
691 fpc->r_temp[i] = temp(fpc);
692 fpc->r_temps_discard = 0;
705 nv30_fragprog_translate(struct nv30_context
*nv30
,
706 struct nv30_fragment_program
*fp
)
708 struct tgsi_parse_context parse
;
709 struct nv30_fpc
*fpc
= NULL
;
711 tgsi_dump(fp
->pipe
.tokens
,0);
713 fpc
= CALLOC(1, sizeof(struct nv30_fpc
));
720 if (!nv30_fragprog_prepare(fpc
)) {
725 tgsi_parse_init(&parse
, fp
->pipe
.tokens
);
727 while (!tgsi_parse_end_of_tokens(&parse
)) {
728 tgsi_parse_token(&parse
);
730 switch (parse
.FullToken
.Token
.Type
) {
731 case TGSI_TOKEN_TYPE_INSTRUCTION
:
733 const struct tgsi_full_instruction
*finst
;
735 finst
= &parse
.FullToken
.FullInstruction
;
736 if (!nv30_fragprog_parse_instruction(fpc
, finst
))
745 fp
->fp_control
|= (fpc
->num_regs
-1)/2;
746 fp
->fp_reg_control
= (1<<16)|0x4;
748 /* Terminate final instruction */
749 fp
->insn
[fpc
->inst_offset
] |= 0x00000001;
751 /* Append NOP + END instruction, may or may not be necessary. */
752 fpc
->inst_offset
= fp
->insn_len
;
754 fp
->insn
[fpc
->inst_offset
+ 0] = 0x00000001;
755 fp
->insn
[fpc
->inst_offset
+ 1] = 0x00000000;
756 fp
->insn
[fpc
->inst_offset
+ 2] = 0x00000000;
757 fp
->insn
[fpc
->inst_offset
+ 3] = 0x00000000;
759 fp
->translated
= TRUE
;
762 tgsi_parse_free(&parse
);
767 nv30_fragprog_upload(struct nv30_context
*nv30
,
768 struct nv30_fragment_program
*fp
)
770 struct pipe_screen
*pscreen
= nv30
->pipe
.screen
;
771 const uint32_t le
= 1;
775 map
= pipe_buffer_map(pscreen
, fp
->buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
);
778 for (i
= 0; i
< fp
->insn_len
; i
++) {
779 fflush(stdout
); fflush(stderr
);
780 NOUVEAU_ERR("%d 0x%08x\n", i
, fp
->insn
[i
]);
781 fflush(stdout
); fflush(stderr
);
785 if ((*(const uint8_t *)&le
)) {
786 for (i
= 0; i
< fp
->insn_len
; i
++) {
787 map
[i
] = fp
->insn
[i
];
790 /* Weird swapping for big-endian chips */
791 for (i
= 0; i
< fp
->insn_len
; i
++) {
792 map
[i
] = ((fp
->insn
[i
] & 0xffff) << 16) |
793 ((fp
->insn
[i
] >> 16) & 0xffff);
797 pipe_buffer_unmap(pscreen
, fp
->buffer
);
801 nv30_fragprog_validate(struct nv30_context
*nv30
)
803 struct nv30_fragment_program
*fp
= nv30
->fragprog
;
804 struct pipe_buffer
*constbuf
=
805 nv30
->constbuf
[PIPE_SHADER_FRAGMENT
];
806 struct pipe_screen
*pscreen
= nv30
->pipe
.screen
;
807 struct nouveau_stateobj
*so
;
808 boolean new_consts
= FALSE
;
812 goto update_constants
;
814 /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
815 nv30_fragprog_translate(nv30
, fp
);
816 if (!fp
->translated
) {
817 /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
821 fp
->buffer
= pscreen
->buffer_create(pscreen
, 0x100, 0, fp
->insn_len
* 4);
822 nv30_fragprog_upload(nv30
, fp
);
825 so_method(so
, nv30
->screen
->rankine
, NV34TCL_FP_ACTIVE_PROGRAM
, 1);
826 so_reloc (so
, nouveau_bo(fp
->buffer
), 0, NOUVEAU_BO_VRAM
|
827 NOUVEAU_BO_GART
| NOUVEAU_BO_RD
| NOUVEAU_BO_LOW
|
828 NOUVEAU_BO_OR
, NV34TCL_FP_ACTIVE_PROGRAM_DMA0
,
829 NV34TCL_FP_ACTIVE_PROGRAM_DMA1
);
830 so_method(so
, nv30
->screen
->rankine
, NV34TCL_FP_CONTROL
, 1);
831 so_data (so
, fp
->fp_control
);
832 so_method(so
, nv30
->screen
->rankine
, NV34TCL_FP_REG_CONTROL
, 1);
833 so_data (so
, fp
->fp_reg_control
);
834 so_method(so
, nv30
->screen
->rankine
, NV34TCL_TX_UNITS_ENABLE
, 1);
835 so_data (so
, fp
->samplers
);
843 map
= pipe_buffer_map(pscreen
, constbuf
,
844 PIPE_BUFFER_USAGE_CPU_READ
);
845 for (i
= 0; i
< fp
->nr_consts
; i
++) {
846 struct nv30_fragment_program_data
*fpd
= &fp
->consts
[i
];
847 uint32_t *p
= &fp
->insn
[fpd
->offset
];
848 uint32_t *cb
= (uint32_t *)&map
[fpd
->index
* 4];
850 if (!memcmp(p
, cb
, 4 * sizeof(float)))
852 memcpy(p
, cb
, 4 * sizeof(float));
855 pipe_buffer_unmap(pscreen
, constbuf
);
858 nv30_fragprog_upload(nv30
, fp
);
861 if (new_consts
|| fp
->so
!= nv30
->state
.hw
[NV30_STATE_FRAGPROG
]) {
862 so_ref(fp
->so
, &nv30
->state
.hw
[NV30_STATE_FRAGPROG
]);
870 nv30_fragprog_destroy(struct nv30_context
*nv30
,
871 struct nv30_fragment_program
*fp
)
877 struct nv30_state_entry nv30_state_fragprog
= {
878 .validate
= nv30_fragprog_validate
,
880 .pipe
= NV30_NEW_FRAGPROG
,
881 .hw
= NV30_STATE_FRAGPROG