1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
5 #include "pipe/p_shader_tokens.h"
6 #include "tgsi/util/tgsi_parse.h"
7 #include "tgsi/util/tgsi_util.h"
9 #include "nv30_context.h"
19 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
20 #define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
21 #define DEF_CTEST NV30_FP_OP_COND_TR
22 #include "nv30_shader.h"
24 #define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
25 #define neg(s) nv30_sr_neg((s))
26 #define abs(s) nv30_sr_abs((s))
27 #define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
29 #define MAX_CONSTS 128
32 struct nv30_fragment_program
*fp
;
34 uint attrib_map
[PIPE_MAX_SHADER_INPUTS
];
51 struct nv30_sreg imm
[MAX_IMM
];
55 static INLINE
struct nv30_sreg
56 temp(struct nv30_fpc
*fpc
)
60 idx
= fpc
->temp_temp_count
++;
61 idx
+= fpc
->high_temp
+ 1;
62 return nv30_sr(NV30SR_TEMP
, idx
);
65 static INLINE
struct nv30_sreg
66 constant(struct nv30_fpc
*fpc
, int pipe
, float vals
[4])
70 if (fpc
->nr_consts
== MAX_CONSTS
)
72 idx
= fpc
->nr_consts
++;
74 fpc
->consts
[idx
].pipe
= pipe
;
76 memcpy(fpc
->consts
[idx
].vals
, vals
, 4 * sizeof(float));
77 return nv30_sr(NV30SR_CONST
, idx
);
80 #define arith(cc,s,o,d,m,s0,s1,s2) \
81 nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
82 (d), (m), (s0), (s1), (s2))
83 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
84 nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
85 (d), (m), (s0), none, none)
88 grow_insns(struct nv30_fpc
*fpc
, int size
)
90 struct nv30_fragment_program
*fp
= fpc
->fp
;
93 fp
->insn
= realloc(fp
->insn
, sizeof(uint32_t) * fp
->insn_len
);
97 emit_src(struct nv30_fpc
*fpc
, int pos
, struct nv30_sreg src
)
99 struct nv30_fragment_program
*fp
= fpc
->fp
;
100 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
105 sr
|= (NV30_FP_REG_TYPE_INPUT
<< NV30_FP_REG_TYPE_SHIFT
);
106 hw
[0] |= (src
.index
<< NV30_FP_OP_INPUT_SRC_SHIFT
);
109 sr
|= NV30_FP_REG_SRC_HALF
;
112 sr
|= (NV30_FP_REG_TYPE_TEMP
<< NV30_FP_REG_TYPE_SHIFT
);
113 sr
|= (src
.index
<< NV30_FP_REG_SRC_SHIFT
);
117 hw
= &fp
->insn
[fpc
->inst_offset
];
118 if (fpc
->consts
[src
.index
].pipe
>= 0) {
119 struct nv30_fragment_program_data
*fpd
;
121 fp
->consts
= realloc(fp
->consts
, ++fp
->nr_consts
*
123 fpd
= &fp
->consts
[fp
->nr_consts
- 1];
124 fpd
->offset
= fpc
->inst_offset
+ 4;
125 fpd
->index
= fpc
->consts
[src
.index
].pipe
;
126 memset(&fp
->insn
[fpd
->offset
], 0, sizeof(uint32_t) * 4);
128 memcpy(&fp
->insn
[fpc
->inst_offset
+ 4],
129 fpc
->consts
[src
.index
].vals
,
130 sizeof(uint32_t) * 4);
133 sr
|= (NV30_FP_REG_TYPE_CONST
<< NV30_FP_REG_TYPE_SHIFT
);
136 sr
|= (NV30_FP_REG_TYPE_INPUT
<< NV30_FP_REG_TYPE_SHIFT
);
143 sr
|= NV30_FP_REG_NEGATE
;
146 hw
[1] |= (1 << (29 + pos
));
148 sr
|= ((src
.swz
[0] << NV30_FP_REG_SWZ_X_SHIFT
) |
149 (src
.swz
[1] << NV30_FP_REG_SWZ_Y_SHIFT
) |
150 (src
.swz
[2] << NV30_FP_REG_SWZ_Z_SHIFT
) |
151 (src
.swz
[3] << NV30_FP_REG_SWZ_W_SHIFT
));
157 emit_dst(struct nv30_fpc
*fpc
, struct nv30_sreg dst
)
159 struct nv30_fragment_program
*fp
= fpc
->fp
;
160 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
164 if (fpc
->num_regs
< (dst
.index
+ 1))
165 fpc
->num_regs
= dst
.index
+ 1;
168 if (dst
.index
== 1) {
169 fp
->fp_control
|= 0xe;
171 hw
[0] |= NV30_FP_OP_OUT_REG_HALF
;
181 hw
[0] |= (dst
.index
<< NV30_FP_OP_OUT_REG_SHIFT
);
185 nv30_fp_arith(struct nv30_fpc
*fpc
, int sat
, int op
,
186 struct nv30_sreg dst
, int mask
,
187 struct nv30_sreg s0
, struct nv30_sreg s1
, struct nv30_sreg s2
)
189 struct nv30_fragment_program
*fp
= fpc
->fp
;
192 fpc
->inst_offset
= fp
->insn_len
;
194 hw
= &fp
->insn
[fpc
->inst_offset
];
195 memset(hw
, 0, sizeof(uint32_t) * 4);
197 if (op
== NV30_FP_OP_OPCODE_KIL
)
198 fp
->fp_control
|= NV34TCL_FP_CONTROL_USES_KIL
;
199 hw
[0] |= (op
<< NV30_FP_OP_OPCODE_SHIFT
);
200 hw
[0] |= (mask
<< NV30_FP_OP_OUTMASK_SHIFT
);
201 hw
[2] |= (dst
.dst_scale
<< NV30_FP_OP_DST_SCALE_SHIFT
);
204 hw
[0] |= NV30_FP_OP_OUT_SAT
;
207 hw
[0] |= NV30_FP_OP_COND_WRITE_ENABLE
;
208 hw
[1] |= (dst
.cc_test
<< NV30_FP_OP_COND_SHIFT
);
209 hw
[1] |= ((dst
.cc_swz
[0] << NV30_FP_OP_COND_SWZ_X_SHIFT
) |
210 (dst
.cc_swz
[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT
) |
211 (dst
.cc_swz
[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT
) |
212 (dst
.cc_swz
[3] << NV30_FP_OP_COND_SWZ_W_SHIFT
));
215 emit_src(fpc
, 0, s0
);
216 emit_src(fpc
, 1, s1
);
217 emit_src(fpc
, 2, s2
);
221 nv30_fp_tex(struct nv30_fpc
*fpc
, int sat
, int op
, int unit
,
222 struct nv30_sreg dst
, int mask
,
223 struct nv30_sreg s0
, struct nv30_sreg s1
, struct nv30_sreg s2
)
225 struct nv30_fragment_program
*fp
= fpc
->fp
;
227 nv30_fp_arith(fpc
, sat
, op
, dst
, mask
, s0
, s1
, s2
);
229 fp
->insn
[fpc
->inst_offset
] |= (unit
<< NV30_FP_OP_TEX_UNIT_SHIFT
);
230 fp
->samplers
|= (1 << unit
);
233 static INLINE
struct nv30_sreg
234 tgsi_src(struct nv30_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
)
236 struct nv30_sreg src
;
238 switch (fsrc
->SrcRegister
.File
) {
239 case TGSI_FILE_INPUT
:
240 src
= nv30_sr(NV30SR_INPUT
,
241 fpc
->attrib_map
[fsrc
->SrcRegister
.Index
]);
243 case TGSI_FILE_CONSTANT
:
244 src
= constant(fpc
, fsrc
->SrcRegister
.Index
, NULL
);
246 case TGSI_FILE_IMMEDIATE
:
247 assert(fsrc
->SrcRegister
.Index
< fpc
->nr_imm
);
248 src
= fpc
->imm
[fsrc
->SrcRegister
.Index
];
250 case TGSI_FILE_TEMPORARY
:
251 src
= nv30_sr(NV30SR_TEMP
, fsrc
->SrcRegister
.Index
+ 1);
252 if (fpc
->high_temp
< src
.index
)
253 fpc
->high_temp
= src
.index
;
255 /* This is clearly insane, but gallium hands us shaders like this.
256 * Luckily fragprog results are just temp regs..
258 case TGSI_FILE_OUTPUT
:
259 if (fsrc
->SrcRegister
.Index
== fpc
->colour_id
)
260 return nv30_sr(NV30SR_OUTPUT
, 0);
262 return nv30_sr(NV30SR_OUTPUT
, 1);
265 NOUVEAU_ERR("bad src file\n");
269 src
.abs
= fsrc
->SrcRegisterExtMod
.Absolute
;
270 src
.negate
= fsrc
->SrcRegister
.Negate
;
271 src
.swz
[0] = fsrc
->SrcRegister
.SwizzleX
;
272 src
.swz
[1] = fsrc
->SrcRegister
.SwizzleY
;
273 src
.swz
[2] = fsrc
->SrcRegister
.SwizzleZ
;
274 src
.swz
[3] = fsrc
->SrcRegister
.SwizzleW
;
278 static INLINE
struct nv30_sreg
279 tgsi_dst(struct nv30_fpc
*fpc
, const struct tgsi_full_dst_register
*fdst
) {
282 switch (fdst
->DstRegister
.File
) {
283 case TGSI_FILE_OUTPUT
:
284 if (fdst
->DstRegister
.Index
== fpc
->colour_id
)
285 return nv30_sr(NV30SR_OUTPUT
, 0);
287 return nv30_sr(NV30SR_OUTPUT
, 1);
289 case TGSI_FILE_TEMPORARY
:
290 idx
= fdst
->DstRegister
.Index
+ 1;
291 if (fpc
->high_temp
< idx
)
292 fpc
->high_temp
= idx
;
293 return nv30_sr(NV30SR_TEMP
, idx
);
295 return nv30_sr(NV30SR_NONE
, 0);
297 NOUVEAU_ERR("bad dst file %d\n", fdst
->DstRegister
.File
);
298 return nv30_sr(NV30SR_NONE
, 0);
307 if (tgsi
& TGSI_WRITEMASK_X
) mask
|= MASK_X
;
308 if (tgsi
& TGSI_WRITEMASK_Y
) mask
|= MASK_Y
;
309 if (tgsi
& TGSI_WRITEMASK_Z
) mask
|= MASK_Z
;
310 if (tgsi
& TGSI_WRITEMASK_W
) mask
|= MASK_W
;
315 src_native_swz(struct nv30_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
,
316 struct nv30_sreg
*src
)
318 const struct nv30_sreg none
= nv30_sr(NV30SR_NONE
, 0);
319 struct nv30_sreg tgsi
= tgsi_src(fpc
, fsrc
);
320 uint mask
= 0, zero_mask
= 0, one_mask
= 0, neg_mask
= 0;
321 uint neg
[4] = { fsrc
->SrcRegisterExtSwz
.NegateX
,
322 fsrc
->SrcRegisterExtSwz
.NegateY
,
323 fsrc
->SrcRegisterExtSwz
.NegateZ
,
324 fsrc
->SrcRegisterExtSwz
.NegateW
};
327 for (c
= 0; c
< 4; c
++) {
328 switch (tgsi_util_get_full_src_register_extswizzle(fsrc
, c
)) {
329 case TGSI_EXTSWIZZLE_X
:
330 case TGSI_EXTSWIZZLE_Y
:
331 case TGSI_EXTSWIZZLE_Z
:
332 case TGSI_EXTSWIZZLE_W
:
335 case TGSI_EXTSWIZZLE_ZERO
:
336 zero_mask
|= (1 << c
);
339 case TGSI_EXTSWIZZLE_ONE
:
340 one_mask
|= (1 << c
);
347 if (!tgsi
.negate
&& neg
[c
])
348 neg_mask
|= (1 << c
);
351 if (mask
== MASK_ALL
&& !neg_mask
)
357 arith(fpc
, 0, MOV
, *src
, mask
, tgsi
, none
, none
);
360 arith(fpc
, 0, SFL
, *src
, zero_mask
, *src
, none
, none
);
363 arith(fpc
, 0, STR
, *src
, one_mask
, *src
, none
, none
);
366 struct nv30_sreg one
= temp(fpc
);
367 arith(fpc
, 0, STR
, one
, neg_mask
, one
, none
, none
);
368 arith(fpc
, 0, MUL
, *src
, neg_mask
, *src
, neg(one
), none
);
375 nv30_fragprog_parse_instruction(struct nv30_fpc
*fpc
,
376 const struct tgsi_full_instruction
*finst
)
378 const struct nv30_sreg none
= nv30_sr(NV30SR_NONE
, 0);
379 struct nv30_sreg src
[3], dst
, tmp
;
380 int mask
, sat
, unit
= 0;
381 int ai
= -1, ci
= -1;
384 if (finst
->Instruction
.Opcode
== TGSI_OPCODE_END
)
387 fpc
->temp_temp_count
= 0;
388 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
389 const struct tgsi_full_src_register
*fsrc
;
391 fsrc
= &finst
->FullSrcRegisters
[i
];
392 if (fsrc
->SrcRegister
.File
== TGSI_FILE_TEMPORARY
) {
393 src
[i
] = tgsi_src(fpc
, fsrc
);
397 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
398 const struct tgsi_full_src_register
*fsrc
;
400 fsrc
= &finst
->FullSrcRegisters
[i
];
402 switch (fsrc
->SrcRegister
.File
) {
403 case TGSI_FILE_INPUT
:
404 case TGSI_FILE_CONSTANT
:
405 case TGSI_FILE_TEMPORARY
:
406 if (!src_native_swz(fpc
, fsrc
, &src
[i
]))
413 switch (fsrc
->SrcRegister
.File
) {
414 case TGSI_FILE_INPUT
:
415 if (ai
== -1 || ai
== fsrc
->SrcRegister
.Index
) {
416 ai
= fsrc
->SrcRegister
.Index
;
417 src
[i
] = tgsi_src(fpc
, fsrc
);
419 NOUVEAU_MSG("extra src attr %d\n",
420 fsrc
->SrcRegister
.Index
);
422 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
423 tgsi_src(fpc
, fsrc
), none
, none
);
426 case TGSI_FILE_CONSTANT
:
427 case TGSI_FILE_IMMEDIATE
:
428 if (ci
== -1 || ci
== fsrc
->SrcRegister
.Index
) {
429 ci
= fsrc
->SrcRegister
.Index
;
430 src
[i
] = tgsi_src(fpc
, fsrc
);
433 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
434 tgsi_src(fpc
, fsrc
), none
, none
);
437 case TGSI_FILE_TEMPORARY
:
440 case TGSI_FILE_SAMPLER
:
441 unit
= fsrc
->SrcRegister
.Index
;
443 case TGSI_FILE_OUTPUT
:
446 NOUVEAU_ERR("bad src file\n");
451 dst
= tgsi_dst(fpc
, &finst
->FullDstRegisters
[0]);
452 mask
= tgsi_mask(finst
->FullDstRegisters
[0].DstRegister
.WriteMask
);
453 sat
= (finst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
);
455 switch (finst
->Instruction
.Opcode
) {
456 case TGSI_OPCODE_ABS
:
457 arith(fpc
, sat
, MOV
, dst
, mask
, abs(src
[0]), none
, none
);
459 case TGSI_OPCODE_ADD
:
460 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], src
[1], none
);
462 case TGSI_OPCODE_CMP
:
464 arith(fpc
, sat
, MOV
, dst
, mask
, src
[2], none
, none
);
466 arith(fpc
, 0, MOV
, tmp
, 0xf, src
[0], none
, none
);
467 dst
.cc_test
= NV30_VP_INST_COND_LT
;
468 arith(fpc
, sat
, MOV
, dst
, mask
, src
[1], none
, none
);
470 case TGSI_OPCODE_COS
:
471 arith(fpc
, sat
, COS
, dst
, mask
, src
[0], none
, none
);
473 case TGSI_OPCODE_DP3
:
474 arith(fpc
, sat
, DP3
, dst
, mask
, src
[0], src
[1], none
);
476 case TGSI_OPCODE_DP4
:
477 arith(fpc
, sat
, DP4
, dst
, mask
, src
[0], src
[1], none
);
479 case TGSI_OPCODE_DPH
:
481 arith(fpc
, 0, DP3
, tmp
, MASK_X
, src
[0], src
[1], none
);
482 arith(fpc
, sat
, ADD
, dst
, mask
, swz(tmp
, X
, X
, X
, X
),
483 swz(src
[1], W
, W
, W
, W
), none
);
485 case TGSI_OPCODE_DST
:
486 arith(fpc
, sat
, DST
, dst
, mask
, src
[0], src
[1], none
);
488 case TGSI_OPCODE_EX2
:
489 arith(fpc
, sat
, EX2
, dst
, mask
, src
[0], none
, none
);
491 case TGSI_OPCODE_FLR
:
492 arith(fpc
, sat
, FLR
, dst
, mask
, src
[0], none
, none
);
494 case TGSI_OPCODE_FRC
:
495 arith(fpc
, sat
, FRC
, dst
, mask
, src
[0], none
, none
);
497 case TGSI_OPCODE_KIL
:
498 arith(fpc
, 0, KIL
, none
, 0, none
, none
, none
);
500 case TGSI_OPCODE_KILP
:
501 dst
= nv30_sr(NV30SR_NONE
, 0);
503 arith(fpc
, 0, MOV
, dst
, MASK_ALL
, src
[0], none
, none
);
504 dst
.cc_update
= 0; dst
.cc_test
= NV30_FP_OP_COND_LT
;
505 arith(fpc
, 0, KIL
, dst
, 0, none
, none
, none
);
507 case TGSI_OPCODE_LG2
:
508 arith(fpc
, sat
, LG2
, dst
, mask
, src
[0], none
, none
);
510 // case TGSI_OPCODE_LIT:
511 case TGSI_OPCODE_LRP
:
512 arith(fpc
, sat
, LRP
, dst
, mask
, src
[0], src
[1], src
[2]);
514 case TGSI_OPCODE_MAD
:
515 arith(fpc
, sat
, MAD
, dst
, mask
, src
[0], src
[1], src
[2]);
517 case TGSI_OPCODE_MAX
:
518 arith(fpc
, sat
, MAX
, dst
, mask
, src
[0], src
[1], none
);
520 case TGSI_OPCODE_MIN
:
521 arith(fpc
, sat
, MIN
, dst
, mask
, src
[0], src
[1], none
);
523 case TGSI_OPCODE_MOV
:
524 arith(fpc
, sat
, MOV
, dst
, mask
, src
[0], none
, none
);
526 case TGSI_OPCODE_MUL
:
527 arith(fpc
, sat
, MUL
, dst
, mask
, src
[0], src
[1], none
);
529 case TGSI_OPCODE_POW
:
530 arith(fpc
, sat
, POW
, dst
, mask
, src
[0], src
[1], none
);
532 case TGSI_OPCODE_RCP
:
533 arith(fpc
, sat
, RCP
, dst
, mask
, src
[0], none
, none
);
535 case TGSI_OPCODE_RET
:
538 case TGSI_OPCODE_RFL
:
539 arith(fpc
, 0, RFL
, dst
, mask
, src
[0], src
[1], none
);
541 case TGSI_OPCODE_RSQ
:
542 arith(fpc
, sat
, RSQ
, dst
, mask
, abs(swz(src
[0], X
, X
, X
, X
)), none
, none
);
544 case TGSI_OPCODE_SCS
:
546 arith(fpc
, sat
, COS
, dst
, MASK_X
,
547 swz(src
[0], X
, X
, X
, X
), none
, none
);
550 arith(fpc
, sat
, SIN
, dst
, MASK_Y
,
551 swz(src
[0], X
, X
, X
, X
), none
, none
);
554 case TGSI_OPCODE_SIN
:
555 arith(fpc
, sat
, SIN
, dst
, mask
, src
[0], none
, none
);
557 case TGSI_OPCODE_SGE
:
558 arith(fpc
, sat
, SGE
, dst
, mask
, src
[0], src
[1], none
);
560 case TGSI_OPCODE_SLT
:
561 arith(fpc
, sat
, SLT
, dst
, mask
, src
[0], src
[1], none
);
563 case TGSI_OPCODE_SUB
:
564 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], neg(src
[1]), none
);
566 case TGSI_OPCODE_TEX
:
567 tex(fpc
, sat
, TEX
, unit
, dst
, mask
, src
[0], none
, none
);
569 case TGSI_OPCODE_TXB
:
570 tex(fpc
, sat
, TXB
, unit
, dst
, mask
, src
[0], none
, none
);
572 case TGSI_OPCODE_TXP
:
573 tex(fpc
, sat
, TXP
, unit
, dst
, mask
, src
[0], none
, none
);
575 case TGSI_OPCODE_XPD
:
577 arith(fpc
, 0, MUL
, tmp
, mask
,
578 swz(src
[0], Z
, X
, Y
, Y
), swz(src
[1], Y
, Z
, X
, X
), none
);
579 arith(fpc
, sat
, MAD
, dst
, (mask
& ~MASK_W
),
580 swz(src
[0], Y
, Z
, X
, X
), swz(src
[1], Z
, X
, Y
, Y
),
584 NOUVEAU_ERR("invalid opcode %d\n", finst
->Instruction
.Opcode
);
592 nv30_fragprog_parse_decl_attrib(struct nv30_fpc
*fpc
,
593 const struct tgsi_full_declaration
*fdec
)
597 switch (fdec
->Semantic
.SemanticName
) {
598 case TGSI_SEMANTIC_POSITION
:
599 hw
= NV30_FP_OP_INPUT_SRC_POSITION
;
601 case TGSI_SEMANTIC_COLOR
:
602 if (fdec
->Semantic
.SemanticIndex
== 0) {
603 hw
= NV30_FP_OP_INPUT_SRC_COL0
;
605 if (fdec
->Semantic
.SemanticIndex
== 1) {
606 hw
= NV30_FP_OP_INPUT_SRC_COL1
;
608 NOUVEAU_ERR("bad colour semantic index\n");
612 case TGSI_SEMANTIC_FOG
:
613 hw
= NV30_FP_OP_INPUT_SRC_FOGC
;
615 case TGSI_SEMANTIC_GENERIC
:
616 if (fdec
->Semantic
.SemanticIndex
<= 7) {
617 hw
= NV30_FP_OP_INPUT_SRC_TC(fdec
->Semantic
.
620 NOUVEAU_ERR("bad generic semantic index\n");
625 NOUVEAU_ERR("bad input semantic\n");
629 fpc
->attrib_map
[fdec
->DeclarationRange
.First
] = hw
;
634 nv30_fragprog_parse_decl_output(struct nv30_fpc
*fpc
,
635 const struct tgsi_full_declaration
*fdec
)
637 switch (fdec
->Semantic
.SemanticName
) {
638 case TGSI_SEMANTIC_POSITION
:
639 fpc
->depth_id
= fdec
->DeclarationRange
.First
;
641 case TGSI_SEMANTIC_COLOR
:
642 fpc
->colour_id
= fdec
->DeclarationRange
.First
;
645 NOUVEAU_ERR("bad output semantic\n");
653 nv30_fragprog_prepare(struct nv30_fpc
*fpc
)
655 struct tgsi_parse_context p
;
656 /*int high_temp = -1, i;*/
658 tgsi_parse_init(&p
, fpc
->fp
->pipe
.tokens
);
659 while (!tgsi_parse_end_of_tokens(&p
)) {
660 const union tgsi_full_token
*tok
= &p
.FullToken
;
662 tgsi_parse_token(&p
);
663 switch(tok
->Token
.Type
) {
664 case TGSI_TOKEN_TYPE_DECLARATION
:
666 const struct tgsi_full_declaration
*fdec
;
667 fdec
= &p
.FullToken
.FullDeclaration
;
668 switch (fdec
->Declaration
.File
) {
669 case TGSI_FILE_INPUT
:
670 if (!nv30_fragprog_parse_decl_attrib(fpc
, fdec
))
673 case TGSI_FILE_OUTPUT
:
674 if (!nv30_fragprog_parse_decl_output(fpc
, fdec
))
677 /*case TGSI_FILE_TEMPORARY:
678 if (fdec->DeclarationRange.Last > high_temp) {
680 fdec->DeclarationRange.Last;
688 case TGSI_TOKEN_TYPE_IMMEDIATE
:
690 struct tgsi_full_immediate
*imm
;
693 imm
= &p
.FullToken
.FullImmediate
;
694 assert(imm
->Immediate
.DataType
== TGSI_IMM_FLOAT32
);
695 assert(fpc
->nr_imm
< MAX_IMM
);
697 vals
[0] = imm
->u
.ImmediateFloat32
[0].Float
;
698 vals
[1] = imm
->u
.ImmediateFloat32
[1].Float
;
699 vals
[2] = imm
->u
.ImmediateFloat32
[2].Float
;
700 vals
[3] = imm
->u
.ImmediateFloat32
[3].Float
;
701 fpc
->imm
[fpc
->nr_imm
++] = constant(fpc
, -1, vals
);
711 fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
712 for (i = 0; i < high_temp; i++)
713 fpc->r_temp[i] = temp(fpc);
714 fpc->r_temps_discard = 0;
727 nv30_fragprog_translate(struct nv30_context
*nv30
,
728 struct nv30_fragment_program
*fp
)
730 struct tgsi_parse_context parse
;
731 struct nv30_fpc
*fpc
= NULL
;
733 fpc
= CALLOC(1, sizeof(struct nv30_fpc
));
740 if (!nv30_fragprog_prepare(fpc
)) {
745 tgsi_parse_init(&parse
, fp
->pipe
.tokens
);
747 while (!tgsi_parse_end_of_tokens(&parse
)) {
748 tgsi_parse_token(&parse
);
750 switch (parse
.FullToken
.Token
.Type
) {
751 case TGSI_TOKEN_TYPE_INSTRUCTION
:
753 const struct tgsi_full_instruction
*finst
;
755 finst
= &parse
.FullToken
.FullInstruction
;
756 if (!nv30_fragprog_parse_instruction(fpc
, finst
))
765 fp
->fp_control
|= (fpc
->num_regs
-1)/2;
766 fp
->fp_reg_control
= (1<<16)|0x4;
768 /* Terminate final instruction */
769 fp
->insn
[fpc
->inst_offset
] |= 0x00000001;
771 /* Append NOP + END instruction, may or may not be necessary. */
772 fpc
->inst_offset
= fp
->insn_len
;
774 fp
->insn
[fpc
->inst_offset
+ 0] = 0x00000001;
775 fp
->insn
[fpc
->inst_offset
+ 1] = 0x00000000;
776 fp
->insn
[fpc
->inst_offset
+ 2] = 0x00000000;
777 fp
->insn
[fpc
->inst_offset
+ 3] = 0x00000000;
779 fp
->translated
= TRUE
;
782 tgsi_parse_free(&parse
);
787 nv30_fragprog_upload(struct nv30_context
*nv30
,
788 struct nv30_fragment_program
*fp
)
790 struct pipe_winsys
*ws
= nv30
->pipe
.winsys
;
791 const uint32_t le
= 1;
795 map
= ws
->buffer_map(ws
, fp
->buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
);
798 for (i
= 0; i
< fp
->insn_len
; i
++) {
799 fflush(stdout
); fflush(stderr
);
800 NOUVEAU_ERR("%d 0x%08x\n", i
, fp
->insn
[i
]);
801 fflush(stdout
); fflush(stderr
);
805 if ((*(const uint8_t *)&le
)) {
806 for (i
= 0; i
< fp
->insn_len
; i
++) {
807 map
[i
] = fp
->insn
[i
];
810 /* Weird swapping for big-endian chips */
811 for (i
= 0; i
< fp
->insn_len
; i
++) {
812 map
[i
] = ((fp
->insn
[i
] & 0xffff) << 16) |
813 ((fp
->insn
[i
] >> 16) & 0xffff);
817 ws
->buffer_unmap(ws
, fp
->buffer
);
821 nv30_fragprog_bind(struct nv30_context
*nv30
, struct nv30_fragment_program
*fp
)
823 struct pipe_buffer
*constbuf
=
824 nv30
->constbuf
[PIPE_SHADER_FRAGMENT
];
825 struct pipe_winsys
*ws
= nv30
->pipe
.winsys
;
828 if (!fp
->translated
) {
829 nv30_fragprog_translate(nv30
, fp
);
837 map
= ws
->buffer_map(ws
, constbuf
, PIPE_BUFFER_USAGE_CPU_READ
);
838 for (i
= 0; i
< fp
->nr_consts
; i
++) {
839 struct nv30_fragment_program_data
*fpd
= &fp
->consts
[i
];
840 uint32_t *p
= &fp
->insn
[fpd
->offset
];
841 uint32_t *cb
= (uint32_t *)&map
[fpd
->index
* 4];
843 if (!memcmp(p
, cb
, 4 * sizeof(float)))
845 memcpy(p
, cb
, 4 * sizeof(float));
848 ws
->buffer_unmap(ws
, constbuf
);
853 fp
->buffer
= ws
->buffer_create(ws
, 0x100, 0,
855 nv30_fragprog_upload(nv30
, fp
);
859 BEGIN_RING(rankine
, NV34TCL_FP_CONTROL
, 1);
860 OUT_RING (fp
->fp_control
);
861 BEGIN_RING(rankine
, NV34TCL_FP_REG_CONTROL
, 1);
862 OUT_RING (fp
->fp_reg_control
);
864 nv30
->fragprog
.active
= fp
;
868 nv30_fragprog_destroy(struct nv30_context
*nv30
,
869 struct nv30_fragment_program
*fp
)