1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_util.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
10 #include "nv30_context.h"
20 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
21 #define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
22 #define DEF_CTEST NV30_FP_OP_COND_TR
23 #include "nv30_shader.h"
25 #define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
26 #define neg(s) nv30_sr_neg((s))
27 #define abs(s) nv30_sr_abs((s))
28 #define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
30 #define MAX_CONSTS 128
33 struct nv30_fragment_program
*fp
;
35 uint attrib_map
[PIPE_MAX_SHADER_INPUTS
];
52 struct nv30_sreg imm
[MAX_IMM
];
56 static INLINE
struct nv30_sreg
57 temp(struct nv30_fpc
*fpc
)
61 idx
= fpc
->temp_temp_count
++;
62 idx
+= fpc
->high_temp
+ 1;
63 return nv30_sr(NV30SR_TEMP
, idx
);
66 static INLINE
struct nv30_sreg
67 constant(struct nv30_fpc
*fpc
, int pipe
, float vals
[4])
71 if (fpc
->nr_consts
== MAX_CONSTS
)
73 idx
= fpc
->nr_consts
++;
75 fpc
->consts
[idx
].pipe
= pipe
;
77 memcpy(fpc
->consts
[idx
].vals
, vals
, 4 * sizeof(float));
78 return nv30_sr(NV30SR_CONST
, idx
);
81 #define arith(cc,s,o,d,m,s0,s1,s2) \
82 nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
83 (d), (m), (s0), (s1), (s2))
84 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
85 nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
86 (d), (m), (s0), none, none)
89 grow_insns(struct nv30_fpc
*fpc
, int size
)
91 struct nv30_fragment_program
*fp
= fpc
->fp
;
94 fp
->insn
= realloc(fp
->insn
, sizeof(uint32_t) * fp
->insn_len
);
98 emit_src(struct nv30_fpc
*fpc
, int pos
, struct nv30_sreg src
)
100 struct nv30_fragment_program
*fp
= fpc
->fp
;
101 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
106 sr
|= (NV30_FP_REG_TYPE_INPUT
<< NV30_FP_REG_TYPE_SHIFT
);
107 hw
[0] |= (src
.index
<< NV30_FP_OP_INPUT_SRC_SHIFT
);
110 sr
|= NV30_FP_REG_SRC_HALF
;
113 sr
|= (NV30_FP_REG_TYPE_TEMP
<< NV30_FP_REG_TYPE_SHIFT
);
114 sr
|= (src
.index
<< NV30_FP_REG_SRC_SHIFT
);
118 hw
= &fp
->insn
[fpc
->inst_offset
];
119 if (fpc
->consts
[src
.index
].pipe
>= 0) {
120 struct nv30_fragment_program_data
*fpd
;
122 fp
->consts
= realloc(fp
->consts
, ++fp
->nr_consts
*
124 fpd
= &fp
->consts
[fp
->nr_consts
- 1];
125 fpd
->offset
= fpc
->inst_offset
+ 4;
126 fpd
->index
= fpc
->consts
[src
.index
].pipe
;
127 memset(&fp
->insn
[fpd
->offset
], 0, sizeof(uint32_t) * 4);
129 memcpy(&fp
->insn
[fpc
->inst_offset
+ 4],
130 fpc
->consts
[src
.index
].vals
,
131 sizeof(uint32_t) * 4);
134 sr
|= (NV30_FP_REG_TYPE_CONST
<< NV30_FP_REG_TYPE_SHIFT
);
137 sr
|= (NV30_FP_REG_TYPE_INPUT
<< NV30_FP_REG_TYPE_SHIFT
);
144 sr
|= NV30_FP_REG_NEGATE
;
147 hw
[1] |= (1 << (29 + pos
));
149 sr
|= ((src
.swz
[0] << NV30_FP_REG_SWZ_X_SHIFT
) |
150 (src
.swz
[1] << NV30_FP_REG_SWZ_Y_SHIFT
) |
151 (src
.swz
[2] << NV30_FP_REG_SWZ_Z_SHIFT
) |
152 (src
.swz
[3] << NV30_FP_REG_SWZ_W_SHIFT
));
158 emit_dst(struct nv30_fpc
*fpc
, struct nv30_sreg dst
)
160 struct nv30_fragment_program
*fp
= fpc
->fp
;
161 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
165 if (fpc
->num_regs
< (dst
.index
+ 1))
166 fpc
->num_regs
= dst
.index
+ 1;
169 if (dst
.index
== 1) {
170 fp
->fp_control
|= 0xe;
172 hw
[0] |= NV30_FP_OP_OUT_REG_HALF
;
182 hw
[0] |= (dst
.index
<< NV30_FP_OP_OUT_REG_SHIFT
);
186 nv30_fp_arith(struct nv30_fpc
*fpc
, int sat
, int op
,
187 struct nv30_sreg dst
, int mask
,
188 struct nv30_sreg s0
, struct nv30_sreg s1
, struct nv30_sreg s2
)
190 struct nv30_fragment_program
*fp
= fpc
->fp
;
193 fpc
->inst_offset
= fp
->insn_len
;
195 hw
= &fp
->insn
[fpc
->inst_offset
];
196 memset(hw
, 0, sizeof(uint32_t) * 4);
198 if (op
== NV30_FP_OP_OPCODE_KIL
)
199 fp
->fp_control
|= NV34TCL_FP_CONTROL_USES_KIL
;
200 hw
[0] |= (op
<< NV30_FP_OP_OPCODE_SHIFT
);
201 hw
[0] |= (mask
<< NV30_FP_OP_OUTMASK_SHIFT
);
202 hw
[2] |= (dst
.dst_scale
<< NV30_FP_OP_DST_SCALE_SHIFT
);
205 hw
[0] |= NV30_FP_OP_OUT_SAT
;
208 hw
[0] |= NV30_FP_OP_COND_WRITE_ENABLE
;
209 hw
[1] |= (dst
.cc_test
<< NV30_FP_OP_COND_SHIFT
);
210 hw
[1] |= ((dst
.cc_swz
[0] << NV30_FP_OP_COND_SWZ_X_SHIFT
) |
211 (dst
.cc_swz
[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT
) |
212 (dst
.cc_swz
[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT
) |
213 (dst
.cc_swz
[3] << NV30_FP_OP_COND_SWZ_W_SHIFT
));
216 emit_src(fpc
, 0, s0
);
217 emit_src(fpc
, 1, s1
);
218 emit_src(fpc
, 2, s2
);
222 nv30_fp_tex(struct nv30_fpc
*fpc
, int sat
, int op
, int unit
,
223 struct nv30_sreg dst
, int mask
,
224 struct nv30_sreg s0
, struct nv30_sreg s1
, struct nv30_sreg s2
)
226 struct nv30_fragment_program
*fp
= fpc
->fp
;
228 nv30_fp_arith(fpc
, sat
, op
, dst
, mask
, s0
, s1
, s2
);
230 fp
->insn
[fpc
->inst_offset
] |= (unit
<< NV30_FP_OP_TEX_UNIT_SHIFT
);
231 fp
->samplers
|= (1 << unit
);
234 static INLINE
struct nv30_sreg
235 tgsi_src(struct nv30_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
)
237 struct nv30_sreg src
;
239 switch (fsrc
->SrcRegister
.File
) {
240 case TGSI_FILE_INPUT
:
241 src
= nv30_sr(NV30SR_INPUT
,
242 fpc
->attrib_map
[fsrc
->SrcRegister
.Index
]);
244 case TGSI_FILE_CONSTANT
:
245 src
= constant(fpc
, fsrc
->SrcRegister
.Index
, NULL
);
247 case TGSI_FILE_IMMEDIATE
:
248 assert(fsrc
->SrcRegister
.Index
< fpc
->nr_imm
);
249 src
= fpc
->imm
[fsrc
->SrcRegister
.Index
];
251 case TGSI_FILE_TEMPORARY
:
252 src
= nv30_sr(NV30SR_TEMP
, fsrc
->SrcRegister
.Index
+ 1);
253 if (fpc
->high_temp
< src
.index
)
254 fpc
->high_temp
= src
.index
;
256 /* This is clearly insane, but gallium hands us shaders like this.
257 * Luckily fragprog results are just temp regs..
259 case TGSI_FILE_OUTPUT
:
260 if (fsrc
->SrcRegister
.Index
== fpc
->colour_id
)
261 return nv30_sr(NV30SR_OUTPUT
, 0);
263 return nv30_sr(NV30SR_OUTPUT
, 1);
266 NOUVEAU_ERR("bad src file\n");
270 src
.abs
= fsrc
->SrcRegisterExtMod
.Absolute
;
271 src
.negate
= fsrc
->SrcRegister
.Negate
;
272 src
.swz
[0] = fsrc
->SrcRegister
.SwizzleX
;
273 src
.swz
[1] = fsrc
->SrcRegister
.SwizzleY
;
274 src
.swz
[2] = fsrc
->SrcRegister
.SwizzleZ
;
275 src
.swz
[3] = fsrc
->SrcRegister
.SwizzleW
;
279 static INLINE
struct nv30_sreg
280 tgsi_dst(struct nv30_fpc
*fpc
, const struct tgsi_full_dst_register
*fdst
) {
283 switch (fdst
->DstRegister
.File
) {
284 case TGSI_FILE_OUTPUT
:
285 if (fdst
->DstRegister
.Index
== fpc
->colour_id
)
286 return nv30_sr(NV30SR_OUTPUT
, 0);
288 return nv30_sr(NV30SR_OUTPUT
, 1);
290 case TGSI_FILE_TEMPORARY
:
291 idx
= fdst
->DstRegister
.Index
+ 1;
292 if (fpc
->high_temp
< idx
)
293 fpc
->high_temp
= idx
;
294 return nv30_sr(NV30SR_TEMP
, idx
);
296 return nv30_sr(NV30SR_NONE
, 0);
298 NOUVEAU_ERR("bad dst file %d\n", fdst
->DstRegister
.File
);
299 return nv30_sr(NV30SR_NONE
, 0);
308 if (tgsi
& TGSI_WRITEMASK_X
) mask
|= MASK_X
;
309 if (tgsi
& TGSI_WRITEMASK_Y
) mask
|= MASK_Y
;
310 if (tgsi
& TGSI_WRITEMASK_Z
) mask
|= MASK_Z
;
311 if (tgsi
& TGSI_WRITEMASK_W
) mask
|= MASK_W
;
316 src_native_swz(struct nv30_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
,
317 struct nv30_sreg
*src
)
319 const struct nv30_sreg none
= nv30_sr(NV30SR_NONE
, 0);
320 struct nv30_sreg tgsi
= tgsi_src(fpc
, fsrc
);
321 uint mask
= 0, zero_mask
= 0, one_mask
= 0, neg_mask
= 0;
322 uint neg
[4] = { fsrc
->SrcRegisterExtSwz
.NegateX
,
323 fsrc
->SrcRegisterExtSwz
.NegateY
,
324 fsrc
->SrcRegisterExtSwz
.NegateZ
,
325 fsrc
->SrcRegisterExtSwz
.NegateW
};
328 for (c
= 0; c
< 4; c
++) {
329 switch (tgsi_util_get_full_src_register_extswizzle(fsrc
, c
)) {
330 case TGSI_EXTSWIZZLE_X
:
331 case TGSI_EXTSWIZZLE_Y
:
332 case TGSI_EXTSWIZZLE_Z
:
333 case TGSI_EXTSWIZZLE_W
:
336 case TGSI_EXTSWIZZLE_ZERO
:
337 zero_mask
|= (1 << c
);
340 case TGSI_EXTSWIZZLE_ONE
:
341 one_mask
|= (1 << c
);
348 if (!tgsi
.negate
&& neg
[c
])
349 neg_mask
|= (1 << c
);
352 if (mask
== MASK_ALL
&& !neg_mask
)
358 arith(fpc
, 0, MOV
, *src
, mask
, tgsi
, none
, none
);
361 arith(fpc
, 0, SFL
, *src
, zero_mask
, *src
, none
, none
);
364 arith(fpc
, 0, STR
, *src
, one_mask
, *src
, none
, none
);
367 struct nv30_sreg one
= temp(fpc
);
368 arith(fpc
, 0, STR
, one
, neg_mask
, one
, none
, none
);
369 arith(fpc
, 0, MUL
, *src
, neg_mask
, *src
, neg(one
), none
);
376 nv30_fragprog_parse_instruction(struct nv30_fpc
*fpc
,
377 const struct tgsi_full_instruction
*finst
)
379 const struct nv30_sreg none
= nv30_sr(NV30SR_NONE
, 0);
380 struct nv30_sreg src
[3], dst
, tmp
;
381 int mask
, sat
, unit
= 0;
382 int ai
= -1, ci
= -1;
385 if (finst
->Instruction
.Opcode
== TGSI_OPCODE_END
)
388 fpc
->temp_temp_count
= 0;
389 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
390 const struct tgsi_full_src_register
*fsrc
;
392 fsrc
= &finst
->FullSrcRegisters
[i
];
393 if (fsrc
->SrcRegister
.File
== TGSI_FILE_TEMPORARY
) {
394 src
[i
] = tgsi_src(fpc
, fsrc
);
398 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
399 const struct tgsi_full_src_register
*fsrc
;
401 fsrc
= &finst
->FullSrcRegisters
[i
];
403 switch (fsrc
->SrcRegister
.File
) {
404 case TGSI_FILE_INPUT
:
405 case TGSI_FILE_CONSTANT
:
406 case TGSI_FILE_TEMPORARY
:
407 if (!src_native_swz(fpc
, fsrc
, &src
[i
]))
414 switch (fsrc
->SrcRegister
.File
) {
415 case TGSI_FILE_INPUT
:
416 if (ai
== -1 || ai
== fsrc
->SrcRegister
.Index
) {
417 ai
= fsrc
->SrcRegister
.Index
;
418 src
[i
] = tgsi_src(fpc
, fsrc
);
420 NOUVEAU_MSG("extra src attr %d\n",
421 fsrc
->SrcRegister
.Index
);
423 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
424 tgsi_src(fpc
, fsrc
), none
, none
);
427 case TGSI_FILE_CONSTANT
:
428 case TGSI_FILE_IMMEDIATE
:
429 if (ci
== -1 || ci
== fsrc
->SrcRegister
.Index
) {
430 ci
= fsrc
->SrcRegister
.Index
;
431 src
[i
] = tgsi_src(fpc
, fsrc
);
434 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
435 tgsi_src(fpc
, fsrc
), none
, none
);
438 case TGSI_FILE_TEMPORARY
:
441 case TGSI_FILE_SAMPLER
:
442 unit
= fsrc
->SrcRegister
.Index
;
444 case TGSI_FILE_OUTPUT
:
447 NOUVEAU_ERR("bad src file\n");
452 dst
= tgsi_dst(fpc
, &finst
->FullDstRegisters
[0]);
453 mask
= tgsi_mask(finst
->FullDstRegisters
[0].DstRegister
.WriteMask
);
454 sat
= (finst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
);
456 switch (finst
->Instruction
.Opcode
) {
457 case TGSI_OPCODE_ABS
:
458 arith(fpc
, sat
, MOV
, dst
, mask
, abs(src
[0]), none
, none
);
460 case TGSI_OPCODE_ADD
:
461 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], src
[1], none
);
463 case TGSI_OPCODE_CMP
:
465 arith(fpc
, sat
, MOV
, dst
, mask
, src
[2], none
, none
);
467 arith(fpc
, 0, MOV
, tmp
, 0xf, src
[0], none
, none
);
468 dst
.cc_test
= NV30_VP_INST_COND_LT
;
469 arith(fpc
, sat
, MOV
, dst
, mask
, src
[1], none
, none
);
471 case TGSI_OPCODE_COS
:
472 arith(fpc
, sat
, COS
, dst
, mask
, src
[0], none
, none
);
474 case TGSI_OPCODE_DP3
:
475 arith(fpc
, sat
, DP3
, dst
, mask
, src
[0], src
[1], none
);
477 case TGSI_OPCODE_DP4
:
478 arith(fpc
, sat
, DP4
, dst
, mask
, src
[0], src
[1], none
);
480 case TGSI_OPCODE_DPH
:
482 arith(fpc
, 0, DP3
, tmp
, MASK_X
, src
[0], src
[1], none
);
483 arith(fpc
, sat
, ADD
, dst
, mask
, swz(tmp
, X
, X
, X
, X
),
484 swz(src
[1], W
, W
, W
, W
), none
);
486 case TGSI_OPCODE_DST
:
487 arith(fpc
, sat
, DST
, dst
, mask
, src
[0], src
[1], none
);
489 case TGSI_OPCODE_EX2
:
490 arith(fpc
, sat
, EX2
, dst
, mask
, src
[0], none
, none
);
492 case TGSI_OPCODE_FLR
:
493 arith(fpc
, sat
, FLR
, dst
, mask
, src
[0], none
, none
);
495 case TGSI_OPCODE_FRC
:
496 arith(fpc
, sat
, FRC
, dst
, mask
, src
[0], none
, none
);
498 case TGSI_OPCODE_KIL
:
499 arith(fpc
, 0, KIL
, none
, 0, none
, none
, none
);
501 case TGSI_OPCODE_KILP
:
502 dst
= nv30_sr(NV30SR_NONE
, 0);
504 arith(fpc
, 0, MOV
, dst
, MASK_ALL
, src
[0], none
, none
);
505 dst
.cc_update
= 0; dst
.cc_test
= NV30_FP_OP_COND_LT
;
506 arith(fpc
, 0, KIL
, dst
, 0, none
, none
, none
);
508 case TGSI_OPCODE_LG2
:
509 arith(fpc
, sat
, LG2
, dst
, mask
, src
[0], none
, none
);
511 // case TGSI_OPCODE_LIT:
512 case TGSI_OPCODE_LRP
:
513 arith(fpc
, sat
, LRP
, dst
, mask
, src
[0], src
[1], src
[2]);
515 case TGSI_OPCODE_MAD
:
516 arith(fpc
, sat
, MAD
, dst
, mask
, src
[0], src
[1], src
[2]);
518 case TGSI_OPCODE_MAX
:
519 arith(fpc
, sat
, MAX
, dst
, mask
, src
[0], src
[1], none
);
521 case TGSI_OPCODE_MIN
:
522 arith(fpc
, sat
, MIN
, dst
, mask
, src
[0], src
[1], none
);
524 case TGSI_OPCODE_MOV
:
525 arith(fpc
, sat
, MOV
, dst
, mask
, src
[0], none
, none
);
527 case TGSI_OPCODE_MUL
:
528 arith(fpc
, sat
, MUL
, dst
, mask
, src
[0], src
[1], none
);
530 case TGSI_OPCODE_POW
:
531 arith(fpc
, sat
, POW
, dst
, mask
, src
[0], src
[1], none
);
533 case TGSI_OPCODE_RCP
:
534 arith(fpc
, sat
, RCP
, dst
, mask
, src
[0], none
, none
);
536 case TGSI_OPCODE_RET
:
539 case TGSI_OPCODE_RFL
:
540 arith(fpc
, 0, RFL
, dst
, mask
, src
[0], src
[1], none
);
542 case TGSI_OPCODE_RSQ
:
543 arith(fpc
, sat
, RSQ
, dst
, mask
, abs(swz(src
[0], X
, X
, X
, X
)), none
, none
);
545 case TGSI_OPCODE_SCS
:
547 arith(fpc
, sat
, COS
, dst
, MASK_X
,
548 swz(src
[0], X
, X
, X
, X
), none
, none
);
551 arith(fpc
, sat
, SIN
, dst
, MASK_Y
,
552 swz(src
[0], X
, X
, X
, X
), none
, none
);
555 case TGSI_OPCODE_SIN
:
556 arith(fpc
, sat
, SIN
, dst
, mask
, src
[0], none
, none
);
558 case TGSI_OPCODE_SGE
:
559 arith(fpc
, sat
, SGE
, dst
, mask
, src
[0], src
[1], none
);
561 case TGSI_OPCODE_SLT
:
562 arith(fpc
, sat
, SLT
, dst
, mask
, src
[0], src
[1], none
);
564 case TGSI_OPCODE_SUB
:
565 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], neg(src
[1]), none
);
567 case TGSI_OPCODE_TEX
:
568 tex(fpc
, sat
, TEX
, unit
, dst
, mask
, src
[0], none
, none
);
570 case TGSI_OPCODE_TXB
:
571 tex(fpc
, sat
, TXB
, unit
, dst
, mask
, src
[0], none
, none
);
573 case TGSI_OPCODE_TXP
:
574 tex(fpc
, sat
, TXP
, unit
, dst
, mask
, src
[0], none
, none
);
576 case TGSI_OPCODE_XPD
:
578 arith(fpc
, 0, MUL
, tmp
, mask
,
579 swz(src
[0], Z
, X
, Y
, Y
), swz(src
[1], Y
, Z
, X
, X
), none
);
580 arith(fpc
, sat
, MAD
, dst
, (mask
& ~MASK_W
),
581 swz(src
[0], Y
, Z
, X
, X
), swz(src
[1], Z
, X
, Y
, Y
),
585 NOUVEAU_ERR("invalid opcode %d\n", finst
->Instruction
.Opcode
);
593 nv30_fragprog_parse_decl_attrib(struct nv30_fpc
*fpc
,
594 const struct tgsi_full_declaration
*fdec
)
598 switch (fdec
->Semantic
.SemanticName
) {
599 case TGSI_SEMANTIC_POSITION
:
600 hw
= NV30_FP_OP_INPUT_SRC_POSITION
;
602 case TGSI_SEMANTIC_COLOR
:
603 if (fdec
->Semantic
.SemanticIndex
== 0) {
604 hw
= NV30_FP_OP_INPUT_SRC_COL0
;
606 if (fdec
->Semantic
.SemanticIndex
== 1) {
607 hw
= NV30_FP_OP_INPUT_SRC_COL1
;
609 NOUVEAU_ERR("bad colour semantic index\n");
613 case TGSI_SEMANTIC_FOG
:
614 hw
= NV30_FP_OP_INPUT_SRC_FOGC
;
616 case TGSI_SEMANTIC_GENERIC
:
617 if (fdec
->Semantic
.SemanticIndex
<= 7) {
618 hw
= NV30_FP_OP_INPUT_SRC_TC(fdec
->Semantic
.
621 NOUVEAU_ERR("bad generic semantic index\n");
626 NOUVEAU_ERR("bad input semantic\n");
630 fpc
->attrib_map
[fdec
->DeclarationRange
.First
] = hw
;
635 nv30_fragprog_parse_decl_output(struct nv30_fpc
*fpc
,
636 const struct tgsi_full_declaration
*fdec
)
638 switch (fdec
->Semantic
.SemanticName
) {
639 case TGSI_SEMANTIC_POSITION
:
640 fpc
->depth_id
= fdec
->DeclarationRange
.First
;
642 case TGSI_SEMANTIC_COLOR
:
643 fpc
->colour_id
= fdec
->DeclarationRange
.First
;
646 NOUVEAU_ERR("bad output semantic\n");
654 nv30_fragprog_translate(struct nv30_context
*nv30
,
655 struct nv30_fragment_program
*fp
)
657 struct tgsi_parse_context parse
;
658 struct nv30_fpc
*fpc
= NULL
;
660 fpc
= CALLOC(1, sizeof(struct nv30_fpc
));
667 tgsi_parse_init(&parse
, fp
->pipe
.tokens
);
669 while (!tgsi_parse_end_of_tokens(&parse
)) {
670 tgsi_parse_token(&parse
);
672 switch (parse
.FullToken
.Token
.Type
) {
673 case TGSI_TOKEN_TYPE_DECLARATION
:
675 const struct tgsi_full_declaration
*fdec
;
676 fdec
= &parse
.FullToken
.FullDeclaration
;
677 switch (fdec
->Declaration
.File
) {
678 case TGSI_FILE_INPUT
:
679 if (!nv30_fragprog_parse_decl_attrib(fpc
, fdec
))
682 case TGSI_FILE_OUTPUT
:
683 if (!nv30_fragprog_parse_decl_output(fpc
, fdec
))
691 case TGSI_TOKEN_TYPE_IMMEDIATE
:
693 struct tgsi_full_immediate
*imm
;
697 imm
= &parse
.FullToken
.FullImmediate
;
698 assert(imm
->Immediate
.DataType
== TGSI_IMM_FLOAT32
);
699 assert(fpc
->nr_imm
< MAX_IMM
);
701 for (i
= 0; i
< 4; i
++)
702 vals
[i
] = imm
->u
.ImmediateFloat32
[i
].Float
;
703 fpc
->imm
[fpc
->nr_imm
++] = constant(fpc
, -1, vals
);
706 case TGSI_TOKEN_TYPE_INSTRUCTION
:
708 const struct tgsi_full_instruction
*finst
;
710 finst
= &parse
.FullToken
.FullInstruction
;
711 if (!nv30_fragprog_parse_instruction(fpc
, finst
))
720 fp
->fp_control
|= (fpc
->num_regs
-1)/2;
721 fp
->fp_reg_control
= (1<<16)|0x4;
723 /* Terminate final instruction */
724 fp
->insn
[fpc
->inst_offset
] |= 0x00000001;
726 /* Append NOP + END instruction, may or may not be necessary. */
727 fpc
->inst_offset
= fp
->insn_len
;
729 fp
->insn
[fpc
->inst_offset
+ 0] = 0x00000001;
730 fp
->insn
[fpc
->inst_offset
+ 1] = 0x00000000;
731 fp
->insn
[fpc
->inst_offset
+ 2] = 0x00000000;
732 fp
->insn
[fpc
->inst_offset
+ 3] = 0x00000000;
734 fp
->translated
= TRUE
;
737 tgsi_parse_free(&parse
);
742 nv30_fragprog_bind(struct nv30_context
*nv30
, struct nv30_fragment_program
*fp
)
744 struct pipe_winsys
*ws
= nv30
->pipe
.winsys
;
747 if (!fp
->translated
) {
748 nv30_fragprog_translate(nv30
, fp
);
754 float *map
= ws
->buffer_map(ws
, nv30
->fragprog
.constant_buf
,
755 PIPE_BUFFER_USAGE_CPU_READ
);
756 for (i
= 0; i
< fp
->nr_consts
; i
++) {
757 struct nv30_fragment_program_data
*fpd
= &fp
->consts
[i
];
758 uint32_t *p
= &fp
->insn
[fpd
->offset
];
759 uint32_t *cb
= (uint32_t *)&map
[fpd
->index
* 4];
761 if (!memcmp(p
, cb
, 4 * sizeof(float)))
763 memcpy(p
, cb
, 4 * sizeof(float));
766 ws
->buffer_unmap(ws
, nv30
->fragprog
.constant_buf
);
770 const uint32_t le
= 1;
774 fp
->buffer
= ws
->buffer_create(ws
, 0x100, 0,
776 map
= ws
->buffer_map(ws
, fp
->buffer
,
777 PIPE_BUFFER_USAGE_CPU_WRITE
);
780 for (i
= 0; i
< fp
->insn_len
; i
++) {
781 NOUVEAU_ERR("%d 0x%08x\n", i
, fp
->insn
[i
]);
785 if ((*(const uint8_t *)&le
)) {
786 for (i
= 0; i
< fp
->insn_len
; i
++) {
787 map
[i
] = fp
->insn
[i
];
790 /* Weird swapping for big-endian chips */
791 for (i
= 0; i
< fp
->insn_len
; i
++) {
792 map
[i
] = ((fp
->insn
[i
] & 0xffff) << 16) |
793 ((fp
->insn
[i
] >> 16) & 0xffff);
797 ws
->buffer_unmap(ws
, fp
->buffer
);
801 BEGIN_RING(rankine
, NV34TCL_FP_CONTROL
, 1);
802 OUT_RING (fp
->fp_control
);
803 BEGIN_RING(rankine
, NV34TCL_FP_REG_CONTROL
, 1);
804 OUT_RING (fp
->fp_reg_control
);
806 nv30
->fragprog
.active
= fp
;
810 nv30_fragprog_destroy(struct nv30_context
*nv30
,
811 struct nv30_fragment_program
*fp
)