1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/tgsi_parse.h"
8 #include "tgsi/tgsi_util.h"
10 #include "nv30_context.h"
20 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
21 #define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
22 #define DEF_CTEST NV30_FP_OP_COND_TR
23 #include "nv30_shader.h"
25 #define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
26 #define neg(s) nv30_sr_neg((s))
27 #define abs(s) nv30_sr_abs((s))
28 #define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
30 #define MAX_CONSTS 128
33 struct nv30_fragment_program
*fp
;
35 uint attrib_map
[PIPE_MAX_SHADER_INPUTS
];
52 struct nv30_sreg imm
[MAX_IMM
];
56 static INLINE
struct nv30_sreg
57 temp(struct nv30_fpc
*fpc
)
61 idx
= fpc
->temp_temp_count
++;
62 idx
+= fpc
->high_temp
+ 1;
63 return nv30_sr(NV30SR_TEMP
, idx
);
66 static INLINE
struct nv30_sreg
67 constant(struct nv30_fpc
*fpc
, int pipe
, float vals
[4])
71 if (fpc
->nr_consts
== MAX_CONSTS
)
73 idx
= fpc
->nr_consts
++;
75 fpc
->consts
[idx
].pipe
= pipe
;
77 memcpy(fpc
->consts
[idx
].vals
, vals
, 4 * sizeof(float));
78 return nv30_sr(NV30SR_CONST
, idx
);
81 #define arith(cc,s,o,d,m,s0,s1,s2) \
82 nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
83 (d), (m), (s0), (s1), (s2))
84 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
85 nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
86 (d), (m), (s0), none, none)
89 grow_insns(struct nv30_fpc
*fpc
, int size
)
91 struct nv30_fragment_program
*fp
= fpc
->fp
;
94 fp
->insn
= realloc(fp
->insn
, sizeof(uint32_t) * fp
->insn_len
);
98 emit_src(struct nv30_fpc
*fpc
, int pos
, struct nv30_sreg src
)
100 struct nv30_fragment_program
*fp
= fpc
->fp
;
101 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
106 sr
|= (NV30_FP_REG_TYPE_INPUT
<< NV30_FP_REG_TYPE_SHIFT
);
107 hw
[0] |= (src
.index
<< NV30_FP_OP_INPUT_SRC_SHIFT
);
110 sr
|= NV30_FP_REG_SRC_HALF
;
113 sr
|= (NV30_FP_REG_TYPE_TEMP
<< NV30_FP_REG_TYPE_SHIFT
);
114 sr
|= (src
.index
<< NV30_FP_REG_SRC_SHIFT
);
118 hw
= &fp
->insn
[fpc
->inst_offset
];
119 if (fpc
->consts
[src
.index
].pipe
>= 0) {
120 struct nv30_fragment_program_data
*fpd
;
122 fp
->consts
= realloc(fp
->consts
, ++fp
->nr_consts
*
124 fpd
= &fp
->consts
[fp
->nr_consts
- 1];
125 fpd
->offset
= fpc
->inst_offset
+ 4;
126 fpd
->index
= fpc
->consts
[src
.index
].pipe
;
127 memset(&fp
->insn
[fpd
->offset
], 0, sizeof(uint32_t) * 4);
129 memcpy(&fp
->insn
[fpc
->inst_offset
+ 4],
130 fpc
->consts
[src
.index
].vals
,
131 sizeof(uint32_t) * 4);
134 sr
|= (NV30_FP_REG_TYPE_CONST
<< NV30_FP_REG_TYPE_SHIFT
);
137 sr
|= (NV30_FP_REG_TYPE_INPUT
<< NV30_FP_REG_TYPE_SHIFT
);
144 sr
|= NV30_FP_REG_NEGATE
;
147 hw
[1] |= (1 << (29 + pos
));
149 sr
|= ((src
.swz
[0] << NV30_FP_REG_SWZ_X_SHIFT
) |
150 (src
.swz
[1] << NV30_FP_REG_SWZ_Y_SHIFT
) |
151 (src
.swz
[2] << NV30_FP_REG_SWZ_Z_SHIFT
) |
152 (src
.swz
[3] << NV30_FP_REG_SWZ_W_SHIFT
));
158 emit_dst(struct nv30_fpc
*fpc
, struct nv30_sreg dst
)
160 struct nv30_fragment_program
*fp
= fpc
->fp
;
161 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
165 if (fpc
->num_regs
< (dst
.index
+ 1))
166 fpc
->num_regs
= dst
.index
+ 1;
169 if (dst
.index
== 1) {
170 fp
->fp_control
|= 0xe;
172 hw
[0] |= NV30_FP_OP_OUT_REG_HALF
;
182 hw
[0] |= (dst
.index
<< NV30_FP_OP_OUT_REG_SHIFT
);
186 nv30_fp_arith(struct nv30_fpc
*fpc
, int sat
, int op
,
187 struct nv30_sreg dst
, int mask
,
188 struct nv30_sreg s0
, struct nv30_sreg s1
, struct nv30_sreg s2
)
190 struct nv30_fragment_program
*fp
= fpc
->fp
;
193 fpc
->inst_offset
= fp
->insn_len
;
195 hw
= &fp
->insn
[fpc
->inst_offset
];
196 memset(hw
, 0, sizeof(uint32_t) * 4);
198 if (op
== NV30_FP_OP_OPCODE_KIL
)
199 fp
->fp_control
|= NV34TCL_FP_CONTROL_USES_KIL
;
200 hw
[0] |= (op
<< NV30_FP_OP_OPCODE_SHIFT
);
201 hw
[0] |= (mask
<< NV30_FP_OP_OUTMASK_SHIFT
);
202 hw
[2] |= (dst
.dst_scale
<< NV30_FP_OP_DST_SCALE_SHIFT
);
205 hw
[0] |= NV30_FP_OP_OUT_SAT
;
208 hw
[0] |= NV30_FP_OP_COND_WRITE_ENABLE
;
209 hw
[1] |= (dst
.cc_test
<< NV30_FP_OP_COND_SHIFT
);
210 hw
[1] |= ((dst
.cc_swz
[0] << NV30_FP_OP_COND_SWZ_X_SHIFT
) |
211 (dst
.cc_swz
[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT
) |
212 (dst
.cc_swz
[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT
) |
213 (dst
.cc_swz
[3] << NV30_FP_OP_COND_SWZ_W_SHIFT
));
216 emit_src(fpc
, 0, s0
);
217 emit_src(fpc
, 1, s1
);
218 emit_src(fpc
, 2, s2
);
222 nv30_fp_tex(struct nv30_fpc
*fpc
, int sat
, int op
, int unit
,
223 struct nv30_sreg dst
, int mask
,
224 struct nv30_sreg s0
, struct nv30_sreg s1
, struct nv30_sreg s2
)
226 struct nv30_fragment_program
*fp
= fpc
->fp
;
228 nv30_fp_arith(fpc
, sat
, op
, dst
, mask
, s0
, s1
, s2
);
230 fp
->insn
[fpc
->inst_offset
] |= (unit
<< NV30_FP_OP_TEX_UNIT_SHIFT
);
231 fp
->samplers
|= (1 << unit
);
234 static INLINE
struct nv30_sreg
235 tgsi_src(struct nv30_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
)
237 struct nv30_sreg src
;
239 switch (fsrc
->SrcRegister
.File
) {
240 case TGSI_FILE_INPUT
:
241 src
= nv30_sr(NV30SR_INPUT
,
242 fpc
->attrib_map
[fsrc
->SrcRegister
.Index
]);
244 case TGSI_FILE_CONSTANT
:
245 src
= constant(fpc
, fsrc
->SrcRegister
.Index
, NULL
);
247 case TGSI_FILE_IMMEDIATE
:
248 assert(fsrc
->SrcRegister
.Index
< fpc
->nr_imm
);
249 src
= fpc
->imm
[fsrc
->SrcRegister
.Index
];
251 case TGSI_FILE_TEMPORARY
:
252 src
= nv30_sr(NV30SR_TEMP
, fsrc
->SrcRegister
.Index
+ 1);
253 if (fpc
->high_temp
< src
.index
)
254 fpc
->high_temp
= src
.index
;
256 /* This is clearly insane, but gallium hands us shaders like this.
257 * Luckily fragprog results are just temp regs..
259 case TGSI_FILE_OUTPUT
:
260 if (fsrc
->SrcRegister
.Index
== fpc
->colour_id
)
261 return nv30_sr(NV30SR_OUTPUT
, 0);
263 return nv30_sr(NV30SR_OUTPUT
, 1);
266 NOUVEAU_ERR("bad src file\n");
270 src
.abs
= fsrc
->SrcRegisterExtMod
.Absolute
;
271 src
.negate
= fsrc
->SrcRegister
.Negate
;
272 src
.swz
[0] = fsrc
->SrcRegister
.SwizzleX
;
273 src
.swz
[1] = fsrc
->SrcRegister
.SwizzleY
;
274 src
.swz
[2] = fsrc
->SrcRegister
.SwizzleZ
;
275 src
.swz
[3] = fsrc
->SrcRegister
.SwizzleW
;
279 static INLINE
struct nv30_sreg
280 tgsi_dst(struct nv30_fpc
*fpc
, const struct tgsi_full_dst_register
*fdst
) {
283 switch (fdst
->DstRegister
.File
) {
284 case TGSI_FILE_OUTPUT
:
285 if (fdst
->DstRegister
.Index
== fpc
->colour_id
)
286 return nv30_sr(NV30SR_OUTPUT
, 0);
288 return nv30_sr(NV30SR_OUTPUT
, 1);
290 case TGSI_FILE_TEMPORARY
:
291 idx
= fdst
->DstRegister
.Index
+ 1;
292 if (fpc
->high_temp
< idx
)
293 fpc
->high_temp
= idx
;
294 return nv30_sr(NV30SR_TEMP
, idx
);
296 return nv30_sr(NV30SR_NONE
, 0);
298 NOUVEAU_ERR("bad dst file %d\n", fdst
->DstRegister
.File
);
299 return nv30_sr(NV30SR_NONE
, 0);
308 if (tgsi
& TGSI_WRITEMASK_X
) mask
|= MASK_X
;
309 if (tgsi
& TGSI_WRITEMASK_Y
) mask
|= MASK_Y
;
310 if (tgsi
& TGSI_WRITEMASK_Z
) mask
|= MASK_Z
;
311 if (tgsi
& TGSI_WRITEMASK_W
) mask
|= MASK_W
;
316 src_native_swz(struct nv30_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
,
317 struct nv30_sreg
*src
)
319 const struct nv30_sreg none
= nv30_sr(NV30SR_NONE
, 0);
320 struct nv30_sreg tgsi
= tgsi_src(fpc
, fsrc
);
324 for (c
= 0; c
< 4; c
++) {
325 switch (tgsi_util_get_full_src_register_swizzle(fsrc
, c
)) {
337 if (mask
== MASK_ALL
)
343 arith(fpc
, 0, MOV
, *src
, mask
, tgsi
, none
, none
);
349 nv30_fragprog_parse_instruction(struct nv30_fpc
*fpc
,
350 const struct tgsi_full_instruction
*finst
)
352 const struct nv30_sreg none
= nv30_sr(NV30SR_NONE
, 0);
353 struct nv30_sreg src
[3], dst
, tmp
;
354 int mask
, sat
, unit
= 0;
355 int ai
= -1, ci
= -1;
358 if (finst
->Instruction
.Opcode
== TGSI_OPCODE_END
)
361 fpc
->temp_temp_count
= 0;
362 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
363 const struct tgsi_full_src_register
*fsrc
;
365 fsrc
= &finst
->FullSrcRegisters
[i
];
366 if (fsrc
->SrcRegister
.File
== TGSI_FILE_TEMPORARY
) {
367 src
[i
] = tgsi_src(fpc
, fsrc
);
371 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
372 const struct tgsi_full_src_register
*fsrc
;
374 fsrc
= &finst
->FullSrcRegisters
[i
];
376 switch (fsrc
->SrcRegister
.File
) {
377 case TGSI_FILE_INPUT
:
378 case TGSI_FILE_CONSTANT
:
379 case TGSI_FILE_TEMPORARY
:
380 if (!src_native_swz(fpc
, fsrc
, &src
[i
]))
387 switch (fsrc
->SrcRegister
.File
) {
388 case TGSI_FILE_INPUT
:
389 if (ai
== -1 || ai
== fsrc
->SrcRegister
.Index
) {
390 ai
= fsrc
->SrcRegister
.Index
;
391 src
[i
] = tgsi_src(fpc
, fsrc
);
393 NOUVEAU_MSG("extra src attr %d\n",
394 fsrc
->SrcRegister
.Index
);
396 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
397 tgsi_src(fpc
, fsrc
), none
, none
);
400 case TGSI_FILE_CONSTANT
:
401 case TGSI_FILE_IMMEDIATE
:
402 if (ci
== -1 || ci
== fsrc
->SrcRegister
.Index
) {
403 ci
= fsrc
->SrcRegister
.Index
;
404 src
[i
] = tgsi_src(fpc
, fsrc
);
407 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
408 tgsi_src(fpc
, fsrc
), none
, none
);
411 case TGSI_FILE_TEMPORARY
:
414 case TGSI_FILE_SAMPLER
:
415 unit
= fsrc
->SrcRegister
.Index
;
417 case TGSI_FILE_OUTPUT
:
420 NOUVEAU_ERR("bad src file\n");
425 dst
= tgsi_dst(fpc
, &finst
->FullDstRegisters
[0]);
426 mask
= tgsi_mask(finst
->FullDstRegisters
[0].DstRegister
.WriteMask
);
427 sat
= (finst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
);
429 switch (finst
->Instruction
.Opcode
) {
430 case TGSI_OPCODE_ABS
:
431 arith(fpc
, sat
, MOV
, dst
, mask
, abs(src
[0]), none
, none
);
433 case TGSI_OPCODE_ADD
:
434 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], src
[1], none
);
436 case TGSI_OPCODE_CMP
:
438 arith(fpc
, sat
, MOV
, dst
, mask
, src
[2], none
, none
);
440 arith(fpc
, 0, MOV
, tmp
, 0xf, src
[0], none
, none
);
441 dst
.cc_test
= NV30_VP_INST_COND_LT
;
442 arith(fpc
, sat
, MOV
, dst
, mask
, src
[1], none
, none
);
444 case TGSI_OPCODE_COS
:
445 arith(fpc
, sat
, COS
, dst
, mask
, src
[0], none
, none
);
447 case TGSI_OPCODE_DP3
:
448 arith(fpc
, sat
, DP3
, dst
, mask
, src
[0], src
[1], none
);
450 case TGSI_OPCODE_DP4
:
451 arith(fpc
, sat
, DP4
, dst
, mask
, src
[0], src
[1], none
);
453 case TGSI_OPCODE_DPH
:
455 arith(fpc
, 0, DP3
, tmp
, MASK_X
, src
[0], src
[1], none
);
456 arith(fpc
, sat
, ADD
, dst
, mask
, swz(tmp
, X
, X
, X
, X
),
457 swz(src
[1], W
, W
, W
, W
), none
);
459 case TGSI_OPCODE_DST
:
460 arith(fpc
, sat
, DST
, dst
, mask
, src
[0], src
[1], none
);
462 case TGSI_OPCODE_EX2
:
463 arith(fpc
, sat
, EX2
, dst
, mask
, src
[0], none
, none
);
465 case TGSI_OPCODE_FLR
:
466 arith(fpc
, sat
, FLR
, dst
, mask
, src
[0], none
, none
);
468 case TGSI_OPCODE_FRC
:
469 arith(fpc
, sat
, FRC
, dst
, mask
, src
[0], none
, none
);
471 case TGSI_OPCODE_KILP
:
472 arith(fpc
, 0, KIL
, none
, 0, none
, none
, none
);
474 case TGSI_OPCODE_KIL
:
475 dst
= nv30_sr(NV30SR_NONE
, 0);
477 arith(fpc
, 0, MOV
, dst
, MASK_ALL
, src
[0], none
, none
);
478 dst
.cc_update
= 0; dst
.cc_test
= NV30_FP_OP_COND_LT
;
479 arith(fpc
, 0, KIL
, dst
, 0, none
, none
, none
);
481 case TGSI_OPCODE_LG2
:
482 arith(fpc
, sat
, LG2
, dst
, mask
, src
[0], none
, none
);
484 // case TGSI_OPCODE_LIT:
485 case TGSI_OPCODE_LRP
:
486 arith(fpc
, sat
, LRP
, dst
, mask
, src
[0], src
[1], src
[2]);
488 case TGSI_OPCODE_MAD
:
489 arith(fpc
, sat
, MAD
, dst
, mask
, src
[0], src
[1], src
[2]);
491 case TGSI_OPCODE_MAX
:
492 arith(fpc
, sat
, MAX
, dst
, mask
, src
[0], src
[1], none
);
494 case TGSI_OPCODE_MIN
:
495 arith(fpc
, sat
, MIN
, dst
, mask
, src
[0], src
[1], none
);
497 case TGSI_OPCODE_MOV
:
498 arith(fpc
, sat
, MOV
, dst
, mask
, src
[0], none
, none
);
500 case TGSI_OPCODE_MUL
:
501 arith(fpc
, sat
, MUL
, dst
, mask
, src
[0], src
[1], none
);
503 case TGSI_OPCODE_POW
:
504 arith(fpc
, sat
, POW
, dst
, mask
, src
[0], src
[1], none
);
506 case TGSI_OPCODE_RCP
:
507 arith(fpc
, sat
, RCP
, dst
, mask
, src
[0], none
, none
);
509 case TGSI_OPCODE_RET
:
512 case TGSI_OPCODE_RFL
:
513 arith(fpc
, 0, RFL
, dst
, mask
, src
[0], src
[1], none
);
515 case TGSI_OPCODE_RSQ
:
516 arith(fpc
, sat
, RSQ
, dst
, mask
, abs(swz(src
[0], X
, X
, X
, X
)), none
, none
);
518 case TGSI_OPCODE_SCS
:
520 arith(fpc
, sat
, COS
, dst
, MASK_X
,
521 swz(src
[0], X
, X
, X
, X
), none
, none
);
524 arith(fpc
, sat
, SIN
, dst
, MASK_Y
,
525 swz(src
[0], X
, X
, X
, X
), none
, none
);
528 case TGSI_OPCODE_SIN
:
529 arith(fpc
, sat
, SIN
, dst
, mask
, src
[0], none
, none
);
531 case TGSI_OPCODE_SGE
:
532 arith(fpc
, sat
, SGE
, dst
, mask
, src
[0], src
[1], none
);
534 case TGSI_OPCODE_SGT
:
535 arith(fpc
, sat
, SGT
, dst
, mask
, src
[0], src
[1], none
);
537 case TGSI_OPCODE_SLT
:
538 arith(fpc
, sat
, SLT
, dst
, mask
, src
[0], src
[1], none
);
540 case TGSI_OPCODE_SUB
:
541 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], neg(src
[1]), none
);
543 case TGSI_OPCODE_TEX
:
544 tex(fpc
, sat
, TEX
, unit
, dst
, mask
, src
[0], none
, none
);
546 case TGSI_OPCODE_TXB
:
547 tex(fpc
, sat
, TXB
, unit
, dst
, mask
, src
[0], none
, none
);
549 case TGSI_OPCODE_TXP
:
550 tex(fpc
, sat
, TXP
, unit
, dst
, mask
, src
[0], none
, none
);
552 case TGSI_OPCODE_XPD
:
554 arith(fpc
, 0, MUL
, tmp
, mask
,
555 swz(src
[0], Z
, X
, Y
, Y
), swz(src
[1], Y
, Z
, X
, X
), none
);
556 arith(fpc
, sat
, MAD
, dst
, (mask
& ~MASK_W
),
557 swz(src
[0], Y
, Z
, X
, X
), swz(src
[1], Z
, X
, Y
, Y
),
561 NOUVEAU_ERR("invalid opcode %d\n", finst
->Instruction
.Opcode
);
569 nv30_fragprog_parse_decl_attrib(struct nv30_fpc
*fpc
,
570 const struct tgsi_full_declaration
*fdec
)
574 switch (fdec
->Semantic
.SemanticName
) {
575 case TGSI_SEMANTIC_POSITION
:
576 hw
= NV30_FP_OP_INPUT_SRC_POSITION
;
578 case TGSI_SEMANTIC_COLOR
:
579 if (fdec
->Semantic
.SemanticIndex
== 0) {
580 hw
= NV30_FP_OP_INPUT_SRC_COL0
;
582 if (fdec
->Semantic
.SemanticIndex
== 1) {
583 hw
= NV30_FP_OP_INPUT_SRC_COL1
;
585 NOUVEAU_ERR("bad colour semantic index\n");
589 case TGSI_SEMANTIC_FOG
:
590 hw
= NV30_FP_OP_INPUT_SRC_FOGC
;
592 case TGSI_SEMANTIC_GENERIC
:
593 if (fdec
->Semantic
.SemanticIndex
<= 7) {
594 hw
= NV30_FP_OP_INPUT_SRC_TC(fdec
->Semantic
.
597 NOUVEAU_ERR("bad generic semantic index\n");
602 NOUVEAU_ERR("bad input semantic\n");
606 fpc
->attrib_map
[fdec
->DeclarationRange
.First
] = hw
;
611 nv30_fragprog_parse_decl_output(struct nv30_fpc
*fpc
,
612 const struct tgsi_full_declaration
*fdec
)
614 switch (fdec
->Semantic
.SemanticName
) {
615 case TGSI_SEMANTIC_POSITION
:
616 fpc
->depth_id
= fdec
->DeclarationRange
.First
;
618 case TGSI_SEMANTIC_COLOR
:
619 fpc
->colour_id
= fdec
->DeclarationRange
.First
;
622 NOUVEAU_ERR("bad output semantic\n");
630 nv30_fragprog_prepare(struct nv30_fpc
*fpc
)
632 struct tgsi_parse_context p
;
633 /*int high_temp = -1, i;*/
635 tgsi_parse_init(&p
, fpc
->fp
->pipe
.tokens
);
636 while (!tgsi_parse_end_of_tokens(&p
)) {
637 const union tgsi_full_token
*tok
= &p
.FullToken
;
639 tgsi_parse_token(&p
);
640 switch(tok
->Token
.Type
) {
641 case TGSI_TOKEN_TYPE_DECLARATION
:
643 const struct tgsi_full_declaration
*fdec
;
644 fdec
= &p
.FullToken
.FullDeclaration
;
645 switch (fdec
->Declaration
.File
) {
646 case TGSI_FILE_INPUT
:
647 if (!nv30_fragprog_parse_decl_attrib(fpc
, fdec
))
650 case TGSI_FILE_OUTPUT
:
651 if (!nv30_fragprog_parse_decl_output(fpc
, fdec
))
654 /*case TGSI_FILE_TEMPORARY:
655 if (fdec->DeclarationRange.Last > high_temp) {
657 fdec->DeclarationRange.Last;
665 case TGSI_TOKEN_TYPE_IMMEDIATE
:
667 struct tgsi_full_immediate
*imm
;
670 imm
= &p
.FullToken
.FullImmediate
;
671 assert(imm
->Immediate
.DataType
== TGSI_IMM_FLOAT32
);
672 assert(fpc
->nr_imm
< MAX_IMM
);
674 vals
[0] = imm
->u
[0].Float
;
675 vals
[1] = imm
->u
[1].Float
;
676 vals
[2] = imm
->u
[2].Float
;
677 vals
[3] = imm
->u
[3].Float
;
678 fpc
->imm
[fpc
->nr_imm
++] = constant(fpc
, -1, vals
);
688 fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
689 for (i = 0; i < high_temp; i++)
690 fpc->r_temp[i] = temp(fpc);
691 fpc->r_temps_discard = 0;
704 nv30_fragprog_translate(struct nv30_context
*nv30
,
705 struct nv30_fragment_program
*fp
)
707 struct tgsi_parse_context parse
;
708 struct nv30_fpc
*fpc
= NULL
;
710 tgsi_dump(fp
->pipe
.tokens
,0);
712 fpc
= CALLOC(1, sizeof(struct nv30_fpc
));
719 if (!nv30_fragprog_prepare(fpc
)) {
724 tgsi_parse_init(&parse
, fp
->pipe
.tokens
);
726 while (!tgsi_parse_end_of_tokens(&parse
)) {
727 tgsi_parse_token(&parse
);
729 switch (parse
.FullToken
.Token
.Type
) {
730 case TGSI_TOKEN_TYPE_INSTRUCTION
:
732 const struct tgsi_full_instruction
*finst
;
734 finst
= &parse
.FullToken
.FullInstruction
;
735 if (!nv30_fragprog_parse_instruction(fpc
, finst
))
744 fp
->fp_control
|= (fpc
->num_regs
-1)/2;
745 fp
->fp_reg_control
= (1<<16)|0x4;
747 /* Terminate final instruction */
748 fp
->insn
[fpc
->inst_offset
] |= 0x00000001;
750 /* Append NOP + END instruction, may or may not be necessary. */
751 fpc
->inst_offset
= fp
->insn_len
;
753 fp
->insn
[fpc
->inst_offset
+ 0] = 0x00000001;
754 fp
->insn
[fpc
->inst_offset
+ 1] = 0x00000000;
755 fp
->insn
[fpc
->inst_offset
+ 2] = 0x00000000;
756 fp
->insn
[fpc
->inst_offset
+ 3] = 0x00000000;
758 fp
->translated
= TRUE
;
761 tgsi_parse_free(&parse
);
766 nv30_fragprog_upload(struct nv30_context
*nv30
,
767 struct nv30_fragment_program
*fp
)
769 struct pipe_screen
*pscreen
= nv30
->pipe
.screen
;
770 const uint32_t le
= 1;
774 map
= pipe_buffer_map(pscreen
, fp
->buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
);
777 for (i
= 0; i
< fp
->insn_len
; i
++) {
778 fflush(stdout
); fflush(stderr
);
779 NOUVEAU_ERR("%d 0x%08x\n", i
, fp
->insn
[i
]);
780 fflush(stdout
); fflush(stderr
);
784 if ((*(const uint8_t *)&le
)) {
785 for (i
= 0; i
< fp
->insn_len
; i
++) {
786 map
[i
] = fp
->insn
[i
];
789 /* Weird swapping for big-endian chips */
790 for (i
= 0; i
< fp
->insn_len
; i
++) {
791 map
[i
] = ((fp
->insn
[i
] & 0xffff) << 16) |
792 ((fp
->insn
[i
] >> 16) & 0xffff);
796 pipe_buffer_unmap(pscreen
, fp
->buffer
);
800 nv30_fragprog_validate(struct nv30_context
*nv30
)
802 struct nv30_fragment_program
*fp
= nv30
->fragprog
;
803 struct pipe_buffer
*constbuf
=
804 nv30
->constbuf
[PIPE_SHADER_FRAGMENT
];
805 struct pipe_screen
*pscreen
= nv30
->pipe
.screen
;
806 struct nouveau_stateobj
*so
;
807 boolean new_consts
= FALSE
;
811 goto update_constants
;
813 /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
814 nv30_fragprog_translate(nv30
, fp
);
815 if (!fp
->translated
) {
816 /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
820 fp
->buffer
= pscreen
->buffer_create(pscreen
, 0x100, 0, fp
->insn_len
* 4);
821 nv30_fragprog_upload(nv30
, fp
);
824 so_method(so
, nv30
->screen
->rankine
, NV34TCL_FP_ACTIVE_PROGRAM
, 1);
825 so_reloc (so
, nouveau_bo(fp
->buffer
), 0, NOUVEAU_BO_VRAM
|
826 NOUVEAU_BO_GART
| NOUVEAU_BO_RD
| NOUVEAU_BO_LOW
|
827 NOUVEAU_BO_OR
, NV34TCL_FP_ACTIVE_PROGRAM_DMA0
,
828 NV34TCL_FP_ACTIVE_PROGRAM_DMA1
);
829 so_method(so
, nv30
->screen
->rankine
, NV34TCL_FP_CONTROL
, 1);
830 so_data (so
, fp
->fp_control
);
831 so_method(so
, nv30
->screen
->rankine
, NV34TCL_FP_REG_CONTROL
, 1);
832 so_data (so
, fp
->fp_reg_control
);
833 so_method(so
, nv30
->screen
->rankine
, NV34TCL_TX_UNITS_ENABLE
, 1);
834 so_data (so
, fp
->samplers
);
842 map
= pipe_buffer_map(pscreen
, constbuf
,
843 PIPE_BUFFER_USAGE_CPU_READ
);
844 for (i
= 0; i
< fp
->nr_consts
; i
++) {
845 struct nv30_fragment_program_data
*fpd
= &fp
->consts
[i
];
846 uint32_t *p
= &fp
->insn
[fpd
->offset
];
847 uint32_t *cb
= (uint32_t *)&map
[fpd
->index
* 4];
849 if (!memcmp(p
, cb
, 4 * sizeof(float)))
851 memcpy(p
, cb
, 4 * sizeof(float));
854 pipe_buffer_unmap(pscreen
, constbuf
);
857 nv30_fragprog_upload(nv30
, fp
);
860 if (new_consts
|| fp
->so
!= nv30
->state
.hw
[NV30_STATE_FRAGPROG
]) {
861 so_ref(fp
->so
, &nv30
->state
.hw
[NV30_STATE_FRAGPROG
]);
869 nv30_fragprog_destroy(struct nv30_context
*nv30
,
870 struct nv30_fragment_program
*fp
)
876 struct nv30_state_entry nv30_state_fragprog
= {
877 .validate
= nv30_fragprog_validate
,
879 .pipe
= NV30_NEW_FRAGPROG
,
880 .hw
= NV30_STATE_FRAGPROG