1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/tgsi_parse.h"
8 #include "tgsi/tgsi_util.h"
10 #include "nv30_context.h"
20 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
21 #define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
22 #define DEF_CTEST NV30_FP_OP_COND_TR
23 #include "nv30_shader.h"
25 #define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
26 #define neg(s) nv30_sr_neg((s))
27 #define abs(s) nv30_sr_abs((s))
28 #define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
30 #define MAX_CONSTS 128
33 struct nv30_fragment_program
*fp
;
35 uint attrib_map
[PIPE_MAX_SHADER_INPUTS
];
52 struct nv30_sreg imm
[MAX_IMM
];
56 static INLINE
struct nv30_sreg
57 temp(struct nv30_fpc
*fpc
)
61 idx
= fpc
->temp_temp_count
++;
62 idx
+= fpc
->high_temp
+ 1;
63 return nv30_sr(NV30SR_TEMP
, idx
);
66 static INLINE
struct nv30_sreg
67 constant(struct nv30_fpc
*fpc
, int pipe
, float vals
[4])
71 if (fpc
->nr_consts
== MAX_CONSTS
)
73 idx
= fpc
->nr_consts
++;
75 fpc
->consts
[idx
].pipe
= pipe
;
77 memcpy(fpc
->consts
[idx
].vals
, vals
, 4 * sizeof(float));
78 return nv30_sr(NV30SR_CONST
, idx
);
81 #define arith(cc,s,o,d,m,s0,s1,s2) \
82 nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
83 (d), (m), (s0), (s1), (s2))
84 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
85 nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
86 (d), (m), (s0), none, none)
89 grow_insns(struct nv30_fpc
*fpc
, int size
)
91 struct nv30_fragment_program
*fp
= fpc
->fp
;
94 fp
->insn
= realloc(fp
->insn
, sizeof(uint32_t) * fp
->insn_len
);
98 emit_src(struct nv30_fpc
*fpc
, int pos
, struct nv30_sreg src
)
100 struct nv30_fragment_program
*fp
= fpc
->fp
;
101 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
106 sr
|= (NV30_FP_REG_TYPE_INPUT
<< NV30_FP_REG_TYPE_SHIFT
);
107 hw
[0] |= (src
.index
<< NV30_FP_OP_INPUT_SRC_SHIFT
);
110 sr
|= NV30_FP_REG_SRC_HALF
;
113 sr
|= (NV30_FP_REG_TYPE_TEMP
<< NV30_FP_REG_TYPE_SHIFT
);
114 sr
|= (src
.index
<< NV30_FP_REG_SRC_SHIFT
);
118 hw
= &fp
->insn
[fpc
->inst_offset
];
119 if (fpc
->consts
[src
.index
].pipe
>= 0) {
120 struct nv30_fragment_program_data
*fpd
;
122 fp
->consts
= realloc(fp
->consts
, ++fp
->nr_consts
*
124 fpd
= &fp
->consts
[fp
->nr_consts
- 1];
125 fpd
->offset
= fpc
->inst_offset
+ 4;
126 fpd
->index
= fpc
->consts
[src
.index
].pipe
;
127 memset(&fp
->insn
[fpd
->offset
], 0, sizeof(uint32_t) * 4);
129 memcpy(&fp
->insn
[fpc
->inst_offset
+ 4],
130 fpc
->consts
[src
.index
].vals
,
131 sizeof(uint32_t) * 4);
134 sr
|= (NV30_FP_REG_TYPE_CONST
<< NV30_FP_REG_TYPE_SHIFT
);
137 sr
|= (NV30_FP_REG_TYPE_INPUT
<< NV30_FP_REG_TYPE_SHIFT
);
144 sr
|= NV30_FP_REG_NEGATE
;
147 hw
[1] |= (1 << (29 + pos
));
149 sr
|= ((src
.swz
[0] << NV30_FP_REG_SWZ_X_SHIFT
) |
150 (src
.swz
[1] << NV30_FP_REG_SWZ_Y_SHIFT
) |
151 (src
.swz
[2] << NV30_FP_REG_SWZ_Z_SHIFT
) |
152 (src
.swz
[3] << NV30_FP_REG_SWZ_W_SHIFT
));
158 emit_dst(struct nv30_fpc
*fpc
, struct nv30_sreg dst
)
160 struct nv30_fragment_program
*fp
= fpc
->fp
;
161 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
165 if (fpc
->num_regs
< (dst
.index
+ 1))
166 fpc
->num_regs
= dst
.index
+ 1;
169 if (dst
.index
== 1) {
170 fp
->fp_control
|= 0xe;
172 hw
[0] |= NV30_FP_OP_OUT_REG_HALF
;
182 hw
[0] |= (dst
.index
<< NV30_FP_OP_OUT_REG_SHIFT
);
186 nv30_fp_arith(struct nv30_fpc
*fpc
, int sat
, int op
,
187 struct nv30_sreg dst
, int mask
,
188 struct nv30_sreg s0
, struct nv30_sreg s1
, struct nv30_sreg s2
)
190 struct nv30_fragment_program
*fp
= fpc
->fp
;
193 fpc
->inst_offset
= fp
->insn_len
;
195 hw
= &fp
->insn
[fpc
->inst_offset
];
196 memset(hw
, 0, sizeof(uint32_t) * 4);
198 if (op
== NV30_FP_OP_OPCODE_KIL
)
199 fp
->fp_control
|= NV34TCL_FP_CONTROL_USES_KIL
;
200 hw
[0] |= (op
<< NV30_FP_OP_OPCODE_SHIFT
);
201 hw
[0] |= (mask
<< NV30_FP_OP_OUTMASK_SHIFT
);
202 hw
[2] |= (dst
.dst_scale
<< NV30_FP_OP_DST_SCALE_SHIFT
);
205 hw
[0] |= NV30_FP_OP_OUT_SAT
;
208 hw
[0] |= NV30_FP_OP_COND_WRITE_ENABLE
;
209 hw
[1] |= (dst
.cc_test
<< NV30_FP_OP_COND_SHIFT
);
210 hw
[1] |= ((dst
.cc_swz
[0] << NV30_FP_OP_COND_SWZ_X_SHIFT
) |
211 (dst
.cc_swz
[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT
) |
212 (dst
.cc_swz
[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT
) |
213 (dst
.cc_swz
[3] << NV30_FP_OP_COND_SWZ_W_SHIFT
));
216 emit_src(fpc
, 0, s0
);
217 emit_src(fpc
, 1, s1
);
218 emit_src(fpc
, 2, s2
);
222 nv30_fp_tex(struct nv30_fpc
*fpc
, int sat
, int op
, int unit
,
223 struct nv30_sreg dst
, int mask
,
224 struct nv30_sreg s0
, struct nv30_sreg s1
, struct nv30_sreg s2
)
226 struct nv30_fragment_program
*fp
= fpc
->fp
;
228 nv30_fp_arith(fpc
, sat
, op
, dst
, mask
, s0
, s1
, s2
);
230 fp
->insn
[fpc
->inst_offset
] |= (unit
<< NV30_FP_OP_TEX_UNIT_SHIFT
);
231 fp
->samplers
|= (1 << unit
);
234 static INLINE
struct nv30_sreg
235 tgsi_src(struct nv30_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
)
237 struct nv30_sreg src
;
239 switch (fsrc
->SrcRegister
.File
) {
240 case TGSI_FILE_INPUT
:
241 src
= nv30_sr(NV30SR_INPUT
,
242 fpc
->attrib_map
[fsrc
->SrcRegister
.Index
]);
244 case TGSI_FILE_CONSTANT
:
245 src
= constant(fpc
, fsrc
->SrcRegister
.Index
, NULL
);
247 case TGSI_FILE_IMMEDIATE
:
248 assert(fsrc
->SrcRegister
.Index
< fpc
->nr_imm
);
249 src
= fpc
->imm
[fsrc
->SrcRegister
.Index
];
251 case TGSI_FILE_TEMPORARY
:
252 src
= nv30_sr(NV30SR_TEMP
, fsrc
->SrcRegister
.Index
+ 1);
253 if (fpc
->high_temp
< src
.index
)
254 fpc
->high_temp
= src
.index
;
256 /* This is clearly insane, but gallium hands us shaders like this.
257 * Luckily fragprog results are just temp regs..
259 case TGSI_FILE_OUTPUT
:
260 if (fsrc
->SrcRegister
.Index
== fpc
->colour_id
)
261 return nv30_sr(NV30SR_OUTPUT
, 0);
263 return nv30_sr(NV30SR_OUTPUT
, 1);
266 NOUVEAU_ERR("bad src file\n");
270 src
.abs
= fsrc
->SrcRegisterExtMod
.Absolute
;
271 src
.negate
= fsrc
->SrcRegister
.Negate
;
272 src
.swz
[0] = fsrc
->SrcRegister
.SwizzleX
;
273 src
.swz
[1] = fsrc
->SrcRegister
.SwizzleY
;
274 src
.swz
[2] = fsrc
->SrcRegister
.SwizzleZ
;
275 src
.swz
[3] = fsrc
->SrcRegister
.SwizzleW
;
279 static INLINE
struct nv30_sreg
280 tgsi_dst(struct nv30_fpc
*fpc
, const struct tgsi_full_dst_register
*fdst
) {
283 switch (fdst
->DstRegister
.File
) {
284 case TGSI_FILE_OUTPUT
:
285 if (fdst
->DstRegister
.Index
== fpc
->colour_id
)
286 return nv30_sr(NV30SR_OUTPUT
, 0);
288 return nv30_sr(NV30SR_OUTPUT
, 1);
290 case TGSI_FILE_TEMPORARY
:
291 idx
= fdst
->DstRegister
.Index
+ 1;
292 if (fpc
->high_temp
< idx
)
293 fpc
->high_temp
= idx
;
294 return nv30_sr(NV30SR_TEMP
, idx
);
296 return nv30_sr(NV30SR_NONE
, 0);
298 NOUVEAU_ERR("bad dst file %d\n", fdst
->DstRegister
.File
);
299 return nv30_sr(NV30SR_NONE
, 0);
308 if (tgsi
& TGSI_WRITEMASK_X
) mask
|= MASK_X
;
309 if (tgsi
& TGSI_WRITEMASK_Y
) mask
|= MASK_Y
;
310 if (tgsi
& TGSI_WRITEMASK_Z
) mask
|= MASK_Z
;
311 if (tgsi
& TGSI_WRITEMASK_W
) mask
|= MASK_W
;
316 src_native_swz(struct nv30_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
,
317 struct nv30_sreg
*src
)
319 const struct nv30_sreg none
= nv30_sr(NV30SR_NONE
, 0);
320 struct nv30_sreg tgsi
= tgsi_src(fpc
, fsrc
);
321 uint mask
= 0, zero_mask
= 0, one_mask
= 0, neg_mask
= 0;
322 uint neg
[4] = { fsrc
->SrcRegisterExtSwz
.NegateX
,
323 fsrc
->SrcRegisterExtSwz
.NegateY
,
324 fsrc
->SrcRegisterExtSwz
.NegateZ
,
325 fsrc
->SrcRegisterExtSwz
.NegateW
};
328 for (c
= 0; c
< 4; c
++) {
329 switch (tgsi_util_get_full_src_register_extswizzle(fsrc
, c
)) {
330 case TGSI_EXTSWIZZLE_X
:
331 case TGSI_EXTSWIZZLE_Y
:
332 case TGSI_EXTSWIZZLE_Z
:
333 case TGSI_EXTSWIZZLE_W
:
336 case TGSI_EXTSWIZZLE_ZERO
:
337 zero_mask
|= (1 << c
);
340 case TGSI_EXTSWIZZLE_ONE
:
341 one_mask
|= (1 << c
);
348 if (!tgsi
.negate
&& neg
[c
])
349 neg_mask
|= (1 << c
);
352 if (mask
== MASK_ALL
&& !neg_mask
)
358 arith(fpc
, 0, MOV
, *src
, mask
, tgsi
, none
, none
);
361 arith(fpc
, 0, SFL
, *src
, zero_mask
, *src
, none
, none
);
364 arith(fpc
, 0, STR
, *src
, one_mask
, *src
, none
, none
);
367 struct nv30_sreg one
= temp(fpc
);
368 arith(fpc
, 0, STR
, one
, neg_mask
, one
, none
, none
);
369 arith(fpc
, 0, MUL
, *src
, neg_mask
, *src
, neg(one
), none
);
376 nv30_fragprog_parse_instruction(struct nv30_fpc
*fpc
,
377 const struct tgsi_full_instruction
*finst
)
379 const struct nv30_sreg none
= nv30_sr(NV30SR_NONE
, 0);
380 struct nv30_sreg src
[3], dst
, tmp
;
381 int mask
, sat
, unit
= 0;
382 int ai
= -1, ci
= -1;
385 if (finst
->Instruction
.Opcode
== TGSI_OPCODE_END
)
388 fpc
->temp_temp_count
= 0;
389 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
390 const struct tgsi_full_src_register
*fsrc
;
392 fsrc
= &finst
->FullSrcRegisters
[i
];
393 if (fsrc
->SrcRegister
.File
== TGSI_FILE_TEMPORARY
) {
394 src
[i
] = tgsi_src(fpc
, fsrc
);
398 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
399 const struct tgsi_full_src_register
*fsrc
;
401 fsrc
= &finst
->FullSrcRegisters
[i
];
403 switch (fsrc
->SrcRegister
.File
) {
404 case TGSI_FILE_INPUT
:
405 case TGSI_FILE_CONSTANT
:
406 case TGSI_FILE_TEMPORARY
:
407 if (!src_native_swz(fpc
, fsrc
, &src
[i
]))
414 switch (fsrc
->SrcRegister
.File
) {
415 case TGSI_FILE_INPUT
:
416 if (ai
== -1 || ai
== fsrc
->SrcRegister
.Index
) {
417 ai
= fsrc
->SrcRegister
.Index
;
418 src
[i
] = tgsi_src(fpc
, fsrc
);
420 NOUVEAU_MSG("extra src attr %d\n",
421 fsrc
->SrcRegister
.Index
);
423 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
424 tgsi_src(fpc
, fsrc
), none
, none
);
427 case TGSI_FILE_CONSTANT
:
428 case TGSI_FILE_IMMEDIATE
:
429 if (ci
== -1 || ci
== fsrc
->SrcRegister
.Index
) {
430 ci
= fsrc
->SrcRegister
.Index
;
431 src
[i
] = tgsi_src(fpc
, fsrc
);
434 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
435 tgsi_src(fpc
, fsrc
), none
, none
);
438 case TGSI_FILE_TEMPORARY
:
441 case TGSI_FILE_SAMPLER
:
442 unit
= fsrc
->SrcRegister
.Index
;
444 case TGSI_FILE_OUTPUT
:
447 NOUVEAU_ERR("bad src file\n");
452 dst
= tgsi_dst(fpc
, &finst
->FullDstRegisters
[0]);
453 mask
= tgsi_mask(finst
->FullDstRegisters
[0].DstRegister
.WriteMask
);
454 sat
= (finst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
);
456 switch (finst
->Instruction
.Opcode
) {
457 case TGSI_OPCODE_ABS
:
458 arith(fpc
, sat
, MOV
, dst
, mask
, abs(src
[0]), none
, none
);
460 case TGSI_OPCODE_ADD
:
461 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], src
[1], none
);
463 case TGSI_OPCODE_CMP
:
465 arith(fpc
, sat
, MOV
, dst
, mask
, src
[2], none
, none
);
467 arith(fpc
, 0, MOV
, tmp
, 0xf, src
[0], none
, none
);
468 dst
.cc_test
= NV30_VP_INST_COND_LT
;
469 arith(fpc
, sat
, MOV
, dst
, mask
, src
[1], none
, none
);
471 case TGSI_OPCODE_COS
:
472 arith(fpc
, sat
, COS
, dst
, mask
, src
[0], none
, none
);
474 case TGSI_OPCODE_DP3
:
475 arith(fpc
, sat
, DP3
, dst
, mask
, src
[0], src
[1], none
);
477 case TGSI_OPCODE_DP4
:
478 arith(fpc
, sat
, DP4
, dst
, mask
, src
[0], src
[1], none
);
480 case TGSI_OPCODE_DPH
:
482 arith(fpc
, 0, DP3
, tmp
, MASK_X
, src
[0], src
[1], none
);
483 arith(fpc
, sat
, ADD
, dst
, mask
, swz(tmp
, X
, X
, X
, X
),
484 swz(src
[1], W
, W
, W
, W
), none
);
486 case TGSI_OPCODE_DST
:
487 arith(fpc
, sat
, DST
, dst
, mask
, src
[0], src
[1], none
);
489 case TGSI_OPCODE_EX2
:
490 arith(fpc
, sat
, EX2
, dst
, mask
, src
[0], none
, none
);
492 case TGSI_OPCODE_FLR
:
493 arith(fpc
, sat
, FLR
, dst
, mask
, src
[0], none
, none
);
495 case TGSI_OPCODE_FRC
:
496 arith(fpc
, sat
, FRC
, dst
, mask
, src
[0], none
, none
);
498 case TGSI_OPCODE_KILP
:
499 arith(fpc
, 0, KIL
, none
, 0, none
, none
, none
);
501 case TGSI_OPCODE_KIL
:
502 dst
= nv30_sr(NV30SR_NONE
, 0);
504 arith(fpc
, 0, MOV
, dst
, MASK_ALL
, src
[0], none
, none
);
505 dst
.cc_update
= 0; dst
.cc_test
= NV30_FP_OP_COND_LT
;
506 arith(fpc
, 0, KIL
, dst
, 0, none
, none
, none
);
508 case TGSI_OPCODE_LG2
:
509 arith(fpc
, sat
, LG2
, dst
, mask
, src
[0], none
, none
);
511 // case TGSI_OPCODE_LIT:
512 case TGSI_OPCODE_LRP
:
513 arith(fpc
, sat
, LRP
, dst
, mask
, src
[0], src
[1], src
[2]);
515 case TGSI_OPCODE_MAD
:
516 arith(fpc
, sat
, MAD
, dst
, mask
, src
[0], src
[1], src
[2]);
518 case TGSI_OPCODE_MAX
:
519 arith(fpc
, sat
, MAX
, dst
, mask
, src
[0], src
[1], none
);
521 case TGSI_OPCODE_MIN
:
522 arith(fpc
, sat
, MIN
, dst
, mask
, src
[0], src
[1], none
);
524 case TGSI_OPCODE_MOV
:
525 arith(fpc
, sat
, MOV
, dst
, mask
, src
[0], none
, none
);
527 case TGSI_OPCODE_MUL
:
528 arith(fpc
, sat
, MUL
, dst
, mask
, src
[0], src
[1], none
);
530 case TGSI_OPCODE_NOISE1
:
531 case TGSI_OPCODE_NOISE2
:
532 case TGSI_OPCODE_NOISE3
:
533 case TGSI_OPCODE_NOISE4
:
534 arith(fpc
, sat
, SFL
, dst
, mask
, none
, none
, none
);
536 case TGSI_OPCODE_POW
:
537 arith(fpc
, sat
, POW
, dst
, mask
, src
[0], src
[1], none
);
539 case TGSI_OPCODE_RCP
:
540 arith(fpc
, sat
, RCP
, dst
, mask
, src
[0], none
, none
);
542 case TGSI_OPCODE_RET
:
545 case TGSI_OPCODE_RFL
:
546 arith(fpc
, 0, RFL
, dst
, mask
, src
[0], src
[1], none
);
548 case TGSI_OPCODE_RSQ
:
549 arith(fpc
, sat
, RSQ
, dst
, mask
, abs(swz(src
[0], X
, X
, X
, X
)), none
, none
);
551 case TGSI_OPCODE_SCS
:
553 arith(fpc
, sat
, COS
, dst
, MASK_X
,
554 swz(src
[0], X
, X
, X
, X
), none
, none
);
557 arith(fpc
, sat
, SIN
, dst
, MASK_Y
,
558 swz(src
[0], X
, X
, X
, X
), none
, none
);
561 case TGSI_OPCODE_SIN
:
562 arith(fpc
, sat
, SIN
, dst
, mask
, src
[0], none
, none
);
564 case TGSI_OPCODE_SGE
:
565 arith(fpc
, sat
, SGE
, dst
, mask
, src
[0], src
[1], none
);
567 case TGSI_OPCODE_SGT
:
568 arith(fpc
, sat
, SGT
, dst
, mask
, src
[0], src
[1], none
);
570 case TGSI_OPCODE_SLT
:
571 arith(fpc
, sat
, SLT
, dst
, mask
, src
[0], src
[1], none
);
573 case TGSI_OPCODE_SUB
:
574 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], neg(src
[1]), none
);
576 case TGSI_OPCODE_TEX
:
577 tex(fpc
, sat
, TEX
, unit
, dst
, mask
, src
[0], none
, none
);
579 case TGSI_OPCODE_TXB
:
580 tex(fpc
, sat
, TXB
, unit
, dst
, mask
, src
[0], none
, none
);
582 case TGSI_OPCODE_TXP
:
583 tex(fpc
, sat
, TXP
, unit
, dst
, mask
, src
[0], none
, none
);
585 case TGSI_OPCODE_XPD
:
587 arith(fpc
, 0, MUL
, tmp
, mask
,
588 swz(src
[0], Z
, X
, Y
, Y
), swz(src
[1], Y
, Z
, X
, X
), none
);
589 arith(fpc
, sat
, MAD
, dst
, (mask
& ~MASK_W
),
590 swz(src
[0], Y
, Z
, X
, X
), swz(src
[1], Z
, X
, Y
, Y
),
594 NOUVEAU_ERR("invalid opcode %d\n", finst
->Instruction
.Opcode
);
602 nv30_fragprog_parse_decl_attrib(struct nv30_fpc
*fpc
,
603 const struct tgsi_full_declaration
*fdec
)
607 switch (fdec
->Semantic
.SemanticName
) {
608 case TGSI_SEMANTIC_POSITION
:
609 hw
= NV30_FP_OP_INPUT_SRC_POSITION
;
611 case TGSI_SEMANTIC_COLOR
:
612 if (fdec
->Semantic
.SemanticIndex
== 0) {
613 hw
= NV30_FP_OP_INPUT_SRC_COL0
;
615 if (fdec
->Semantic
.SemanticIndex
== 1) {
616 hw
= NV30_FP_OP_INPUT_SRC_COL1
;
618 NOUVEAU_ERR("bad colour semantic index\n");
622 case TGSI_SEMANTIC_FOG
:
623 hw
= NV30_FP_OP_INPUT_SRC_FOGC
;
625 case TGSI_SEMANTIC_GENERIC
:
626 if (fdec
->Semantic
.SemanticIndex
<= 7) {
627 hw
= NV30_FP_OP_INPUT_SRC_TC(fdec
->Semantic
.
630 NOUVEAU_ERR("bad generic semantic index\n");
635 NOUVEAU_ERR("bad input semantic\n");
639 fpc
->attrib_map
[fdec
->DeclarationRange
.First
] = hw
;
644 nv30_fragprog_parse_decl_output(struct nv30_fpc
*fpc
,
645 const struct tgsi_full_declaration
*fdec
)
647 switch (fdec
->Semantic
.SemanticName
) {
648 case TGSI_SEMANTIC_POSITION
:
649 fpc
->depth_id
= fdec
->DeclarationRange
.First
;
651 case TGSI_SEMANTIC_COLOR
:
652 fpc
->colour_id
= fdec
->DeclarationRange
.First
;
655 NOUVEAU_ERR("bad output semantic\n");
663 nv30_fragprog_prepare(struct nv30_fpc
*fpc
)
665 struct tgsi_parse_context p
;
666 /*int high_temp = -1, i;*/
668 tgsi_parse_init(&p
, fpc
->fp
->pipe
.tokens
);
669 while (!tgsi_parse_end_of_tokens(&p
)) {
670 const union tgsi_full_token
*tok
= &p
.FullToken
;
672 tgsi_parse_token(&p
);
673 switch(tok
->Token
.Type
) {
674 case TGSI_TOKEN_TYPE_DECLARATION
:
676 const struct tgsi_full_declaration
*fdec
;
677 fdec
= &p
.FullToken
.FullDeclaration
;
678 switch (fdec
->Declaration
.File
) {
679 case TGSI_FILE_INPUT
:
680 if (!nv30_fragprog_parse_decl_attrib(fpc
, fdec
))
683 case TGSI_FILE_OUTPUT
:
684 if (!nv30_fragprog_parse_decl_output(fpc
, fdec
))
687 /*case TGSI_FILE_TEMPORARY:
688 if (fdec->DeclarationRange.Last > high_temp) {
690 fdec->DeclarationRange.Last;
698 case TGSI_TOKEN_TYPE_IMMEDIATE
:
700 struct tgsi_full_immediate
*imm
;
703 imm
= &p
.FullToken
.FullImmediate
;
704 assert(imm
->Immediate
.DataType
== TGSI_IMM_FLOAT32
);
705 assert(fpc
->nr_imm
< MAX_IMM
);
707 vals
[0] = imm
->u
[0].Float
;
708 vals
[1] = imm
->u
[1].Float
;
709 vals
[2] = imm
->u
[2].Float
;
710 vals
[3] = imm
->u
[3].Float
;
711 fpc
->imm
[fpc
->nr_imm
++] = constant(fpc
, -1, vals
);
721 fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
722 for (i = 0; i < high_temp; i++)
723 fpc->r_temp[i] = temp(fpc);
724 fpc->r_temps_discard = 0;
737 nv30_fragprog_translate(struct nv30_context
*nv30
,
738 struct nv30_fragment_program
*fp
)
740 struct tgsi_parse_context parse
;
741 struct nv30_fpc
*fpc
= NULL
;
743 tgsi_dump(fp
->pipe
.tokens
,0);
745 fpc
= CALLOC(1, sizeof(struct nv30_fpc
));
752 if (!nv30_fragprog_prepare(fpc
)) {
757 tgsi_parse_init(&parse
, fp
->pipe
.tokens
);
759 while (!tgsi_parse_end_of_tokens(&parse
)) {
760 tgsi_parse_token(&parse
);
762 switch (parse
.FullToken
.Token
.Type
) {
763 case TGSI_TOKEN_TYPE_INSTRUCTION
:
765 const struct tgsi_full_instruction
*finst
;
767 finst
= &parse
.FullToken
.FullInstruction
;
768 if (!nv30_fragprog_parse_instruction(fpc
, finst
))
777 fp
->fp_control
|= (fpc
->num_regs
-1)/2;
778 fp
->fp_reg_control
= (1<<16)|0x4;
780 /* Terminate final instruction */
781 fp
->insn
[fpc
->inst_offset
] |= 0x00000001;
783 /* Append NOP + END instruction, may or may not be necessary. */
784 fpc
->inst_offset
= fp
->insn_len
;
786 fp
->insn
[fpc
->inst_offset
+ 0] = 0x00000001;
787 fp
->insn
[fpc
->inst_offset
+ 1] = 0x00000000;
788 fp
->insn
[fpc
->inst_offset
+ 2] = 0x00000000;
789 fp
->insn
[fpc
->inst_offset
+ 3] = 0x00000000;
791 fp
->translated
= TRUE
;
794 tgsi_parse_free(&parse
);
799 nv30_fragprog_upload(struct nv30_context
*nv30
,
800 struct nv30_fragment_program
*fp
)
802 struct pipe_screen
*pscreen
= nv30
->pipe
.screen
;
803 const uint32_t le
= 1;
807 map
= pipe_buffer_map(pscreen
, fp
->buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
);
810 for (i
= 0; i
< fp
->insn_len
; i
++) {
811 fflush(stdout
); fflush(stderr
);
812 NOUVEAU_ERR("%d 0x%08x\n", i
, fp
->insn
[i
]);
813 fflush(stdout
); fflush(stderr
);
817 if ((*(const uint8_t *)&le
)) {
818 for (i
= 0; i
< fp
->insn_len
; i
++) {
819 map
[i
] = fp
->insn
[i
];
822 /* Weird swapping for big-endian chips */
823 for (i
= 0; i
< fp
->insn_len
; i
++) {
824 map
[i
] = ((fp
->insn
[i
] & 0xffff) << 16) |
825 ((fp
->insn
[i
] >> 16) & 0xffff);
829 pipe_buffer_unmap(pscreen
, fp
->buffer
);
833 nv30_fragprog_validate(struct nv30_context
*nv30
)
835 struct nv30_fragment_program
*fp
= nv30
->fragprog
;
836 struct pipe_buffer
*constbuf
=
837 nv30
->constbuf
[PIPE_SHADER_FRAGMENT
];
838 struct pipe_screen
*pscreen
= nv30
->pipe
.screen
;
839 struct nouveau_stateobj
*so
;
840 boolean new_consts
= FALSE
;
844 goto update_constants
;
846 /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
847 nv30_fragprog_translate(nv30
, fp
);
848 if (!fp
->translated
) {
849 /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
853 fp
->buffer
= pscreen
->buffer_create(pscreen
, 0x100, 0, fp
->insn_len
* 4);
854 nv30_fragprog_upload(nv30
, fp
);
857 so_method(so
, nv30
->screen
->rankine
, NV34TCL_FP_ACTIVE_PROGRAM
, 1);
858 so_reloc (so
, nouveau_bo(fp
->buffer
), 0, NOUVEAU_BO_VRAM
|
859 NOUVEAU_BO_GART
| NOUVEAU_BO_RD
| NOUVEAU_BO_LOW
|
860 NOUVEAU_BO_OR
, NV34TCL_FP_ACTIVE_PROGRAM_DMA0
,
861 NV34TCL_FP_ACTIVE_PROGRAM_DMA1
);
862 so_method(so
, nv30
->screen
->rankine
, NV34TCL_FP_CONTROL
, 1);
863 so_data (so
, fp
->fp_control
);
864 so_method(so
, nv30
->screen
->rankine
, NV34TCL_FP_REG_CONTROL
, 1);
865 so_data (so
, fp
->fp_reg_control
);
866 so_method(so
, nv30
->screen
->rankine
, NV34TCL_TX_UNITS_ENABLE
, 1);
867 so_data (so
, fp
->samplers
);
875 map
= pipe_buffer_map(pscreen
, constbuf
,
876 PIPE_BUFFER_USAGE_CPU_READ
);
877 for (i
= 0; i
< fp
->nr_consts
; i
++) {
878 struct nv30_fragment_program_data
*fpd
= &fp
->consts
[i
];
879 uint32_t *p
= &fp
->insn
[fpd
->offset
];
880 uint32_t *cb
= (uint32_t *)&map
[fpd
->index
* 4];
882 if (!memcmp(p
, cb
, 4 * sizeof(float)))
884 memcpy(p
, cb
, 4 * sizeof(float));
887 pipe_buffer_unmap(pscreen
, constbuf
);
890 nv30_fragprog_upload(nv30
, fp
);
893 if (new_consts
|| fp
->so
!= nv30
->state
.hw
[NV30_STATE_FRAGPROG
]) {
894 so_ref(fp
->so
, &nv30
->state
.hw
[NV30_STATE_FRAGPROG
]);
902 nv30_fragprog_destroy(struct nv30_context
*nv30
,
903 struct nv30_fragment_program
*fp
)
909 struct nv30_state_entry nv30_state_fragprog
= {
910 .validate
= nv30_fragprog_validate
,
912 .pipe
= NV30_NEW_FRAGPROG
,
913 .hw
= NV30_STATE_FRAGPROG