1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
5 #include "pipe/p_shader_tokens.h"
6 #include "tgsi/util/tgsi_parse.h"
7 #include "tgsi/util/tgsi_util.h"
9 #include "nv40_context.h"
19 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
20 #define DEF_SCALE NV40_FP_OP_DST_SCALE_1X
21 #define DEF_CTEST NV40_FP_OP_COND_TR
22 #include "nv40_shader.h"
24 #define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
25 #define neg(s) nv40_sr_neg((s))
26 #define abs(s) nv40_sr_abs((s))
27 #define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v)
29 #define MAX_CONSTS 128
32 struct nv40_fragment_program
*fp
;
34 uint attrib_map
[PIPE_MAX_SHADER_INPUTS
];
51 struct nv40_sreg imm
[MAX_IMM
];
55 static INLINE
struct nv40_sreg
56 temp(struct nv40_fpc
*fpc
)
60 idx
= fpc
->temp_temp_count
++;
61 idx
+= fpc
->high_temp
+ 1;
62 return nv40_sr(NV40SR_TEMP
, idx
);
65 static INLINE
struct nv40_sreg
66 constant(struct nv40_fpc
*fpc
, int pipe
, float vals
[4])
70 if (fpc
->nr_consts
== MAX_CONSTS
)
72 idx
= fpc
->nr_consts
++;
74 fpc
->consts
[idx
].pipe
= pipe
;
76 memcpy(fpc
->consts
[idx
].vals
, vals
, 4 * sizeof(float));
77 return nv40_sr(NV40SR_CONST
, idx
);
80 #define arith(cc,s,o,d,m,s0,s1,s2) \
81 nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \
82 (d), (m), (s0), (s1), (s2))
83 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
84 nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \
85 (d), (m), (s0), none, none)
88 grow_insns(struct nv40_fpc
*fpc
, int size
)
90 struct nv40_fragment_program
*fp
= fpc
->fp
;
93 fp
->insn
= realloc(fp
->insn
, sizeof(uint32_t) * fp
->insn_len
);
97 emit_src(struct nv40_fpc
*fpc
, int pos
, struct nv40_sreg src
)
99 struct nv40_fragment_program
*fp
= fpc
->fp
;
100 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
105 sr
|= (NV40_FP_REG_TYPE_INPUT
<< NV40_FP_REG_TYPE_SHIFT
);
106 hw
[0] |= (src
.index
<< NV40_FP_OP_INPUT_SRC_SHIFT
);
109 sr
|= NV40_FP_REG_SRC_HALF
;
112 sr
|= (NV40_FP_REG_TYPE_TEMP
<< NV40_FP_REG_TYPE_SHIFT
);
113 sr
|= (src
.index
<< NV40_FP_REG_SRC_SHIFT
);
117 hw
= &fp
->insn
[fpc
->inst_offset
];
118 if (fpc
->consts
[src
.index
].pipe
>= 0) {
119 struct nv40_fragment_program_data
*fpd
;
121 fp
->consts
= realloc(fp
->consts
, ++fp
->nr_consts
*
123 fpd
= &fp
->consts
[fp
->nr_consts
- 1];
124 fpd
->offset
= fpc
->inst_offset
+ 4;
125 fpd
->index
= fpc
->consts
[src
.index
].pipe
;
126 memset(&fp
->insn
[fpd
->offset
], 0, sizeof(uint32_t) * 4);
128 memcpy(&fp
->insn
[fpc
->inst_offset
+ 4],
129 fpc
->consts
[src
.index
].vals
,
130 sizeof(uint32_t) * 4);
133 sr
|= (NV40_FP_REG_TYPE_CONST
<< NV40_FP_REG_TYPE_SHIFT
);
136 sr
|= (NV40_FP_REG_TYPE_INPUT
<< NV40_FP_REG_TYPE_SHIFT
);
143 sr
|= NV40_FP_REG_NEGATE
;
146 hw
[1] |= (1 << (29 + pos
));
148 sr
|= ((src
.swz
[0] << NV40_FP_REG_SWZ_X_SHIFT
) |
149 (src
.swz
[1] << NV40_FP_REG_SWZ_Y_SHIFT
) |
150 (src
.swz
[2] << NV40_FP_REG_SWZ_Z_SHIFT
) |
151 (src
.swz
[3] << NV40_FP_REG_SWZ_W_SHIFT
));
157 emit_dst(struct nv40_fpc
*fpc
, struct nv40_sreg dst
)
159 struct nv40_fragment_program
*fp
= fpc
->fp
;
160 uint32_t *hw
= &fp
->insn
[fpc
->inst_offset
];
164 if (fpc
->num_regs
< (dst
.index
+ 1))
165 fpc
->num_regs
= dst
.index
+ 1;
168 if (dst
.index
== 1) {
169 fp
->fp_control
|= 0xe;
171 hw
[0] |= NV40_FP_OP_OUT_REG_HALF
;
181 hw
[0] |= (dst
.index
<< NV40_FP_OP_OUT_REG_SHIFT
);
185 nv40_fp_arith(struct nv40_fpc
*fpc
, int sat
, int op
,
186 struct nv40_sreg dst
, int mask
,
187 struct nv40_sreg s0
, struct nv40_sreg s1
, struct nv40_sreg s2
)
189 struct nv40_fragment_program
*fp
= fpc
->fp
;
192 fpc
->inst_offset
= fp
->insn_len
;
194 hw
= &fp
->insn
[fpc
->inst_offset
];
195 memset(hw
, 0, sizeof(uint32_t) * 4);
197 if (op
== NV40_FP_OP_OPCODE_KIL
)
198 fp
->fp_control
|= NV40TCL_FP_CONTROL_KIL
;
199 hw
[0] |= (op
<< NV40_FP_OP_OPCODE_SHIFT
);
200 hw
[0] |= (mask
<< NV40_FP_OP_OUTMASK_SHIFT
);
201 hw
[2] |= (dst
.dst_scale
<< NV40_FP_OP_DST_SCALE_SHIFT
);
204 hw
[0] |= NV40_FP_OP_OUT_SAT
;
207 hw
[0] |= NV40_FP_OP_COND_WRITE_ENABLE
;
208 hw
[1] |= (dst
.cc_test
<< NV40_FP_OP_COND_SHIFT
);
209 hw
[1] |= ((dst
.cc_swz
[0] << NV40_FP_OP_COND_SWZ_X_SHIFT
) |
210 (dst
.cc_swz
[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT
) |
211 (dst
.cc_swz
[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT
) |
212 (dst
.cc_swz
[3] << NV40_FP_OP_COND_SWZ_W_SHIFT
));
215 emit_src(fpc
, 0, s0
);
216 emit_src(fpc
, 1, s1
);
217 emit_src(fpc
, 2, s2
);
221 nv40_fp_tex(struct nv40_fpc
*fpc
, int sat
, int op
, int unit
,
222 struct nv40_sreg dst
, int mask
,
223 struct nv40_sreg s0
, struct nv40_sreg s1
, struct nv40_sreg s2
)
225 struct nv40_fragment_program
*fp
= fpc
->fp
;
227 nv40_fp_arith(fpc
, sat
, op
, dst
, mask
, s0
, s1
, s2
);
229 fp
->insn
[fpc
->inst_offset
] |= (unit
<< NV40_FP_OP_TEX_UNIT_SHIFT
);
230 fp
->samplers
|= (1 << unit
);
233 static INLINE
struct nv40_sreg
234 tgsi_src(struct nv40_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
)
236 struct nv40_sreg src
;
238 switch (fsrc
->SrcRegister
.File
) {
239 case TGSI_FILE_INPUT
:
240 src
= nv40_sr(NV40SR_INPUT
,
241 fpc
->attrib_map
[fsrc
->SrcRegister
.Index
]);
243 case TGSI_FILE_CONSTANT
:
244 src
= constant(fpc
, fsrc
->SrcRegister
.Index
, NULL
);
246 case TGSI_FILE_IMMEDIATE
:
247 assert(fsrc
->SrcRegister
.Index
< fpc
->nr_imm
);
248 src
= fpc
->imm
[fsrc
->SrcRegister
.Index
];
250 case TGSI_FILE_TEMPORARY
:
251 src
= nv40_sr(NV40SR_TEMP
, fsrc
->SrcRegister
.Index
+ 1);
252 if (fpc
->high_temp
< src
.index
)
253 fpc
->high_temp
= src
.index
;
255 /* This is clearly insane, but gallium hands us shaders like this.
256 * Luckily fragprog results are just temp regs..
258 case TGSI_FILE_OUTPUT
:
259 if (fsrc
->SrcRegister
.Index
== fpc
->colour_id
)
260 return nv40_sr(NV40SR_OUTPUT
, 0);
262 return nv40_sr(NV40SR_OUTPUT
, 1);
265 NOUVEAU_ERR("bad src file\n");
269 src
.abs
= fsrc
->SrcRegisterExtMod
.Absolute
;
270 src
.negate
= fsrc
->SrcRegister
.Negate
;
271 src
.swz
[0] = fsrc
->SrcRegister
.SwizzleX
;
272 src
.swz
[1] = fsrc
->SrcRegister
.SwizzleY
;
273 src
.swz
[2] = fsrc
->SrcRegister
.SwizzleZ
;
274 src
.swz
[3] = fsrc
->SrcRegister
.SwizzleW
;
278 static INLINE
struct nv40_sreg
279 tgsi_dst(struct nv40_fpc
*fpc
, const struct tgsi_full_dst_register
*fdst
) {
282 switch (fdst
->DstRegister
.File
) {
283 case TGSI_FILE_OUTPUT
:
284 if (fdst
->DstRegister
.Index
== fpc
->colour_id
)
285 return nv40_sr(NV40SR_OUTPUT
, 0);
287 return nv40_sr(NV40SR_OUTPUT
, 1);
289 case TGSI_FILE_TEMPORARY
:
290 idx
= fdst
->DstRegister
.Index
+ 1;
291 if (fpc
->high_temp
< idx
)
292 fpc
->high_temp
= idx
;
293 return nv40_sr(NV40SR_TEMP
, idx
);
295 return nv40_sr(NV40SR_NONE
, 0);
297 NOUVEAU_ERR("bad dst file %d\n", fdst
->DstRegister
.File
);
298 return nv40_sr(NV40SR_NONE
, 0);
307 if (tgsi
& TGSI_WRITEMASK_X
) mask
|= MASK_X
;
308 if (tgsi
& TGSI_WRITEMASK_Y
) mask
|= MASK_Y
;
309 if (tgsi
& TGSI_WRITEMASK_Z
) mask
|= MASK_Z
;
310 if (tgsi
& TGSI_WRITEMASK_W
) mask
|= MASK_W
;
315 src_native_swz(struct nv40_fpc
*fpc
, const struct tgsi_full_src_register
*fsrc
,
316 struct nv40_sreg
*src
)
318 const struct nv40_sreg none
= nv40_sr(NV40SR_NONE
, 0);
319 struct nv40_sreg tgsi
= tgsi_src(fpc
, fsrc
);
320 uint mask
= 0, zero_mask
= 0, one_mask
= 0, neg_mask
= 0;
321 uint neg
[4] = { fsrc
->SrcRegisterExtSwz
.NegateX
,
322 fsrc
->SrcRegisterExtSwz
.NegateY
,
323 fsrc
->SrcRegisterExtSwz
.NegateZ
,
324 fsrc
->SrcRegisterExtSwz
.NegateW
};
327 for (c
= 0; c
< 4; c
++) {
328 switch (tgsi_util_get_full_src_register_extswizzle(fsrc
, c
)) {
329 case TGSI_EXTSWIZZLE_X
:
330 case TGSI_EXTSWIZZLE_Y
:
331 case TGSI_EXTSWIZZLE_Z
:
332 case TGSI_EXTSWIZZLE_W
:
335 case TGSI_EXTSWIZZLE_ZERO
:
336 zero_mask
|= (1 << c
);
339 case TGSI_EXTSWIZZLE_ONE
:
340 one_mask
|= (1 << c
);
347 if (!tgsi
.negate
&& neg
[c
])
348 neg_mask
|= (1 << c
);
351 if (mask
== MASK_ALL
&& !neg_mask
)
357 arith(fpc
, 0, MOV
, *src
, mask
, tgsi
, none
, none
);
360 arith(fpc
, 0, SFL
, *src
, zero_mask
, *src
, none
, none
);
363 arith(fpc
, 0, STR
, *src
, one_mask
, *src
, none
, none
);
366 struct nv40_sreg one
= temp(fpc
);
367 arith(fpc
, 0, STR
, one
, neg_mask
, one
, none
, none
);
368 arith(fpc
, 0, MUL
, *src
, neg_mask
, *src
, neg(one
), none
);
375 nv40_fragprog_parse_instruction(struct nv40_fpc
*fpc
,
376 const struct tgsi_full_instruction
*finst
)
378 const struct nv40_sreg none
= nv40_sr(NV40SR_NONE
, 0);
379 struct nv40_sreg src
[3], dst
, tmp
;
381 int ai
= -1, ci
= -1;
384 if (finst
->Instruction
.Opcode
== TGSI_OPCODE_END
)
387 fpc
->temp_temp_count
= 0;
388 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
389 const struct tgsi_full_src_register
*fsrc
;
391 fsrc
= &finst
->FullSrcRegisters
[i
];
392 if (fsrc
->SrcRegister
.File
== TGSI_FILE_TEMPORARY
) {
393 src
[i
] = tgsi_src(fpc
, fsrc
);
397 for (i
= 0; i
< finst
->Instruction
.NumSrcRegs
; i
++) {
398 const struct tgsi_full_src_register
*fsrc
;
400 fsrc
= &finst
->FullSrcRegisters
[i
];
402 switch (fsrc
->SrcRegister
.File
) {
403 case TGSI_FILE_INPUT
:
404 case TGSI_FILE_CONSTANT
:
405 case TGSI_FILE_TEMPORARY
:
406 if (!src_native_swz(fpc
, fsrc
, &src
[i
]))
413 switch (fsrc
->SrcRegister
.File
) {
414 case TGSI_FILE_INPUT
:
415 if (ai
== -1 || ai
== fsrc
->SrcRegister
.Index
) {
416 ai
= fsrc
->SrcRegister
.Index
;
417 src
[i
] = tgsi_src(fpc
, fsrc
);
419 NOUVEAU_MSG("extra src attr %d\n",
420 fsrc
->SrcRegister
.Index
);
422 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
423 tgsi_src(fpc
, fsrc
), none
, none
);
426 case TGSI_FILE_CONSTANT
:
427 case TGSI_FILE_IMMEDIATE
:
428 if (ci
== -1 || ci
== fsrc
->SrcRegister
.Index
) {
429 ci
= fsrc
->SrcRegister
.Index
;
430 src
[i
] = tgsi_src(fpc
, fsrc
);
433 arith(fpc
, 0, MOV
, src
[i
], MASK_ALL
,
434 tgsi_src(fpc
, fsrc
), none
, none
);
437 case TGSI_FILE_TEMPORARY
:
440 case TGSI_FILE_SAMPLER
:
441 unit
= fsrc
->SrcRegister
.Index
;
443 case TGSI_FILE_OUTPUT
:
446 NOUVEAU_ERR("bad src file\n");
451 dst
= tgsi_dst(fpc
, &finst
->FullDstRegisters
[0]);
452 mask
= tgsi_mask(finst
->FullDstRegisters
[0].DstRegister
.WriteMask
);
453 sat
= (finst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
);
455 switch (finst
->Instruction
.Opcode
) {
456 case TGSI_OPCODE_ABS
:
457 arith(fpc
, sat
, MOV
, dst
, mask
, abs(src
[0]), none
, none
);
459 case TGSI_OPCODE_ADD
:
460 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], src
[1], none
);
462 case TGSI_OPCODE_CMP
:
464 arith(fpc
, sat
, MOV
, dst
, mask
, src
[2], none
, none
);
466 arith(fpc
, 0, MOV
, tmp
, 0xf, src
[0], none
, none
);
467 dst
.cc_test
= NV40_VP_INST_COND_LT
;
468 arith(fpc
, sat
, MOV
, dst
, mask
, src
[1], none
, none
);
470 case TGSI_OPCODE_COS
:
471 arith(fpc
, sat
, COS
, dst
, mask
, src
[0], none
, none
);
473 case TGSI_OPCODE_DP3
:
474 arith(fpc
, sat
, DP3
, dst
, mask
, src
[0], src
[1], none
);
476 case TGSI_OPCODE_DP4
:
477 arith(fpc
, sat
, DP4
, dst
, mask
, src
[0], src
[1], none
);
479 case TGSI_OPCODE_DPH
:
481 arith(fpc
, 0, DP3
, tmp
, MASK_X
, src
[0], src
[1], none
);
482 arith(fpc
, sat
, ADD
, dst
, mask
, swz(tmp
, X
, X
, X
, X
),
483 swz(src
[1], W
, W
, W
, W
), none
);
485 case TGSI_OPCODE_DST
:
486 arith(fpc
, sat
, DST
, dst
, mask
, src
[0], src
[1], none
);
488 case TGSI_OPCODE_EX2
:
489 arith(fpc
, sat
, EX2
, dst
, mask
, src
[0], none
, none
);
491 case TGSI_OPCODE_FLR
:
492 arith(fpc
, sat
, FLR
, dst
, mask
, src
[0], none
, none
);
494 case TGSI_OPCODE_FRC
:
495 arith(fpc
, sat
, FRC
, dst
, mask
, src
[0], none
, none
);
497 case TGSI_OPCODE_KIL
:
498 arith(fpc
, 0, KIL
, none
, 0, none
, none
, none
);
500 case TGSI_OPCODE_KILP
:
501 dst
= nv40_sr(NV40SR_NONE
, 0);
503 arith(fpc
, 0, MOV
, dst
, MASK_ALL
, src
[0], none
, none
);
504 dst
.cc_update
= 0; dst
.cc_test
= NV40_FP_OP_COND_LT
;
505 arith(fpc
, 0, KIL
, dst
, 0, none
, none
, none
);
507 case TGSI_OPCODE_LG2
:
508 arith(fpc
, sat
, LG2
, dst
, mask
, src
[0], none
, none
);
510 // case TGSI_OPCODE_LIT:
511 case TGSI_OPCODE_LRP
:
513 arith(fpc
, 0, MAD
, tmp
, mask
, neg(src
[0]), src
[2], src
[2]);
514 arith(fpc
, sat
, MAD
, dst
, mask
, src
[0], src
[1], tmp
);
516 case TGSI_OPCODE_MAD
:
517 arith(fpc
, sat
, MAD
, dst
, mask
, src
[0], src
[1], src
[2]);
519 case TGSI_OPCODE_MAX
:
520 arith(fpc
, sat
, MAX
, dst
, mask
, src
[0], src
[1], none
);
522 case TGSI_OPCODE_MIN
:
523 arith(fpc
, sat
, MIN
, dst
, mask
, src
[0], src
[1], none
);
525 case TGSI_OPCODE_MOV
:
526 arith(fpc
, sat
, MOV
, dst
, mask
, src
[0], none
, none
);
528 case TGSI_OPCODE_MUL
:
529 arith(fpc
, sat
, MUL
, dst
, mask
, src
[0], src
[1], none
);
531 case TGSI_OPCODE_POW
:
533 arith(fpc
, 0, LG2
, tmp
, MASK_X
,
534 swz(src
[0], X
, X
, X
, X
), none
, none
);
535 arith(fpc
, 0, MUL
, tmp
, MASK_X
, swz(tmp
, X
, X
, X
, X
),
536 swz(src
[1], X
, X
, X
, X
), none
);
537 arith(fpc
, sat
, EX2
, dst
, mask
,
538 swz(tmp
, X
, X
, X
, X
), none
, none
);
540 case TGSI_OPCODE_RCP
:
541 arith(fpc
, sat
, RCP
, dst
, mask
, src
[0], none
, none
);
543 case TGSI_OPCODE_RET
:
546 case TGSI_OPCODE_RFL
:
548 arith(fpc
, 0, DP3
, tmp
, MASK_X
, src
[0], src
[0], none
);
549 arith(fpc
, 0, DP3
, tmp
, MASK_Y
, src
[0], src
[1], none
);
550 arith(fpc
, 0, DIV
, scale(tmp
, 2X
), MASK_Z
,
551 swz(tmp
, Y
, Y
, Y
, Y
), swz(tmp
, X
, X
, X
, X
), none
);
552 arith(fpc
, sat
, MAD
, dst
, mask
,
553 swz(tmp
, Z
, Z
, Z
, Z
), src
[0], neg(src
[1]));
555 case TGSI_OPCODE_RSQ
:
557 arith(fpc
, 0, LG2
, scale(tmp
, INV_2X
), MASK_X
,
558 abs(swz(src
[0], X
, X
, X
, X
)), none
, none
);
559 arith(fpc
, sat
, EX2
, dst
, mask
,
560 neg(swz(tmp
, X
, X
, X
, X
)), none
, none
);
562 case TGSI_OPCODE_SCS
:
564 arith(fpc
, sat
, COS
, dst
, MASK_X
,
565 swz(src
[0], X
, X
, X
, X
), none
, none
);
568 arith(fpc
, sat
, SIN
, dst
, MASK_Y
,
569 swz(src
[0], X
, X
, X
, X
), none
, none
);
572 case TGSI_OPCODE_SIN
:
573 arith(fpc
, sat
, SIN
, dst
, mask
, src
[0], none
, none
);
575 case TGSI_OPCODE_SGE
:
576 arith(fpc
, sat
, SGE
, dst
, mask
, src
[0], src
[1], none
);
578 case TGSI_OPCODE_SLT
:
579 arith(fpc
, sat
, SLT
, dst
, mask
, src
[0], src
[1], none
);
581 case TGSI_OPCODE_SUB
:
582 arith(fpc
, sat
, ADD
, dst
, mask
, src
[0], neg(src
[1]), none
);
584 case TGSI_OPCODE_TEX
:
585 if (finst
->FullSrcRegisters
[0].SrcRegisterExtSwz
.ExtDivide
==
587 tex(fpc
, sat
, TXP
, unit
, dst
, mask
, src
[0], none
, none
);
589 tex(fpc
, sat
, TEX
, unit
, dst
, mask
, src
[0], none
, none
);
591 case TGSI_OPCODE_TXB
:
592 tex(fpc
, sat
, TXB
, unit
, dst
, mask
, src
[0], none
, none
);
594 case TGSI_OPCODE_XPD
:
596 arith(fpc
, 0, MUL
, tmp
, mask
,
597 swz(src
[0], Z
, X
, Y
, Y
), swz(src
[1], Y
, Z
, X
, X
), none
);
598 arith(fpc
, sat
, MAD
, dst
, (mask
& ~MASK_W
),
599 swz(src
[0], Y
, Z
, X
, X
), swz(src
[1], Z
, X
, Y
, Y
),
603 NOUVEAU_ERR("invalid opcode %d\n", finst
->Instruction
.Opcode
);
611 nv40_fragprog_parse_decl_attrib(struct nv40_fpc
*fpc
,
612 const struct tgsi_full_declaration
*fdec
)
616 switch (fdec
->Semantic
.SemanticName
) {
617 case TGSI_SEMANTIC_POSITION
:
618 hw
= NV40_FP_OP_INPUT_SRC_POSITION
;
620 case TGSI_SEMANTIC_COLOR
:
621 if (fdec
->Semantic
.SemanticIndex
== 0) {
622 hw
= NV40_FP_OP_INPUT_SRC_COL0
;
624 if (fdec
->Semantic
.SemanticIndex
== 1) {
625 hw
= NV40_FP_OP_INPUT_SRC_COL1
;
627 NOUVEAU_ERR("bad colour semantic index\n");
631 case TGSI_SEMANTIC_FOG
:
632 hw
= NV40_FP_OP_INPUT_SRC_FOGC
;
634 case TGSI_SEMANTIC_GENERIC
:
635 if (fdec
->Semantic
.SemanticIndex
<= 7) {
636 hw
= NV40_FP_OP_INPUT_SRC_TC(fdec
->Semantic
.
639 NOUVEAU_ERR("bad generic semantic index\n");
644 NOUVEAU_ERR("bad input semantic\n");
648 fpc
->attrib_map
[fdec
->u
.DeclarationRange
.First
] = hw
;
653 nv40_fragprog_parse_decl_output(struct nv40_fpc
*fpc
,
654 const struct tgsi_full_declaration
*fdec
)
656 switch (fdec
->Semantic
.SemanticName
) {
657 case TGSI_SEMANTIC_POSITION
:
658 fpc
->depth_id
= fdec
->u
.DeclarationRange
.First
;
660 case TGSI_SEMANTIC_COLOR
:
661 fpc
->colour_id
= fdec
->u
.DeclarationRange
.First
;
664 NOUVEAU_ERR("bad output semantic\n");
672 nv40_fragprog_translate(struct nv40_context
*nv40
,
673 struct nv40_fragment_program
*fp
)
675 struct tgsi_parse_context parse
;
676 struct nv40_fpc
*fpc
= NULL
;
678 fpc
= CALLOC(1, sizeof(struct nv40_fpc
));
685 tgsi_parse_init(&parse
, fp
->pipe
->tokens
);
687 while (!tgsi_parse_end_of_tokens(&parse
)) {
688 tgsi_parse_token(&parse
);
690 switch (parse
.FullToken
.Token
.Type
) {
691 case TGSI_TOKEN_TYPE_DECLARATION
:
693 const struct tgsi_full_declaration
*fdec
;
694 fdec
= &parse
.FullToken
.FullDeclaration
;
695 switch (fdec
->Declaration
.File
) {
696 case TGSI_FILE_INPUT
:
697 if (!nv40_fragprog_parse_decl_attrib(fpc
, fdec
))
700 case TGSI_FILE_OUTPUT
:
701 if (!nv40_fragprog_parse_decl_output(fpc
, fdec
))
709 case TGSI_TOKEN_TYPE_IMMEDIATE
:
711 struct tgsi_full_immediate
*imm
;
714 imm
= &parse
.FullToken
.FullImmediate
;
715 assert(imm
->Immediate
.DataType
== TGSI_IMM_FLOAT32
);
716 assert(fpc
->nr_imm
< MAX_IMM
);
718 vals
[0] = imm
->u
.ImmediateFloat32
[0].Float
;
719 vals
[1] = imm
->u
.ImmediateFloat32
[1].Float
;
720 vals
[2] = imm
->u
.ImmediateFloat32
[2].Float
;
721 vals
[3] = imm
->u
.ImmediateFloat32
[3].Float
;
722 fpc
->imm
[fpc
->nr_imm
++] = constant(fpc
, -1, vals
);
725 case TGSI_TOKEN_TYPE_INSTRUCTION
:
727 const struct tgsi_full_instruction
*finst
;
729 finst
= &parse
.FullToken
.FullInstruction
;
730 if (!nv40_fragprog_parse_instruction(fpc
, finst
))
739 fp
->fp_control
|= fpc
->num_regs
<< NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT
;
741 /* Terminate final instruction */
742 fp
->insn
[fpc
->inst_offset
] |= 0x00000001;
744 /* Append NOP + END instruction, may or may not be necessary. */
745 fpc
->inst_offset
= fp
->insn_len
;
747 fp
->insn
[fpc
->inst_offset
+ 0] = 0x00000001;
748 fp
->insn
[fpc
->inst_offset
+ 1] = 0x00000000;
749 fp
->insn
[fpc
->inst_offset
+ 2] = 0x00000000;
750 fp
->insn
[fpc
->inst_offset
+ 3] = 0x00000000;
752 fp
->translated
= TRUE
;
755 tgsi_parse_free(&parse
);
760 nv40_fragprog_bind(struct nv40_context
*nv40
, struct nv40_fragment_program
*fp
)
762 struct pipe_winsys
*ws
= nv40
->pipe
.winsys
;
763 struct nouveau_stateobj
*so
;
766 if (!fp
->translated
) {
767 nv40_fragprog_translate(nv40
, fp
);
773 float *map
= ws
->buffer_map(ws
, nv40
->fragprog
.constant_buf
,
774 PIPE_BUFFER_USAGE_CPU_READ
);
775 for (i
= 0; i
< fp
->nr_consts
; i
++) {
776 struct nv40_fragment_program_data
*fpd
= &fp
->consts
[i
];
777 uint32_t *p
= &fp
->insn
[fpd
->offset
];
778 uint32_t *cb
= (uint32_t *)&map
[fpd
->index
* 4];
780 if (!memcmp(p
, cb
, 4 * sizeof(float)))
782 memcpy(p
, cb
, 4 * sizeof(float));
785 ws
->buffer_unmap(ws
, nv40
->fragprog
.constant_buf
);
789 const uint32_t le
= 1;
793 fp
->buffer
= ws
->buffer_create(ws
, 0x100, 0,
795 map
= ws
->buffer_map(ws
, fp
->buffer
,
796 PIPE_BUFFER_USAGE_CPU_WRITE
);
799 for (i
= 0; i
< fp
->insn_len
; i
++) {
800 NOUVEAU_ERR("%d 0x%08x\n", i
, fp
->insn
[i
]);
804 if ((*(const uint8_t *)&le
)) {
805 for (i
= 0; i
< fp
->insn_len
; i
++) {
806 map
[i
] = fp
->insn
[i
];
809 /* Weird swapping for big-endian chips */
810 for (i
= 0; i
< fp
->insn_len
; i
++) {
811 map
[i
] = ((fp
->insn
[i
] & 0xffff) << 16) |
812 ((fp
->insn
[i
] >> 16) & 0xffff);
816 ws
->buffer_unmap(ws
, fp
->buffer
);
821 so_method(so
, nv40
->hw
->curie
, NV40TCL_FP_ADDRESS
, 1);
822 so_reloc (so
, fp
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_GART
|
823 NOUVEAU_BO_RD
| NOUVEAU_BO_LOW
| NOUVEAU_BO_OR
,
824 NV40TCL_FP_ADDRESS_DMA0
, NV40TCL_FP_ADDRESS_DMA1
);
825 so_method(so
, nv40
->hw
->curie
, NV40TCL_FP_CONTROL
, 1);
826 so_data (so
, fp
->fp_control
);
828 so_emit(nv40
->nvws
, so
);
832 nv40
->fragprog
.active
= fp
;
836 nv40_fragprog_destroy(struct nv40_context
*nv40
,
837 struct nv40_fragment_program
*fp
)