Merge remote branch 'origin/master' into nv50-compiler
[mesa.git] / src / gallium / drivers / nvfx / nvfx_fragprog.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "util/u_inlines.h"
5 #include "util/u_debug.h"
6
7 #include "pipe/p_shader_tokens.h"
8 #include "tgsi/tgsi_parse.h"
9 #include "tgsi/tgsi_util.h"
10 #include "tgsi/tgsi_dump.h"
11 #include "tgsi/tgsi_ureg.h"
12
13 #include "nvfx_context.h"
14 #include "nvfx_shader.h"
15 #include "nvfx_resource.h"
16
17 #define MAX_CONSTS 128
18 #define MAX_IMM 32
19
20 struct nvfx_fpc {
21 struct nvfx_pipe_fragment_program* pfp;
22 struct nvfx_fragment_program *fp;
23
24 unsigned max_temps;
25 unsigned long long r_temps;
26 unsigned long long r_temps_discard;
27 struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];
28 struct nvfx_reg *r_temp;
29 unsigned sprite_coord_temp;
30
31 int num_regs;
32
33 unsigned inst_offset;
34 unsigned have_const;
35
36 struct {
37 int pipe;
38 float vals[4];
39 } consts[MAX_CONSTS];
40 int nr_consts;
41
42 struct nvfx_reg imm[MAX_IMM];
43 unsigned nr_imm;
44
45 unsigned char generic_to_slot[256]; /* semantic idx for each input semantic */
46
47 struct util_dynarray if_stack;
48 //struct util_dynarray loop_stack;
49 struct util_dynarray label_relocs;
50 };
51
52 static INLINE struct nvfx_reg
53 temp(struct nvfx_fpc *fpc)
54 {
55 int idx = __builtin_ctzll(~fpc->r_temps);
56
57 if (idx >= fpc->max_temps) {
58 NOUVEAU_ERR("out of temps!!\n");
59 assert(0);
60 return nvfx_reg(NVFXSR_TEMP, 0);
61 }
62
63 fpc->r_temps |= (1ULL << idx);
64 fpc->r_temps_discard |= (1ULL << idx);
65 return nvfx_reg(NVFXSR_TEMP, idx);
66 }
67
68 static INLINE void
69 release_temps(struct nvfx_fpc *fpc)
70 {
71 fpc->r_temps &= ~fpc->r_temps_discard;
72 fpc->r_temps_discard = 0ULL;
73 }
74
75 static INLINE struct nvfx_reg
76 constant(struct nvfx_fpc *fpc, int pipe, float vals[4])
77 {
78 int idx;
79
80 if (fpc->nr_consts == MAX_CONSTS)
81 assert(0);
82 idx = fpc->nr_consts++;
83
84 fpc->consts[idx].pipe = pipe;
85 if (pipe == -1)
86 memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
87 return nvfx_reg(NVFXSR_CONST, idx);
88 }
89
90 static void
91 grow_insns(struct nvfx_fpc *fpc, int size)
92 {
93 struct nvfx_fragment_program *fp = fpc->fp;
94
95 fp->insn_len += size;
96 fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
97 }
98
99 static void
100 emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src)
101 {
102 struct nvfx_fragment_program *fp = fpc->fp;
103 uint32_t *hw = &fp->insn[fpc->inst_offset];
104 uint32_t sr = 0;
105
106 switch (src.reg.type) {
107 case NVFXSR_INPUT:
108 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
109 hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
110 break;
111 case NVFXSR_OUTPUT:
112 sr |= NVFX_FP_REG_SRC_HALF;
113 /* fall-through */
114 case NVFXSR_TEMP:
115 sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
116 sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT);
117 break;
118 case NVFXSR_RELOCATED:
119 sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
120 sr |= (fpc->sprite_coord_temp << NVFX_FP_REG_SRC_SHIFT);
121 //printf("adding relocation at %x for %x\n", fpc->inst_offset, src.index);
122 util_dynarray_append(&fpc->fp->slot_relocations[src.reg.index], unsigned, fpc->inst_offset + pos + 1);
123 break;
124 case NVFXSR_CONST:
125 if (!fpc->have_const) {
126 grow_insns(fpc, 4);
127 fpc->have_const = 1;
128 }
129
130 hw = &fp->insn[fpc->inst_offset];
131 if (fpc->consts[src.reg.index].pipe >= 0) {
132 struct nvfx_fragment_program_data *fpd;
133
134 fp->consts = realloc(fp->consts, ++fp->nr_consts *
135 sizeof(*fpd));
136 fpd = &fp->consts[fp->nr_consts - 1];
137 fpd->offset = fpc->inst_offset + 4;
138 fpd->index = fpc->consts[src.reg.index].pipe;
139 memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
140 } else {
141 memcpy(&fp->insn[fpc->inst_offset + 4],
142 fpc->consts[src.reg.index].vals,
143 sizeof(uint32_t) * 4);
144 }
145
146 sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
147 break;
148 case NVFXSR_NONE:
149 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
150 break;
151 default:
152 assert(0);
153 }
154
155 if (src.negate)
156 sr |= NVFX_FP_REG_NEGATE;
157
158 if (src.abs)
159 hw[1] |= (1 << (29 + pos));
160
161 sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
162 (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
163 (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
164 (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
165
166 hw[pos + 1] |= sr;
167 }
168
169 static void
170 emit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst)
171 {
172 struct nvfx_fragment_program *fp = fpc->fp;
173 uint32_t *hw = &fp->insn[fpc->inst_offset];
174
175 switch (dst.type) {
176 case NVFXSR_TEMP:
177 if (fpc->num_regs < (dst.index + 1))
178 fpc->num_regs = dst.index + 1;
179 break;
180 case NVFXSR_OUTPUT:
181 if (dst.index == 1) {
182 fp->fp_control |= 0xe;
183 } else {
184 hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
185 }
186 break;
187 case NVFXSR_NONE:
188 hw[0] |= (1 << 30);
189 break;
190 default:
191 assert(0);
192 }
193
194 hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
195 }
196
197 static void
198 nvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn)
199 {
200 struct nvfx_fragment_program *fp = fpc->fp;
201 uint32_t *hw;
202
203 fpc->inst_offset = fp->insn_len;
204 fpc->have_const = 0;
205 grow_insns(fpc, 4);
206 hw = &fp->insn[fpc->inst_offset];
207 memset(hw, 0, sizeof(uint32_t) * 4);
208
209 if (insn.op == NVFX_FP_OP_OPCODE_KIL)
210 fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
211 hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT);
212 hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT);
213 hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT);
214
215 if (insn.sat)
216 hw[0] |= NVFX_FP_OP_OUT_SAT;
217
218 if (insn.cc_update)
219 hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
220 hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT);
221 hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
222 (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
223 (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
224 (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
225
226 if(insn.unit >= 0)
227 {
228 hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
229 fp->samplers |= (1 << insn.unit);
230 }
231
232 emit_dst(fpc, insn.dst);
233 emit_src(fpc, 0, insn.src[0]);
234 emit_src(fpc, 1, insn.src[1]);
235 emit_src(fpc, 2, insn.src[2]);
236 }
237
238 #define arith(s,o,d,m,s0,s1,s2) \
239 nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \
240 (d), (m), (s0), (s1), (s2))
241
242 #define tex(s,o,u,d,m,s0,s1,s2) \
243 nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \
244 (d), (m), (s0), none, none)
245
246 /* IF src.x != 0, as TGSI specifies */
247 static void
248 nv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src)
249 {
250 const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
251 struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none);
252 uint32_t *hw;
253 insn.cc_update = 1;
254 nvfx_fp_emit(fpc, insn);
255
256 fpc->inst_offset = fpc->fp->insn_len;
257 grow_insns(fpc, 4);
258 hw = &fpc->fp->insn[fpc->inst_offset];
259 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
260 hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
261 NV40_FP_OP_OUT_NONE |
262 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
263 /* Use .xxxx swizzle so that we check only src[0].x*/
264 hw[1] = (0 << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
265 (0 << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
266 (0 << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
267 (0 << NVFX_FP_OP_COND_SWZ_W_SHIFT) |
268 (NVFX_FP_OP_COND_NE << NVFX_FP_OP_COND_SHIFT);
269 hw[2] = 0; /* | NV40_FP_OP_OPCODE_IS_BRANCH | else_offset */
270 hw[3] = 0; /* | endif_offset */
271 util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset);
272 }
273
274 /* IF src.x != 0, as TGSI specifies */
275 static void
276 nv40_fp_cal(struct nvfx_fpc *fpc, unsigned target)
277 {
278 struct nvfx_relocation reloc;
279 uint32_t *hw;
280 fpc->inst_offset = fpc->fp->insn_len;
281 grow_insns(fpc, 4);
282 hw = &fpc->fp->insn[fpc->inst_offset];
283 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
284 hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT);
285 /* Use .xxxx swizzle so that we check only src[0].x*/
286 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
287 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
288 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */
289 hw[3] = 0;
290 reloc.target = target;
291 reloc.location = fpc->inst_offset + 2;
292 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
293 }
294
295 static void
296 nv40_fp_ret(struct nvfx_fpc *fpc)
297 {
298 uint32_t *hw;
299 fpc->inst_offset = fpc->fp->insn_len;
300 grow_insns(fpc, 4);
301 hw = &fpc->fp->insn[fpc->inst_offset];
302 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
303 hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT);
304 /* Use .xxxx swizzle so that we check only src[0].x*/
305 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
306 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
307 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */
308 hw[3] = 0;
309 }
310
311 static void
312 nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target)
313 {
314 struct nvfx_relocation reloc;
315 uint32_t *hw;
316 fpc->inst_offset = fpc->fp->insn_len;
317 grow_insns(fpc, 4);
318 hw = &fpc->fp->insn[fpc->inst_offset];
319 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
320 hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) |
321 NV40_FP_OP_OUT_NONE |
322 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
323 /* Use .xxxx swizzle so that we check only src[0].x*/
324 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) |
325 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
326 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH |
327 (count << NV40_FP_OP_REP_COUNT1_SHIFT) |
328 (count << NV40_FP_OP_REP_COUNT2_SHIFT) |
329 (count << NV40_FP_OP_REP_COUNT3_SHIFT);
330 hw[3] = 0; /* | end_offset */
331 reloc.target = target;
332 reloc.location = fpc->inst_offset + 3;
333 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
334 //util_dynarray_append(&fpc->loop_stack, unsigned, target);
335 }
336
337 /* warning: this only works forward, and probably only if not inside any IF */
338 static void
339 nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target)
340 {
341 struct nvfx_relocation reloc;
342 uint32_t *hw;
343 fpc->inst_offset = fpc->fp->insn_len;
344 grow_insns(fpc, 4);
345 hw = &fpc->fp->insn[fpc->inst_offset];
346 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
347 hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
348 NV40_FP_OP_OUT_NONE |
349 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT);
350 /* Use .xxxx swizzle so that we check only src[0].x*/
351 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
352 (NVFX_FP_OP_COND_FL << NVFX_FP_OP_COND_SHIFT);
353 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | else_offset */
354 hw[3] = 0; /* | endif_offset */
355 reloc.target = target;
356 reloc.location = fpc->inst_offset + 2;
357 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
358 reloc.target = target;
359 reloc.location = fpc->inst_offset + 3;
360 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
361 }
362
363 static void
364 nv40_fp_brk(struct nvfx_fpc *fpc)
365 {
366 uint32_t *hw;
367 fpc->inst_offset = fpc->fp->insn_len;
368 grow_insns(fpc, 4);
369 hw = &fpc->fp->insn[fpc->inst_offset];
370 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
371 hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) |
372 NV40_FP_OP_OUT_NONE;
373 /* Use .xxxx swizzle so that we check only src[0].x*/
374 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
375 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT);
376 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH;
377 hw[3] = 0;
378 }
379
380 static INLINE struct nvfx_src
381 tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
382 {
383 struct nvfx_src src;
384
385 switch (fsrc->Register.File) {
386 case TGSI_FILE_INPUT:
387 if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_POSITION) {
388 assert(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0);
389 src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_POSITION);
390 } else if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_COLOR) {
391 if(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0)
392 src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL0);
393 else if(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 1)
394 src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL1);
395 else
396 assert(0);
397 } else if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG) {
398 assert(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0);
399 src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_FOGC);
400 } else if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FACE) {
401 /* TODO: check this has the correct values */
402 /* XXX: what do we do for nv30 here (assuming it lacks facing)?! */
403 assert(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0);
404 src.reg = nvfx_reg(NVFXSR_INPUT, NV40_FP_OP_INPUT_SRC_FACING);
405 } else {
406 assert(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_GENERIC);
407 src.reg = nvfx_reg(NVFXSR_RELOCATED, fpc->generic_to_slot[fpc->pfp->info.input_semantic_index[fsrc->Register.Index]]);
408 }
409 break;
410 case TGSI_FILE_CONSTANT:
411 src.reg = constant(fpc, fsrc->Register.Index, NULL);
412 break;
413 case TGSI_FILE_IMMEDIATE:
414 assert(fsrc->Register.Index < fpc->nr_imm);
415 src.reg = fpc->imm[fsrc->Register.Index];
416 break;
417 case TGSI_FILE_TEMPORARY:
418 src.reg = fpc->r_temp[fsrc->Register.Index];
419 break;
420 /* NV40 fragprog result regs are just temps, so this is simple */
421 case TGSI_FILE_OUTPUT:
422 src.reg = fpc->r_result[fsrc->Register.Index];
423 break;
424 default:
425 NOUVEAU_ERR("bad src file\n");
426 src.reg.index = 0;
427 src.reg.type = 0;
428 break;
429 }
430
431 src.abs = fsrc->Register.Absolute;
432 src.negate = fsrc->Register.Negate;
433 src.swz[0] = fsrc->Register.SwizzleX;
434 src.swz[1] = fsrc->Register.SwizzleY;
435 src.swz[2] = fsrc->Register.SwizzleZ;
436 src.swz[3] = fsrc->Register.SwizzleW;
437 return src;
438 }
439
440 static INLINE struct nvfx_reg
441 tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
442 switch (fdst->Register.File) {
443 case TGSI_FILE_OUTPUT:
444 return fpc->r_result[fdst->Register.Index];
445 case TGSI_FILE_TEMPORARY:
446 return fpc->r_temp[fdst->Register.Index];
447 case TGSI_FILE_NULL:
448 return nvfx_reg(NVFXSR_NONE, 0);
449 default:
450 NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
451 return nvfx_reg(NVFXSR_NONE, 0);
452 }
453 }
454
455 static INLINE int
456 tgsi_mask(uint tgsi)
457 {
458 int mask = 0;
459
460 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X;
461 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y;
462 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z;
463 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W;
464 return mask;
465 }
466
467 static boolean
468 nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
469 const struct tgsi_full_instruction *finst)
470 {
471 const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
472 struct nvfx_insn insn;
473 struct nvfx_src src[3], tmp, tmp2;
474 struct nvfx_reg dst;
475 int mask, sat, unit = 0;
476 int ai = -1, ci = -1, ii = -1;
477 int i;
478
479 if (finst->Instruction.Opcode == TGSI_OPCODE_END)
480 return TRUE;
481
482 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
483 const struct tgsi_full_src_register *fsrc;
484
485 fsrc = &finst->Src[i];
486 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
487 src[i] = tgsi_src(fpc, fsrc);
488 }
489 }
490
491 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
492 const struct tgsi_full_src_register *fsrc;
493
494 fsrc = &finst->Src[i];
495
496 switch (fsrc->Register.File) {
497 case TGSI_FILE_INPUT:
498 if (ai == -1 || ai == fsrc->Register.Index) {
499 ai = fsrc->Register.Index;
500 src[i] = tgsi_src(fpc, fsrc);
501 } else {
502 src[i] = nvfx_src(temp(fpc));
503 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
504 }
505 break;
506 case TGSI_FILE_CONSTANT:
507 if ((ci == -1 && ii == -1) ||
508 ci == fsrc->Register.Index) {
509 ci = fsrc->Register.Index;
510 src[i] = tgsi_src(fpc, fsrc);
511 } else {
512 src[i] = nvfx_src(temp(fpc));
513 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
514 }
515 break;
516 case TGSI_FILE_IMMEDIATE:
517 if ((ci == -1 && ii == -1) ||
518 ii == fsrc->Register.Index) {
519 ii = fsrc->Register.Index;
520 src[i] = tgsi_src(fpc, fsrc);
521 } else {
522 src[i] = nvfx_src(temp(fpc));
523 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
524 }
525 break;
526 case TGSI_FILE_TEMPORARY:
527 /* handled above */
528 break;
529 case TGSI_FILE_SAMPLER:
530 unit = fsrc->Register.Index;
531 break;
532 case TGSI_FILE_OUTPUT:
533 break;
534 default:
535 NOUVEAU_ERR("bad src file\n");
536 return FALSE;
537 }
538 }
539
540 dst = tgsi_dst(fpc, &finst->Dst[0]);
541 mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
542 sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
543
544 switch (finst->Instruction.Opcode) {
545 case TGSI_OPCODE_ABS:
546 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, abs(src[0]), none, none));
547 break;
548 case TGSI_OPCODE_ADD:
549 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none));
550 break;
551 case TGSI_OPCODE_CMP:
552 insn = arith(0, MOV, none.reg, mask, src[0], none, none);
553 insn.cc_update = 1;
554 nvfx_fp_emit(fpc, insn);
555
556 insn = arith(sat, MOV, dst, mask, src[2], none, none);
557 insn.cc_test = NVFX_COND_GE;
558 nvfx_fp_emit(fpc, insn);
559
560 insn = arith(sat, MOV, dst, mask, src[1], none, none);
561 insn.cc_test = NVFX_COND_LT;
562 nvfx_fp_emit(fpc, insn);
563 break;
564 case TGSI_OPCODE_COS:
565 nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none));
566 break;
567 case TGSI_OPCODE_DDX:
568 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
569 tmp = nvfx_src(temp(fpc));
570 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
571 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
572 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
573 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
574 } else {
575 nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none));
576 }
577 break;
578 case TGSI_OPCODE_DDY:
579 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
580 tmp = nvfx_src(temp(fpc));
581 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
582 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
583 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
584 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
585 } else {
586 nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none));
587 }
588 break;
589 case TGSI_OPCODE_DP2:
590 tmp = nvfx_src(temp(fpc));
591 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none));
592 nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none));
593 break;
594 case TGSI_OPCODE_DP3:
595 nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none));
596 break;
597 case TGSI_OPCODE_DP4:
598 nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none));
599 break;
600 case TGSI_OPCODE_DPH:
601 tmp = nvfx_src(temp(fpc));
602 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none));
603 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none));
604 break;
605 case TGSI_OPCODE_DST:
606 nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none));
607 break;
608 case TGSI_OPCODE_EX2:
609 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none));
610 break;
611 case TGSI_OPCODE_FLR:
612 nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none));
613 break;
614 case TGSI_OPCODE_FRC:
615 nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none));
616 break;
617 case TGSI_OPCODE_KILP:
618 nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none));
619 break;
620 case TGSI_OPCODE_KIL:
621 insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none);
622 insn.cc_update = 1;
623 nvfx_fp_emit(fpc, insn);
624
625 insn = arith(0, KIL, none.reg, 0, none, none, none);
626 insn.cc_test = NVFX_COND_LT;
627 nvfx_fp_emit(fpc, insn);
628 break;
629 case TGSI_OPCODE_LG2:
630 nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none));
631 break;
632 // case TGSI_OPCODE_LIT:
633 case TGSI_OPCODE_LRP:
634 if(!nvfx->is_nv4x)
635 nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2]));
636 else {
637 tmp = nvfx_src(temp(fpc));
638 nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
639 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp));
640 }
641 break;
642 case TGSI_OPCODE_MAD:
643 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2]));
644 break;
645 case TGSI_OPCODE_MAX:
646 nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none));
647 break;
648 case TGSI_OPCODE_MIN:
649 nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none));
650 break;
651 case TGSI_OPCODE_MOV:
652 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none));
653 break;
654 case TGSI_OPCODE_MUL:
655 nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none));
656 break;
657 case TGSI_OPCODE_NOP:
658 break;
659 case TGSI_OPCODE_POW:
660 if(!nvfx->is_nv4x)
661 nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none));
662 else {
663 tmp = nvfx_src(temp(fpc));
664 nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
665 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
666 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none));
667 }
668 break;
669 case TGSI_OPCODE_RCP:
670 nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none));
671 break;
672 case TGSI_OPCODE_RFL:
673 if(!nvfx->is_nv4x)
674 nvfx_fp_emit(fpc, arith(0, RFL_NV30, dst, mask, src[0], src[1], none));
675 else {
676 tmp = nvfx_src(temp(fpc));
677 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[0], none));
678 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_Y, src[0], src[1], none));
679 insn = arith(0, DIV, tmp.reg, NVFX_FP_MASK_Z, swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
680 insn.scale = NVFX_FP_OP_DST_SCALE_2X;
681 nvfx_fp_emit(fpc, insn);
682 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])));
683 }
684 break;
685 case TGSI_OPCODE_RSQ:
686 if(!nvfx->is_nv4x)
687 nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none));
688 else {
689 tmp = nvfx_src(temp(fpc));
690 insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none);
691 insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X;
692 nvfx_fp_emit(fpc, insn);
693 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none));
694 }
695 break;
696 case TGSI_OPCODE_SCS:
697 /* avoid overwriting the source */
698 if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
699 {
700 if (mask & NVFX_FP_MASK_X)
701 nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
702 if (mask & NVFX_FP_MASK_Y)
703 nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
704 }
705 else
706 {
707 if (mask & NVFX_FP_MASK_Y)
708 nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
709 if (mask & NVFX_FP_MASK_X)
710 nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
711 }
712 break;
713 case TGSI_OPCODE_SEQ:
714 nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none));
715 break;
716 case TGSI_OPCODE_SFL:
717 nvfx_fp_emit(fpc, arith(sat, SFL, dst, mask, src[0], src[1], none));
718 break;
719 case TGSI_OPCODE_SGE:
720 nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none));
721 break;
722 case TGSI_OPCODE_SGT:
723 nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none));
724 break;
725 case TGSI_OPCODE_SIN:
726 nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none));
727 break;
728 case TGSI_OPCODE_SLE:
729 nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none));
730 break;
731 case TGSI_OPCODE_SLT:
732 nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none));
733 break;
734 case TGSI_OPCODE_SNE:
735 nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none));
736 break;
737 case TGSI_OPCODE_SSG:
738 tmp = nvfx_src(temp(fpc));
739 tmp2 = nvfx_src(temp(fpc));
740 nvfx_fp_emit(fpc, arith(0, SGT, tmp.reg, mask, src[0], nvfx_src(nvfx_reg(NVFXSR_CONST, 0)), none));
741 nvfx_fp_emit(fpc, arith(0, SLT, tmp.reg, mask, src[0], nvfx_src(nvfx_reg(NVFXSR_CONST, 0)), none));
742 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, tmp, neg(tmp2), none));
743 break;
744 case TGSI_OPCODE_STR:
745 nvfx_fp_emit(fpc, arith(sat, STR, dst, mask, src[0], src[1], none));
746 break;
747 case TGSI_OPCODE_SUB:
748 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], neg(src[1]), none));
749 break;
750 case TGSI_OPCODE_TEX:
751 nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
752 break;
753 case TGSI_OPCODE_TRUNC:
754 tmp = nvfx_src(temp(fpc));
755 insn = arith(0, MOV, none.reg, mask, src[0], none, none);
756 insn.cc_update = 1;
757 nvfx_fp_emit(fpc, insn);
758
759 nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none));
760 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none));
761
762 insn = arith(sat, MOV, dst, mask, neg(tmp), none, none);
763 insn.cc_test = NVFX_COND_LT;
764 nvfx_fp_emit(fpc, insn);
765 break;
766 case TGSI_OPCODE_TXB:
767 nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none));
768 break;
769 case TGSI_OPCODE_TXL:
770 if(nvfx->is_nv4x)
771 nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none));
772 else /* unsupported on nv30, use TEX and hope they like it */
773 nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
774 break;
775 case TGSI_OPCODE_TXP:
776 nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none));
777 break;
778 case TGSI_OPCODE_XPD:
779 tmp = nvfx_src(temp(fpc));
780 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
781 nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
782 break;
783
784 case TGSI_OPCODE_IF:
785 // MOVRC0 R31 (TR0.xyzw), R<src>:
786 // IF (NE.xxxx) ELSE <else> END <end>
787 if(!nvfx->is_nv4x)
788 goto nv3x_cflow;
789 nv40_fp_if(fpc, src[0]);
790 break;
791
792 case TGSI_OPCODE_ELSE:
793 {
794 uint32_t *hw;
795 if(!nvfx->is_nv4x)
796 goto nv3x_cflow;
797 assert(util_dynarray_contains(&fpc->if_stack, unsigned));
798 hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)];
799 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
800 break;
801 }
802
803 case TGSI_OPCODE_ENDIF:
804 {
805 uint32_t *hw;
806 if(!nvfx->is_nv4x)
807 goto nv3x_cflow;
808 assert(util_dynarray_contains(&fpc->if_stack, unsigned));
809 hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)];
810 if(!hw[2])
811 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
812 hw[3] = fpc->fp->insn_len;
813 break;
814 }
815
816 case TGSI_OPCODE_BRA:
817 /* This can in limited cases be implemented with an IF with the else and endif labels pointing to the target */
818 /* no state tracker uses this, so don't implement this for now */
819 assert(0);
820 nv40_fp_bra(fpc, finst->Label.Label);
821 break;
822
823 case TGSI_OPCODE_BGNSUB:
824 case TGSI_OPCODE_ENDSUB:
825 /* nothing to do here */
826 break;
827
828 case TGSI_OPCODE_CAL:
829 if(!nvfx->is_nv4x)
830 goto nv3x_cflow;
831 nv40_fp_cal(fpc, finst->Label.Label);
832 break;
833
834 case TGSI_OPCODE_RET:
835 if(!nvfx->is_nv4x)
836 goto nv3x_cflow;
837 nv40_fp_ret(fpc);
838 break;
839
840 case TGSI_OPCODE_BGNLOOP:
841 if(!nvfx->is_nv4x)
842 goto nv3x_cflow;
843 /* TODO: we should support using two nested REPs to allow a > 255 iteration count */
844 nv40_fp_rep(fpc, 255, finst->Label.Label);
845 break;
846
847 case TGSI_OPCODE_ENDLOOP:
848 break;
849
850 case TGSI_OPCODE_BRK:
851 if(!nvfx->is_nv4x)
852 goto nv3x_cflow;
853 nv40_fp_brk(fpc);
854 break;
855
856 case TGSI_OPCODE_CONT:
857 {
858 static int warned = 0;
859 if(!warned) {
860 NOUVEAU_ERR("Sorry, the continue keyword is not implemented: ignoring it.\n");
861 warned = 1;
862 }
863 break;
864 }
865
866 default:
867 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
868 return FALSE;
869 }
870
871 out:
872 release_temps(fpc);
873 return TRUE;
874 nv3x_cflow:
875 {
876 static int warned = 0;
877 if(!warned) {
878 NOUVEAU_ERR(
879 "Sorry, control flow instructions are not supported in hardware on nv3x: ignoring them\n"
880 "If rendering is incorrect, try to disable GLSL support in the application.\n");
881 warned = 1;
882 }
883 }
884 goto out;
885 }
886
887 static boolean
888 nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
889 const struct tgsi_full_declaration *fdec)
890 {
891 unsigned idx = fdec->Range.First;
892 unsigned hw;
893
894 switch (fdec->Semantic.Name) {
895 case TGSI_SEMANTIC_POSITION:
896 hw = 1;
897 break;
898 case TGSI_SEMANTIC_COLOR:
899 hw = ~0;
900 switch (fdec->Semantic.Index) {
901 case 0: hw = 0; break;
902 case 1: hw = 2; break;
903 case 2: hw = 3; break;
904 case 3: hw = 4; break;
905 }
906 if(hw > ((nvfx->is_nv4x) ? 4 : 2)) {
907 NOUVEAU_ERR("bad rcol index\n");
908 return FALSE;
909 }
910 break;
911 default:
912 NOUVEAU_ERR("bad output semantic\n");
913 return FALSE;
914 }
915
916 fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
917 fpc->r_temps |= (1ULL << hw);
918 return TRUE;
919 }
920
921 static boolean
922 nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
923 {
924 struct tgsi_parse_context p;
925 int high_temp = -1, i;
926 struct util_semantic_set set;
927 float const0v[4] = {0, 0, 0, 0};
928 struct nvfx_reg const0;
929
930 fpc->fp->num_slots = util_semantic_set_from_program_file(&set, fpc->pfp->pipe.tokens, TGSI_FILE_INPUT);
931 if(fpc->fp->num_slots > 8)
932 return FALSE;
933 util_semantic_layout_from_set(fpc->fp->slot_to_generic, &set, 0, 8);
934 util_semantic_table_from_layout(fpc->generic_to_slot, fpc->fp->slot_to_generic, 0, 8);
935
936 memset(fpc->fp->slot_to_fp_input, 0xff, sizeof(fpc->fp->slot_to_fp_input));
937
938 const0 = constant(fpc, -1, const0v);
939 assert(const0.index == 0);
940
941 tgsi_parse_init(&p, fpc->pfp->pipe.tokens);
942 while (!tgsi_parse_end_of_tokens(&p)) {
943 const union tgsi_full_token *tok = &p.FullToken;
944
945 tgsi_parse_token(&p);
946 switch(tok->Token.Type) {
947 case TGSI_TOKEN_TYPE_DECLARATION:
948 {
949 const struct tgsi_full_declaration *fdec;
950 fdec = &p.FullToken.FullDeclaration;
951 switch (fdec->Declaration.File) {
952 case TGSI_FILE_OUTPUT:
953 if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec))
954 goto out_err;
955 break;
956 case TGSI_FILE_TEMPORARY:
957 if (fdec->Range.Last > high_temp) {
958 high_temp =
959 fdec->Range.Last;
960 }
961 break;
962 default:
963 break;
964 }
965 }
966 break;
967 case TGSI_TOKEN_TYPE_IMMEDIATE:
968 {
969 struct tgsi_full_immediate *imm;
970 float vals[4];
971
972 imm = &p.FullToken.FullImmediate;
973 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
974 assert(fpc->nr_imm < MAX_IMM);
975
976 vals[0] = imm->u[0].Float;
977 vals[1] = imm->u[1].Float;
978 vals[2] = imm->u[2].Float;
979 vals[3] = imm->u[3].Float;
980 fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
981 }
982 break;
983 default:
984 break;
985 }
986 }
987 tgsi_parse_free(&p);
988
989 if (++high_temp) {
990 fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
991 for (i = 0; i < high_temp; i++)
992 fpc->r_temp[i] = temp(fpc);
993 fpc->r_temps_discard = 0ULL;
994 }
995
996 return TRUE;
997
998 out_err:
999 if (fpc->r_temp) {
1000 FREE(fpc->r_temp);
1001 fpc->r_temp = NULL;
1002 }
1003 tgsi_parse_free(&p);
1004 return FALSE;
1005 }
1006
1007 DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", FALSE)
1008
1009 static struct nvfx_fragment_program*
1010 nvfx_fragprog_translate(struct nvfx_context *nvfx,
1011 struct nvfx_pipe_fragment_program *pfp,
1012 boolean emulate_sprite_flipping)
1013 {
1014 struct tgsi_parse_context parse;
1015 struct nvfx_fpc *fpc = NULL;
1016 struct util_dynarray insns;
1017 struct nvfx_fragment_program* fp = NULL;
1018 const int min_size = 4096;
1019
1020 fp = CALLOC_STRUCT(nvfx_fragment_program);
1021 if(!fp)
1022 goto out_err;
1023
1024 fpc = CALLOC_STRUCT(nvfx_fpc);
1025 if (!fpc)
1026 goto out_err;
1027
1028 fpc->max_temps = nvfx->is_nv4x ? 48 : 32;
1029 fpc->pfp = pfp;
1030 fpc->fp = fp;
1031 fpc->num_regs = 2;
1032
1033 if (!nvfx_fragprog_prepare(nvfx, fpc))
1034 goto out_err;
1035
1036 tgsi_parse_init(&parse, pfp->pipe.tokens);
1037 util_dynarray_init(&insns);
1038
1039 if(emulate_sprite_flipping)
1040 {
1041 struct nvfx_reg reg = temp(fpc);
1042 struct nvfx_src sprite_input = nvfx_src(nvfx_reg(NVFXSR_RELOCATED, fp->num_slots));
1043 float v[4] = {1, -1, 0, 0};
1044 struct nvfx_src imm = nvfx_src(constant(fpc, -1, v));
1045
1046 fpc->sprite_coord_temp = reg.index;
1047 fpc->r_temps_discard = 0ULL;
1048 nvfx_fp_emit(fpc, arith(0, MAD, reg, NVFX_FP_MASK_ALL, sprite_input, swz(imm, X, Y, X, X), swz(imm, Z, X, Z, Z)));
1049 }
1050
1051 while (!tgsi_parse_end_of_tokens(&parse)) {
1052 tgsi_parse_token(&parse);
1053
1054 switch (parse.FullToken.Token.Type) {
1055 case TGSI_TOKEN_TYPE_INSTRUCTION:
1056 {
1057 const struct tgsi_full_instruction *finst;
1058
1059 util_dynarray_append(&insns, unsigned, fp->insn_len);
1060 finst = &parse.FullToken.FullInstruction;
1061 if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst))
1062 goto out_err;
1063 }
1064 break;
1065 default:
1066 break;
1067 }
1068 }
1069 util_dynarray_append(&insns, unsigned, fp->insn_len);
1070
1071 for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation))
1072 {
1073 struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i);
1074 fp->insn[label_reloc->location] |= ((unsigned*)insns.data)[label_reloc->target];
1075 }
1076 util_dynarray_fini(&insns);
1077
1078 if(!nvfx->is_nv4x)
1079 fp->fp_control |= (fpc->num_regs-1)/2;
1080 else
1081 fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
1082
1083 /* Terminate final instruction */
1084 if(fp->insn)
1085 fp->insn[fpc->inst_offset] |= 0x00000001;
1086
1087 /* Append NOP + END instruction for branches to the end of the program */
1088 fpc->inst_offset = fp->insn_len;
1089 grow_insns(fpc, 4);
1090 fp->insn[fpc->inst_offset + 0] = 0x00000001;
1091 fp->insn[fpc->inst_offset + 1] = 0x00000000;
1092 fp->insn[fpc->inst_offset + 2] = 0x00000000;
1093 fp->insn[fpc->inst_offset + 3] = 0x00000000;
1094
1095 if(debug_get_option_nvfx_dump_fp())
1096 {
1097 debug_printf("\n");
1098 tgsi_dump(pfp->pipe.tokens, 0);
1099
1100 debug_printf("\n%s fragment program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x");
1101 for (unsigned i = 0; i < fp->insn_len; i += 4)
1102 debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]);
1103 debug_printf("\n");
1104 }
1105
1106 fp->prog_size = (fp->insn_len * 4 + 63) & ~63;
1107
1108 if(fp->prog_size >= min_size)
1109 fp->progs_per_bo = 1;
1110 else
1111 fp->progs_per_bo = min_size / fp->prog_size;
1112 fp->bo_prog_idx = fp->progs_per_bo - 1;
1113
1114 out:
1115 tgsi_parse_free(&parse);
1116 if(fpc)
1117 {
1118 if (fpc->r_temp)
1119 FREE(fpc->r_temp);
1120 util_dynarray_fini(&fpc->if_stack);
1121 util_dynarray_fini(&fpc->label_relocs);
1122 //util_dynarray_fini(&fpc->loop_stack);
1123 FREE(fpc);
1124 }
1125 return fp;
1126
1127 out_err:
1128 _debug_printf("Error: failed to compile this fragment program:\n");
1129 tgsi_dump(pfp->pipe.tokens, 0);
1130
1131 if(fp)
1132 {
1133 FREE(fp);
1134 fp = NULL;
1135 }
1136 goto out;
1137 }
1138
1139 static inline void
1140 nvfx_fp_memcpy(void* dst, const void* src, size_t len)
1141 {
1142 #ifndef WORDS_BIGENDIAN
1143 memcpy(dst, src, len);
1144 #else
1145 size_t i;
1146 for(i = 0; i < len; i += 4) {
1147 uint32_t v = (uint32_t*)((char*)src + i);
1148 *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16);
1149 }
1150 #endif
1151 }
1152
1153 /* The hardware only supports immediate constants inside the fragment program,
1154 * and at least on nv30 doesn't support an indirect linkage table.
1155 *
1156 * Hence, we need to patch the fragment program itself both to update constants
1157 * and update linkage.
1158 *
1159 * Using a single fragment program would entail unacceptable stalls if the GPU is
1160 * already rendering with that fragment program.
1161 * Thus, we instead use a "rotating queue" of buffer objects, each of which is
1162 * packed with multiple versions of the same program.
1163 *
1164 * Whenever we need to patch something, we move to the next program and
1165 * patch it. If all buffer objects are in use by the GPU, we allocate another one,
1166 * expanding the queue.
1167 *
1168 * As an additional optimization, we record when all the programs have the
1169 * current input slot configuration, and at that point we stop patching inputs.
1170 * This happens, for instance, if a given fragment program is always used with
1171 * the same vertex program (i.e. always with GLSL), or if the layouts match
1172 * enough (non-GLSL).
1173 *
1174 * Note that instead of using multiple programs, we could push commands
1175 * on the FIFO to patch a single program: it's not fully clear which option is
1176 * faster, but my guess is that the current way is faster.
1177 *
1178 * We also track the previous slot assignments for each version and don't
1179 * patch if they are the same (this could perhaps be removed).
1180 */
1181
1182 void
1183 nvfx_fragprog_validate(struct nvfx_context *nvfx)
1184 {
1185 struct nouveau_channel* chan = nvfx->screen->base.channel;
1186 struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog;
1187 struct nvfx_vertex_program* vp;
1188 /* Gallium always puts the point coord in GENERIC[0]
1189 * TODO: this is wrong, Gallium needs to be fixed
1190 */
1191 unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * (nvfx->rasterizer->pipe.sprite_coord_enable | 1);
1192
1193 boolean emulate_sprite_flipping = sprite_coord_enable && nvfx->rasterizer->pipe.sprite_coord_mode;
1194 unsigned key = emulate_sprite_flipping;
1195 struct nvfx_fragment_program* fp;
1196
1197 fp = pfp->fps[key];
1198 if (!fp)
1199 {
1200 fp = nvfx_fragprog_translate(nvfx, pfp, emulate_sprite_flipping);
1201
1202 if(!fp)
1203 {
1204 if(!nvfx->dummy_fs)
1205 {
1206 struct ureg_program *ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
1207 if (ureg)
1208 {
1209 ureg_END( ureg );
1210 nvfx->dummy_fs = ureg_create_shader_and_destroy( ureg, &nvfx->pipe );
1211 }
1212
1213 if(!nvfx->dummy_fs)
1214 {
1215 _debug_printf("Error: unable to create a dummy fragment shader: aborting.");
1216 abort();
1217 }
1218 }
1219
1220 fp = nvfx_fragprog_translate(nvfx, nvfx->dummy_fs, FALSE);
1221 emulate_sprite_flipping = FALSE;
1222
1223 if(!fp)
1224 {
1225 _debug_printf("Error: unable to compile even a dummy fragment shader: aborting.");
1226 abort();
1227 }
1228 }
1229
1230 pfp->fps[key] = fp;
1231 }
1232
1233 vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog;
1234
1235 if (fp->last_vp_id != vp->id || fp->last_sprite_coord_enable != sprite_coord_enable) {
1236 int sprite_real_input = -1;
1237 int sprite_reloc_input;
1238 unsigned i;
1239 fp->last_vp_id = vp->id;
1240 fp->last_sprite_coord_enable = sprite_coord_enable;
1241
1242 if(sprite_coord_enable)
1243 {
1244 sprite_real_input = vp->sprite_fp_input;
1245 if(sprite_real_input < 0)
1246 {
1247 unsigned used_texcoords = 0;
1248 for(unsigned i = 0; i < fp->num_slots; ++i) {
1249 unsigned generic = fp->slot_to_generic[i];
1250 if(!((1 << generic) & sprite_coord_enable))
1251 {
1252 unsigned char slot_mask = vp->generic_to_fp_input[generic];
1253 if(slot_mask >= 0xf0)
1254 used_texcoords |= 1 << ((slot_mask & 0xf) - NVFX_FP_OP_INPUT_SRC_TC0);
1255 }
1256 }
1257
1258 sprite_real_input = NVFX_FP_OP_INPUT_SRC_TC(__builtin_ctz(~used_texcoords));
1259 }
1260
1261 fp->point_sprite_control |= (1 << (sprite_real_input - NVFX_FP_OP_INPUT_SRC_TC0 + 8));
1262 }
1263 else
1264 fp->point_sprite_control = 0;
1265
1266 if(emulate_sprite_flipping)
1267 sprite_reloc_input = 0;
1268 else
1269 sprite_reloc_input = sprite_real_input;
1270
1271 for(i = 0; i < fp->num_slots; ++i) {
1272 unsigned generic = fp->slot_to_generic[i];
1273 if((1 << generic) & sprite_coord_enable)
1274 {
1275 if(fp->slot_to_fp_input[i] != sprite_reloc_input)
1276 goto update_slots;
1277 }
1278 else
1279 {
1280 unsigned char slot_mask = vp->generic_to_fp_input[generic];
1281 if((slot_mask >> 4) & (slot_mask ^ fp->slot_to_fp_input[i]))
1282 goto update_slots;
1283 }
1284 }
1285
1286 if(emulate_sprite_flipping)
1287 {
1288 if(fp->slot_to_fp_input[fp->num_slots] != sprite_real_input)
1289 goto update_slots;
1290 }
1291
1292 if(0)
1293 {
1294 update_slots:
1295 /* optimization: we start updating from the slot we found the first difference in */
1296 for(; i < fp->num_slots; ++i)
1297 {
1298 unsigned generic = fp->slot_to_generic[i];
1299 if((1 << generic) & sprite_coord_enable)
1300 fp->slot_to_fp_input[i] = sprite_reloc_input;
1301 else
1302 fp->slot_to_fp_input[i] = vp->generic_to_fp_input[generic] & 0xf;
1303 }
1304
1305 fp->slot_to_fp_input[fp->num_slots] = sprite_real_input;
1306
1307 if(nvfx->is_nv4x)
1308 {
1309 fp->or = 0;
1310 for(i = 0; i <= fp->num_slots; ++i) {
1311 unsigned fp_input = fp->slot_to_fp_input[i];
1312 if(fp_input == NVFX_FP_OP_INPUT_SRC_TC(8))
1313 fp->or |= (1 << 12);
1314 else if(fp_input == NVFX_FP_OP_INPUT_SRC_TC(9))
1315 fp->or |= (1 << 13);
1316 else if(fp_input >= NVFX_FP_OP_INPUT_SRC_TC(0) && fp_input <= NVFX_FP_OP_INPUT_SRC_TC(7))
1317 fp->or |= (1 << (fp_input - NVFX_FP_OP_INPUT_SRC_TC0 + 14));
1318 }
1319 }
1320
1321 fp->progs_left_with_obsolete_slot_assignments = fp->progs;
1322 goto update;
1323 }
1324 }
1325
1326 /* We must update constants even on "just" fragprog changes, because
1327 * we don't check whether the current constant buffer matches the latest
1328 * one bound to this fragment program.
1329 * Doing such a check would likely be a pessimization.
1330 */
1331 if ((nvfx->hw_fragprog != fp) || (nvfx->dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST))) {
1332 int offset;
1333 uint32_t* fpmap;
1334
1335 update:
1336 ++fp->bo_prog_idx;
1337 if(fp->bo_prog_idx >= fp->progs_per_bo)
1338 {
1339 if(fp->fpbo && !nouveau_bo_busy(fp->fpbo->next->bo, NOUVEAU_BO_WR))
1340 {
1341 fp->fpbo = fp->fpbo->next;
1342 }
1343 else
1344 {
1345 struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + (fp->prog_size + 8) * fp->progs_per_bo, 16);
1346 uint8_t* map;
1347 uint8_t* buf;
1348
1349 fpbo->slots = (unsigned char*)&fpbo->insn[(fp->prog_size) * fp->progs_per_bo];
1350 memset(fpbo->slots, 0, 8 * fp->progs_per_bo);
1351 if(fp->fpbo)
1352 {
1353 fpbo->next = fp->fpbo->next;
1354 fp->fpbo->next = fpbo;
1355 }
1356 else
1357 fpbo->next = fpbo;
1358 fp->fpbo = fpbo;
1359 fpbo->bo = 0;
1360 fp->progs += fp->progs_per_bo;
1361 fp->progs_left_with_obsolete_slot_assignments += fp->progs_per_bo;
1362 nouveau_bo_new(nvfx->screen->base.device, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 64, fp->prog_size * fp->progs_per_bo, &fpbo->bo);
1363 nouveau_bo_map(fpbo->bo, NOUVEAU_BO_NOSYNC);
1364
1365 map = fpbo->bo->map;
1366 buf = (uint8_t*)fpbo->insn;
1367 for(unsigned i = 0; i < fp->progs_per_bo; ++i)
1368 {
1369 memcpy(buf, fp->insn, fp->insn_len * 4);
1370 nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4);
1371 map += fp->prog_size;
1372 buf += fp->prog_size;
1373 }
1374 }
1375 fp->bo_prog_idx = 0;
1376 }
1377
1378 offset = fp->bo_prog_idx * fp->prog_size;
1379 fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
1380
1381 if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) {
1382 struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT];
1383 uint32_t* map = (uint32_t*)nvfx_buffer(constbuf)->data;
1384 uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
1385 uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset);
1386 int i;
1387 for (i = 0; i < fp->nr_consts; ++i) {
1388 unsigned off = fp->consts[i].offset;
1389 unsigned idx = fp->consts[i].index * 4;
1390
1391 /* TODO: is checking a good idea? */
1392 if(memcmp(&buf[off], &map[idx], 4 * sizeof(uint32_t))) {
1393 memcpy(&buf[off], &map[idx], 4 * sizeof(uint32_t));
1394 nvfx_fp_memcpy(&fpmap[off], &map[idx], 4 * sizeof(uint32_t));
1395 }
1396 }
1397 }
1398
1399 /* we only do this if we aren't sure that all program versions have the
1400 * current slot assignments, otherwise we just update constants for speed
1401 */
1402 if(fp->progs_left_with_obsolete_slot_assignments) {
1403 unsigned char* fpbo_slots = &fp->fpbo->slots[fp->bo_prog_idx * 8];
1404 /* also relocate sprite coord slot, if any */
1405 for(unsigned i = 0; i <= fp->num_slots; ++i) {
1406 unsigned value = fp->slot_to_fp_input[i];;
1407 if(value != fpbo_slots[i]) {
1408 unsigned* p;
1409 unsigned* begin = (unsigned*)fp->slot_relocations[i].data;
1410 unsigned* end = (unsigned*)((char*)fp->slot_relocations[i].data + fp->slot_relocations[i].size);
1411 //printf("fp %p reloc slot %u/%u: %u -> %u\n", fp, i, fp->num_slots, fpbo_slots[i], value);
1412 if(value == 0)
1413 {
1414 /* was relocated to an input, switch type to temporary */
1415 for(p = begin; p != end; ++p) {
1416 unsigned off = *p;
1417 unsigned dw = fp->insn[off];
1418 dw &=~ NVFX_FP_REG_TYPE_MASK;
1419 //printf("reloc_tmp at %x\n", off);
1420 nvfx_fp_memcpy(&fpmap[off], &dw, sizeof(dw));
1421 }
1422 } else {
1423 if(!fpbo_slots[i])
1424 {
1425 /* was relocated to a temporary, switch type to input */
1426 for(p= begin; p != end; ++p) {
1427 unsigned off = *p;
1428 unsigned dw = fp->insn[off];
1429 //printf("reloc_in at %x\n", off);
1430 dw |= NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT;
1431 nvfx_fp_memcpy(&fpmap[off], &dw, sizeof(dw));
1432 }
1433 }
1434
1435 /* set the correct input index */
1436 for(p = begin; p != end; ++p) {
1437 unsigned off = *p & ~3;
1438 unsigned dw = fp->insn[off];
1439 //printf("reloc&~3 at %x\n", off);
1440 dw = (dw & ~NVFX_FP_OP_INPUT_SRC_MASK) | (value << NVFX_FP_OP_INPUT_SRC_SHIFT);
1441 nvfx_fp_memcpy(&fpmap[off], &dw, sizeof(dw));
1442 }
1443 }
1444 fpbo_slots[i] = value;
1445 }
1446 }
1447 --fp->progs_left_with_obsolete_slot_assignments;
1448 }
1449
1450 nvfx->hw_fragprog = fp;
1451
1452 MARK_RING(chan, 8, 1);
1453 OUT_RING(chan, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1));
1454 OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM |
1455 NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
1456 NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
1457 NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
1458 OUT_RING(chan, RING_3D(NV34TCL_FP_CONTROL, 1));
1459 OUT_RING(chan, fp->fp_control);
1460 if(!nvfx->is_nv4x) {
1461 OUT_RING(chan, RING_3D(NV34TCL_FP_REG_CONTROL, 1));
1462 OUT_RING(chan, (1<<16)|0x4);
1463 OUT_RING(chan, RING_3D(NV34TCL_TX_UNITS_ENABLE, 1));
1464 OUT_RING(chan, fp->samplers);
1465 }
1466 }
1467
1468 {
1469 unsigned pointsprite_control = fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization;
1470 if(pointsprite_control != nvfx->hw_pointsprite_control)
1471 {
1472 WAIT_RING(chan, 2);
1473 OUT_RING(chan, RING_3D(NV34TCL_POINT_SPRITE, 1));
1474 OUT_RING(chan, pointsprite_control);
1475 nvfx->hw_pointsprite_control = pointsprite_control;
1476 }
1477 }
1478
1479 nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGPROG;
1480 }
1481
1482 void
1483 nvfx_fragprog_relocate(struct nvfx_context *nvfx)
1484 {
1485 struct nouveau_channel* chan = nvfx->screen->base.channel;
1486 struct nvfx_fragment_program *fp = nvfx->hw_fragprog;
1487 struct nouveau_bo* bo = fp->fpbo->bo;
1488 int offset = fp->bo_prog_idx * fp->prog_size;
1489 unsigned fp_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; // TODO: GART?
1490 fp_flags |= NOUVEAU_BO_DUMMY;
1491 MARK_RING(chan, 2, 2);
1492 OUT_RELOC(chan, bo, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1), fp_flags, 0, 0);
1493 OUT_RELOC(chan, bo, offset, fp_flags | NOUVEAU_BO_LOW |
1494 NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
1495 NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
1496 nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGPROG;
1497 }
1498
1499 void
1500 nvfx_fragprog_destroy(struct nvfx_context *nvfx,
1501 struct nvfx_fragment_program *fp)
1502 {
1503 unsigned i;
1504 struct nvfx_fragment_program_bo* fpbo = fp->fpbo;
1505 if(fpbo)
1506 {
1507 do
1508 {
1509 struct nvfx_fragment_program_bo* next = fpbo->next;
1510 nouveau_bo_unmap(fpbo->bo);
1511 nouveau_bo_ref(0, &fpbo->bo);
1512 free(fpbo);
1513 fpbo = next;
1514 }
1515 while(fpbo != fp->fpbo);
1516 }
1517
1518 for(i = 0; i < Elements(fp->slot_relocations); ++i)
1519 util_dynarray_fini(&fp->slot_relocations[i]);
1520
1521 if (fp->insn_len)
1522 FREE(fp->insn);
1523 }
1524
1525 static void *
1526 nvfx_fp_state_create(struct pipe_context *pipe,
1527 const struct pipe_shader_state *cso)
1528 {
1529 struct nvfx_pipe_fragment_program *pfp;
1530
1531 pfp = CALLOC(1, sizeof(struct nvfx_pipe_fragment_program));
1532 pfp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
1533
1534 tgsi_scan_shader(pfp->pipe.tokens, &pfp->info);
1535
1536 return (void *)pfp;
1537 }
1538
1539 static void
1540 nvfx_fp_state_bind(struct pipe_context *pipe, void *hwcso)
1541 {
1542 struct nvfx_context *nvfx = nvfx_context(pipe);
1543
1544 nvfx->fragprog = hwcso;
1545 nvfx->dirty |= NVFX_NEW_FRAGPROG;
1546 }
1547
1548 static void
1549 nvfx_fp_state_delete(struct pipe_context *pipe, void *hwcso)
1550 {
1551 struct nvfx_context *nvfx = nvfx_context(pipe);
1552 struct nvfx_pipe_fragment_program *pfp = hwcso;
1553 unsigned i;
1554
1555 for(i = 0; i < Elements(pfp->fps); ++i)
1556 {
1557 if(pfp->fps[i])
1558 {
1559 nvfx_fragprog_destroy(nvfx, pfp->fps[i]);
1560 FREE(pfp->fps[i]);
1561 }
1562 }
1563
1564 FREE((void*)pfp->pipe.tokens);
1565 FREE(pfp);
1566 }
1567
1568 void
1569 nvfx_init_fragprog_functions(struct nvfx_context *nvfx)
1570 {
1571 nvfx->pipe.create_fs_state = nvfx_fp_state_create;
1572 nvfx->pipe.bind_fs_state = nvfx_fp_state_bind;
1573 nvfx->pipe.delete_fs_state = nvfx_fp_state_delete;
1574 }