nvfx: add option to dump shaders in TGSI and native code
[mesa.git] / src / gallium / drivers / nvfx / nvfx_fragprog.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "util/u_inlines.h"
5 #include "util/u_debug.h"
6
7 #include "pipe/p_shader_tokens.h"
8 #include "tgsi/tgsi_parse.h"
9 #include "tgsi/tgsi_util.h"
10 #include "tgsi/tgsi_dump.h"
11
12 #include "nvfx_context.h"
13 #include "nvfx_shader.h"
14 #include "nvfx_resource.h"
15
16 #define MAX_CONSTS 128
17 #define MAX_IMM 32
18 struct nvfx_fpc {
19 struct nvfx_fragment_program *fp;
20
21 unsigned r_temps;
22 unsigned r_temps_discard;
23 struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
24 struct nvfx_sreg *r_temp;
25
26 int num_regs;
27
28 unsigned inst_offset;
29 unsigned have_const;
30
31 struct {
32 int pipe;
33 float vals[4];
34 } consts[MAX_CONSTS];
35 int nr_consts;
36
37 struct nvfx_sreg imm[MAX_IMM];
38 unsigned nr_imm;
39
40 unsigned char generic_to_slot[256]; /* semantic idx for each input semantic */
41 };
42
43 static INLINE struct nvfx_sreg
44 temp(struct nvfx_fpc *fpc)
45 {
46 int idx = ffs(~fpc->r_temps) - 1;
47
48 if (idx < 0) {
49 NOUVEAU_ERR("out of temps!!\n");
50 assert(0);
51 return nvfx_sr(NVFXSR_TEMP, 0);
52 }
53
54 fpc->r_temps |= (1 << idx);
55 fpc->r_temps_discard |= (1 << idx);
56 return nvfx_sr(NVFXSR_TEMP, idx);
57 }
58
59 static INLINE void
60 release_temps(struct nvfx_fpc *fpc)
61 {
62 fpc->r_temps &= ~fpc->r_temps_discard;
63 fpc->r_temps_discard = 0;
64 }
65
66 static INLINE struct nvfx_sreg
67 constant(struct nvfx_fpc *fpc, int pipe, float vals[4])
68 {
69 int idx;
70
71 if (fpc->nr_consts == MAX_CONSTS)
72 assert(0);
73 idx = fpc->nr_consts++;
74
75 fpc->consts[idx].pipe = pipe;
76 if (pipe == -1)
77 memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
78 return nvfx_sr(NVFXSR_CONST, idx);
79 }
80
81 #define arith(cc,s,o,d,m,s0,s1,s2) \
82 nvfx_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \
83 (d), (m), (s0), (s1), (s2))
84 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
85 nvfx_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \
86 (d), (m), (s0), none, none)
87
88 static void
89 grow_insns(struct nvfx_fpc *fpc, int size)
90 {
91 struct nvfx_fragment_program *fp = fpc->fp;
92
93 fp->insn_len += size;
94 fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
95 }
96
97 static void
98 emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src)
99 {
100 struct nvfx_fragment_program *fp = fpc->fp;
101 uint32_t *hw = &fp->insn[fpc->inst_offset];
102 uint32_t sr = 0;
103
104 switch (src.type) {
105 case NVFXSR_INPUT:
106 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
107 hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
108 break;
109 case NVFXSR_OUTPUT:
110 sr |= NVFX_FP_REG_SRC_HALF;
111 /* fall-through */
112 case NVFXSR_TEMP:
113 sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
114 sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);
115 break;
116 case NVFXSR_RELOCATED:
117 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
118 //printf("adding relocation at %x for %x\n", fpc->inst_offset, src.index);
119 util_dynarray_append(&fpc->fp->slot_relocations[src.index], unsigned, fpc->inst_offset);
120 break;
121 case NVFXSR_CONST:
122 if (!fpc->have_const) {
123 grow_insns(fpc, 4);
124 fpc->have_const = 1;
125 }
126
127 hw = &fp->insn[fpc->inst_offset];
128 if (fpc->consts[src.index].pipe >= 0) {
129 struct nvfx_fragment_program_data *fpd;
130
131 fp->consts = realloc(fp->consts, ++fp->nr_consts *
132 sizeof(*fpd));
133 fpd = &fp->consts[fp->nr_consts - 1];
134 fpd->offset = fpc->inst_offset + 4;
135 fpd->index = fpc->consts[src.index].pipe;
136 memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
137 } else {
138 memcpy(&fp->insn[fpc->inst_offset + 4],
139 fpc->consts[src.index].vals,
140 sizeof(uint32_t) * 4);
141 }
142
143 sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
144 break;
145 case NVFXSR_NONE:
146 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
147 break;
148 default:
149 assert(0);
150 }
151
152 if (src.negate)
153 sr |= NVFX_FP_REG_NEGATE;
154
155 if (src.abs)
156 hw[1] |= (1 << (29 + pos));
157
158 sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
159 (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
160 (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
161 (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
162
163 hw[pos + 1] |= sr;
164 }
165
166 static void
167 emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst)
168 {
169 struct nvfx_fragment_program *fp = fpc->fp;
170 uint32_t *hw = &fp->insn[fpc->inst_offset];
171
172 switch (dst.type) {
173 case NVFXSR_TEMP:
174 if (fpc->num_regs < (dst.index + 1))
175 fpc->num_regs = dst.index + 1;
176 break;
177 case NVFXSR_OUTPUT:
178 if (dst.index == 1) {
179 fp->fp_control |= 0xe;
180 } else {
181 hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
182 }
183 break;
184 case NVFXSR_NONE:
185 hw[0] |= (1 << 30);
186 break;
187 default:
188 assert(0);
189 }
190
191 hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
192 }
193
194 static void
195 nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op,
196 struct nvfx_sreg dst, int mask,
197 struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
198 {
199 struct nvfx_fragment_program *fp = fpc->fp;
200 uint32_t *hw;
201
202 fpc->inst_offset = fp->insn_len;
203 fpc->have_const = 0;
204 grow_insns(fpc, 4);
205 hw = &fp->insn[fpc->inst_offset];
206 memset(hw, 0, sizeof(uint32_t) * 4);
207
208 if (op == NVFX_FP_OP_OPCODE_KIL)
209 fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
210 hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT);
211 hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT);
212 hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);
213
214 if (sat)
215 hw[0] |= NVFX_FP_OP_OUT_SAT;
216
217 if (dst.cc_update)
218 hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
219 hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT);
220 hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
221 (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
222 (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
223 (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
224
225 emit_dst(fpc, dst);
226 emit_src(fpc, 0, s0);
227 emit_src(fpc, 1, s1);
228 emit_src(fpc, 2, s2);
229 }
230
231 static void
232 nvfx_fp_tex(struct nvfx_fpc *fpc, int sat, int op, int unit,
233 struct nvfx_sreg dst, int mask,
234 struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
235 {
236 struct nvfx_fragment_program *fp = fpc->fp;
237
238 nvfx_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
239
240 fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
241 fp->samplers |= (1 << unit);
242 }
243
244 static INLINE struct nvfx_sreg
245 tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
246 {
247 struct nvfx_sreg src = { 0 };
248
249 switch (fsrc->Register.File) {
250 case TGSI_FILE_INPUT:
251 if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_POSITION) {
252 assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0);
253 src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_POSITION);
254 } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_COLOR) {
255 if(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0)
256 src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL0);
257 else if(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 1)
258 src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL1);
259 else
260 assert(0);
261 } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG) {
262 assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0);
263 src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_FOGC);
264 } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FACE) {
265 /* TODO: check this has the correct values */
266 /* XXX: what do we do for nv30 here (assuming it lacks facing)?! */
267 assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0);
268 src = nvfx_sr(NVFXSR_INPUT, NV40_FP_OP_INPUT_SRC_FACING);
269 } else {
270 assert(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_GENERIC);
271 src = nvfx_sr(NVFXSR_RELOCATED, fpc->generic_to_slot[fpc->fp->info.input_semantic_index[fsrc->Register.Index]]);
272 }
273 break;
274 case TGSI_FILE_CONSTANT:
275 src = constant(fpc, fsrc->Register.Index, NULL);
276 break;
277 case TGSI_FILE_IMMEDIATE:
278 assert(fsrc->Register.Index < fpc->nr_imm);
279 src = fpc->imm[fsrc->Register.Index];
280 break;
281 case TGSI_FILE_TEMPORARY:
282 src = fpc->r_temp[fsrc->Register.Index];
283 break;
284 /* NV40 fragprog result regs are just temps, so this is simple */
285 case TGSI_FILE_OUTPUT:
286 src = fpc->r_result[fsrc->Register.Index];
287 break;
288 default:
289 NOUVEAU_ERR("bad src file\n");
290 break;
291 }
292
293 src.abs = fsrc->Register.Absolute;
294 src.negate = fsrc->Register.Negate;
295 src.swz[0] = fsrc->Register.SwizzleX;
296 src.swz[1] = fsrc->Register.SwizzleY;
297 src.swz[2] = fsrc->Register.SwizzleZ;
298 src.swz[3] = fsrc->Register.SwizzleW;
299 return src;
300 }
301
302 static INLINE struct nvfx_sreg
303 tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
304 switch (fdst->Register.File) {
305 case TGSI_FILE_OUTPUT:
306 return fpc->r_result[fdst->Register.Index];
307 case TGSI_FILE_TEMPORARY:
308 return fpc->r_temp[fdst->Register.Index];
309 case TGSI_FILE_NULL:
310 return nvfx_sr(NVFXSR_NONE, 0);
311 default:
312 NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
313 return nvfx_sr(NVFXSR_NONE, 0);
314 }
315 }
316
317 static INLINE int
318 tgsi_mask(uint tgsi)
319 {
320 int mask = 0;
321
322 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X;
323 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y;
324 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z;
325 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W;
326 return mask;
327 }
328
329 static boolean
330 nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
331 const struct tgsi_full_instruction *finst)
332 {
333 const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
334 struct nvfx_sreg src[3], dst, tmp;
335 int mask, sat, unit = 0;
336 int ai = -1, ci = -1, ii = -1;
337 int i;
338
339 if (finst->Instruction.Opcode == TGSI_OPCODE_END)
340 return TRUE;
341
342 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
343 const struct tgsi_full_src_register *fsrc;
344
345 fsrc = &finst->Src[i];
346 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
347 src[i] = tgsi_src(fpc, fsrc);
348 }
349 }
350
351 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
352 const struct tgsi_full_src_register *fsrc;
353
354 fsrc = &finst->Src[i];
355
356 switch (fsrc->Register.File) {
357 case TGSI_FILE_INPUT:
358 if (ai == -1 || ai == fsrc->Register.Index) {
359 ai = fsrc->Register.Index;
360 src[i] = tgsi_src(fpc, fsrc);
361 } else {
362 src[i] = temp(fpc);
363 arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
364 tgsi_src(fpc, fsrc), none, none);
365 }
366 break;
367 case TGSI_FILE_CONSTANT:
368 if ((ci == -1 && ii == -1) ||
369 ci == fsrc->Register.Index) {
370 ci = fsrc->Register.Index;
371 src[i] = tgsi_src(fpc, fsrc);
372 } else {
373 src[i] = temp(fpc);
374 arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
375 tgsi_src(fpc, fsrc), none, none);
376 }
377 break;
378 case TGSI_FILE_IMMEDIATE:
379 if ((ci == -1 && ii == -1) ||
380 ii == fsrc->Register.Index) {
381 ii = fsrc->Register.Index;
382 src[i] = tgsi_src(fpc, fsrc);
383 } else {
384 src[i] = temp(fpc);
385 arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
386 tgsi_src(fpc, fsrc), none, none);
387 }
388 break;
389 case TGSI_FILE_TEMPORARY:
390 /* handled above */
391 break;
392 case TGSI_FILE_SAMPLER:
393 unit = fsrc->Register.Index;
394 break;
395 case TGSI_FILE_OUTPUT:
396 break;
397 default:
398 NOUVEAU_ERR("bad src file\n");
399 return FALSE;
400 }
401 }
402
403 dst = tgsi_dst(fpc, &finst->Dst[0]);
404 mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
405 sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
406
407 switch (finst->Instruction.Opcode) {
408 case TGSI_OPCODE_ABS:
409 arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
410 break;
411 case TGSI_OPCODE_ADD:
412 arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
413 break;
414 case TGSI_OPCODE_CMP:
415 tmp = nvfx_sr(NVFXSR_NONE, 0);
416 tmp.cc_update = 1;
417 arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
418 dst.cc_test = NVFX_COND_GE;
419 arith(fpc, sat, MOV, dst, mask, src[2], none, none);
420 dst.cc_test = NVFX_COND_LT;
421 arith(fpc, sat, MOV, dst, mask, src[1], none, none);
422 break;
423 case TGSI_OPCODE_COS:
424 arith(fpc, sat, COS, dst, mask, src[0], none, none);
425 break;
426 case TGSI_OPCODE_DDX:
427 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
428 tmp = temp(fpc);
429 arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
430 swz(src[0], Z, W, Z, W), none, none);
431 arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
432 swz(tmp, X, Y, X, Y), none, none);
433 arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
434 none, none);
435 arith(fpc, 0, MOV, dst, mask, tmp, none, none);
436 } else {
437 arith(fpc, sat, DDX, dst, mask, src[0], none, none);
438 }
439 break;
440 case TGSI_OPCODE_DDY:
441 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
442 tmp = temp(fpc);
443 arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
444 swz(src[0], Z, W, Z, W), none, none);
445 arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
446 swz(tmp, X, Y, X, Y), none, none);
447 arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
448 none, none);
449 arith(fpc, 0, MOV, dst, mask, tmp, none, none);
450 } else {
451 arith(fpc, sat, DDY, dst, mask, src[0], none, none);
452 }
453 break;
454 case TGSI_OPCODE_DP3:
455 arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
456 break;
457 case TGSI_OPCODE_DP4:
458 arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
459 break;
460 case TGSI_OPCODE_DPH:
461 tmp = temp(fpc);
462 arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[1], none);
463 arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
464 swz(src[1], W, W, W, W), none);
465 break;
466 case TGSI_OPCODE_DST:
467 arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
468 break;
469 case TGSI_OPCODE_EX2:
470 arith(fpc, sat, EX2, dst, mask, src[0], none, none);
471 break;
472 case TGSI_OPCODE_FLR:
473 arith(fpc, sat, FLR, dst, mask, src[0], none, none);
474 break;
475 case TGSI_OPCODE_FRC:
476 arith(fpc, sat, FRC, dst, mask, src[0], none, none);
477 break;
478 case TGSI_OPCODE_KILP:
479 arith(fpc, 0, KIL, none, 0, none, none, none);
480 break;
481 case TGSI_OPCODE_KIL:
482 dst = nvfx_sr(NVFXSR_NONE, 0);
483 dst.cc_update = 1;
484 arith(fpc, 0, MOV, dst, NVFX_FP_MASK_ALL, src[0], none, none);
485 dst.cc_update = 0; dst.cc_test = NVFX_COND_LT;
486 arith(fpc, 0, KIL, dst, 0, none, none, none);
487 break;
488 case TGSI_OPCODE_LG2:
489 arith(fpc, sat, LG2, dst, mask, src[0], none, none);
490 break;
491 // case TGSI_OPCODE_LIT:
492 case TGSI_OPCODE_LRP:
493 if(!nvfx->is_nv4x)
494 arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]);
495 else {
496 tmp = temp(fpc);
497 arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
498 arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
499 }
500 break;
501 case TGSI_OPCODE_MAD:
502 arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
503 break;
504 case TGSI_OPCODE_MAX:
505 arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
506 break;
507 case TGSI_OPCODE_MIN:
508 arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
509 break;
510 case TGSI_OPCODE_MOV:
511 arith(fpc, sat, MOV, dst, mask, src[0], none, none);
512 break;
513 case TGSI_OPCODE_MUL:
514 arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
515 break;
516 case TGSI_OPCODE_POW:
517 if(!nvfx->is_nv4x)
518 arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none);
519 else {
520 tmp = temp(fpc);
521 arith(fpc, 0, LG2, tmp, NVFX_FP_MASK_X,
522 swz(src[0], X, X, X, X), none, none);
523 arith(fpc, 0, MUL, tmp, NVFX_FP_MASK_X, swz(tmp, X, X, X, X),
524 swz(src[1], X, X, X, X), none);
525 arith(fpc, sat, EX2, dst, mask,
526 swz(tmp, X, X, X, X), none, none);
527 }
528 break;
529 case TGSI_OPCODE_RCP:
530 arith(fpc, sat, RCP, dst, mask, src[0], none, none);
531 break;
532 case TGSI_OPCODE_RET:
533 assert(0);
534 break;
535 case TGSI_OPCODE_RFL:
536 if(!nvfx->is_nv4x)
537 arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none);
538 else {
539 tmp = temp(fpc);
540 arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[0], none);
541 arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_Y, src[0], src[1], none);
542 arith(fpc, 0, DIV, scale(tmp, 2X), NVFX_FP_MASK_Z,
543 swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
544 arith(fpc, sat, MAD, dst, mask,
545 swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
546 }
547 break;
548 case TGSI_OPCODE_RSQ:
549 if(!nvfx->is_nv4x)
550 arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
551 else {
552 tmp = temp(fpc);
553 arith(fpc, 0, LG2, scale(tmp, INV_2X), NVFX_FP_MASK_X,
554 abs(swz(src[0], X, X, X, X)), none, none);
555 arith(fpc, sat, EX2, dst, mask,
556 neg(swz(tmp, X, X, X, X)), none, none);
557 }
558 break;
559 case TGSI_OPCODE_SCS:
560 /* avoid overwriting the source */
561 if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
562 {
563 if (mask & NVFX_FP_MASK_X) {
564 arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
565 swz(src[0], X, X, X, X), none, none);
566 }
567 if (mask & NVFX_FP_MASK_Y) {
568 arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
569 swz(src[0], X, X, X, X), none, none);
570 }
571 }
572 else
573 {
574 if (mask & NVFX_FP_MASK_Y) {
575 arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
576 swz(src[0], X, X, X, X), none, none);
577 }
578 if (mask & NVFX_FP_MASK_X) {
579 arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
580 swz(src[0], X, X, X, X), none, none);
581 }
582 }
583 break;
584 case TGSI_OPCODE_SEQ:
585 arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none);
586 break;
587 case TGSI_OPCODE_SFL:
588 arith(fpc, sat, SFL, dst, mask, src[0], src[1], none);
589 break;
590 case TGSI_OPCODE_SGE:
591 arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
592 break;
593 case TGSI_OPCODE_SGT:
594 arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
595 break;
596 case TGSI_OPCODE_SIN:
597 arith(fpc, sat, SIN, dst, mask, src[0], none, none);
598 break;
599 case TGSI_OPCODE_SLE:
600 arith(fpc, sat, SLE, dst, mask, src[0], src[1], none);
601 break;
602 case TGSI_OPCODE_SLT:
603 arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
604 break;
605 case TGSI_OPCODE_SNE:
606 arith(fpc, sat, SNE, dst, mask, src[0], src[1], none);
607 break;
608 case TGSI_OPCODE_STR:
609 arith(fpc, sat, STR, dst, mask, src[0], src[1], none);
610 break;
611 case TGSI_OPCODE_SUB:
612 arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
613 break;
614 case TGSI_OPCODE_TEX:
615 tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
616 break;
617 case TGSI_OPCODE_TXB:
618 tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
619 break;
620 case TGSI_OPCODE_TXP:
621 tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
622 break;
623 case TGSI_OPCODE_XPD:
624 tmp = temp(fpc);
625 arith(fpc, 0, MUL, tmp, mask,
626 swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
627 arith(fpc, sat, MAD, dst, (mask & ~NVFX_FP_MASK_W),
628 swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
629 neg(tmp));
630 break;
631 default:
632 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
633 return FALSE;
634 }
635
636 release_temps(fpc);
637 return TRUE;
638 }
639
640 static boolean
641 nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
642 const struct tgsi_full_declaration *fdec)
643 {
644 unsigned idx = fdec->Range.First;
645 unsigned hw;
646
647 switch (fdec->Semantic.Name) {
648 case TGSI_SEMANTIC_POSITION:
649 hw = 1;
650 break;
651 case TGSI_SEMANTIC_COLOR:
652 hw = ~0;
653 switch (fdec->Semantic.Index) {
654 case 0: hw = 0; break;
655 case 1: hw = 2; break;
656 case 2: hw = 3; break;
657 case 3: hw = 4; break;
658 }
659 if(hw > ((nvfx->is_nv4x) ? 4 : 2)) {
660 NOUVEAU_ERR("bad rcol index\n");
661 return FALSE;
662 }
663 break;
664 default:
665 NOUVEAU_ERR("bad output semantic\n");
666 return FALSE;
667 }
668
669 fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
670 fpc->r_temps |= (1 << hw);
671 return TRUE;
672 }
673
674 static boolean
675 nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
676 {
677 struct tgsi_parse_context p;
678 int high_temp = -1, i;
679 struct util_semantic_set set;
680
681 fpc->fp->num_slots = util_semantic_set_from_program_file(&set, fpc->fp->pipe.tokens, TGSI_FILE_INPUT);
682 if(fpc->fp->num_slots > 8)
683 return FALSE;
684 util_semantic_layout_from_set(fpc->fp->slot_to_generic, &set, 0, 8);
685 util_semantic_table_from_layout(fpc->generic_to_slot, fpc->fp->slot_to_generic, 0, 8);
686
687 memset(fpc->fp->slot_to_fp_input, 0xff, sizeof(fpc->fp->slot_to_fp_input));
688
689 tgsi_parse_init(&p, fpc->fp->pipe.tokens);
690 while (!tgsi_parse_end_of_tokens(&p)) {
691 const union tgsi_full_token *tok = &p.FullToken;
692
693 tgsi_parse_token(&p);
694 switch(tok->Token.Type) {
695 case TGSI_TOKEN_TYPE_DECLARATION:
696 {
697 const struct tgsi_full_declaration *fdec;
698 fdec = &p.FullToken.FullDeclaration;
699 switch (fdec->Declaration.File) {
700 case TGSI_FILE_OUTPUT:
701 if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec))
702 goto out_err;
703 break;
704 case TGSI_FILE_TEMPORARY:
705 if (fdec->Range.Last > high_temp) {
706 high_temp =
707 fdec->Range.Last;
708 }
709 break;
710 default:
711 break;
712 }
713 }
714 break;
715 case TGSI_TOKEN_TYPE_IMMEDIATE:
716 {
717 struct tgsi_full_immediate *imm;
718 float vals[4];
719
720 imm = &p.FullToken.FullImmediate;
721 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
722 assert(fpc->nr_imm < MAX_IMM);
723
724 vals[0] = imm->u[0].Float;
725 vals[1] = imm->u[1].Float;
726 vals[2] = imm->u[2].Float;
727 vals[3] = imm->u[3].Float;
728 fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
729 }
730 break;
731 default:
732 break;
733 }
734 }
735 tgsi_parse_free(&p);
736
737 if (++high_temp) {
738 fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
739 for (i = 0; i < high_temp; i++)
740 fpc->r_temp[i] = temp(fpc);
741 fpc->r_temps_discard = 0;
742 }
743
744 return TRUE;
745
746 out_err:
747 if (fpc->r_temp)
748 FREE(fpc->r_temp);
749 tgsi_parse_free(&p);
750 return FALSE;
751 }
752
753 DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", FALSE)
754
755 static void
756 nvfx_fragprog_translate(struct nvfx_context *nvfx,
757 struct nvfx_fragment_program *fp)
758 {
759 struct tgsi_parse_context parse;
760 struct nvfx_fpc *fpc = NULL;
761
762 fpc = CALLOC(1, sizeof(struct nvfx_fpc));
763 if (!fpc)
764 return;
765 fpc->fp = fp;
766 fpc->num_regs = 2;
767
768 if (!nvfx_fragprog_prepare(nvfx, fpc)) {
769 FREE(fpc);
770 return;
771 }
772
773 tgsi_parse_init(&parse, fp->pipe.tokens);
774
775 while (!tgsi_parse_end_of_tokens(&parse)) {
776 tgsi_parse_token(&parse);
777
778 switch (parse.FullToken.Token.Type) {
779 case TGSI_TOKEN_TYPE_INSTRUCTION:
780 {
781 const struct tgsi_full_instruction *finst;
782
783 finst = &parse.FullToken.FullInstruction;
784 if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst))
785 goto out_err;
786 }
787 break;
788 default:
789 break;
790 }
791 }
792
793 if(!nvfx->is_nv4x)
794 fp->fp_control |= (fpc->num_regs-1)/2;
795 else
796 fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
797
798 /* Terminate final instruction */
799 if(fp->insn)
800 fp->insn[fpc->inst_offset] |= 0x00000001;
801
802 /* Append NOP + END instruction, may or may not be necessary. */
803 fpc->inst_offset = fp->insn_len;
804 grow_insns(fpc, 4);
805 fp->insn[fpc->inst_offset + 0] = 0x00000001;
806 fp->insn[fpc->inst_offset + 1] = 0x00000000;
807 fp->insn[fpc->inst_offset + 2] = 0x00000000;
808 fp->insn[fpc->inst_offset + 3] = 0x00000000;
809
810 if(debug_get_option_nvfx_dump_fp())
811 {
812 debug_printf("\n");
813 tgsi_dump(fp->pipe.tokens, 0);
814
815 debug_printf("\n%s fragment program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x");
816 for (unsigned i = 0; i < fp->insn_len; i += 4)
817 debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]);
818 debug_printf("\n");
819 }
820
821 fp->translated = TRUE;
822 out_err:
823 tgsi_parse_free(&parse);
824 if (fpc->r_temp)
825 FREE(fpc->r_temp);
826 FREE(fpc);
827 }
828
829 static inline void
830 nvfx_fp_memcpy(void* dst, const void* src, size_t len)
831 {
832 #ifndef WORDS_BIGENDIAN
833 memcpy(dst, src, len);
834 #else
835 size_t i;
836 for(i = 0; i < len; i += 4) {
837 uint32_t v = (uint32_t*)((char*)src + i);
838 *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16);
839 }
840 #endif
841 }
842
843 void
844 nvfx_fragprog_validate(struct nvfx_context *nvfx)
845 {
846 struct nouveau_channel* chan = nvfx->screen->base.channel;
847 struct nvfx_fragment_program *fp = nvfx->fragprog;
848 int update = 0;
849
850 if (!fp->translated)
851 {
852 const int min_size = 4096;
853
854 nvfx_fragprog_translate(nvfx, fp);
855 if (!fp->translated) {
856 static unsigned dummy[8] = {1, 0, 0, 0, 1, 0, 0, 0};
857 static int warned = 0;
858 if(!warned)
859 {
860 fprintf(stderr, "nvfx: failed to translate fragment program!\n");
861 warned = 1;
862 }
863
864 /* use dummy program: we cannot fail here */
865 fp->translated = TRUE;
866 fp->insn = malloc(sizeof(dummy));
867 memcpy(fp->insn, dummy, sizeof(dummy));
868 fp->insn_len = sizeof(dummy) / sizeof(dummy[0]);
869 }
870 update = TRUE;
871
872 fp->prog_size = (fp->insn_len * 4 + 63) & ~63;
873
874 if(fp->prog_size >= min_size)
875 fp->progs_per_bo = 1;
876 else
877 fp->progs_per_bo = min_size / fp->prog_size;
878 fp->bo_prog_idx = fp->progs_per_bo - 1;
879 }
880
881 /* we must update constants even on "just" fragprog changes, because
882 we don't check whether the current constant buffer matches the latest
883 one bound to this fragment program */
884 if (nvfx->dirty & (NVFX_NEW_FRAGCONST | NVFX_NEW_FRAGPROG))
885 update = TRUE;
886
887 struct nvfx_vertex_program* vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog;
888 if (fp->last_vp_id != vp->id) {
889 char* vp_sem_table = vp->generic_to_fp_input;
890 unsigned char* fp_semantics = fp->slot_to_generic;
891 unsigned diff = 0;
892 fp->last_vp_id = nvfx->vertprog->id;
893 unsigned char* cur_slots = fp->slot_to_fp_input;
894 for(unsigned i = 0; i < fp->num_slots; ++i) {
895 unsigned char slot_mask = vp_sem_table[fp_semantics[i]];
896 diff |= (slot_mask >> 4) & (slot_mask ^ cur_slots[i]);
897 }
898
899 if(diff)
900 {
901 for(unsigned i = 0; i < fp->num_slots; ++i) {
902 /* if 0xff, then this will write to the dummy value at fp->last_layout_mask[0] */
903 fp->slot_to_fp_input[i] = vp_sem_table[fp_semantics[i]] & 0xf;
904 //printf("fp: GENERIC[%i] from fpreg %i\n", fp_semantics[i], fp->slot_to_fp_input[i]);
905 }
906
907 fp->progs_left_with_obsolete_slot_assignments = fp->progs;
908 update = TRUE;
909 }
910 }
911
912 // last_sprite_coord_enable
913 unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * nvfx->rasterizer->pipe.sprite_coord_enable;
914 if(fp->last_sprite_coord_enable != sprite_coord_enable)
915 {
916 unsigned texcoord_mask = vp->texcoord_ouput_mask;
917 fp->last_sprite_coord_enable = sprite_coord_enable;
918 fp->point_sprite_control = 0;
919 for(unsigned i = 0; i < fp->num_slots; ++i) {
920 if((1 << fp->slot_to_generic[i]) & sprite_coord_enable)
921 {
922 unsigned fpin = fp->slot_to_fp_input[i];
923 //printf("sprite: slot %i generic %i had texcoord %i\n", i, fp->slot_to_generic[i], fpin - NVFX_FP_OP_INPUT_SRC_TC0);
924 if(fpin >= 0x0f)
925 {
926 unsigned tc = __builtin_ctz(~texcoord_mask);
927 texcoord_mask |= (1 << tc);
928 fp->slot_to_fp_input[i] = fpin = NVFX_FP_OP_INPUT_SRC_TC(tc);
929
930 fp->progs_left_with_obsolete_slot_assignments = fp->progs;
931 update = TRUE;
932 }
933 //printf("sprite: slot %i texcoord %i\n", i, fpin - NVFX_FP_OP_INPUT_SRC_TC0);
934 fp->point_sprite_control |= (1 << (fpin - NVFX_FP_OP_INPUT_SRC_TC0 + 8));
935 }
936 else
937 {
938 unsigned fpin = fp->slot_to_fp_input[i];
939 if(!(vp->texcoord_ouput_mask & (1 << (fpin - NVFX_FP_OP_INPUT_SRC_TC0))))
940 {
941 fp->slot_to_fp_input[i] = 0x0f;
942
943 fp->progs_left_with_obsolete_slot_assignments = fp->progs;
944 update = TRUE;
945 }
946 }
947 }
948 }
949
950 if(update) {
951 ++fp->bo_prog_idx;
952 if(fp->bo_prog_idx >= fp->progs_per_bo)
953 {
954 if(fp->fpbo && !nouveau_bo_busy(fp->fpbo->next->bo, NOUVEAU_BO_WR))
955 {
956 fp->fpbo = fp->fpbo->next;
957 }
958 else
959 {
960 struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + (fp->prog_size + 8) * fp->progs_per_bo, 16);
961 fpbo->slots = (unsigned char*)&fpbo->insn[(fp->prog_size) * fp->progs_per_bo];
962 memset(fpbo->slots, 0, 8 * fp->progs_per_bo);
963 if(fp->fpbo)
964 {
965 fpbo->next = fp->fpbo->next;
966 fp->fpbo->next = fpbo;
967 }
968 else
969 fpbo->next = fpbo;
970 fp->fpbo = fpbo;
971 fpbo->bo = 0;
972 fp->progs += fp->progs_per_bo;
973 fp->progs_left_with_obsolete_slot_assignments += fp->progs_per_bo;
974 nouveau_bo_new(nvfx->screen->base.device, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 64, fp->prog_size * fp->progs_per_bo, &fpbo->bo);
975 nouveau_bo_map(fpbo->bo, NOUVEAU_BO_NOSYNC);
976
977 uint8_t* map = fpbo->bo->map;
978 uint8_t* buf = (uint8_t*)fpbo->insn;
979 for(unsigned i = 0; i < fp->progs_per_bo; ++i)
980 {
981 memcpy(buf, fp->insn, fp->insn_len * 4);
982 nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4);
983 map += fp->prog_size;
984 buf += fp->prog_size;
985 }
986 }
987 fp->bo_prog_idx = 0;
988 }
989
990 int offset = fp->bo_prog_idx * fp->prog_size;
991 uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
992
993 if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) {
994 struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT];
995 uint32_t* map = (uint32_t*)nvfx_buffer(constbuf)->data;
996 uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
997 uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset);
998 int i;
999 for (i = 0; i < fp->nr_consts; ++i) {
1000 unsigned off = fp->consts[i].offset;
1001 unsigned idx = fp->consts[i].index * 4;
1002
1003 /* TODO: is checking a good idea? */
1004 if(memcmp(&buf[off], &map[idx], 4 * sizeof(uint32_t))) {
1005 memcpy(&buf[off], &map[idx], 4 * sizeof(uint32_t));
1006 nvfx_fp_memcpy(&fpmap[off], &map[idx], 4 * sizeof(uint32_t));
1007 }
1008 }
1009 }
1010
1011 if(fp->progs_left_with_obsolete_slot_assignments) {
1012 unsigned char* fpbo_slots = &fp->fpbo->slots[fp->bo_prog_idx * 8];
1013 for(unsigned i = 0; i < fp->num_slots; ++i) {
1014 unsigned value = fp->slot_to_fp_input[i];;
1015 if(value != fpbo_slots[i]) {
1016 unsigned* p = (unsigned*)fp->slot_relocations[i].data;
1017 unsigned* pend = (unsigned*)((char*)fp->slot_relocations[i].data + fp->slot_relocations[i].size);
1018 for(; p != pend; ++p) {
1019 unsigned off = *p;
1020 unsigned dw = fp->insn[off];
1021 dw = (dw & ~NVFX_FP_OP_INPUT_SRC_MASK) | (value << NVFX_FP_OP_INPUT_SRC_SHIFT);
1022 nvfx_fp_memcpy(&fpmap[*p], &dw, sizeof(dw));
1023 }
1024 fpbo_slots[i] = value;
1025 }
1026 }
1027 --fp->progs_left_with_obsolete_slot_assignments;
1028 }
1029 }
1030
1031 if(update || (nvfx->dirty & NVFX_NEW_FRAGPROG)) {
1032 int offset = fp->bo_prog_idx * fp->prog_size;
1033 MARK_RING(chan, 8, 1);
1034 OUT_RING(chan, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1));
1035 OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM |
1036 NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
1037 NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
1038 NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
1039 OUT_RING(chan, RING_3D(NV34TCL_FP_CONTROL, 1));
1040 OUT_RING(chan, fp->fp_control);
1041 if(!nvfx->is_nv4x) {
1042 OUT_RING(chan, RING_3D(NV34TCL_FP_REG_CONTROL, 1));
1043 OUT_RING(chan, (1<<16)|0x4);
1044 OUT_RING(chan, RING_3D(NV34TCL_TX_UNITS_ENABLE, 1));
1045 OUT_RING(chan, fp->samplers);
1046 }
1047 }
1048
1049 if(nvfx->dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SPRITE))
1050 {
1051 WAIT_RING(chan, 2);
1052 OUT_RING(chan, RING_3D(NV34TCL_POINT_SPRITE, 1));
1053 OUT_RING(chan, fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization);
1054 }
1055 }
1056
1057 void
1058 nvfx_fragprog_relocate(struct nvfx_context *nvfx)
1059 {
1060 struct nouveau_channel* chan = nvfx->screen->base.channel;
1061 struct nvfx_fragment_program *fp = nvfx->fragprog;
1062 struct nouveau_bo* bo = fp->fpbo->bo;
1063 int offset = fp->bo_prog_idx * fp->prog_size;
1064 unsigned fp_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; // TODO: GART?
1065 fp_flags |= NOUVEAU_BO_DUMMY;
1066 MARK_RING(chan, 2, 2);
1067 OUT_RELOC(chan, bo, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1), fp_flags, 0, 0);
1068 OUT_RELOC(chan, bo, offset, fp_flags | NOUVEAU_BO_LOW |
1069 NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
1070 NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
1071 }
1072
1073 void
1074 nvfx_fragprog_destroy(struct nvfx_context *nvfx,
1075 struct nvfx_fragment_program *fp)
1076 {
1077 unsigned i;
1078 struct nvfx_fragment_program_bo* fpbo = fp->fpbo;
1079 if(fpbo)
1080 {
1081 do
1082 {
1083 struct nvfx_fragment_program_bo* next = fpbo->next;
1084 nouveau_bo_unmap(fpbo->bo);
1085 nouveau_bo_ref(0, &fpbo->bo);
1086 free(fpbo);
1087 fpbo = next;
1088 }
1089 while(fpbo != fp->fpbo);
1090 }
1091
1092 for(i = 0; i < 8; ++i)
1093 util_dynarray_fini(&fp->slot_relocations[i]);
1094
1095 if (fp->insn_len)
1096 FREE(fp->insn);
1097 }