nvfx: refactor shader assembler
[mesa.git] / src / gallium / drivers / nvfx / nvfx_fragprog.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "util/u_inlines.h"
5 #include "util/u_debug.h"
6
7 #include "pipe/p_shader_tokens.h"
8 #include "tgsi/tgsi_parse.h"
9 #include "tgsi/tgsi_util.h"
10 #include "tgsi/tgsi_dump.h"
11
12 #include "nvfx_context.h"
13 #include "nvfx_shader.h"
14 #include "nvfx_resource.h"
15
16 #define MAX_CONSTS 128
17 #define MAX_IMM 32
18 struct nvfx_fpc {
19 struct nvfx_fragment_program *fp;
20
21 unsigned r_temps;
22 unsigned r_temps_discard;
23 struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];
24 struct nvfx_reg *r_temp;
25
26 int num_regs;
27
28 unsigned inst_offset;
29 unsigned have_const;
30
31 struct {
32 int pipe;
33 float vals[4];
34 } consts[MAX_CONSTS];
35 int nr_consts;
36
37 struct nvfx_reg imm[MAX_IMM];
38 unsigned nr_imm;
39
40 unsigned char generic_to_slot[256]; /* semantic idx for each input semantic */
41 };
42
43 static INLINE struct nvfx_reg
44 temp(struct nvfx_fpc *fpc)
45 {
46 int idx = ffs(~fpc->r_temps) - 1;
47
48 if (idx < 0) {
49 NOUVEAU_ERR("out of temps!!\n");
50 assert(0);
51 return nvfx_reg(NVFXSR_TEMP, 0);
52 }
53
54 fpc->r_temps |= (1 << idx);
55 fpc->r_temps_discard |= (1 << idx);
56 return nvfx_reg(NVFXSR_TEMP, idx);
57 }
58
59 static INLINE void
60 release_temps(struct nvfx_fpc *fpc)
61 {
62 fpc->r_temps &= ~fpc->r_temps_discard;
63 fpc->r_temps_discard = 0;
64 }
65
66 static INLINE struct nvfx_reg
67 constant(struct nvfx_fpc *fpc, int pipe, float vals[4])
68 {
69 int idx;
70
71 if (fpc->nr_consts == MAX_CONSTS)
72 assert(0);
73 idx = fpc->nr_consts++;
74
75 fpc->consts[idx].pipe = pipe;
76 if (pipe == -1)
77 memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
78 return nvfx_reg(NVFXSR_CONST, idx);
79 }
80
81 static void
82 grow_insns(struct nvfx_fpc *fpc, int size)
83 {
84 struct nvfx_fragment_program *fp = fpc->fp;
85
86 fp->insn_len += size;
87 fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
88 }
89
90 static void
91 emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src)
92 {
93 struct nvfx_fragment_program *fp = fpc->fp;
94 uint32_t *hw = &fp->insn[fpc->inst_offset];
95 uint32_t sr = 0;
96
97 switch (src.reg.type) {
98 case NVFXSR_INPUT:
99 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
100 hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
101 break;
102 case NVFXSR_OUTPUT:
103 sr |= NVFX_FP_REG_SRC_HALF;
104 /* fall-through */
105 case NVFXSR_TEMP:
106 sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
107 sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT);
108 break;
109 case NVFXSR_RELOCATED:
110 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
111 //printf("adding relocation at %x for %x\n", fpc->inst_offset, src.index);
112 util_dynarray_append(&fpc->fp->slot_relocations[src.reg.index], unsigned, fpc->inst_offset);
113 break;
114 case NVFXSR_CONST:
115 if (!fpc->have_const) {
116 grow_insns(fpc, 4);
117 fpc->have_const = 1;
118 }
119
120 hw = &fp->insn[fpc->inst_offset];
121 if (fpc->consts[src.reg.index].pipe >= 0) {
122 struct nvfx_fragment_program_data *fpd;
123
124 fp->consts = realloc(fp->consts, ++fp->nr_consts *
125 sizeof(*fpd));
126 fpd = &fp->consts[fp->nr_consts - 1];
127 fpd->offset = fpc->inst_offset + 4;
128 fpd->index = fpc->consts[src.reg.index].pipe;
129 memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
130 } else {
131 memcpy(&fp->insn[fpc->inst_offset + 4],
132 fpc->consts[src.reg.index].vals,
133 sizeof(uint32_t) * 4);
134 }
135
136 sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
137 break;
138 case NVFXSR_NONE:
139 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
140 break;
141 default:
142 assert(0);
143 }
144
145 if (src.negate)
146 sr |= NVFX_FP_REG_NEGATE;
147
148 if (src.abs)
149 hw[1] |= (1 << (29 + pos));
150
151 sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
152 (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
153 (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
154 (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
155
156 hw[pos + 1] |= sr;
157 }
158
159 static void
160 emit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst)
161 {
162 struct nvfx_fragment_program *fp = fpc->fp;
163 uint32_t *hw = &fp->insn[fpc->inst_offset];
164
165 switch (dst.type) {
166 case NVFXSR_TEMP:
167 if (fpc->num_regs < (dst.index + 1))
168 fpc->num_regs = dst.index + 1;
169 break;
170 case NVFXSR_OUTPUT:
171 if (dst.index == 1) {
172 fp->fp_control |= 0xe;
173 } else {
174 hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
175 }
176 break;
177 case NVFXSR_NONE:
178 hw[0] |= (1 << 30);
179 break;
180 default:
181 assert(0);
182 }
183
184 hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
185 }
186
187 static void
188 nvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn)
189 {
190 struct nvfx_fragment_program *fp = fpc->fp;
191 uint32_t *hw;
192
193 fpc->inst_offset = fp->insn_len;
194 fpc->have_const = 0;
195 grow_insns(fpc, 4);
196 hw = &fp->insn[fpc->inst_offset];
197 memset(hw, 0, sizeof(uint32_t) * 4);
198
199 if (insn.op == NVFX_FP_OP_OPCODE_KIL)
200 fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
201 hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT);
202 hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT);
203 hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT);
204
205 if (insn.sat)
206 hw[0] |= NVFX_FP_OP_OUT_SAT;
207
208 if (insn.cc_update)
209 hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
210 hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT);
211 hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
212 (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
213 (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
214 (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
215
216 if(insn.unit >= 0)
217 {
218 hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
219 fp->samplers |= (1 << insn.unit);
220 }
221
222 emit_dst(fpc, insn.dst);
223 emit_src(fpc, 0, insn.src[0]);
224 emit_src(fpc, 1, insn.src[1]);
225 emit_src(fpc, 2, insn.src[2]);
226 }
227
228 #define arith(s,o,d,m,s0,s1,s2) \
229 nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \
230 (d), (m), (s0), (s1), (s2))
231
232 #define tex(s,o,u,d,m,s0,s1,s2) \
233 nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \
234 (d), (m), (s0), none, none)
235
236 static INLINE struct nvfx_src
237 tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
238 {
239 struct nvfx_src src;
240
241 switch (fsrc->Register.File) {
242 case TGSI_FILE_INPUT:
243 if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_POSITION) {
244 assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0);
245 src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_POSITION);
246 } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_COLOR) {
247 if(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0)
248 src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL0);
249 else if(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 1)
250 src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL1);
251 else
252 assert(0);
253 } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG) {
254 assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0);
255 src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_FOGC);
256 } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FACE) {
257 /* TODO: check this has the correct values */
258 /* XXX: what do we do for nv30 here (assuming it lacks facing)?! */
259 assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0);
260 src.reg = nvfx_reg(NVFXSR_INPUT, NV40_FP_OP_INPUT_SRC_FACING);
261 } else {
262 assert(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_GENERIC);
263 src.reg = nvfx_reg(NVFXSR_RELOCATED, fpc->generic_to_slot[fpc->fp->info.input_semantic_index[fsrc->Register.Index]]);
264 }
265 break;
266 case TGSI_FILE_CONSTANT:
267 src.reg = constant(fpc, fsrc->Register.Index, NULL);
268 break;
269 case TGSI_FILE_IMMEDIATE:
270 assert(fsrc->Register.Index < fpc->nr_imm);
271 src.reg = fpc->imm[fsrc->Register.Index];
272 break;
273 case TGSI_FILE_TEMPORARY:
274 src.reg = fpc->r_temp[fsrc->Register.Index];
275 break;
276 /* NV40 fragprog result regs are just temps, so this is simple */
277 case TGSI_FILE_OUTPUT:
278 src.reg = fpc->r_result[fsrc->Register.Index];
279 break;
280 default:
281 NOUVEAU_ERR("bad src file\n");
282 break;
283 }
284
285 src.abs = fsrc->Register.Absolute;
286 src.negate = fsrc->Register.Negate;
287 src.swz[0] = fsrc->Register.SwizzleX;
288 src.swz[1] = fsrc->Register.SwizzleY;
289 src.swz[2] = fsrc->Register.SwizzleZ;
290 src.swz[3] = fsrc->Register.SwizzleW;
291 return src;
292 }
293
294 static INLINE struct nvfx_reg
295 tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
296 switch (fdst->Register.File) {
297 case TGSI_FILE_OUTPUT:
298 return fpc->r_result[fdst->Register.Index];
299 case TGSI_FILE_TEMPORARY:
300 return fpc->r_temp[fdst->Register.Index];
301 case TGSI_FILE_NULL:
302 return nvfx_reg(NVFXSR_NONE, 0);
303 default:
304 NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
305 return nvfx_reg(NVFXSR_NONE, 0);
306 }
307 }
308
309 static INLINE int
310 tgsi_mask(uint tgsi)
311 {
312 int mask = 0;
313
314 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X;
315 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y;
316 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z;
317 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W;
318 return mask;
319 }
320
321 static boolean
322 nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
323 const struct tgsi_full_instruction *finst)
324 {
325 const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
326 struct nvfx_insn insn;
327 struct nvfx_src src[3], tmp;
328 struct nvfx_reg dst;
329 int mask, sat, unit = 0;
330 int ai = -1, ci = -1, ii = -1;
331 int i;
332
333 if (finst->Instruction.Opcode == TGSI_OPCODE_END)
334 return TRUE;
335
336 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
337 const struct tgsi_full_src_register *fsrc;
338
339 fsrc = &finst->Src[i];
340 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
341 src[i] = tgsi_src(fpc, fsrc);
342 }
343 }
344
345 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
346 const struct tgsi_full_src_register *fsrc;
347
348 fsrc = &finst->Src[i];
349
350 switch (fsrc->Register.File) {
351 case TGSI_FILE_INPUT:
352 if (ai == -1 || ai == fsrc->Register.Index) {
353 ai = fsrc->Register.Index;
354 src[i] = tgsi_src(fpc, fsrc);
355 } else {
356 src[i] = nvfx_src(temp(fpc));
357 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
358 }
359 break;
360 case TGSI_FILE_CONSTANT:
361 if ((ci == -1 && ii == -1) ||
362 ci == fsrc->Register.Index) {
363 ci = fsrc->Register.Index;
364 src[i] = tgsi_src(fpc, fsrc);
365 } else {
366 src[i] = nvfx_src(temp(fpc));
367 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
368 }
369 break;
370 case TGSI_FILE_IMMEDIATE:
371 if ((ci == -1 && ii == -1) ||
372 ii == fsrc->Register.Index) {
373 ii = fsrc->Register.Index;
374 src[i] = tgsi_src(fpc, fsrc);
375 } else {
376 src[i] = nvfx_src(temp(fpc));
377 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
378 }
379 break;
380 case TGSI_FILE_TEMPORARY:
381 /* handled above */
382 break;
383 case TGSI_FILE_SAMPLER:
384 unit = fsrc->Register.Index;
385 break;
386 case TGSI_FILE_OUTPUT:
387 break;
388 default:
389 NOUVEAU_ERR("bad src file\n");
390 return FALSE;
391 }
392 }
393
394 dst = tgsi_dst(fpc, &finst->Dst[0]);
395 mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
396 sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
397
398 switch (finst->Instruction.Opcode) {
399 case TGSI_OPCODE_ABS:
400 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, abs(src[0]), none, none));
401 break;
402 case TGSI_OPCODE_ADD:
403 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none));
404 break;
405 case TGSI_OPCODE_CMP:
406 insn = arith(0, MOV, none.reg, 0xf, src[0], none, none);
407 insn.cc_update = 1;
408 nvfx_fp_emit(fpc, insn);
409
410 insn = arith(sat, MOV, dst, mask, src[2], none, none);
411 insn.cc_test = NVFX_COND_GE;
412 nvfx_fp_emit(fpc, insn);
413
414 insn = arith(sat, MOV, dst, mask, src[1], none, none);
415 insn.cc_test = NVFX_COND_LT;
416 nvfx_fp_emit(fpc, insn);
417 break;
418 case TGSI_OPCODE_COS:
419 nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none));
420 break;
421 case TGSI_OPCODE_DDX:
422 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
423 tmp = nvfx_src(temp(fpc));
424 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
425 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
426 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
427 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
428 } else {
429 nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none));
430 }
431 break;
432 case TGSI_OPCODE_DDY:
433 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
434 tmp = nvfx_src(temp(fpc));
435 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
436 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
437 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
438 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
439 } else {
440 nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none));
441 }
442 break;
443 case TGSI_OPCODE_DP3:
444 nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none));
445 break;
446 case TGSI_OPCODE_DP4:
447 nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none));
448 break;
449 case TGSI_OPCODE_DPH:
450 tmp = nvfx_src(temp(fpc));
451 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none));
452 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none));
453 break;
454 case TGSI_OPCODE_DST:
455 nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none));
456 break;
457 case TGSI_OPCODE_EX2:
458 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none));
459 break;
460 case TGSI_OPCODE_FLR:
461 nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none));
462 break;
463 case TGSI_OPCODE_FRC:
464 nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none));
465 break;
466 case TGSI_OPCODE_KILP:
467 nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none));
468 break;
469 case TGSI_OPCODE_KIL:
470 insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none);
471 insn.cc_update = 1;
472 nvfx_fp_emit(fpc, insn);
473
474 insn = arith(0, KIL, none.reg, 0, none, none, none);
475 insn.cc_test = NVFX_COND_LT;
476 nvfx_fp_emit(fpc, insn);
477 break;
478 case TGSI_OPCODE_LG2:
479 nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none));
480 break;
481 // case TGSI_OPCODE_LIT:
482 case TGSI_OPCODE_LRP:
483 if(!nvfx->is_nv4x)
484 nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2]));
485 else {
486 tmp = nvfx_src(temp(fpc));
487 nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
488 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp));
489 }
490 break;
491 case TGSI_OPCODE_MAD:
492 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2]));
493 break;
494 case TGSI_OPCODE_MAX:
495 nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none));
496 break;
497 case TGSI_OPCODE_MIN:
498 nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none));
499 break;
500 case TGSI_OPCODE_MOV:
501 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none));
502 break;
503 case TGSI_OPCODE_MUL:
504 nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none));
505 break;
506 case TGSI_OPCODE_POW:
507 if(!nvfx->is_nv4x)
508 nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none));
509 else {
510 tmp = nvfx_src(temp(fpc));
511 nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
512 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
513 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none));
514 }
515 break;
516 case TGSI_OPCODE_RCP:
517 nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none));
518 break;
519 case TGSI_OPCODE_RET:
520 assert(0);
521 break;
522 case TGSI_OPCODE_RFL:
523 if(!nvfx->is_nv4x)
524 nvfx_fp_emit(fpc, arith(0, RFL_NV30, dst, mask, src[0], src[1], none));
525 else {
526 tmp = nvfx_src(temp(fpc));
527 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[0], none));
528 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_Y, src[0], src[1], none));
529 insn = arith(0, DIV, tmp.reg, NVFX_FP_MASK_Z, swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
530 insn.scale = NVFX_FP_OP_DST_SCALE_2X;
531 nvfx_fp_emit(fpc, insn);
532 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])));
533 }
534 break;
535 case TGSI_OPCODE_RSQ:
536 if(!nvfx->is_nv4x)
537 nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none));
538 else {
539 tmp = nvfx_src(temp(fpc));
540 insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none);
541 insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X;
542 nvfx_fp_emit(fpc, insn);
543 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none));
544 }
545 break;
546 case TGSI_OPCODE_SCS:
547 /* avoid overwriting the source */
548 if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
549 {
550 if (mask & NVFX_FP_MASK_X)
551 nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
552 if (mask & NVFX_FP_MASK_Y)
553 nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
554 }
555 else
556 {
557 if (mask & NVFX_FP_MASK_Y)
558 nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
559 if (mask & NVFX_FP_MASK_X)
560 nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
561 }
562 break;
563 case TGSI_OPCODE_SEQ:
564 nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none));
565 break;
566 case TGSI_OPCODE_SFL:
567 nvfx_fp_emit(fpc, arith(sat, SFL, dst, mask, src[0], src[1], none));
568 break;
569 case TGSI_OPCODE_SGE:
570 nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none));
571 break;
572 case TGSI_OPCODE_SGT:
573 nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none));
574 break;
575 case TGSI_OPCODE_SIN:
576 nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none));
577 break;
578 case TGSI_OPCODE_SLE:
579 nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none));
580 break;
581 case TGSI_OPCODE_SLT:
582 nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none));
583 break;
584 case TGSI_OPCODE_SNE:
585 nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none));
586 break;
587 case TGSI_OPCODE_STR:
588 nvfx_fp_emit(fpc, arith(sat, STR, dst, mask, src[0], src[1], none));
589 break;
590 case TGSI_OPCODE_SUB:
591 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], neg(src[1]), none));
592 break;
593 case TGSI_OPCODE_TEX:
594 nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
595 break;
596 case TGSI_OPCODE_TXB:
597 nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none));
598 break;
599 case TGSI_OPCODE_TXP:
600 nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none));
601 break;
602 case TGSI_OPCODE_XPD:
603 tmp = nvfx_src(temp(fpc));
604 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
605 nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
606 break;
607 default:
608 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
609 return FALSE;
610 }
611
612 release_temps(fpc);
613 return TRUE;
614 }
615
616 static boolean
617 nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
618 const struct tgsi_full_declaration *fdec)
619 {
620 unsigned idx = fdec->Range.First;
621 unsigned hw;
622
623 switch (fdec->Semantic.Name) {
624 case TGSI_SEMANTIC_POSITION:
625 hw = 1;
626 break;
627 case TGSI_SEMANTIC_COLOR:
628 hw = ~0;
629 switch (fdec->Semantic.Index) {
630 case 0: hw = 0; break;
631 case 1: hw = 2; break;
632 case 2: hw = 3; break;
633 case 3: hw = 4; break;
634 }
635 if(hw > ((nvfx->is_nv4x) ? 4 : 2)) {
636 NOUVEAU_ERR("bad rcol index\n");
637 return FALSE;
638 }
639 break;
640 default:
641 NOUVEAU_ERR("bad output semantic\n");
642 return FALSE;
643 }
644
645 fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
646 fpc->r_temps |= (1 << hw);
647 return TRUE;
648 }
649
650 static boolean
651 nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
652 {
653 struct tgsi_parse_context p;
654 int high_temp = -1, i;
655 struct util_semantic_set set;
656
657 fpc->fp->num_slots = util_semantic_set_from_program_file(&set, fpc->fp->pipe.tokens, TGSI_FILE_INPUT);
658 if(fpc->fp->num_slots > 8)
659 return FALSE;
660 util_semantic_layout_from_set(fpc->fp->slot_to_generic, &set, 0, 8);
661 util_semantic_table_from_layout(fpc->generic_to_slot, fpc->fp->slot_to_generic, 0, 8);
662
663 memset(fpc->fp->slot_to_fp_input, 0xff, sizeof(fpc->fp->slot_to_fp_input));
664
665 tgsi_parse_init(&p, fpc->fp->pipe.tokens);
666 while (!tgsi_parse_end_of_tokens(&p)) {
667 const union tgsi_full_token *tok = &p.FullToken;
668
669 tgsi_parse_token(&p);
670 switch(tok->Token.Type) {
671 case TGSI_TOKEN_TYPE_DECLARATION:
672 {
673 const struct tgsi_full_declaration *fdec;
674 fdec = &p.FullToken.FullDeclaration;
675 switch (fdec->Declaration.File) {
676 case TGSI_FILE_OUTPUT:
677 if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec))
678 goto out_err;
679 break;
680 case TGSI_FILE_TEMPORARY:
681 if (fdec->Range.Last > high_temp) {
682 high_temp =
683 fdec->Range.Last;
684 }
685 break;
686 default:
687 break;
688 }
689 }
690 break;
691 case TGSI_TOKEN_TYPE_IMMEDIATE:
692 {
693 struct tgsi_full_immediate *imm;
694 float vals[4];
695
696 imm = &p.FullToken.FullImmediate;
697 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
698 assert(fpc->nr_imm < MAX_IMM);
699
700 vals[0] = imm->u[0].Float;
701 vals[1] = imm->u[1].Float;
702 vals[2] = imm->u[2].Float;
703 vals[3] = imm->u[3].Float;
704 fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
705 }
706 break;
707 default:
708 break;
709 }
710 }
711 tgsi_parse_free(&p);
712
713 if (++high_temp) {
714 fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
715 for (i = 0; i < high_temp; i++)
716 fpc->r_temp[i] = temp(fpc);
717 fpc->r_temps_discard = 0;
718 }
719
720 return TRUE;
721
722 out_err:
723 if (fpc->r_temp)
724 FREE(fpc->r_temp);
725 tgsi_parse_free(&p);
726 return FALSE;
727 }
728
729 DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", FALSE)
730
731 static void
732 nvfx_fragprog_translate(struct nvfx_context *nvfx,
733 struct nvfx_fragment_program *fp)
734 {
735 struct tgsi_parse_context parse;
736 struct nvfx_fpc *fpc = NULL;
737
738 fpc = CALLOC(1, sizeof(struct nvfx_fpc));
739 if (!fpc)
740 return;
741 fpc->fp = fp;
742 fpc->num_regs = 2;
743
744 if (!nvfx_fragprog_prepare(nvfx, fpc)) {
745 FREE(fpc);
746 return;
747 }
748
749 tgsi_parse_init(&parse, fp->pipe.tokens);
750
751 while (!tgsi_parse_end_of_tokens(&parse)) {
752 tgsi_parse_token(&parse);
753
754 switch (parse.FullToken.Token.Type) {
755 case TGSI_TOKEN_TYPE_INSTRUCTION:
756 {
757 const struct tgsi_full_instruction *finst;
758
759 finst = &parse.FullToken.FullInstruction;
760 if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst))
761 goto out_err;
762 }
763 break;
764 default:
765 break;
766 }
767 }
768
769 if(!nvfx->is_nv4x)
770 fp->fp_control |= (fpc->num_regs-1)/2;
771 else
772 fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
773
774 /* Terminate final instruction */
775 if(fp->insn)
776 fp->insn[fpc->inst_offset] |= 0x00000001;
777
778 /* Append NOP + END instruction, may or may not be necessary. */
779 fpc->inst_offset = fp->insn_len;
780 grow_insns(fpc, 4);
781 fp->insn[fpc->inst_offset + 0] = 0x00000001;
782 fp->insn[fpc->inst_offset + 1] = 0x00000000;
783 fp->insn[fpc->inst_offset + 2] = 0x00000000;
784 fp->insn[fpc->inst_offset + 3] = 0x00000000;
785
786 if(debug_get_option_nvfx_dump_fp())
787 {
788 debug_printf("\n");
789 tgsi_dump(fp->pipe.tokens, 0);
790
791 debug_printf("\n%s fragment program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x");
792 for (unsigned i = 0; i < fp->insn_len; i += 4)
793 debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]);
794 debug_printf("\n");
795 }
796
797 fp->translated = TRUE;
798 out_err:
799 tgsi_parse_free(&parse);
800 if (fpc->r_temp)
801 FREE(fpc->r_temp);
802 FREE(fpc);
803 }
804
805 static inline void
806 nvfx_fp_memcpy(void* dst, const void* src, size_t len)
807 {
808 #ifndef WORDS_BIGENDIAN
809 memcpy(dst, src, len);
810 #else
811 size_t i;
812 for(i = 0; i < len; i += 4) {
813 uint32_t v = (uint32_t*)((char*)src + i);
814 *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16);
815 }
816 #endif
817 }
818
819 void
820 nvfx_fragprog_validate(struct nvfx_context *nvfx)
821 {
822 struct nouveau_channel* chan = nvfx->screen->base.channel;
823 struct nvfx_fragment_program *fp = nvfx->fragprog;
824 int update = 0;
825
826 if (!fp->translated)
827 {
828 const int min_size = 4096;
829
830 nvfx_fragprog_translate(nvfx, fp);
831 if (!fp->translated) {
832 static unsigned dummy[8] = {1, 0, 0, 0, 1, 0, 0, 0};
833 static int warned = 0;
834 if(!warned)
835 {
836 fprintf(stderr, "nvfx: failed to translate fragment program!\n");
837 warned = 1;
838 }
839
840 /* use dummy program: we cannot fail here */
841 fp->translated = TRUE;
842 fp->insn = malloc(sizeof(dummy));
843 memcpy(fp->insn, dummy, sizeof(dummy));
844 fp->insn_len = sizeof(dummy) / sizeof(dummy[0]);
845 }
846 update = TRUE;
847
848 fp->prog_size = (fp->insn_len * 4 + 63) & ~63;
849
850 if(fp->prog_size >= min_size)
851 fp->progs_per_bo = 1;
852 else
853 fp->progs_per_bo = min_size / fp->prog_size;
854 fp->bo_prog_idx = fp->progs_per_bo - 1;
855 }
856
857 /* we must update constants even on "just" fragprog changes, because
858 we don't check whether the current constant buffer matches the latest
859 one bound to this fragment program */
860 if (nvfx->dirty & (NVFX_NEW_FRAGCONST | NVFX_NEW_FRAGPROG))
861 update = TRUE;
862
863 struct nvfx_vertex_program* vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog;
864 if (fp->last_vp_id != vp->id) {
865 char* vp_sem_table = vp->generic_to_fp_input;
866 unsigned char* fp_semantics = fp->slot_to_generic;
867 unsigned diff = 0;
868 fp->last_vp_id = nvfx->vertprog->id;
869 unsigned char* cur_slots = fp->slot_to_fp_input;
870 for(unsigned i = 0; i < fp->num_slots; ++i) {
871 unsigned char slot_mask = vp_sem_table[fp_semantics[i]];
872 diff |= (slot_mask >> 4) & (slot_mask ^ cur_slots[i]);
873 }
874
875 if(diff)
876 {
877 for(unsigned i = 0; i < fp->num_slots; ++i) {
878 /* if 0xff, then this will write to the dummy value at fp->last_layout_mask[0] */
879 fp->slot_to_fp_input[i] = vp_sem_table[fp_semantics[i]] & 0xf;
880 //printf("fp: GENERIC[%i] from fpreg %i\n", fp_semantics[i], fp->slot_to_fp_input[i]);
881 }
882
883 fp->progs_left_with_obsolete_slot_assignments = fp->progs;
884 update = TRUE;
885 }
886 }
887
888 // last_sprite_coord_enable
889 unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * nvfx->rasterizer->pipe.sprite_coord_enable;
890 if(fp->last_sprite_coord_enable != sprite_coord_enable)
891 {
892 unsigned texcoord_mask = vp->texcoord_ouput_mask;
893 fp->last_sprite_coord_enable = sprite_coord_enable;
894 fp->point_sprite_control = 0;
895 for(unsigned i = 0; i < fp->num_slots; ++i) {
896 if((1 << fp->slot_to_generic[i]) & sprite_coord_enable)
897 {
898 unsigned fpin = fp->slot_to_fp_input[i];
899 //printf("sprite: slot %i generic %i had texcoord %i\n", i, fp->slot_to_generic[i], fpin - NVFX_FP_OP_INPUT_SRC_TC0);
900 if(fpin >= 0x0f)
901 {
902 unsigned tc = __builtin_ctz(~texcoord_mask);
903 texcoord_mask |= (1 << tc);
904 fp->slot_to_fp_input[i] = fpin = NVFX_FP_OP_INPUT_SRC_TC(tc);
905
906 fp->progs_left_with_obsolete_slot_assignments = fp->progs;
907 update = TRUE;
908 }
909 //printf("sprite: slot %i texcoord %i\n", i, fpin - NVFX_FP_OP_INPUT_SRC_TC0);
910 fp->point_sprite_control |= (1 << (fpin - NVFX_FP_OP_INPUT_SRC_TC0 + 8));
911 }
912 else
913 {
914 unsigned fpin = fp->slot_to_fp_input[i];
915 if(!(vp->texcoord_ouput_mask & (1 << (fpin - NVFX_FP_OP_INPUT_SRC_TC0))))
916 {
917 fp->slot_to_fp_input[i] = 0x0f;
918
919 fp->progs_left_with_obsolete_slot_assignments = fp->progs;
920 update = TRUE;
921 }
922 }
923 }
924 }
925
926 if(update) {
927 ++fp->bo_prog_idx;
928 if(fp->bo_prog_idx >= fp->progs_per_bo)
929 {
930 if(fp->fpbo && !nouveau_bo_busy(fp->fpbo->next->bo, NOUVEAU_BO_WR))
931 {
932 fp->fpbo = fp->fpbo->next;
933 }
934 else
935 {
936 struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + (fp->prog_size + 8) * fp->progs_per_bo, 16);
937 fpbo->slots = (unsigned char*)&fpbo->insn[(fp->prog_size) * fp->progs_per_bo];
938 memset(fpbo->slots, 0, 8 * fp->progs_per_bo);
939 if(fp->fpbo)
940 {
941 fpbo->next = fp->fpbo->next;
942 fp->fpbo->next = fpbo;
943 }
944 else
945 fpbo->next = fpbo;
946 fp->fpbo = fpbo;
947 fpbo->bo = 0;
948 fp->progs += fp->progs_per_bo;
949 fp->progs_left_with_obsolete_slot_assignments += fp->progs_per_bo;
950 nouveau_bo_new(nvfx->screen->base.device, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 64, fp->prog_size * fp->progs_per_bo, &fpbo->bo);
951 nouveau_bo_map(fpbo->bo, NOUVEAU_BO_NOSYNC);
952
953 uint8_t* map = fpbo->bo->map;
954 uint8_t* buf = (uint8_t*)fpbo->insn;
955 for(unsigned i = 0; i < fp->progs_per_bo; ++i)
956 {
957 memcpy(buf, fp->insn, fp->insn_len * 4);
958 nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4);
959 map += fp->prog_size;
960 buf += fp->prog_size;
961 }
962 }
963 fp->bo_prog_idx = 0;
964 }
965
966 int offset = fp->bo_prog_idx * fp->prog_size;
967 uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
968
969 if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) {
970 struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT];
971 uint32_t* map = (uint32_t*)nvfx_buffer(constbuf)->data;
972 uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
973 uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset);
974 int i;
975 for (i = 0; i < fp->nr_consts; ++i) {
976 unsigned off = fp->consts[i].offset;
977 unsigned idx = fp->consts[i].index * 4;
978
979 /* TODO: is checking a good idea? */
980 if(memcmp(&buf[off], &map[idx], 4 * sizeof(uint32_t))) {
981 memcpy(&buf[off], &map[idx], 4 * sizeof(uint32_t));
982 nvfx_fp_memcpy(&fpmap[off], &map[idx], 4 * sizeof(uint32_t));
983 }
984 }
985 }
986
987 if(fp->progs_left_with_obsolete_slot_assignments) {
988 unsigned char* fpbo_slots = &fp->fpbo->slots[fp->bo_prog_idx * 8];
989 for(unsigned i = 0; i < fp->num_slots; ++i) {
990 unsigned value = fp->slot_to_fp_input[i];;
991 if(value != fpbo_slots[i]) {
992 unsigned* p = (unsigned*)fp->slot_relocations[i].data;
993 unsigned* pend = (unsigned*)((char*)fp->slot_relocations[i].data + fp->slot_relocations[i].size);
994 for(; p != pend; ++p) {
995 unsigned off = *p;
996 unsigned dw = fp->insn[off];
997 dw = (dw & ~NVFX_FP_OP_INPUT_SRC_MASK) | (value << NVFX_FP_OP_INPUT_SRC_SHIFT);
998 nvfx_fp_memcpy(&fpmap[*p], &dw, sizeof(dw));
999 }
1000 fpbo_slots[i] = value;
1001 }
1002 }
1003 --fp->progs_left_with_obsolete_slot_assignments;
1004 }
1005 }
1006
1007 if(update || (nvfx->dirty & NVFX_NEW_FRAGPROG)) {
1008 int offset = fp->bo_prog_idx * fp->prog_size;
1009 MARK_RING(chan, 8, 1);
1010 OUT_RING(chan, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1));
1011 OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM |
1012 NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
1013 NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
1014 NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
1015 OUT_RING(chan, RING_3D(NV34TCL_FP_CONTROL, 1));
1016 OUT_RING(chan, fp->fp_control);
1017 if(!nvfx->is_nv4x) {
1018 OUT_RING(chan, RING_3D(NV34TCL_FP_REG_CONTROL, 1));
1019 OUT_RING(chan, (1<<16)|0x4);
1020 OUT_RING(chan, RING_3D(NV34TCL_TX_UNITS_ENABLE, 1));
1021 OUT_RING(chan, fp->samplers);
1022 }
1023 }
1024
1025 if(nvfx->dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SPRITE))
1026 {
1027 WAIT_RING(chan, 2);
1028 OUT_RING(chan, RING_3D(NV34TCL_POINT_SPRITE, 1));
1029 OUT_RING(chan, fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization);
1030 }
1031 }
1032
1033 void
1034 nvfx_fragprog_relocate(struct nvfx_context *nvfx)
1035 {
1036 struct nouveau_channel* chan = nvfx->screen->base.channel;
1037 struct nvfx_fragment_program *fp = nvfx->fragprog;
1038 struct nouveau_bo* bo = fp->fpbo->bo;
1039 int offset = fp->bo_prog_idx * fp->prog_size;
1040 unsigned fp_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; // TODO: GART?
1041 fp_flags |= NOUVEAU_BO_DUMMY;
1042 MARK_RING(chan, 2, 2);
1043 OUT_RELOC(chan, bo, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1), fp_flags, 0, 0);
1044 OUT_RELOC(chan, bo, offset, fp_flags | NOUVEAU_BO_LOW |
1045 NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
1046 NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
1047 }
1048
1049 void
1050 nvfx_fragprog_destroy(struct nvfx_context *nvfx,
1051 struct nvfx_fragment_program *fp)
1052 {
1053 unsigned i;
1054 struct nvfx_fragment_program_bo* fpbo = fp->fpbo;
1055 if(fpbo)
1056 {
1057 do
1058 {
1059 struct nvfx_fragment_program_bo* next = fpbo->next;
1060 nouveau_bo_unmap(fpbo->bo);
1061 nouveau_bo_ref(0, &fpbo->bo);
1062 free(fpbo);
1063 fpbo = next;
1064 }
1065 while(fpbo != fp->fpbo);
1066 }
1067
1068 for(i = 0; i < 8; ++i)
1069 util_dynarray_fini(&fp->slot_relocations[i]);
1070
1071 if (fp->insn_len)
1072 FREE(fp->insn);
1073 }