nvfx: simplify and correct fragment program update logic
[mesa.git] / src / gallium / drivers / nvfx / nvfx_vertprog.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "util/u_linkage.h"
5 #include "util/u_debug.h"
6
7 #include "pipe/p_shader_tokens.h"
8 #include "tgsi/tgsi_parse.h"
9 #include "tgsi/tgsi_dump.h"
10 #include "tgsi/tgsi_util.h"
11
12 #include "nvfx_context.h"
13 #include "nvfx_state.h"
14 #include "nvfx_resource.h"
15
16 /* TODO (at least...):
17 * 1. Indexed consts + ARL
18 * 3. NV_vp11, NV_vp2, NV_vp3 features
19 * - extra arith opcodes
20 * - branching
21 * - texture sampling
22 * - indexed attribs
23 * - indexed results
24 * 4. bugs
25 */
26
27 #include "nv30_vertprog.h"
28 #include "nv40_vertprog.h"
29
30 #define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
31
32 struct nvfx_loop_entry
33 {
34 unsigned brk_target;
35 unsigned cont_target;
36 };
37
38 struct nvfx_vpc {
39 struct nvfx_context* nvfx;
40 struct nvfx_vertex_program *vp;
41
42 struct nvfx_vertex_program_exec *vpi;
43
44 unsigned r_temps;
45 unsigned r_temps_discard;
46 struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];
47 struct nvfx_reg *r_address;
48 struct nvfx_reg *r_temp;
49
50 struct nvfx_reg *imm;
51 unsigned nr_imm;
52
53 unsigned hpos_idx;
54
55 struct util_dynarray label_relocs;
56 struct util_dynarray loop_stack;
57 };
58
59 static struct nvfx_reg
60 temp(struct nvfx_vpc *vpc)
61 {
62 int idx = ffs(~vpc->r_temps) - 1;
63
64 if (idx < 0) {
65 NOUVEAU_ERR("out of temps!!\n");
66 assert(0);
67 return nvfx_reg(NVFXSR_TEMP, 0);
68 }
69
70 vpc->r_temps |= (1 << idx);
71 vpc->r_temps_discard |= (1 << idx);
72 return nvfx_reg(NVFXSR_TEMP, idx);
73 }
74
75 static inline void
76 release_temps(struct nvfx_vpc *vpc)
77 {
78 vpc->r_temps &= ~vpc->r_temps_discard;
79 vpc->r_temps_discard = 0;
80 }
81
82 static struct nvfx_reg
83 constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w)
84 {
85 struct nvfx_vertex_program *vp = vpc->vp;
86 struct nvfx_vertex_program_data *vpd;
87 int idx;
88
89 if (pipe >= 0) {
90 for (idx = 0; idx < vp->nr_consts; idx++) {
91 if (vp->consts[idx].index == pipe)
92 return nvfx_reg(NVFXSR_CONST, idx);
93 }
94 }
95
96 idx = vp->nr_consts++;
97 vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
98 vpd = &vp->consts[idx];
99
100 vpd->index = pipe;
101 vpd->value[0] = x;
102 vpd->value[1] = y;
103 vpd->value[2] = z;
104 vpd->value[3] = w;
105 return nvfx_reg(NVFXSR_CONST, idx);
106 }
107
108 #define arith(s,o,d,m,s0,s1,s2) \
109 nvfx_insn(0, (NVFX_VP_INST_SLOT_##s << 7) | NVFX_VP_INST_##s##_OP_##o, -1, (d), (m), (s0), (s1), (s2))
110
111 static void
112 emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_src src)
113 {
114 struct nvfx_vertex_program *vp = vpc->vp;
115 uint32_t sr = 0;
116 struct nvfx_relocation reloc;
117
118 switch (src.reg.type) {
119 case NVFXSR_TEMP:
120 sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT));
121 sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT));
122 break;
123 case NVFXSR_INPUT:
124 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
125 NVFX_VP(SRC_REG_TYPE_SHIFT));
126 vp->ir |= (1 << src.reg.index);
127 hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT));
128 break;
129 case NVFXSR_CONST:
130 sr |= (NVFX_VP(SRC_REG_TYPE_CONST) <<
131 NVFX_VP(SRC_REG_TYPE_SHIFT));
132 reloc.location = vp->nr_insns - 1;
133 reloc.target = src.reg.index;
134 util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc);
135 break;
136 case NVFXSR_NONE:
137 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
138 NVFX_VP(SRC_REG_TYPE_SHIFT));
139 break;
140 default:
141 assert(0);
142 }
143
144 if (src.negate)
145 sr |= NVFX_VP(SRC_NEGATE);
146
147 if (src.abs)
148 hw[0] |= (1 << (21 + pos));
149
150 sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) |
151 (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) |
152 (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) |
153 (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT)));
154
155 switch (pos) {
156 case 0:
157 hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >>
158 NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT);
159 hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) <<
160 NVFX_VP(INST_SRC0L_SHIFT);
161 break;
162 case 1:
163 hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT);
164 break;
165 case 2:
166 hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >>
167 NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT);
168 hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) <<
169 NVFX_VP(INST_SRC2L_SHIFT);
170 break;
171 default:
172 assert(0);
173 }
174 }
175
176 static void
177 emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_reg dst)
178 {
179 struct nvfx_vertex_program *vp = vpc->vp;
180
181 switch (dst.type) {
182 case NVFXSR_NONE:
183 if(!nvfx->is_nv4x)
184 hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK;
185 else {
186 hw[3] |= NV40_VP_INST_DEST_MASK;
187 if (slot == 0)
188 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;
189 else
190 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
191 }
192 break;
193 case NVFXSR_TEMP:
194 if(!nvfx->is_nv4x)
195 hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
196 else {
197 hw[3] |= NV40_VP_INST_DEST_MASK;
198 if (slot == 0)
199 hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
200 else
201 hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
202 }
203 break;
204 case NVFXSR_OUTPUT:
205 /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */
206 switch (dst.index) {
207 case NVFX_VP_INST_DEST_CLIP(0):
208 vp->or |= (1 << 6);
209 vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0;
210 dst.index = NVFX_VP(INST_DEST_FOGC);
211 break;
212 case NVFX_VP_INST_DEST_CLIP(1):
213 vp->or |= (1 << 7);
214 vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1;
215 dst.index = NVFX_VP(INST_DEST_FOGC);
216 break;
217 case NVFX_VP_INST_DEST_CLIP(2):
218 vp->or |= (1 << 8);
219 vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2;
220 dst.index = NVFX_VP(INST_DEST_FOGC);
221 break;
222 case NVFX_VP_INST_DEST_CLIP(3):
223 vp->or |= (1 << 9);
224 vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3;
225 dst.index = NVFX_VP(INST_DEST_PSZ);
226 break;
227 case NVFX_VP_INST_DEST_CLIP(4):
228 vp->or |= (1 << 10);
229 vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4;
230 dst.index = NVFX_VP(INST_DEST_PSZ);
231 break;
232 case NVFX_VP_INST_DEST_CLIP(5):
233 vp->or |= (1 << 11);
234 vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5;
235 dst.index = NVFX_VP(INST_DEST_PSZ);
236 break;
237 default:
238 if(nvfx->is_nv4x) {
239 /* we don't need vp->or on nv3x
240 * texcoords are handled by fragment program
241 */
242 switch (dst.index) {
243 case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
244 case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
245 case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
246 case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
247 case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
248 case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
249 }
250 }
251 break;
252 }
253
254 if(!nvfx->is_nv4x) {
255 hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
256 hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK;
257
258 /*XXX: no way this is entirely correct, someone needs to
259 * figure out what exactly it is.
260 */
261 hw[3] |= 0x800;
262 } else {
263 hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);
264 if (slot == 0) {
265 hw[0] |= NV40_VP_INST_VEC_RESULT;
266 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;
267 } else {
268 hw[3] |= NV40_VP_INST_SCA_RESULT;
269 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
270 }
271 }
272 break;
273 default:
274 assert(0);
275 }
276 }
277
278 static void
279 nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn)
280 {
281 struct nvfx_context* nvfx = vpc->nvfx;
282 struct nvfx_vertex_program *vp = vpc->vp;
283 unsigned slot = insn.op >> 7;
284 unsigned op = insn.op & 0x7f;
285 uint32_t *hw;
286
287 vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
288 vpc->vpi = &vp->insns[vp->nr_insns - 1];
289 memset(vpc->vpi, 0, sizeof(*vpc->vpi));
290
291 hw = vpc->vpi->data;
292
293 hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT));
294 hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) |
295 (insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) |
296 (insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) |
297 (insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT)));
298 if(insn.cc_update)
299 hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE);
300
301 if(!nvfx->is_nv4x) {
302 if(slot == 0)
303 hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
304 else
305 {
306 hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT);
307 hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT);
308 }
309 // hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK);
310 // hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT));
311
312 if (insn.dst.type == NVFXSR_OUTPUT) {
313 if (slot)
314 hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
315 else
316 hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
317 } else {
318 if (slot)
319 hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
320 else
321 hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
322 }
323 } else {
324 if (slot == 0) {
325 hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT);
326 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
327 hw[3] |= (insn.mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
328 } else {
329 hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT);
330 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ;
331 hw[3] |= (insn.mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
332 }
333 }
334
335 emit_dst(nvfx, vpc, hw, slot, insn.dst);
336 emit_src(nvfx, vpc, hw, 0, insn.src[0]);
337 emit_src(nvfx, vpc, hw, 1, insn.src[1]);
338 emit_src(nvfx, vpc, hw, 2, insn.src[2]);
339 }
340
341 static inline struct nvfx_src
342 tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
343 struct nvfx_src src;
344
345 switch (fsrc->Register.File) {
346 case TGSI_FILE_INPUT:
347 src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index);
348 break;
349 case TGSI_FILE_CONSTANT:
350 src.reg = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
351 break;
352 case TGSI_FILE_IMMEDIATE:
353 src.reg = vpc->imm[fsrc->Register.Index];
354 break;
355 case TGSI_FILE_TEMPORARY:
356 src.reg = vpc->r_temp[fsrc->Register.Index];
357 break;
358 default:
359 NOUVEAU_ERR("bad src file\n");
360 src.reg.index = 0;
361 src.reg.type = 0;
362 break;
363 }
364
365 src.abs = fsrc->Register.Absolute;
366 src.negate = fsrc->Register.Negate;
367 src.swz[0] = fsrc->Register.SwizzleX;
368 src.swz[1] = fsrc->Register.SwizzleY;
369 src.swz[2] = fsrc->Register.SwizzleZ;
370 src.swz[3] = fsrc->Register.SwizzleW;
371 return src;
372 }
373
374 static INLINE struct nvfx_reg
375 tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
376 struct nvfx_reg dst;
377
378 switch (fdst->Register.File) {
379 case TGSI_FILE_NULL:
380 dst = nvfx_reg(NVFXSR_NONE, 0);
381 break;
382 case TGSI_FILE_OUTPUT:
383 dst = vpc->r_result[fdst->Register.Index];
384 break;
385 case TGSI_FILE_TEMPORARY:
386 dst = vpc->r_temp[fdst->Register.Index];
387 break;
388 case TGSI_FILE_ADDRESS:
389 dst = vpc->r_address[fdst->Register.Index];
390 break;
391 default:
392 NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File);
393 dst.index = 0;
394 dst.type = 0;
395 break;
396 }
397
398 return dst;
399 }
400
401 static inline int
402 tgsi_mask(uint tgsi)
403 {
404 int mask = 0;
405
406 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X;
407 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y;
408 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z;
409 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W;
410 return mask;
411 }
412
413 static boolean
414 nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
415 unsigned idx, const struct tgsi_full_instruction *finst)
416 {
417 struct nvfx_src src[3], tmp;
418 struct nvfx_reg dst;
419 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
420 struct nvfx_insn insn;
421 struct nvfx_relocation reloc;
422 struct nvfx_loop_entry loop;
423 int mask;
424 int ai = -1, ci = -1, ii = -1;
425 int i;
426
427 if (finst->Instruction.Opcode == TGSI_OPCODE_END)
428 return TRUE;
429
430 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
431 const struct tgsi_full_src_register *fsrc;
432
433 fsrc = &finst->Src[i];
434 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
435 src[i] = tgsi_src(vpc, fsrc);
436 }
437 }
438
439 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
440 const struct tgsi_full_src_register *fsrc;
441
442 fsrc = &finst->Src[i];
443
444 switch (fsrc->Register.File) {
445 case TGSI_FILE_INPUT:
446 if (ai == -1 || ai == fsrc->Register.Index) {
447 ai = fsrc->Register.Index;
448 src[i] = tgsi_src(vpc, fsrc);
449 } else {
450 src[i] = nvfx_src(temp(vpc));
451 nvfx_vp_emit(vpc, arith(VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
452 }
453 break;
454 case TGSI_FILE_CONSTANT:
455 if ((ci == -1 && ii == -1) ||
456 ci == fsrc->Register.Index) {
457 ci = fsrc->Register.Index;
458 src[i] = tgsi_src(vpc, fsrc);
459 } else {
460 src[i] = nvfx_src(temp(vpc));
461 nvfx_vp_emit(vpc, arith(VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
462 }
463 break;
464 case TGSI_FILE_IMMEDIATE:
465 if ((ci == -1 && ii == -1) ||
466 ii == fsrc->Register.Index) {
467 ii = fsrc->Register.Index;
468 src[i] = tgsi_src(vpc, fsrc);
469 } else {
470 src[i] = nvfx_src(temp(vpc));
471 nvfx_vp_emit(vpc, arith(VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
472 }
473 break;
474 case TGSI_FILE_TEMPORARY:
475 /* handled above */
476 break;
477 default:
478 NOUVEAU_ERR("bad src file\n");
479 return FALSE;
480 }
481 }
482
483 dst = tgsi_dst(vpc, &finst->Dst[0]);
484 mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
485
486 switch (finst->Instruction.Opcode) {
487 case TGSI_OPCODE_ABS:
488 nvfx_vp_emit(vpc, arith(VEC, MOV, dst, mask, abs(src[0]), none, none));
489 break;
490 case TGSI_OPCODE_ADD:
491 nvfx_vp_emit(vpc, arith(VEC, ADD, dst, mask, src[0], none, src[1]));
492 break;
493 case TGSI_OPCODE_ARL:
494 nvfx_vp_emit(vpc, arith(VEC, ARL, dst, mask, src[0], none, none));
495 break;
496 case TGSI_OPCODE_CMP:
497 insn = arith(VEC, MOV, none.reg, mask, src[0], none, none);
498 insn.cc_update = 1;
499 nvfx_vp_emit(vpc, insn);
500
501 insn = arith(VEC, MOV, dst, mask, src[2], none, none);
502 insn.cc_test = NVFX_COND_GE;
503 nvfx_vp_emit(vpc, insn);
504
505 insn = arith(VEC, MOV, dst, mask, src[1], none, none);
506 insn.cc_test = NVFX_COND_LT;
507 nvfx_vp_emit(vpc, insn);
508 break;
509 case TGSI_OPCODE_COS:
510 nvfx_vp_emit(vpc, arith(SCA, COS, dst, mask, none, none, src[0]));
511 break;
512 case TGSI_OPCODE_DP2:
513 tmp = nvfx_src(temp(vpc));
514 nvfx_vp_emit(vpc, arith(VEC, MUL, tmp.reg, NVFX_VP_MASK_X | NVFX_VP_MASK_Y, src[0], src[1], none));
515 nvfx_vp_emit(vpc, arith(VEC, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none));
516 break;
517 case TGSI_OPCODE_DP3:
518 nvfx_vp_emit(vpc, arith(VEC, DP3, dst, mask, src[0], src[1], none));
519 break;
520 case TGSI_OPCODE_DP4:
521 nvfx_vp_emit(vpc, arith(VEC, DP4, dst, mask, src[0], src[1], none));
522 break;
523 case TGSI_OPCODE_DPH:
524 nvfx_vp_emit(vpc, arith(VEC, DPH, dst, mask, src[0], src[1], none));
525 break;
526 case TGSI_OPCODE_DST:
527 nvfx_vp_emit(vpc, arith(VEC, DST, dst, mask, src[0], src[1], none));
528 break;
529 case TGSI_OPCODE_EX2:
530 nvfx_vp_emit(vpc, arith(SCA, EX2, dst, mask, none, none, src[0]));
531 break;
532 case TGSI_OPCODE_EXP:
533 nvfx_vp_emit(vpc, arith(SCA, EXP, dst, mask, none, none, src[0]));
534 break;
535 case TGSI_OPCODE_FLR:
536 nvfx_vp_emit(vpc, arith(VEC, FLR, dst, mask, src[0], none, none));
537 break;
538 case TGSI_OPCODE_FRC:
539 nvfx_vp_emit(vpc, arith(VEC, FRC, dst, mask, src[0], none, none));
540 break;
541 case TGSI_OPCODE_LG2:
542 nvfx_vp_emit(vpc, arith(SCA, LG2, dst, mask, none, none, src[0]));
543 break;
544 case TGSI_OPCODE_LIT:
545 nvfx_vp_emit(vpc, arith(SCA, LIT, dst, mask, none, none, src[0]));
546 break;
547 case TGSI_OPCODE_LOG:
548 nvfx_vp_emit(vpc, arith(SCA, LOG, dst, mask, none, none, src[0]));
549 break;
550 case TGSI_OPCODE_LRP:
551 tmp = nvfx_src(temp(vpc));
552 nvfx_vp_emit(vpc, arith(VEC, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
553 nvfx_vp_emit(vpc, arith(VEC, MAD, dst, mask, src[0], src[1], tmp));
554 break;
555 case TGSI_OPCODE_MAD:
556 nvfx_vp_emit(vpc, arith(VEC, MAD, dst, mask, src[0], src[1], src[2]));
557 break;
558 case TGSI_OPCODE_MAX:
559 nvfx_vp_emit(vpc, arith(VEC, MAX, dst, mask, src[0], src[1], none));
560 break;
561 case TGSI_OPCODE_MIN:
562 nvfx_vp_emit(vpc, arith(VEC, MIN, dst, mask, src[0], src[1], none));
563 break;
564 case TGSI_OPCODE_MOV:
565 nvfx_vp_emit(vpc, arith(VEC, MOV, dst, mask, src[0], none, none));
566 break;
567 case TGSI_OPCODE_MUL:
568 nvfx_vp_emit(vpc, arith(VEC, MUL, dst, mask, src[0], src[1], none));
569 break;
570 case TGSI_OPCODE_NOP:
571 break;
572 case TGSI_OPCODE_POW:
573 tmp = nvfx_src(temp(vpc));
574 nvfx_vp_emit(vpc, arith(SCA, LG2, tmp.reg, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X)));
575 nvfx_vp_emit(vpc, arith(VEC, MUL, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
576 nvfx_vp_emit(vpc, arith(SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X)));
577 break;
578 case TGSI_OPCODE_RCP:
579 nvfx_vp_emit(vpc, arith(SCA, RCP, dst, mask, none, none, src[0]));
580 break;
581 case TGSI_OPCODE_RSQ:
582 nvfx_vp_emit(vpc, arith(SCA, RSQ, dst, mask, none, none, abs(src[0])));
583 break;
584 case TGSI_OPCODE_SEQ:
585 nvfx_vp_emit(vpc, arith(VEC, SEQ, dst, mask, src[0], src[1], none));
586 break;
587 case TGSI_OPCODE_SFL:
588 nvfx_vp_emit(vpc, arith(VEC, SFL, dst, mask, src[0], src[1], none));
589 break;
590 case TGSI_OPCODE_SGE:
591 nvfx_vp_emit(vpc, arith(VEC, SGE, dst, mask, src[0], src[1], none));
592 break;
593 case TGSI_OPCODE_SGT:
594 nvfx_vp_emit(vpc, arith(VEC, SGT, dst, mask, src[0], src[1], none));
595 break;
596 case TGSI_OPCODE_SIN:
597 nvfx_vp_emit(vpc, arith(SCA, SIN, dst, mask, none, none, src[0]));
598 break;
599 case TGSI_OPCODE_SLE:
600 nvfx_vp_emit(vpc, arith(VEC, SLE, dst, mask, src[0], src[1], none));
601 break;
602 case TGSI_OPCODE_SLT:
603 nvfx_vp_emit(vpc, arith(VEC, SLT, dst, mask, src[0], src[1], none));
604 break;
605 case TGSI_OPCODE_SNE:
606 nvfx_vp_emit(vpc, arith(VEC, SNE, dst, mask, src[0], src[1], none));
607 break;
608 case TGSI_OPCODE_SSG:
609 nvfx_vp_emit(vpc, arith(VEC, SSG, dst, mask, src[0], src[1], none));
610 break;
611 case TGSI_OPCODE_STR:
612 nvfx_vp_emit(vpc, arith(VEC, STR, dst, mask, src[0], src[1], none));
613 break;
614 case TGSI_OPCODE_SUB:
615 nvfx_vp_emit(vpc, arith(VEC, ADD, dst, mask, src[0], none, neg(src[1])));
616 break;
617 case TGSI_OPCODE_TRUNC:
618 tmp = nvfx_src(temp(vpc));
619 insn = arith(VEC, MOV, none.reg, mask, src[0], none, none);
620 insn.cc_update = 1;
621 nvfx_vp_emit(vpc, insn);
622
623 nvfx_vp_emit(vpc, arith(VEC, FLR, tmp.reg, mask, abs(src[0]), none, none));
624 nvfx_vp_emit(vpc, arith(VEC, MOV, dst, mask, tmp, none, none));
625
626 insn = arith(VEC, MOV, dst, mask, neg(tmp), none, none);
627 insn.cc_test = NVFX_COND_LT;
628 nvfx_vp_emit(vpc, insn);
629 break;
630 case TGSI_OPCODE_XPD:
631 tmp = nvfx_src(temp(vpc));
632 nvfx_vp_emit(vpc, arith(VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
633 nvfx_vp_emit(vpc, arith(VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
634 break;
635
636 case TGSI_OPCODE_IF:
637 insn = arith(VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none);
638 insn.cc_update = 1;
639 nvfx_vp_emit(vpc, insn);
640
641 reloc.location = vpc->vp->nr_insns;
642 reloc.target = finst->Label.Label + 1;
643 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
644
645 insn = arith(SCA, BRA, none.reg, 0, none, none, none);
646 insn.cc_test = NVFX_COND_EQ;
647 insn.cc_swz[0] = insn.cc_swz[1] = insn.cc_swz[2] = insn.cc_swz[3] = 0;
648 nvfx_vp_emit(vpc, insn);
649 break;
650
651 case TGSI_OPCODE_ELSE:
652 case TGSI_OPCODE_BRA:
653 case TGSI_OPCODE_CAL:
654 reloc.location = vpc->vp->nr_insns;
655 reloc.target = finst->Label.Label;
656 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
657
658 if(finst->Instruction.Opcode == TGSI_OPCODE_CAL)
659 insn = arith(SCA, CAL, none.reg, 0, none, none, none);
660 else
661 insn = arith(SCA, BRA, none.reg, 0, none, none, none);
662 nvfx_vp_emit(vpc, insn);
663 break;
664
665 case TGSI_OPCODE_RET:
666 tmp = none;
667 tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0;
668 nvfx_vp_emit(vpc, arith(SCA, RET, none.reg, 0, none, none, tmp));
669 break;
670
671 case TGSI_OPCODE_BGNSUB:
672 case TGSI_OPCODE_ENDSUB:
673 case TGSI_OPCODE_ENDIF:
674 /* nothing to do here */
675 break;
676
677 case TGSI_OPCODE_BGNLOOP:
678 loop.cont_target = idx;
679 loop.brk_target = finst->Label.Label + 1;
680 util_dynarray_append(&vpc->loop_stack, struct nvfx_loop_entry, loop);
681 break;
682
683 case TGSI_OPCODE_ENDLOOP:
684 loop = util_dynarray_pop(&vpc->loop_stack, struct nvfx_loop_entry);
685
686 reloc.location = vpc->vp->nr_insns;
687 reloc.target = loop.cont_target;
688 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
689
690 nvfx_vp_emit(vpc, arith(SCA, BRA, none.reg, 0, none, none, none));
691 break;
692
693 case TGSI_OPCODE_CONT:
694 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);
695
696 reloc.location = vpc->vp->nr_insns;
697 reloc.target = loop.cont_target;
698 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
699
700 nvfx_vp_emit(vpc, arith(SCA, BRA, none.reg, 0, none, none, none));
701 break;
702
703 case TGSI_OPCODE_BRK:
704 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);
705
706 reloc.location = vpc->vp->nr_insns;
707 reloc.target = loop.brk_target;
708 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
709
710 nvfx_vp_emit(vpc, arith(SCA, BRA, none.reg, 0, none, none, none));
711 break;
712
713 default:
714 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
715 return FALSE;
716 }
717
718 release_temps(vpc);
719 return TRUE;
720 }
721
722 static boolean
723 nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
724 const struct tgsi_full_declaration *fdec)
725 {
726 unsigned idx = fdec->Range.First;
727 int hw;
728
729 switch (fdec->Semantic.Name) {
730 case TGSI_SEMANTIC_POSITION:
731 hw = NVFX_VP(INST_DEST_POS);
732 vpc->hpos_idx = idx;
733 break;
734 case TGSI_SEMANTIC_COLOR:
735 if (fdec->Semantic.Index == 0) {
736 hw = NVFX_VP(INST_DEST_COL0);
737 } else
738 if (fdec->Semantic.Index == 1) {
739 hw = NVFX_VP(INST_DEST_COL1);
740 } else {
741 NOUVEAU_ERR("bad colour semantic index\n");
742 return FALSE;
743 }
744 break;
745 case TGSI_SEMANTIC_BCOLOR:
746 if (fdec->Semantic.Index == 0) {
747 hw = NVFX_VP(INST_DEST_BFC0);
748 } else
749 if (fdec->Semantic.Index == 1) {
750 hw = NVFX_VP(INST_DEST_BFC1);
751 } else {
752 NOUVEAU_ERR("bad bcolour semantic index\n");
753 return FALSE;
754 }
755 break;
756 case TGSI_SEMANTIC_FOG:
757 hw = NVFX_VP(INST_DEST_FOGC);
758 break;
759 case TGSI_SEMANTIC_PSIZE:
760 hw = NVFX_VP(INST_DEST_PSZ);
761 break;
762 case TGSI_SEMANTIC_GENERIC:
763 hw = (vpc->vp->generic_to_fp_input[fdec->Semantic.Index] & 0xf)
764 + NVFX_VP(INST_DEST_TC(0)) - NVFX_FP_OP_INPUT_SRC_TC(0);
765 break;
766 case TGSI_SEMANTIC_EDGEFLAG:
767 /* not really an error just a fallback */
768 NOUVEAU_ERR("cannot handle edgeflag output\n");
769 return FALSE;
770 default:
771 NOUVEAU_ERR("bad output semantic\n");
772 return FALSE;
773 }
774
775 vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
776 return TRUE;
777 }
778
779 static boolean
780 nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)
781 {
782 struct tgsi_parse_context p;
783 int high_temp = -1, high_addr = -1, nr_imm = 0, i;
784 struct util_semantic_set set;
785 unsigned char sem_layout[8];
786 unsigned num_outputs;
787
788 num_outputs = util_semantic_set_from_program_file(&set, vpc->vp->pipe.tokens, TGSI_FILE_OUTPUT);
789
790 if(num_outputs > 8) {
791 NOUVEAU_ERR("too many vertex program outputs: %i\n", num_outputs);
792 return FALSE;
793 }
794 util_semantic_layout_from_set(sem_layout, &set, 8, 8);
795
796 /* hope 0xf is (0, 0, 0, 1) initialized; otherwise, we are _probably_ not required to do this */
797 memset(vpc->vp->generic_to_fp_input, 0x0f, sizeof(vpc->vp->generic_to_fp_input));
798 for(int i = 0; i < 8; ++i) {
799 if(sem_layout[i] == 0xff)
800 continue;
801 //printf("vp: GENERIC[%i] to fpreg %i\n", sem_layout[i], NVFX_FP_OP_INPUT_SRC_TC(0) + i);
802 vpc->vp->generic_to_fp_input[sem_layout[i]] = 0xf0 | NVFX_FP_OP_INPUT_SRC_TC(i);
803 }
804
805 vpc->vp->sprite_fp_input = -1;
806 for(int i = 0; i < 8; ++i)
807 {
808 if(sem_layout[i] == 0xff)
809 {
810 vpc->vp->sprite_fp_input = NVFX_FP_OP_INPUT_SRC_TC(i);
811 break;
812 }
813 }
814
815 tgsi_parse_init(&p, vpc->vp->pipe.tokens);
816 while (!tgsi_parse_end_of_tokens(&p)) {
817 const union tgsi_full_token *tok = &p.FullToken;
818
819 tgsi_parse_token(&p);
820 switch(tok->Token.Type) {
821 case TGSI_TOKEN_TYPE_IMMEDIATE:
822 nr_imm++;
823 break;
824 case TGSI_TOKEN_TYPE_DECLARATION:
825 {
826 const struct tgsi_full_declaration *fdec;
827
828 fdec = &p.FullToken.FullDeclaration;
829 switch (fdec->Declaration.File) {
830 case TGSI_FILE_TEMPORARY:
831 if (fdec->Range.Last > high_temp) {
832 high_temp =
833 fdec->Range.Last;
834 }
835 break;
836 #if 0 /* this would be nice.. except gallium doesn't track it */
837 case TGSI_FILE_ADDRESS:
838 if (fdec->Range.Last > high_addr) {
839 high_addr =
840 fdec->Range.Last;
841 }
842 break;
843 #endif
844 case TGSI_FILE_OUTPUT:
845 if (!nvfx_vertprog_parse_decl_output(nvfx, vpc, fdec))
846 return FALSE;
847 break;
848 default:
849 break;
850 }
851 }
852 break;
853 #if 1 /* yay, parse instructions looking for address regs instead */
854 case TGSI_TOKEN_TYPE_INSTRUCTION:
855 {
856 const struct tgsi_full_instruction *finst;
857 const struct tgsi_full_dst_register *fdst;
858
859 finst = &p.FullToken.FullInstruction;
860 fdst = &finst->Dst[0];
861
862 if (fdst->Register.File == TGSI_FILE_ADDRESS) {
863 if (fdst->Register.Index > high_addr)
864 high_addr = fdst->Register.Index;
865 }
866
867 }
868 break;
869 #endif
870 default:
871 break;
872 }
873 }
874 tgsi_parse_free(&p);
875
876 if (nr_imm) {
877 vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_reg));
878 assert(vpc->imm);
879 }
880
881 if (++high_temp) {
882 vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
883 for (i = 0; i < high_temp; i++)
884 vpc->r_temp[i] = temp(vpc);
885 }
886
887 if (++high_addr) {
888 vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg));
889 for (i = 0; i < high_addr; i++)
890 vpc->r_address[i] = temp(vpc);
891 }
892
893 vpc->r_temps_discard = 0;
894 return TRUE;
895 }
896
897 DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE)
898
899 static void
900 nvfx_vertprog_translate(struct nvfx_context *nvfx,
901 struct nvfx_vertex_program *vp)
902 {
903 struct tgsi_parse_context parse;
904 struct nvfx_vpc *vpc = NULL;
905 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
906 struct util_dynarray insns;
907 int i;
908
909 vpc = CALLOC(1, sizeof(struct nvfx_vpc));
910 if (!vpc)
911 return;
912 vpc->nvfx = nvfx;
913 vpc->vp = vp;
914
915 if (!nvfx_vertprog_prepare(nvfx, vpc)) {
916 FREE(vpc);
917 return;
918 }
919
920 /* Redirect post-transform vertex position to a temp if user clip
921 * planes are enabled. We need to append code to the vtxprog
922 * to handle clip planes later.
923 */
924 if (vp->ucp.nr) {
925 vpc->r_result[vpc->hpos_idx] = temp(vpc);
926 vpc->r_temps_discard = 0;
927 }
928
929 tgsi_parse_init(&parse, vp->pipe.tokens);
930
931 util_dynarray_init(&insns);
932 while (!tgsi_parse_end_of_tokens(&parse)) {
933 tgsi_parse_token(&parse);
934
935 switch (parse.FullToken.Token.Type) {
936 case TGSI_TOKEN_TYPE_IMMEDIATE:
937 {
938 const struct tgsi_full_immediate *imm;
939
940 imm = &parse.FullToken.FullImmediate;
941 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
942 assert(imm->Immediate.NrTokens == 4 + 1);
943 vpc->imm[vpc->nr_imm++] =
944 constant(vpc, -1,
945 imm->u[0].Float,
946 imm->u[1].Float,
947 imm->u[2].Float,
948 imm->u[3].Float);
949 }
950 break;
951 case TGSI_TOKEN_TYPE_INSTRUCTION:
952 {
953 const struct tgsi_full_instruction *finst;
954 unsigned idx = insns.size >> 2;
955 util_dynarray_append(&insns, unsigned, vp->nr_insns);
956 finst = &parse.FullToken.FullInstruction;
957 if (!nvfx_vertprog_parse_instruction(nvfx, vpc, idx, finst))
958 goto out_err;
959 }
960 break;
961 default:
962 break;
963 }
964 }
965
966 util_dynarray_append(&insns, unsigned, vp->nr_insns);
967
968 for(unsigned i = 0; i < vpc->label_relocs.size; i += sizeof(struct nvfx_relocation))
969 {
970 struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)vpc->label_relocs.data + i);
971 struct nvfx_relocation hw_reloc;
972
973 hw_reloc.location = label_reloc->location;
974 hw_reloc.target = ((unsigned*)insns.data)[label_reloc->target];
975
976 //debug_printf("hw %u -> tgsi %u = hw %u\n", hw_reloc.location, label_reloc->target, hw_reloc.target);
977
978 util_dynarray_append(&vp->branch_relocs, struct nvfx_relocation, hw_reloc);
979 }
980 util_dynarray_fini(&insns);
981 util_dynarray_trim(&vp->branch_relocs);
982
983 /* XXX: what if we add a RET before?! make sure we jump here...*/
984
985 /* Write out HPOS if it was redirected to a temp earlier */
986 if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) {
987 struct nvfx_reg hpos = nvfx_reg(NVFXSR_OUTPUT,
988 NVFX_VP(INST_DEST_POS));
989 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
990
991 nvfx_vp_emit(vpc, arith(VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none));
992 }
993
994 /* Insert code to handle user clip planes */
995 for (i = 0; i < vp->ucp.nr; i++) {
996 struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT,
997 NVFX_VP_INST_DEST_CLIP(i));
998 struct nvfx_src ceqn = nvfx_src(constant(vpc, -1,
999 nvfx->clip.ucp[i][0],
1000 nvfx->clip.ucp[i][1],
1001 nvfx->clip.ucp[i][2],
1002 nvfx->clip.ucp[i][3]));
1003 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
1004 unsigned mask;
1005
1006 switch (i) {
1007 case 0: case 3: mask = NVFX_VP_MASK_Y; break;
1008 case 1: case 4: mask = NVFX_VP_MASK_Z; break;
1009 case 2: case 5: mask = NVFX_VP_MASK_W; break;
1010 default:
1011 NOUVEAU_ERR("invalid clip dist #%d\n", i);
1012 goto out_err;
1013 }
1014
1015 nvfx_vp_emit(vpc, arith(VEC, DP4, cdst, mask, htmp, ceqn, none));
1016 }
1017
1018 //vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
1019
1020 /* Append NOP + END instruction for branches to the end of the program */
1021 nvfx_vp_emit(vpc, arith(VEC, NOP, none.reg, 0, none, none, none));
1022 vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST | 0x1000;
1023
1024 if(debug_get_option_nvfx_dump_vp())
1025 {
1026 debug_printf("\n");
1027 tgsi_dump(vp->pipe.tokens, 0);
1028
1029 debug_printf("\n%s vertex program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x");
1030 for (i = 0; i < vp->nr_insns; i++)
1031 debug_printf("%3u: %08x %08x %08x %08x\n", i, vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);
1032 debug_printf("\n");
1033 }
1034
1035 vp->exec_start = -1;
1036 vp->translated = TRUE;
1037 out_err:
1038 tgsi_parse_free(&parse);
1039 util_dynarray_fini(&vpc->label_relocs);
1040 util_dynarray_fini(&vpc->loop_stack);
1041 if (vpc->r_temp)
1042 FREE(vpc->r_temp);
1043 if (vpc->r_address)
1044 FREE(vpc->r_address);
1045 if (vpc->imm)
1046 FREE(vpc->imm);
1047 FREE(vpc);
1048 }
1049
1050 boolean
1051 nvfx_vertprog_validate(struct nvfx_context *nvfx)
1052 {
1053 struct nvfx_screen *screen = nvfx->screen;
1054 struct nouveau_channel *chan = screen->base.channel;
1055 struct nouveau_grobj *eng3d = screen->eng3d;
1056 struct nvfx_vertex_program *vp;
1057 struct pipe_resource *constbuf;
1058 boolean upload_code = FALSE, upload_data = FALSE;
1059 int i;
1060
1061 if (nvfx->render_mode == HW) {
1062 vp = nvfx->vertprog;
1063 constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX];
1064
1065 // TODO: ouch! can't we just use constant slots for these?!
1066 if ((nvfx->dirty & NVFX_NEW_UCP) ||
1067 memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) {
1068 nvfx_vertprog_destroy(nvfx, vp);
1069 memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp));
1070 }
1071 } else {
1072 vp = nvfx->swtnl.vertprog;
1073 constbuf = NULL;
1074 }
1075
1076 /* Translate TGSI shader into hw bytecode */
1077 if (!vp->translated)
1078 {
1079 nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG;
1080 nvfx_vertprog_translate(nvfx, vp);
1081 if (!vp->translated) {
1082 nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
1083 return FALSE;
1084 }
1085 }
1086
1087 /* Allocate hw vtxprog exec slots */
1088 if (!vp->exec) {
1089 struct nouveau_resource *heap = nvfx->screen->vp_exec_heap;
1090 uint vplen = vp->nr_insns;
1091
1092 if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) {
1093 while (heap->next && heap->size < vplen) {
1094 struct nvfx_vertex_program *evict;
1095
1096 evict = heap->next->priv;
1097 nouveau_resource_free(&evict->exec);
1098 }
1099
1100 if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec))
1101 {
1102 debug_printf("Vertex shader too long: %u instructions\n", vplen);
1103 nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
1104 return FALSE;
1105 }
1106 }
1107
1108 upload_code = TRUE;
1109 }
1110
1111 /* Allocate hw vtxprog const slots */
1112 if (vp->nr_consts && !vp->data) {
1113 struct nouveau_resource *heap = nvfx->screen->vp_data_heap;
1114
1115 if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) {
1116 while (heap->next && heap->size < vp->nr_consts) {
1117 struct nvfx_vertex_program *evict;
1118
1119 evict = heap->next->priv;
1120 nouveau_resource_free(&evict->data);
1121 }
1122
1123 if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data))
1124 {
1125 debug_printf("Vertex shader uses too many constants: %u constants\n", vp->nr_consts);
1126 nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
1127 return FALSE;
1128 }
1129 }
1130
1131 /*XXX: handle this some day */
1132 assert(vp->data->start >= vp->data_start_min);
1133
1134 upload_data = TRUE;
1135 if (vp->data_start != vp->data->start)
1136 upload_code = TRUE;
1137 }
1138
1139 /* If exec or data segments moved we need to patch the program to
1140 * fixup offsets and register IDs.
1141 */
1142 if (vp->exec_start != vp->exec->start) {
1143 //printf("vp_relocs %u -> %u\n", vp->exec_start, vp->exec->start);
1144 for(unsigned i = 0; i < vp->branch_relocs.size; i += sizeof(struct nvfx_relocation))
1145 {
1146 struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->branch_relocs.data + i);
1147 uint32_t* hw = vp->insns[reloc->location].data;
1148 unsigned target = vp->exec->start + reloc->target;
1149
1150 //debug_printf("vp_reloc hw %u -> hw %u\n", reloc->location, target);
1151
1152 if(!nvfx->is_nv4x)
1153 {
1154 hw[2] &=~ NV30_VP_INST_IADDR_MASK;
1155 hw[2] |= (target & 0x1ff) << NV30_VP_INST_IADDR_SHIFT;
1156 }
1157 else
1158 {
1159 hw[3] &=~ NV40_VP_INST_IADDRL_MASK;
1160 hw[3] |= (target & 7) << NV40_VP_INST_IADDRL_SHIFT;
1161
1162 hw[2] &=~ NV40_VP_INST_IADDRH_MASK;
1163 hw[2] |= ((target >> 3) & 0x3f) << NV40_VP_INST_IADDRH_SHIFT;
1164 }
1165 }
1166
1167 vp->exec_start = vp->exec->start;
1168 }
1169
1170 if (vp->nr_consts && vp->data_start != vp->data->start) {
1171 for(unsigned i = 0; i < vp->const_relocs.size; i += sizeof(struct nvfx_relocation))
1172 {
1173 struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->const_relocs.data + i);
1174 struct nvfx_vertex_program_exec *vpi = &vp->insns[reloc->location];
1175
1176 vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK);
1177 vpi->data[1] |=
1178 (reloc->target + vp->data->start) <<
1179 NVFX_VP(INST_CONST_SRC_SHIFT);
1180 }
1181
1182 vp->data_start = vp->data->start;
1183 }
1184
1185 /* Update + Upload constant values */
1186 if (vp->nr_consts) {
1187 float *map = NULL;
1188
1189 if (constbuf)
1190 map = (float*)nvfx_buffer(constbuf)->data;
1191
1192 for (i = 0; i < vp->nr_consts; i++) {
1193 struct nvfx_vertex_program_data *vpd = &vp->consts[i];
1194
1195 if (vpd->index >= 0) {
1196 if (!upload_data &&
1197 !memcmp(vpd->value, &map[vpd->index * 4],
1198 4 * sizeof(float)))
1199 continue;
1200 memcpy(vpd->value, &map[vpd->index * 4],
1201 4 * sizeof(float));
1202 }
1203
1204 BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5);
1205 OUT_RING (chan, i + vp->data->start);
1206 OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
1207 }
1208 }
1209
1210 /* Upload vtxprog */
1211 if (upload_code) {
1212 BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1);
1213 OUT_RING (chan, vp->exec->start);
1214 for (i = 0; i < vp->nr_insns; i++) {
1215 BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4);
1216 OUT_RINGp (chan, vp->insns[i].data, 4);
1217 }
1218 }
1219
1220 if(nvfx->dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
1221 {
1222 WAIT_RING(chan, 6);
1223 OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1));
1224 OUT_RING(chan, vp->exec->start);
1225 if(nvfx->is_nv4x) {
1226 OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 1));
1227 OUT_RING(chan, vp->ir);
1228 }
1229 OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
1230 OUT_RING(chan, vp->clip_ctrl);
1231 }
1232
1233 return TRUE;
1234 }
1235
1236 void
1237 nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp)
1238 {
1239 vp->translated = FALSE;
1240
1241 if (vp->nr_insns) {
1242 FREE(vp->insns);
1243 vp->insns = NULL;
1244 vp->nr_insns = 0;
1245 }
1246
1247 if (vp->nr_consts) {
1248 FREE(vp->consts);
1249 vp->consts = NULL;
1250 vp->nr_consts = 0;
1251 }
1252
1253 nouveau_resource_free(&vp->exec);
1254 vp->exec_start = 0;
1255 nouveau_resource_free(&vp->data);
1256 vp->data_start = 0;
1257 vp->data_start_min = 0;
1258
1259 vp->ir = vp->or = vp->clip_ctrl = 0;
1260 util_dynarray_fini(&vp->branch_relocs);
1261 util_dynarray_fini(&vp->const_relocs);
1262 }