nvfx: support indirect addressing in vps
[mesa.git] / src / gallium / drivers / nvfx / nvfx_vertprog.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "util/u_linkage.h"
5 #include "util/u_debug.h"
6
7 #include "pipe/p_shader_tokens.h"
8 #include "tgsi/tgsi_parse.h"
9 #include "tgsi/tgsi_dump.h"
10 #include "tgsi/tgsi_util.h"
11
12 #include "draw/draw_context.h"
13
14 #include "nvfx_context.h"
15 #include "nvfx_state.h"
16 #include "nvfx_resource.h"
17
18 /* TODO (at least...):
19 * 1. Indexed consts + ARL
20 * 3. NV_vp11, NV_vp2, NV_vp3 features
21 * - extra arith opcodes
22 * - branching
23 * - texture sampling
24 * - indexed attribs
25 * - indexed results
26 * 4. bugs
27 */
28
29 #include "nv30_vertprog.h"
30 #include "nv40_vertprog.h"
31
32 struct nvfx_loop_entry
33 {
34 unsigned brk_target;
35 unsigned cont_target;
36 };
37
38 struct nvfx_vpc {
39 struct nvfx_context* nvfx;
40 struct nvfx_vertex_program *vp;
41
42 struct nvfx_vertex_program_exec *vpi;
43
44 unsigned r_temps;
45 unsigned r_temps_discard;
46 struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];
47 struct nvfx_reg *r_address;
48 struct nvfx_reg *r_temp;
49 struct nvfx_reg *r_const;
50
51 struct nvfx_reg *imm;
52 unsigned nr_imm;
53
54 unsigned hpos_idx;
55
56 struct util_dynarray label_relocs;
57 struct util_dynarray loop_stack;
58 };
59
60 static struct nvfx_reg
61 temp(struct nvfx_vpc *vpc)
62 {
63 int idx = ffs(~vpc->r_temps) - 1;
64
65 if (idx < 0) {
66 NOUVEAU_ERR("out of temps!!\n");
67 assert(0);
68 return nvfx_reg(NVFXSR_TEMP, 0);
69 }
70
71 vpc->r_temps |= (1 << idx);
72 vpc->r_temps_discard |= (1 << idx);
73 return nvfx_reg(NVFXSR_TEMP, idx);
74 }
75
76 static inline void
77 release_temps(struct nvfx_vpc *vpc)
78 {
79 vpc->r_temps &= ~vpc->r_temps_discard;
80 vpc->r_temps_discard = 0;
81 }
82
83 static struct nvfx_reg
84 constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w)
85 {
86 struct nvfx_vertex_program *vp = vpc->vp;
87 struct nvfx_vertex_program_data *vpd;
88 int idx;
89
90 if (pipe >= 0) {
91 for (idx = 0; idx < vp->nr_consts; idx++) {
92 if (vp->consts[idx].index == pipe)
93 return nvfx_reg(NVFXSR_CONST, idx);
94 }
95 }
96
97 idx = vp->nr_consts++;
98 vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
99 vpd = &vp->consts[idx];
100
101 vpd->index = pipe;
102 vpd->value[0] = x;
103 vpd->value[1] = y;
104 vpd->value[2] = z;
105 vpd->value[3] = w;
106 return nvfx_reg(NVFXSR_CONST, idx);
107 }
108
109 #define arith(s,o,d,m,s0,s1,s2) \
110 nvfx_insn(0, (NVFX_VP_INST_SLOT_##s << 7) | NVFX_VP_INST_##s##_OP_##o, -1, (d), (m), (s0), (s1), (s2))
111
112 static void
113 emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_src src)
114 {
115 struct nvfx_vertex_program *vp = vpc->vp;
116 uint32_t sr = 0;
117 struct nvfx_relocation reloc;
118
119 switch (src.reg.type) {
120 case NVFXSR_TEMP:
121 sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT));
122 sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT));
123 break;
124 case NVFXSR_INPUT:
125 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
126 NVFX_VP(SRC_REG_TYPE_SHIFT));
127 vp->ir |= (1 << src.reg.index);
128 hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT));
129 break;
130 case NVFXSR_CONST:
131 sr |= (NVFX_VP(SRC_REG_TYPE_CONST) <<
132 NVFX_VP(SRC_REG_TYPE_SHIFT));
133 reloc.location = vp->nr_insns - 1;
134 reloc.target = src.reg.index;
135 util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc);
136 break;
137 case NVFXSR_NONE:
138 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
139 NVFX_VP(SRC_REG_TYPE_SHIFT));
140 break;
141 default:
142 assert(0);
143 }
144
145 if (src.negate)
146 sr |= NVFX_VP(SRC_NEGATE);
147
148 if (src.abs)
149 hw[0] |= (1 << (21 + pos));
150
151 sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) |
152 (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) |
153 (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) |
154 (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT)));
155
156 if(src.indirect) {
157 if(src.reg.type == NVFXSR_CONST)
158 hw[3] |= NVFX_VP(INST_INDEX_CONST);
159 else if(src.reg.type == NVFXSR_INPUT)
160 hw[0] |= NVFX_VP(INST_INDEX_INPUT);
161 else
162 assert(0);
163 if(src.indirect_reg)
164 hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1);
165 hw[0] |= src.indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT);
166 }
167
168 switch (pos) {
169 case 0:
170 hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >>
171 NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT);
172 hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) <<
173 NVFX_VP(INST_SRC0L_SHIFT);
174 break;
175 case 1:
176 hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT);
177 break;
178 case 2:
179 hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >>
180 NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT);
181 hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) <<
182 NVFX_VP(INST_SRC2L_SHIFT);
183 break;
184 default:
185 assert(0);
186 }
187 }
188
189 static void
190 emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_reg dst)
191 {
192 struct nvfx_vertex_program *vp = vpc->vp;
193
194 switch (dst.type) {
195 case NVFXSR_NONE:
196 if(!nvfx->is_nv4x)
197 hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK;
198 else {
199 hw[3] |= NV40_VP_INST_DEST_MASK;
200 if (slot == 0)
201 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;
202 else
203 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
204 }
205 break;
206 case NVFXSR_TEMP:
207 if(!nvfx->is_nv4x)
208 hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
209 else {
210 hw[3] |= NV40_VP_INST_DEST_MASK;
211 if (slot == 0)
212 hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
213 else
214 hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
215 }
216 break;
217 case NVFXSR_OUTPUT:
218 /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */
219 if(nvfx->is_nv4x) {
220 switch (dst.index) {
221 case NV30_VP_INST_DEST_CLP(0):
222 dst.index = NVFX_VP(INST_DEST_FOGC);
223 break;
224 case NV30_VP_INST_DEST_CLP(1):
225 dst.index = NVFX_VP(INST_DEST_FOGC);
226 break;
227 case NV30_VP_INST_DEST_CLP(2):
228 dst.index = NVFX_VP(INST_DEST_FOGC);
229 break;
230 case NV30_VP_INST_DEST_CLP(3):
231 dst.index = NVFX_VP(INST_DEST_PSZ);
232 break;
233 case NV30_VP_INST_DEST_CLP(4):
234 dst.index = NVFX_VP(INST_DEST_PSZ);
235 break;
236 case NV30_VP_INST_DEST_CLP(5):
237 dst.index = NVFX_VP(INST_DEST_PSZ);
238 break;
239 case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
240 case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
241 case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
242 case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
243 case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
244 case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
245 }
246 }
247
248 if(!nvfx->is_nv4x) {
249 hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
250 hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK;
251
252 /*XXX: no way this is entirely correct, someone needs to
253 * figure out what exactly it is.
254 */
255 hw[3] |= 0x800;
256 } else {
257 hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);
258 if (slot == 0) {
259 hw[0] |= NV40_VP_INST_VEC_RESULT;
260 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;
261 } else {
262 hw[3] |= NV40_VP_INST_SCA_RESULT;
263 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
264 }
265 }
266 break;
267 default:
268 assert(0);
269 }
270 }
271
272 static void
273 nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn)
274 {
275 struct nvfx_context* nvfx = vpc->nvfx;
276 struct nvfx_vertex_program *vp = vpc->vp;
277 unsigned slot = insn.op >> 7;
278 unsigned op = insn.op & 0x7f;
279 uint32_t *hw;
280
281 vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
282 vpc->vpi = &vp->insns[vp->nr_insns - 1];
283 memset(vpc->vpi, 0, sizeof(*vpc->vpi));
284
285 hw = vpc->vpi->data;
286
287 hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT));
288 hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) |
289 (insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) |
290 (insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) |
291 (insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT)));
292 if(insn.cc_update)
293 hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE);
294
295 if(!nvfx->is_nv4x) {
296 if(slot == 0)
297 hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
298 else
299 {
300 hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT);
301 hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT);
302 }
303 // hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK);
304 // hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT));
305
306 if (insn.dst.type == NVFXSR_OUTPUT) {
307 if (slot)
308 hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
309 else
310 hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
311 } else {
312 if (slot)
313 hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
314 else
315 hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
316 }
317 } else {
318 if (slot == 0) {
319 hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT);
320 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
321 hw[3] |= (insn.mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
322 } else {
323 hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT);
324 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ;
325 hw[3] |= (insn.mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
326 }
327 }
328
329 emit_dst(nvfx, vpc, hw, slot, insn.dst);
330 emit_src(nvfx, vpc, hw, 0, insn.src[0]);
331 emit_src(nvfx, vpc, hw, 1, insn.src[1]);
332 emit_src(nvfx, vpc, hw, 2, insn.src[2]);
333
334 // if(insn.src[0].indirect || op == NVFX_VP_INST_VEC_OP_ARL)
335 // hw[3] |= NV40_VP_INST_SCA_RESULT;
336 }
337
338 static inline struct nvfx_src
339 tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
340 struct nvfx_src src;
341
342 switch (fsrc->Register.File) {
343 case TGSI_FILE_INPUT:
344 src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index);
345 break;
346 case TGSI_FILE_CONSTANT:
347 src.reg = vpc->r_const[fsrc->Register.Index];
348 break;
349 case TGSI_FILE_IMMEDIATE:
350 src.reg = vpc->imm[fsrc->Register.Index];
351 break;
352 case TGSI_FILE_TEMPORARY:
353 src.reg = vpc->r_temp[fsrc->Register.Index];
354 break;
355 default:
356 NOUVEAU_ERR("bad src file\n");
357 src.reg.index = 0;
358 src.reg.type = -1;
359 break;
360 }
361
362 src.abs = fsrc->Register.Absolute;
363 src.negate = fsrc->Register.Negate;
364 src.swz[0] = fsrc->Register.SwizzleX;
365 src.swz[1] = fsrc->Register.SwizzleY;
366 src.swz[2] = fsrc->Register.SwizzleZ;
367 src.swz[3] = fsrc->Register.SwizzleW;
368 src.indirect = 0;
369
370 if(fsrc->Register.Indirect) {
371 if(fsrc->Indirect.File == TGSI_FILE_ADDRESS &&
372 (fsrc->Register.File == TGSI_FILE_CONSTANT || fsrc->Register.File == TGSI_FILE_INPUT))
373 {
374 src.indirect = 1;
375 src.indirect_reg = fsrc->Indirect.Index;
376 src.indirect_swz = fsrc->Indirect.SwizzleX;
377 }
378 else
379 {
380 src.reg.index = 0;
381 src.reg.type = -1;
382 }
383 }
384 return src;
385 }
386
387 static INLINE struct nvfx_reg
388 tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
389 struct nvfx_reg dst;
390
391 switch (fdst->Register.File) {
392 case TGSI_FILE_NULL:
393 dst = nvfx_reg(NVFXSR_NONE, 0);
394 break;
395 case TGSI_FILE_OUTPUT:
396 dst = vpc->r_result[fdst->Register.Index];
397 break;
398 case TGSI_FILE_TEMPORARY:
399 dst = vpc->r_temp[fdst->Register.Index];
400 break;
401 case TGSI_FILE_ADDRESS:
402 dst = vpc->r_address[fdst->Register.Index];
403 break;
404 default:
405 NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File);
406 dst.index = 0;
407 dst.type = 0;
408 break;
409 }
410
411 return dst;
412 }
413
414 static inline int
415 tgsi_mask(uint tgsi)
416 {
417 int mask = 0;
418
419 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X;
420 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y;
421 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z;
422 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W;
423 return mask;
424 }
425
426 static boolean
427 nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
428 unsigned idx, const struct tgsi_full_instruction *finst)
429 {
430 struct nvfx_src src[3], tmp;
431 struct nvfx_reg dst;
432 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
433 struct nvfx_insn insn;
434 struct nvfx_relocation reloc;
435 struct nvfx_loop_entry loop;
436 int mask;
437 int ai = -1, ci = -1, ii = -1;
438 int i;
439
440 if (finst->Instruction.Opcode == TGSI_OPCODE_END)
441 return TRUE;
442
443 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
444 const struct tgsi_full_src_register *fsrc;
445
446 fsrc = &finst->Src[i];
447 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
448 src[i] = tgsi_src(vpc, fsrc);
449 }
450 }
451
452 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
453 const struct tgsi_full_src_register *fsrc;
454
455 fsrc = &finst->Src[i];
456
457 switch (fsrc->Register.File) {
458 case TGSI_FILE_INPUT:
459 if (ai == -1 || ai == fsrc->Register.Index) {
460 ai = fsrc->Register.Index;
461 src[i] = tgsi_src(vpc, fsrc);
462 } else {
463 src[i] = nvfx_src(temp(vpc));
464 nvfx_vp_emit(vpc, arith(VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
465 }
466 break;
467 case TGSI_FILE_CONSTANT:
468 if ((ci == -1 && ii == -1) ||
469 ci == fsrc->Register.Index) {
470 ci = fsrc->Register.Index;
471 src[i] = tgsi_src(vpc, fsrc);
472 } else {
473 src[i] = nvfx_src(temp(vpc));
474 nvfx_vp_emit(vpc, arith(VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
475 }
476 break;
477 case TGSI_FILE_IMMEDIATE:
478 if ((ci == -1 && ii == -1) ||
479 ii == fsrc->Register.Index) {
480 ii = fsrc->Register.Index;
481 src[i] = tgsi_src(vpc, fsrc);
482 } else {
483 src[i] = nvfx_src(temp(vpc));
484 nvfx_vp_emit(vpc, arith(VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
485 }
486 break;
487 case TGSI_FILE_TEMPORARY:
488 /* handled above */
489 break;
490 default:
491 NOUVEAU_ERR("bad src file\n");
492 return FALSE;
493 }
494 }
495
496 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
497 if(src[i].reg.type < 0)
498 return FALSE;
499 }
500
501 if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS &&
502 finst->Instruction.Opcode != TGSI_OPCODE_ARL)
503 return FALSE;
504
505 dst = tgsi_dst(vpc, &finst->Dst[0]);
506 mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
507
508 switch (finst->Instruction.Opcode) {
509 case TGSI_OPCODE_ABS:
510 nvfx_vp_emit(vpc, arith(VEC, MOV, dst, mask, abs(src[0]), none, none));
511 break;
512 case TGSI_OPCODE_ADD:
513 nvfx_vp_emit(vpc, arith(VEC, ADD, dst, mask, src[0], none, src[1]));
514 break;
515 case TGSI_OPCODE_ARL:
516 nvfx_vp_emit(vpc, arith(VEC, ARL, dst, mask, src[0], none, none));
517 break;
518 case TGSI_OPCODE_CMP:
519 insn = arith(VEC, MOV, none.reg, mask, src[0], none, none);
520 insn.cc_update = 1;
521 nvfx_vp_emit(vpc, insn);
522
523 insn = arith(VEC, MOV, dst, mask, src[2], none, none);
524 insn.cc_test = NVFX_COND_GE;
525 nvfx_vp_emit(vpc, insn);
526
527 insn = arith(VEC, MOV, dst, mask, src[1], none, none);
528 insn.cc_test = NVFX_COND_LT;
529 nvfx_vp_emit(vpc, insn);
530 break;
531 case TGSI_OPCODE_COS:
532 nvfx_vp_emit(vpc, arith(SCA, COS, dst, mask, none, none, src[0]));
533 break;
534 case TGSI_OPCODE_DP2:
535 tmp = nvfx_src(temp(vpc));
536 nvfx_vp_emit(vpc, arith(VEC, MUL, tmp.reg, NVFX_VP_MASK_X | NVFX_VP_MASK_Y, src[0], src[1], none));
537 nvfx_vp_emit(vpc, arith(VEC, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none));
538 break;
539 case TGSI_OPCODE_DP3:
540 nvfx_vp_emit(vpc, arith(VEC, DP3, dst, mask, src[0], src[1], none));
541 break;
542 case TGSI_OPCODE_DP4:
543 nvfx_vp_emit(vpc, arith(VEC, DP4, dst, mask, src[0], src[1], none));
544 break;
545 case TGSI_OPCODE_DPH:
546 nvfx_vp_emit(vpc, arith(VEC, DPH, dst, mask, src[0], src[1], none));
547 break;
548 case TGSI_OPCODE_DST:
549 nvfx_vp_emit(vpc, arith(VEC, DST, dst, mask, src[0], src[1], none));
550 break;
551 case TGSI_OPCODE_EX2:
552 nvfx_vp_emit(vpc, arith(SCA, EX2, dst, mask, none, none, src[0]));
553 break;
554 case TGSI_OPCODE_EXP:
555 nvfx_vp_emit(vpc, arith(SCA, EXP, dst, mask, none, none, src[0]));
556 break;
557 case TGSI_OPCODE_FLR:
558 nvfx_vp_emit(vpc, arith(VEC, FLR, dst, mask, src[0], none, none));
559 break;
560 case TGSI_OPCODE_FRC:
561 nvfx_vp_emit(vpc, arith(VEC, FRC, dst, mask, src[0], none, none));
562 break;
563 case TGSI_OPCODE_LG2:
564 nvfx_vp_emit(vpc, arith(SCA, LG2, dst, mask, none, none, src[0]));
565 break;
566 case TGSI_OPCODE_LIT:
567 nvfx_vp_emit(vpc, arith(SCA, LIT, dst, mask, none, none, src[0]));
568 break;
569 case TGSI_OPCODE_LOG:
570 nvfx_vp_emit(vpc, arith(SCA, LOG, dst, mask, none, none, src[0]));
571 break;
572 case TGSI_OPCODE_LRP:
573 tmp = nvfx_src(temp(vpc));
574 nvfx_vp_emit(vpc, arith(VEC, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
575 nvfx_vp_emit(vpc, arith(VEC, MAD, dst, mask, src[0], src[1], tmp));
576 break;
577 case TGSI_OPCODE_MAD:
578 nvfx_vp_emit(vpc, arith(VEC, MAD, dst, mask, src[0], src[1], src[2]));
579 break;
580 case TGSI_OPCODE_MAX:
581 nvfx_vp_emit(vpc, arith(VEC, MAX, dst, mask, src[0], src[1], none));
582 break;
583 case TGSI_OPCODE_MIN:
584 nvfx_vp_emit(vpc, arith(VEC, MIN, dst, mask, src[0], src[1], none));
585 break;
586 case TGSI_OPCODE_MOV:
587 nvfx_vp_emit(vpc, arith(VEC, MOV, dst, mask, src[0], none, none));
588 break;
589 case TGSI_OPCODE_MUL:
590 nvfx_vp_emit(vpc, arith(VEC, MUL, dst, mask, src[0], src[1], none));
591 break;
592 case TGSI_OPCODE_NOP:
593 break;
594 case TGSI_OPCODE_POW:
595 tmp = nvfx_src(temp(vpc));
596 nvfx_vp_emit(vpc, arith(SCA, LG2, tmp.reg, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X)));
597 nvfx_vp_emit(vpc, arith(VEC, MUL, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
598 nvfx_vp_emit(vpc, arith(SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X)));
599 break;
600 case TGSI_OPCODE_RCP:
601 nvfx_vp_emit(vpc, arith(SCA, RCP, dst, mask, none, none, src[0]));
602 break;
603 case TGSI_OPCODE_RSQ:
604 nvfx_vp_emit(vpc, arith(SCA, RSQ, dst, mask, none, none, abs(src[0])));
605 break;
606 case TGSI_OPCODE_SEQ:
607 nvfx_vp_emit(vpc, arith(VEC, SEQ, dst, mask, src[0], src[1], none));
608 break;
609 case TGSI_OPCODE_SFL:
610 nvfx_vp_emit(vpc, arith(VEC, SFL, dst, mask, src[0], src[1], none));
611 break;
612 case TGSI_OPCODE_SGE:
613 nvfx_vp_emit(vpc, arith(VEC, SGE, dst, mask, src[0], src[1], none));
614 break;
615 case TGSI_OPCODE_SGT:
616 nvfx_vp_emit(vpc, arith(VEC, SGT, dst, mask, src[0], src[1], none));
617 break;
618 case TGSI_OPCODE_SIN:
619 nvfx_vp_emit(vpc, arith(SCA, SIN, dst, mask, none, none, src[0]));
620 break;
621 case TGSI_OPCODE_SLE:
622 nvfx_vp_emit(vpc, arith(VEC, SLE, dst, mask, src[0], src[1], none));
623 break;
624 case TGSI_OPCODE_SLT:
625 nvfx_vp_emit(vpc, arith(VEC, SLT, dst, mask, src[0], src[1], none));
626 break;
627 case TGSI_OPCODE_SNE:
628 nvfx_vp_emit(vpc, arith(VEC, SNE, dst, mask, src[0], src[1], none));
629 break;
630 case TGSI_OPCODE_SSG:
631 nvfx_vp_emit(vpc, arith(VEC, SSG, dst, mask, src[0], src[1], none));
632 break;
633 case TGSI_OPCODE_STR:
634 nvfx_vp_emit(vpc, arith(VEC, STR, dst, mask, src[0], src[1], none));
635 break;
636 case TGSI_OPCODE_SUB:
637 nvfx_vp_emit(vpc, arith(VEC, ADD, dst, mask, src[0], none, neg(src[1])));
638 break;
639 case TGSI_OPCODE_TRUNC:
640 tmp = nvfx_src(temp(vpc));
641 insn = arith(VEC, MOV, none.reg, mask, src[0], none, none);
642 insn.cc_update = 1;
643 nvfx_vp_emit(vpc, insn);
644
645 nvfx_vp_emit(vpc, arith(VEC, FLR, tmp.reg, mask, abs(src[0]), none, none));
646 nvfx_vp_emit(vpc, arith(VEC, MOV, dst, mask, tmp, none, none));
647
648 insn = arith(VEC, MOV, dst, mask, neg(tmp), none, none);
649 insn.cc_test = NVFX_COND_LT;
650 nvfx_vp_emit(vpc, insn);
651 break;
652 case TGSI_OPCODE_XPD:
653 tmp = nvfx_src(temp(vpc));
654 nvfx_vp_emit(vpc, arith(VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
655 nvfx_vp_emit(vpc, arith(VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
656 break;
657
658 case TGSI_OPCODE_IF:
659 insn = arith(VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none);
660 insn.cc_update = 1;
661 nvfx_vp_emit(vpc, insn);
662
663 reloc.location = vpc->vp->nr_insns;
664 reloc.target = finst->Label.Label + 1;
665 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
666
667 insn = arith(SCA, BRA, none.reg, 0, none, none, none);
668 insn.cc_test = NVFX_COND_EQ;
669 insn.cc_swz[0] = insn.cc_swz[1] = insn.cc_swz[2] = insn.cc_swz[3] = 0;
670 nvfx_vp_emit(vpc, insn);
671 break;
672
673 case TGSI_OPCODE_ELSE:
674 case TGSI_OPCODE_BRA:
675 case TGSI_OPCODE_CAL:
676 reloc.location = vpc->vp->nr_insns;
677 reloc.target = finst->Label.Label;
678 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
679
680 if(finst->Instruction.Opcode == TGSI_OPCODE_CAL)
681 insn = arith(SCA, CAL, none.reg, 0, none, none, none);
682 else
683 insn = arith(SCA, BRA, none.reg, 0, none, none, none);
684 nvfx_vp_emit(vpc, insn);
685 break;
686
687 case TGSI_OPCODE_RET:
688 tmp = none;
689 tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0;
690 nvfx_vp_emit(vpc, arith(SCA, RET, none.reg, 0, none, none, tmp));
691 break;
692
693 case TGSI_OPCODE_BGNSUB:
694 case TGSI_OPCODE_ENDSUB:
695 case TGSI_OPCODE_ENDIF:
696 /* nothing to do here */
697 break;
698
699 case TGSI_OPCODE_BGNLOOP:
700 loop.cont_target = idx;
701 loop.brk_target = finst->Label.Label + 1;
702 util_dynarray_append(&vpc->loop_stack, struct nvfx_loop_entry, loop);
703 break;
704
705 case TGSI_OPCODE_ENDLOOP:
706 loop = util_dynarray_pop(&vpc->loop_stack, struct nvfx_loop_entry);
707
708 reloc.location = vpc->vp->nr_insns;
709 reloc.target = loop.cont_target;
710 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
711
712 nvfx_vp_emit(vpc, arith(SCA, BRA, none.reg, 0, none, none, none));
713 break;
714
715 case TGSI_OPCODE_CONT:
716 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);
717
718 reloc.location = vpc->vp->nr_insns;
719 reloc.target = loop.cont_target;
720 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
721
722 nvfx_vp_emit(vpc, arith(SCA, BRA, none.reg, 0, none, none, none));
723 break;
724
725 case TGSI_OPCODE_BRK:
726 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);
727
728 reloc.location = vpc->vp->nr_insns;
729 reloc.target = loop.brk_target;
730 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
731
732 nvfx_vp_emit(vpc, arith(SCA, BRA, none.reg, 0, none, none, none));
733 break;
734
735 default:
736 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
737 return FALSE;
738 }
739
740 release_temps(vpc);
741 return TRUE;
742 }
743
744 static boolean
745 nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
746 const struct tgsi_full_declaration *fdec)
747 {
748 unsigned idx = fdec->Range.First;
749 int hw;
750
751 switch (fdec->Semantic.Name) {
752 case TGSI_SEMANTIC_POSITION:
753 hw = NVFX_VP(INST_DEST_POS);
754 vpc->hpos_idx = idx;
755 break;
756 case TGSI_SEMANTIC_COLOR:
757 if (fdec->Semantic.Index == 0) {
758 hw = NVFX_VP(INST_DEST_COL0);
759 } else
760 if (fdec->Semantic.Index == 1) {
761 hw = NVFX_VP(INST_DEST_COL1);
762 } else {
763 NOUVEAU_ERR("bad colour semantic index\n");
764 return FALSE;
765 }
766 break;
767 case TGSI_SEMANTIC_BCOLOR:
768 if (fdec->Semantic.Index == 0) {
769 hw = NVFX_VP(INST_DEST_BFC0);
770 } else
771 if (fdec->Semantic.Index == 1) {
772 hw = NVFX_VP(INST_DEST_BFC1);
773 } else {
774 NOUVEAU_ERR("bad bcolour semantic index\n");
775 return FALSE;
776 }
777 break;
778 case TGSI_SEMANTIC_FOG:
779 hw = NVFX_VP(INST_DEST_FOGC);
780 break;
781 case TGSI_SEMANTIC_PSIZE:
782 hw = NVFX_VP(INST_DEST_PSZ);
783 break;
784 case TGSI_SEMANTIC_GENERIC:
785 hw = (vpc->vp->generic_to_fp_input[fdec->Semantic.Index] & 0xf)
786 + NVFX_VP(INST_DEST_TC(0)) - NVFX_FP_OP_INPUT_SRC_TC(0);
787 break;
788 case TGSI_SEMANTIC_EDGEFLAG:
789 /* not really an error just a fallback */
790 NOUVEAU_ERR("cannot handle edgeflag output\n");
791 return FALSE;
792 default:
793 NOUVEAU_ERR("bad output semantic\n");
794 return FALSE;
795 }
796
797 vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
798 return TRUE;
799 }
800
801 static boolean
802 nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)
803 {
804 struct tgsi_parse_context p;
805 int high_const = -1, high_temp = -1, high_addr = -1, nr_imm = 0, i;
806 struct util_semantic_set set;
807 unsigned char sem_layout[8];
808 unsigned num_outputs;
809
810 num_outputs = util_semantic_set_from_program_file(&set, vpc->vp->pipe.tokens, TGSI_FILE_OUTPUT);
811
812 if(num_outputs > 8) {
813 NOUVEAU_ERR("too many vertex program outputs: %i\n", num_outputs);
814 return FALSE;
815 }
816 util_semantic_layout_from_set(sem_layout, &set, 8, 8);
817
818 /* hope 0xf is (0, 0, 0, 1) initialized; otherwise, we are _probably_ not required to do this */
819 memset(vpc->vp->generic_to_fp_input, 0x0f, sizeof(vpc->vp->generic_to_fp_input));
820 for(int i = 0; i < 8; ++i) {
821 if(sem_layout[i] == 0xff)
822 continue;
823 //printf("vp: GENERIC[%i] to fpreg %i\n", sem_layout[i], NVFX_FP_OP_INPUT_SRC_TC(0) + i);
824 vpc->vp->generic_to_fp_input[sem_layout[i]] = 0xf0 | NVFX_FP_OP_INPUT_SRC_TC(i);
825 }
826
827 vpc->vp->sprite_fp_input = -1;
828 for(int i = 0; i < 8; ++i)
829 {
830 if(sem_layout[i] == 0xff)
831 {
832 vpc->vp->sprite_fp_input = NVFX_FP_OP_INPUT_SRC_TC(i);
833 break;
834 }
835 }
836
837 tgsi_parse_init(&p, vpc->vp->pipe.tokens);
838 while (!tgsi_parse_end_of_tokens(&p)) {
839 const union tgsi_full_token *tok = &p.FullToken;
840
841 tgsi_parse_token(&p);
842 switch(tok->Token.Type) {
843 case TGSI_TOKEN_TYPE_IMMEDIATE:
844 nr_imm++;
845 break;
846 case TGSI_TOKEN_TYPE_DECLARATION:
847 {
848 const struct tgsi_full_declaration *fdec;
849
850 fdec = &p.FullToken.FullDeclaration;
851 switch (fdec->Declaration.File) {
852 case TGSI_FILE_TEMPORARY:
853 if (fdec->Range.Last > high_temp) {
854 high_temp =
855 fdec->Range.Last;
856 }
857 break;
858 case TGSI_FILE_ADDRESS:
859 if (fdec->Range.Last > high_addr) {
860 high_addr =
861 fdec->Range.Last;
862 }
863 break;
864 case TGSI_FILE_CONSTANT:
865 if (fdec->Range.Last > high_const) {
866 high_const =
867 fdec->Range.Last;
868 }
869 break;
870 case TGSI_FILE_OUTPUT:
871 if (!nvfx_vertprog_parse_decl_output(nvfx, vpc, fdec))
872 return FALSE;
873 break;
874 default:
875 break;
876 }
877 }
878 break;
879 default:
880 break;
881 }
882 }
883 tgsi_parse_free(&p);
884
885 if (nr_imm) {
886 vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_reg));
887 assert(vpc->imm);
888 }
889
890 if (++high_temp) {
891 vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
892 for (i = 0; i < high_temp; i++)
893 vpc->r_temp[i] = temp(vpc);
894 }
895
896 if (++high_addr) {
897 vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg));
898 for (i = 0; i < high_addr; i++)
899 vpc->r_address[i] = nvfx_reg(NVFXSR_TEMP, i);
900 }
901
902 if(++high_const) {
903 vpc->r_const = CALLOC(high_const, sizeof(struct nvfx_reg));
904 for (i = 0; i < high_const; i++)
905 vpc->r_const[i] = constant(vpc, i, 0, 0, 0, 0);
906 }
907
908 vpc->r_temps_discard = 0;
909 return TRUE;
910 }
911
912 DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE)
913
914 static void
915 nvfx_vertprog_translate(struct nvfx_context *nvfx,
916 struct nvfx_vertex_program *vp)
917 {
918 struct tgsi_parse_context parse;
919 struct nvfx_vpc *vpc = NULL;
920 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
921 struct util_dynarray insns;
922 int i;
923
924 vpc = CALLOC(1, sizeof(struct nvfx_vpc));
925 if (!vpc)
926 return;
927 vpc->nvfx = nvfx;
928 vpc->vp = vp;
929
930 /* reserve space for ucps */
931 if(nvfx->use_vp_clipping)
932 {
933 for(i = 0; i < 6; ++i)
934 constant(vpc, -1, 0, 0, 0, 0);
935 }
936
937 if (!nvfx_vertprog_prepare(nvfx, vpc)) {
938 FREE(vpc);
939 return;
940 }
941
942 /* Redirect post-transform vertex position to a temp if user clip
943 * planes are enabled. We need to append code to the vtxprog
944 * to handle clip planes later.
945 */
946 /* TODO: maybe support patching this depending on whether there are ucps: not sure if it is really matters much */
947 if (nvfx->use_vp_clipping) {
948 vpc->r_result[vpc->hpos_idx] = temp(vpc);
949 vpc->r_temps_discard = 0;
950 }
951
952 tgsi_parse_init(&parse, vp->pipe.tokens);
953
954 util_dynarray_init(&insns);
955 while (!tgsi_parse_end_of_tokens(&parse)) {
956 tgsi_parse_token(&parse);
957
958 switch (parse.FullToken.Token.Type) {
959 case TGSI_TOKEN_TYPE_IMMEDIATE:
960 {
961 const struct tgsi_full_immediate *imm;
962
963 imm = &parse.FullToken.FullImmediate;
964 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
965 assert(imm->Immediate.NrTokens == 4 + 1);
966 vpc->imm[vpc->nr_imm++] =
967 constant(vpc, -1,
968 imm->u[0].Float,
969 imm->u[1].Float,
970 imm->u[2].Float,
971 imm->u[3].Float);
972 }
973 break;
974 case TGSI_TOKEN_TYPE_INSTRUCTION:
975 {
976 const struct tgsi_full_instruction *finst;
977 unsigned idx = insns.size >> 2;
978 util_dynarray_append(&insns, unsigned, vp->nr_insns);
979 finst = &parse.FullToken.FullInstruction;
980 if (!nvfx_vertprog_parse_instruction(nvfx, vpc, idx, finst))
981 goto out_err;
982 }
983 break;
984 default:
985 break;
986 }
987 }
988
989 util_dynarray_append(&insns, unsigned, vp->nr_insns);
990
991 for(unsigned i = 0; i < vpc->label_relocs.size; i += sizeof(struct nvfx_relocation))
992 {
993 struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)vpc->label_relocs.data + i);
994 struct nvfx_relocation hw_reloc;
995
996 hw_reloc.location = label_reloc->location;
997 hw_reloc.target = ((unsigned*)insns.data)[label_reloc->target];
998
999 //debug_printf("hw %u -> tgsi %u = hw %u\n", hw_reloc.location, label_reloc->target, hw_reloc.target);
1000
1001 util_dynarray_append(&vp->branch_relocs, struct nvfx_relocation, hw_reloc);
1002 }
1003 util_dynarray_fini(&insns);
1004 util_dynarray_trim(&vp->branch_relocs);
1005
1006 /* XXX: what if we add a RET before?! make sure we jump here...*/
1007
1008 /* Write out HPOS if it was redirected to a temp earlier */
1009 if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) {
1010 struct nvfx_reg hpos = nvfx_reg(NVFXSR_OUTPUT,
1011 NVFX_VP(INST_DEST_POS));
1012 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
1013
1014 nvfx_vp_emit(vpc, arith(VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none));
1015 }
1016
1017 /* Insert code to handle user clip planes */
1018 if(nvfx->use_vp_clipping)
1019 {
1020 for (i = 0; i < 6; i++) {
1021 struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i));
1022 struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, i));
1023 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
1024 unsigned mask;
1025
1026 if(nvfx->is_nv4x)
1027 {
1028 switch (i) {
1029 case 0: case 3: mask = NVFX_VP_MASK_Y; break;
1030 case 1: case 4: mask = NVFX_VP_MASK_Z; break;
1031 case 2: case 5: mask = NVFX_VP_MASK_W; break;
1032 default:
1033 NOUVEAU_ERR("invalid clip dist #%d\n", i);
1034 goto out_err;
1035 }
1036 }
1037 else
1038 mask = NVFX_VP_MASK_X;
1039
1040 nvfx_vp_emit(vpc, arith(VEC, DP4, cdst, mask, htmp, ceqn, none));
1041 }
1042 }
1043 else
1044 {
1045 if(vp->nr_insns)
1046 vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
1047
1048 nvfx_vp_emit(vpc, arith(VEC, NOP, none.reg, 0, none, none, none));
1049 vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
1050 }
1051
1052 if(debug_get_option_nvfx_dump_vp())
1053 {
1054 debug_printf("\n");
1055 tgsi_dump(vp->pipe.tokens, 0);
1056
1057 debug_printf("\n%s vertex program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x");
1058 for (i = 0; i < vp->nr_insns; i++)
1059 debug_printf("%3u: %08x %08x %08x %08x\n", i, vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);
1060 debug_printf("\n");
1061 }
1062
1063 vp->clip_nr = -1;
1064 vp->exec_start = -1;
1065 vp->translated = TRUE;
1066 out_err:
1067 tgsi_parse_free(&parse);
1068 util_dynarray_fini(&vpc->label_relocs);
1069 util_dynarray_fini(&vpc->loop_stack);
1070 if (vpc->r_temp)
1071 FREE(vpc->r_temp);
1072 if (vpc->r_address)
1073 FREE(vpc->r_address);
1074 if (vpc->r_const)
1075 FREE(vpc->r_const);
1076 if (vpc->imm)
1077 FREE(vpc->imm);
1078 FREE(vpc);
1079 }
1080
1081 boolean
1082 nvfx_vertprog_validate(struct nvfx_context *nvfx)
1083 {
1084 struct nvfx_screen *screen = nvfx->screen;
1085 struct nouveau_channel *chan = screen->base.channel;
1086 struct nouveau_grobj *eng3d = screen->eng3d;
1087 struct nvfx_vertex_program *vp;
1088 struct pipe_resource *constbuf;
1089 boolean upload_code = FALSE, upload_data = FALSE;
1090 int i;
1091
1092 if (nvfx->render_mode == HW) {
1093 vp = nvfx->vertprog;
1094 constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX];
1095 } else {
1096 vp = nvfx->swtnl.vertprog;
1097 constbuf = NULL;
1098 }
1099
1100 /* Translate TGSI shader into hw bytecode */
1101 if (!vp->translated)
1102 {
1103 nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG;
1104 nvfx_vertprog_translate(nvfx, vp);
1105 if (!vp->translated) {
1106 nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
1107 return FALSE;
1108 }
1109 }
1110
1111 /* Allocate hw vtxprog exec slots */
1112 if (!vp->exec) {
1113 struct nouveau_resource *heap = nvfx->screen->vp_exec_heap;
1114 uint vplen = vp->nr_insns;
1115
1116 if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) {
1117 while (heap->next && heap->size < vplen) {
1118 struct nvfx_vertex_program *evict;
1119
1120 evict = heap->next->priv;
1121 nouveau_resource_free(&evict->exec);
1122 }
1123
1124 if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec))
1125 {
1126 debug_printf("Vertex shader too long: %u instructions\n", vplen);
1127 nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
1128 return FALSE;
1129 }
1130 }
1131
1132 upload_code = TRUE;
1133 }
1134
1135 /* Allocate hw vtxprog const slots */
1136 if (vp->nr_consts && !vp->data) {
1137 struct nouveau_resource *heap = nvfx->screen->vp_data_heap;
1138
1139 if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) {
1140 while (heap->next && heap->size < vp->nr_consts) {
1141 struct nvfx_vertex_program *evict;
1142
1143 evict = heap->next->priv;
1144 nouveau_resource_free(&evict->data);
1145 }
1146
1147 if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data))
1148 {
1149 debug_printf("Vertex shader uses too many constants: %u constants\n", vp->nr_consts);
1150 nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
1151 return FALSE;
1152 }
1153 }
1154
1155 //printf("start at %u nc %u\n", vp->data->start, vp->nr_consts);
1156
1157 /*XXX: handle this some day */
1158 assert(vp->data->start >= vp->data_start_min);
1159
1160 upload_data = TRUE;
1161 if (vp->data_start != vp->data->start)
1162 upload_code = TRUE;
1163 }
1164
1165 /* If exec or data segments moved we need to patch the program to
1166 * fixup offsets and register IDs.
1167 */
1168 if (vp->exec_start != vp->exec->start) {
1169 //printf("vp_relocs %u -> %u\n", vp->exec_start, vp->exec->start);
1170 for(unsigned i = 0; i < vp->branch_relocs.size; i += sizeof(struct nvfx_relocation))
1171 {
1172 struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->branch_relocs.data + i);
1173 uint32_t* hw = vp->insns[reloc->location].data;
1174 unsigned target = vp->exec->start + reloc->target;
1175
1176 //debug_printf("vp_reloc hw %u -> hw %u\n", reloc->location, target);
1177
1178 if(!nvfx->is_nv4x)
1179 {
1180 hw[2] &=~ NV30_VP_INST_IADDR_MASK;
1181 hw[2] |= (target & 0x1ff) << NV30_VP_INST_IADDR_SHIFT;
1182 }
1183 else
1184 {
1185 hw[3] &=~ NV40_VP_INST_IADDRL_MASK;
1186 hw[3] |= (target & 7) << NV40_VP_INST_IADDRL_SHIFT;
1187
1188 hw[2] &=~ NV40_VP_INST_IADDRH_MASK;
1189 hw[2] |= ((target >> 3) & 0x3f) << NV40_VP_INST_IADDRH_SHIFT;
1190 }
1191 }
1192
1193 vp->exec_start = vp->exec->start;
1194 }
1195
1196 if (vp->data_start != vp->data->start) {
1197 for(unsigned i = 0; i < vp->const_relocs.size; i += sizeof(struct nvfx_relocation))
1198 {
1199 struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->const_relocs.data + i);
1200 struct nvfx_vertex_program_exec *vpi = &vp->insns[reloc->location];
1201
1202 //printf("reloc %i to %i + %i\n", reloc->location, vp->data->start, reloc->target);
1203
1204 vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK);
1205 vpi->data[1] |=
1206 (reloc->target + vp->data->start) <<
1207 NVFX_VP(INST_CONST_SRC_SHIFT);
1208 }
1209
1210 vp->data_start = vp->data->start;
1211 upload_code = TRUE;
1212 }
1213
1214 /* Update + Upload constant values */
1215 if (vp->nr_consts) {
1216 float *map = NULL;
1217
1218 if (constbuf)
1219 map = (float*)nvfx_buffer(constbuf)->data;
1220
1221 /*
1222 for (i = 0; i < 512; i++) {
1223 float v[4] = {0.1, 0,2, 0.3, 0.4};
1224 BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5);
1225 OUT_RING (chan, i);
1226 OUT_RINGp (chan, (uint32_t *)v, 4);
1227 printf("frob %i\n", i);
1228 }
1229 */
1230
1231 for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) {
1232 struct nvfx_vertex_program_data *vpd = &vp->consts[i];
1233
1234 if (vpd->index >= 0) {
1235 if (!upload_data &&
1236 !memcmp(vpd->value, &map[vpd->index * 4],
1237 4 * sizeof(float)))
1238 continue;
1239 memcpy(vpd->value, &map[vpd->index * 4],
1240 4 * sizeof(float));
1241 }
1242
1243 //printf("upload into %i + %i: %f %f %f %f\n", vp->data->start, i, vpd->value[0], vpd->value[1], vpd->value[2], vpd->value[3]);
1244
1245 BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5);
1246 OUT_RING (chan, i + vp->data->start);
1247 OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
1248 }
1249 }
1250
1251 /* Upload vtxprog */
1252 if (upload_code) {
1253 BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1);
1254 OUT_RING (chan, vp->exec->start);
1255 for (i = 0; i < vp->nr_insns; i++) {
1256 BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4);
1257 //printf("%08x %08x %08x %08x\n", vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);
1258 OUT_RINGp (chan, vp->insns[i].data, 4);
1259 }
1260 vp->clip_nr = -1;
1261 }
1262
1263 if(nvfx->dirty & (NVFX_NEW_VERTPROG))
1264 {
1265 WAIT_RING(chan, 6);
1266 OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1));
1267 OUT_RING(chan, vp->exec->start);
1268 if(nvfx->is_nv4x) {
1269 OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 1));
1270 OUT_RING(chan, vp->ir);
1271 }
1272 }
1273
1274 return TRUE;
1275 }
1276
1277 void
1278 nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp)
1279 {
1280 if (vp->nr_insns)
1281 FREE(vp->insns);
1282
1283 if (vp->nr_consts)
1284 FREE(vp->consts);
1285
1286 nouveau_resource_free(&vp->exec);
1287 nouveau_resource_free(&vp->data);
1288
1289 util_dynarray_fini(&vp->branch_relocs);
1290 util_dynarray_fini(&vp->const_relocs);
1291 }
1292
1293 static void *
1294 nvfx_vp_state_create(struct pipe_context *pipe,
1295 const struct pipe_shader_state *cso)
1296 {
1297 struct nvfx_context *nvfx = nvfx_context(pipe);
1298 struct nvfx_vertex_program *vp;
1299
1300 // TODO: use a 64-bit atomic here!
1301 static unsigned long long id = 0;
1302
1303 vp = CALLOC(1, sizeof(struct nvfx_vertex_program));
1304 vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
1305 vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe);
1306 vp->id = ++id;
1307
1308 return (void *)vp;
1309 }
1310
1311 static void
1312 nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso)
1313 {
1314 struct nvfx_context *nvfx = nvfx_context(pipe);
1315
1316 nvfx->vertprog = hwcso;
1317 nvfx->dirty |= NVFX_NEW_VERTPROG;
1318 nvfx->draw_dirty |= NVFX_NEW_VERTPROG;
1319 }
1320
1321 static void
1322 nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso)
1323 {
1324 struct nvfx_context *nvfx = nvfx_context(pipe);
1325 struct nvfx_vertex_program *vp = hwcso;
1326
1327 draw_delete_vertex_shader(nvfx->draw, vp->draw);
1328 nvfx_vertprog_destroy(nvfx, vp);
1329 FREE((void*)vp->pipe.tokens);
1330 FREE(vp);
1331 }
1332
1333 void
1334 nvfx_init_vertprog_functions(struct nvfx_context *nvfx)
1335 {
1336 nvfx->pipe.create_vs_state = nvfx_vp_state_create;
1337 nvfx->pipe.bind_vs_state = nvfx_vp_state_bind;
1338 nvfx->pipe.delete_vs_state = nvfx_vp_state_delete;
1339 }