nv30: fix breakage due to 10 texcoord support on nv40
[mesa.git] / src / gallium / drivers / nvfx / nvfx_vertprog.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "util/u_linkage.h"
5 #include "util/u_debug.h"
6
7 #include "pipe/p_shader_tokens.h"
8 #include "tgsi/tgsi_parse.h"
9 #include "tgsi/tgsi_dump.h"
10 #include "tgsi/tgsi_util.h"
11 #include "tgsi/tgsi_ureg.h"
12
13 #include "draw/draw_context.h"
14
15 #include "nvfx_context.h"
16 #include "nvfx_state.h"
17 #include "nvfx_resource.h"
18
19 /* TODO (at least...):
20 * 1. Indexed consts + ARL
21 * 3. NV_vp11, NV_vp2, NV_vp3 features
22 * - extra arith opcodes
23 * - branching
24 * - texture sampling
25 * - indexed attribs
26 * - indexed results
27 * 4. bugs
28 */
29
30 #include "nv30_vertprog.h"
31 #include "nv40_vertprog.h"
32
33 struct nvfx_loop_entry
34 {
35 unsigned brk_target;
36 unsigned cont_target;
37 };
38
39 struct nvfx_vpc {
40 struct nvfx_context* nvfx;
41 struct pipe_shader_state pipe;
42 struct nvfx_vertex_program *vp;
43 struct tgsi_shader_info* info;
44
45 struct nvfx_vertex_program_exec *vpi;
46
47 unsigned r_temps;
48 unsigned r_temps_discard;
49 struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];
50 struct nvfx_reg *r_address;
51 struct nvfx_reg *r_temp;
52 struct nvfx_reg *r_const;
53 struct nvfx_reg r_0_1;
54
55 struct nvfx_reg *imm;
56 unsigned nr_imm;
57
58 unsigned hpos_idx;
59
60 struct util_dynarray label_relocs;
61 struct util_dynarray loop_stack;
62 };
63
64 static struct nvfx_reg
65 temp(struct nvfx_vpc *vpc)
66 {
67 int idx = ffs(~vpc->r_temps) - 1;
68
69 if (idx < 0) {
70 NOUVEAU_ERR("out of temps!!\n");
71 assert(0);
72 return nvfx_reg(NVFXSR_TEMP, 0);
73 }
74
75 vpc->r_temps |= (1 << idx);
76 vpc->r_temps_discard |= (1 << idx);
77 return nvfx_reg(NVFXSR_TEMP, idx);
78 }
79
80 static inline void
81 release_temps(struct nvfx_vpc *vpc)
82 {
83 vpc->r_temps &= ~vpc->r_temps_discard;
84 vpc->r_temps_discard = 0;
85 }
86
87 static struct nvfx_reg
88 constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w)
89 {
90 struct nvfx_vertex_program *vp = vpc->vp;
91 struct nvfx_vertex_program_data *vpd;
92 int idx;
93
94 if (pipe >= 0) {
95 for (idx = 0; idx < vp->nr_consts; idx++) {
96 if (vp->consts[idx].index == pipe)
97 return nvfx_reg(NVFXSR_CONST, idx);
98 }
99 }
100
101 idx = vp->nr_consts++;
102 vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
103 vpd = &vp->consts[idx];
104
105 vpd->index = pipe;
106 vpd->value[0] = x;
107 vpd->value[1] = y;
108 vpd->value[2] = z;
109 vpd->value[3] = w;
110 return nvfx_reg(NVFXSR_CONST, idx);
111 }
112
113 #define arith(s,t,o,d,m,s0,s1,s2) \
114 nvfx_insn((s), (NVFX_VP_INST_SLOT_##t << 7) | NVFX_VP_INST_##t##_OP_##o, -1, (d), (m), (s0), (s1), (s2))
115
116 static void
117 emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_src src)
118 {
119 struct nvfx_vertex_program *vp = vpc->vp;
120 uint32_t sr = 0;
121 struct nvfx_relocation reloc;
122
123 switch (src.reg.type) {
124 case NVFXSR_TEMP:
125 sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT));
126 sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT));
127 break;
128 case NVFXSR_INPUT:
129 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
130 NVFX_VP(SRC_REG_TYPE_SHIFT));
131 vp->ir |= (1 << src.reg.index);
132 hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT));
133 break;
134 case NVFXSR_CONST:
135 sr |= (NVFX_VP(SRC_REG_TYPE_CONST) <<
136 NVFX_VP(SRC_REG_TYPE_SHIFT));
137 reloc.location = vp->nr_insns - 1;
138 reloc.target = src.reg.index;
139 util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc);
140 break;
141 case NVFXSR_NONE:
142 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
143 NVFX_VP(SRC_REG_TYPE_SHIFT));
144 break;
145 default:
146 assert(0);
147 }
148
149 if (src.negate)
150 sr |= NVFX_VP(SRC_NEGATE);
151
152 if (src.abs)
153 hw[0] |= (1 << (21 + pos));
154
155 sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) |
156 (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) |
157 (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) |
158 (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT)));
159
160 if(src.indirect) {
161 if(src.reg.type == NVFXSR_CONST)
162 hw[3] |= NVFX_VP(INST_INDEX_CONST);
163 else if(src.reg.type == NVFXSR_INPUT)
164 hw[0] |= NVFX_VP(INST_INDEX_INPUT);
165 else
166 assert(0);
167 if(src.indirect_reg)
168 hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1);
169 hw[0] |= src.indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT);
170 }
171
172 switch (pos) {
173 case 0:
174 hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >>
175 NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT);
176 hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) <<
177 NVFX_VP(INST_SRC0L_SHIFT);
178 break;
179 case 1:
180 hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT);
181 break;
182 case 2:
183 hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >>
184 NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT);
185 hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) <<
186 NVFX_VP(INST_SRC2L_SHIFT);
187 break;
188 default:
189 assert(0);
190 }
191 }
192
193 static void
194 emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_reg dst)
195 {
196 struct nvfx_vertex_program *vp = vpc->vp;
197
198 switch (dst.type) {
199 case NVFXSR_NONE:
200 if(!nvfx->is_nv4x)
201 hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK;
202 else {
203 hw[3] |= NV40_VP_INST_DEST_MASK;
204 if (slot == 0)
205 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;
206 else
207 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
208 }
209 break;
210 case NVFXSR_TEMP:
211 if(!nvfx->is_nv4x)
212 hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
213 else {
214 hw[3] |= NV40_VP_INST_DEST_MASK;
215 if (slot == 0)
216 hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
217 else
218 hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
219 }
220 break;
221 case NVFXSR_OUTPUT:
222 /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */
223 if(nvfx->is_nv4x) {
224 switch (dst.index) {
225 case NV30_VP_INST_DEST_CLP(0):
226 dst.index = NVFX_VP(INST_DEST_FOGC);
227 break;
228 case NV30_VP_INST_DEST_CLP(1):
229 dst.index = NVFX_VP(INST_DEST_FOGC);
230 break;
231 case NV30_VP_INST_DEST_CLP(2):
232 dst.index = NVFX_VP(INST_DEST_FOGC);
233 break;
234 case NV30_VP_INST_DEST_CLP(3):
235 dst.index = NVFX_VP(INST_DEST_PSZ);
236 break;
237 case NV30_VP_INST_DEST_CLP(4):
238 dst.index = NVFX_VP(INST_DEST_PSZ);
239 break;
240 case NV30_VP_INST_DEST_CLP(5):
241 dst.index = NVFX_VP(INST_DEST_PSZ);
242 break;
243 case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
244 case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
245 case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
246 case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
247 case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
248 case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
249 }
250 }
251
252 if(!nvfx->is_nv4x) {
253 hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
254 hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK;
255
256 /*XXX: no way this is entirely correct, someone needs to
257 * figure out what exactly it is.
258 */
259 hw[3] |= 0x800;
260 } else {
261 hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);
262 if (slot == 0) {
263 hw[0] |= NV40_VP_INST_VEC_RESULT;
264 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;
265 } else {
266 hw[3] |= NV40_VP_INST_SCA_RESULT;
267 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
268 }
269 }
270 break;
271 default:
272 assert(0);
273 }
274 }
275
276 static void
277 nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn)
278 {
279 struct nvfx_context* nvfx = vpc->nvfx;
280 struct nvfx_vertex_program *vp = vpc->vp;
281 unsigned slot = insn.op >> 7;
282 unsigned op = insn.op & 0x7f;
283 uint32_t *hw;
284
285 vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
286 vpc->vpi = &vp->insns[vp->nr_insns - 1];
287 memset(vpc->vpi, 0, sizeof(*vpc->vpi));
288
289 hw = vpc->vpi->data;
290
291 hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT));
292 hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) |
293 (insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) |
294 (insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) |
295 (insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT)));
296 if(insn.cc_update)
297 hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE);
298
299 if(insn.sat)
300 {
301 assert(nvfx->use_nv4x);
302 if(nvfx->use_nv4x)
303 hw[0] |= NV40_VP_INST_SATURATE;
304 }
305
306 if(!nvfx->is_nv4x) {
307 if(slot == 0)
308 hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
309 else
310 {
311 hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT);
312 hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT);
313 }
314 // hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK);
315 // hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT));
316
317 if (insn.dst.type == NVFXSR_OUTPUT) {
318 if (slot)
319 hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
320 else
321 hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
322 } else {
323 if (slot)
324 hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
325 else
326 hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
327 }
328 } else {
329 if (slot == 0) {
330 hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT);
331 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
332 hw[3] |= (insn.mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
333 } else {
334 hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT);
335 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ;
336 hw[3] |= (insn.mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
337 }
338 }
339
340 emit_dst(nvfx, vpc, hw, slot, insn.dst);
341 emit_src(nvfx, vpc, hw, 0, insn.src[0]);
342 emit_src(nvfx, vpc, hw, 1, insn.src[1]);
343 emit_src(nvfx, vpc, hw, 2, insn.src[2]);
344
345 // if(insn.src[0].indirect || op == NVFX_VP_INST_VEC_OP_ARL)
346 // hw[3] |= NV40_VP_INST_SCA_RESULT;
347 }
348
349 static inline struct nvfx_src
350 tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
351 struct nvfx_src src;
352
353 switch (fsrc->Register.File) {
354 case TGSI_FILE_INPUT:
355 src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index);
356 break;
357 case TGSI_FILE_CONSTANT:
358 src.reg = vpc->r_const[fsrc->Register.Index];
359 break;
360 case TGSI_FILE_IMMEDIATE:
361 src.reg = vpc->imm[fsrc->Register.Index];
362 break;
363 case TGSI_FILE_TEMPORARY:
364 src.reg = vpc->r_temp[fsrc->Register.Index];
365 break;
366 default:
367 NOUVEAU_ERR("bad src file\n");
368 src.reg.index = 0;
369 src.reg.type = -1;
370 break;
371 }
372
373 src.abs = fsrc->Register.Absolute;
374 src.negate = fsrc->Register.Negate;
375 src.swz[0] = fsrc->Register.SwizzleX;
376 src.swz[1] = fsrc->Register.SwizzleY;
377 src.swz[2] = fsrc->Register.SwizzleZ;
378 src.swz[3] = fsrc->Register.SwizzleW;
379 src.indirect = 0;
380
381 if(fsrc->Register.Indirect) {
382 if(fsrc->Indirect.File == TGSI_FILE_ADDRESS &&
383 (fsrc->Register.File == TGSI_FILE_CONSTANT || fsrc->Register.File == TGSI_FILE_INPUT))
384 {
385 src.indirect = 1;
386 src.indirect_reg = fsrc->Indirect.Index;
387 src.indirect_swz = fsrc->Indirect.SwizzleX;
388 }
389 else
390 {
391 src.reg.index = 0;
392 src.reg.type = -1;
393 }
394 }
395 return src;
396 }
397
398 static INLINE struct nvfx_reg
399 tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
400 struct nvfx_reg dst;
401
402 switch (fdst->Register.File) {
403 case TGSI_FILE_NULL:
404 dst = nvfx_reg(NVFXSR_NONE, 0);
405 break;
406 case TGSI_FILE_OUTPUT:
407 dst = vpc->r_result[fdst->Register.Index];
408 break;
409 case TGSI_FILE_TEMPORARY:
410 dst = vpc->r_temp[fdst->Register.Index];
411 break;
412 case TGSI_FILE_ADDRESS:
413 dst = vpc->r_address[fdst->Register.Index];
414 break;
415 default:
416 NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File);
417 dst.index = 0;
418 dst.type = 0;
419 break;
420 }
421
422 return dst;
423 }
424
425 static inline int
426 tgsi_mask(uint tgsi)
427 {
428 int mask = 0;
429
430 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X;
431 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y;
432 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z;
433 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W;
434 return mask;
435 }
436
437 static boolean
438 nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
439 unsigned idx, const struct tgsi_full_instruction *finst)
440 {
441 struct nvfx_src src[3], tmp;
442 struct nvfx_reg dst;
443 struct nvfx_reg final_dst;
444 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
445 struct nvfx_insn insn;
446 struct nvfx_relocation reloc;
447 struct nvfx_loop_entry loop;
448 boolean sat = FALSE;
449 int mask;
450 int ai = -1, ci = -1, ii = -1;
451 int i;
452 unsigned sub_depth = 0;
453
454 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
455 const struct tgsi_full_src_register *fsrc;
456
457 fsrc = &finst->Src[i];
458 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
459 src[i] = tgsi_src(vpc, fsrc);
460 }
461 }
462
463 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
464 const struct tgsi_full_src_register *fsrc;
465
466 fsrc = &finst->Src[i];
467
468 switch (fsrc->Register.File) {
469 case TGSI_FILE_INPUT:
470 if (ai == -1 || ai == fsrc->Register.Index) {
471 ai = fsrc->Register.Index;
472 src[i] = tgsi_src(vpc, fsrc);
473 } else {
474 src[i] = nvfx_src(temp(vpc));
475 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
476 }
477 break;
478 case TGSI_FILE_CONSTANT:
479 if ((ci == -1 && ii == -1) ||
480 ci == fsrc->Register.Index) {
481 ci = fsrc->Register.Index;
482 src[i] = tgsi_src(vpc, fsrc);
483 } else {
484 src[i] = nvfx_src(temp(vpc));
485 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
486 }
487 break;
488 case TGSI_FILE_IMMEDIATE:
489 if ((ci == -1 && ii == -1) ||
490 ii == fsrc->Register.Index) {
491 ii = fsrc->Register.Index;
492 src[i] = tgsi_src(vpc, fsrc);
493 } else {
494 src[i] = nvfx_src(temp(vpc));
495 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none));
496 }
497 break;
498 case TGSI_FILE_TEMPORARY:
499 /* handled above */
500 break;
501 default:
502 NOUVEAU_ERR("bad src file\n");
503 return FALSE;
504 }
505 }
506
507 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
508 if(src[i].reg.type < 0)
509 return FALSE;
510 }
511
512 if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS &&
513 finst->Instruction.Opcode != TGSI_OPCODE_ARL)
514 return FALSE;
515
516 final_dst = dst = tgsi_dst(vpc, &finst->Dst[0]);
517 mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
518 if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
519 {
520 assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL);
521 if(nvfx->use_nv4x)
522 sat = TRUE;
523 else if(dst.type != NVFXSR_TEMP)
524 dst = temp(vpc);
525 }
526
527 switch (finst->Instruction.Opcode) {
528 case TGSI_OPCODE_ABS:
529 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, abs(src[0]), none, none));
530 break;
531 case TGSI_OPCODE_ADD:
532 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, src[1]));
533 break;
534 case TGSI_OPCODE_ARL:
535 nvfx_vp_emit(vpc, arith(0, VEC, ARL, dst, mask, src[0], none, none));
536 break;
537 case TGSI_OPCODE_CMP:
538 insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none);
539 insn.cc_update = 1;
540 nvfx_vp_emit(vpc, insn);
541
542 insn = arith(sat, VEC, MOV, dst, mask, src[2], none, none);
543 insn.cc_test = NVFX_COND_GE;
544 nvfx_vp_emit(vpc, insn);
545
546 insn = arith(sat, VEC, MOV, dst, mask, src[1], none, none);
547 insn.cc_test = NVFX_COND_LT;
548 nvfx_vp_emit(vpc, insn);
549 break;
550 case TGSI_OPCODE_COS:
551 nvfx_vp_emit(vpc, arith(sat, SCA, COS, dst, mask, none, none, src[0]));
552 break;
553 case TGSI_OPCODE_DP2:
554 tmp = nvfx_src(temp(vpc));
555 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X | NVFX_VP_MASK_Y, src[0], src[1], none));
556 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, swz(tmp, X, X, X, X), none, swz(tmp, Y, Y, Y, Y)));
557 break;
558 case TGSI_OPCODE_DP3:
559 nvfx_vp_emit(vpc, arith(sat, VEC, DP3, dst, mask, src[0], src[1], none));
560 break;
561 case TGSI_OPCODE_DP4:
562 nvfx_vp_emit(vpc, arith(sat, VEC, DP4, dst, mask, src[0], src[1], none));
563 break;
564 case TGSI_OPCODE_DPH:
565 nvfx_vp_emit(vpc, arith(sat, VEC, DPH, dst, mask, src[0], src[1], none));
566 break;
567 case TGSI_OPCODE_DST:
568 nvfx_vp_emit(vpc, arith(sat, VEC, DST, dst, mask, src[0], src[1], none));
569 break;
570 case TGSI_OPCODE_EX2:
571 nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, src[0]));
572 break;
573 case TGSI_OPCODE_EXP:
574 nvfx_vp_emit(vpc, arith(sat, SCA, EXP, dst, mask, none, none, src[0]));
575 break;
576 case TGSI_OPCODE_FLR:
577 nvfx_vp_emit(vpc, arith(sat, VEC, FLR, dst, mask, src[0], none, none));
578 break;
579 case TGSI_OPCODE_FRC:
580 nvfx_vp_emit(vpc, arith(sat, VEC, FRC, dst, mask, src[0], none, none));
581 break;
582 case TGSI_OPCODE_LG2:
583 nvfx_vp_emit(vpc, arith(sat, SCA, LG2, dst, mask, none, none, src[0]));
584 break;
585 case TGSI_OPCODE_LIT:
586 nvfx_vp_emit(vpc, arith(sat, SCA, LIT, dst, mask, none, none, src[0]));
587 break;
588 case TGSI_OPCODE_LOG:
589 nvfx_vp_emit(vpc, arith(sat, SCA, LOG, dst, mask, none, none, src[0]));
590 break;
591 case TGSI_OPCODE_LRP:
592 tmp = nvfx_src(temp(vpc));
593 nvfx_vp_emit(vpc, arith(0, VEC, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
594 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], tmp));
595 break;
596 case TGSI_OPCODE_MAD:
597 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], src[2]));
598 break;
599 case TGSI_OPCODE_MAX:
600 nvfx_vp_emit(vpc, arith(sat, VEC, MAX, dst, mask, src[0], src[1], none));
601 break;
602 case TGSI_OPCODE_MIN:
603 nvfx_vp_emit(vpc, arith(sat, VEC, MIN, dst, mask, src[0], src[1], none));
604 break;
605 case TGSI_OPCODE_MOV:
606 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, src[0], none, none));
607 break;
608 case TGSI_OPCODE_MUL:
609 nvfx_vp_emit(vpc, arith(sat, VEC, MUL, dst, mask, src[0], src[1], none));
610 break;
611 case TGSI_OPCODE_NOP:
612 break;
613 case TGSI_OPCODE_POW:
614 tmp = nvfx_src(temp(vpc));
615 nvfx_vp_emit(vpc, arith(0, SCA, LG2, tmp.reg, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X)));
616 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
617 nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X)));
618 break;
619 case TGSI_OPCODE_RCP:
620 nvfx_vp_emit(vpc, arith(sat, SCA, RCP, dst, mask, none, none, src[0]));
621 break;
622 case TGSI_OPCODE_RSQ:
623 nvfx_vp_emit(vpc, arith(sat, SCA, RSQ, dst, mask, none, none, abs(src[0])));
624 break;
625 case TGSI_OPCODE_SEQ:
626 nvfx_vp_emit(vpc, arith(sat, VEC, SEQ, dst, mask, src[0], src[1], none));
627 break;
628 case TGSI_OPCODE_SFL:
629 nvfx_vp_emit(vpc, arith(sat, VEC, SFL, dst, mask, src[0], src[1], none));
630 break;
631 case TGSI_OPCODE_SGE:
632 nvfx_vp_emit(vpc, arith(sat, VEC, SGE, dst, mask, src[0], src[1], none));
633 break;
634 case TGSI_OPCODE_SGT:
635 nvfx_vp_emit(vpc, arith(sat, VEC, SGT, dst, mask, src[0], src[1], none));
636 break;
637 case TGSI_OPCODE_SIN:
638 nvfx_vp_emit(vpc, arith(sat, SCA, SIN, dst, mask, none, none, src[0]));
639 break;
640 case TGSI_OPCODE_SLE:
641 nvfx_vp_emit(vpc, arith(sat, VEC, SLE, dst, mask, src[0], src[1], none));
642 break;
643 case TGSI_OPCODE_SLT:
644 nvfx_vp_emit(vpc, arith(sat, VEC, SLT, dst, mask, src[0], src[1], none));
645 break;
646 case TGSI_OPCODE_SNE:
647 nvfx_vp_emit(vpc, arith(sat, VEC, SNE, dst, mask, src[0], src[1], none));
648 break;
649 case TGSI_OPCODE_SSG:
650 nvfx_vp_emit(vpc, arith(sat, VEC, SSG, dst, mask, src[0], src[1], none));
651 break;
652 case TGSI_OPCODE_STR:
653 nvfx_vp_emit(vpc, arith(sat, VEC, STR, dst, mask, src[0], src[1], none));
654 break;
655 case TGSI_OPCODE_SUB:
656 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, neg(src[1])));
657 break;
658 case TGSI_OPCODE_TRUNC:
659 tmp = nvfx_src(temp(vpc));
660 insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none);
661 insn.cc_update = 1;
662 nvfx_vp_emit(vpc, insn);
663
664 nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, abs(src[0]), none, none));
665 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, tmp, none, none));
666
667 insn = arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none);
668 insn.cc_test = NVFX_COND_LT;
669 nvfx_vp_emit(vpc, insn);
670 break;
671 case TGSI_OPCODE_XPD:
672 tmp = nvfx_src(temp(vpc));
673 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
674 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
675 break;
676
677 case TGSI_OPCODE_IF:
678 insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none);
679 insn.cc_update = 1;
680 nvfx_vp_emit(vpc, insn);
681
682 reloc.location = vpc->vp->nr_insns;
683 reloc.target = finst->Label.Label + 1;
684 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
685
686 insn = arith(0, SCA, BRA, none.reg, 0, none, none, none);
687 insn.cc_test = NVFX_COND_EQ;
688 insn.cc_swz[0] = insn.cc_swz[1] = insn.cc_swz[2] = insn.cc_swz[3] = 0;
689 nvfx_vp_emit(vpc, insn);
690 break;
691
692 case TGSI_OPCODE_ELSE:
693 case TGSI_OPCODE_BRA:
694 case TGSI_OPCODE_CAL:
695 reloc.location = vpc->vp->nr_insns;
696 reloc.target = finst->Label.Label;
697 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
698
699 if(finst->Instruction.Opcode == TGSI_OPCODE_CAL)
700 insn = arith(0, SCA, CAL, none.reg, 0, none, none, none);
701 else
702 insn = arith(0, SCA, BRA, none.reg, 0, none, none, none);
703 nvfx_vp_emit(vpc, insn);
704 break;
705
706 case TGSI_OPCODE_RET:
707 if(sub_depth || !nvfx->use_vp_clipping) {
708 tmp = none;
709 tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0;
710 nvfx_vp_emit(vpc, arith(0, SCA, RET, none.reg, 0, none, none, tmp));
711 } else {
712 reloc.location = vpc->vp->nr_insns;
713 reloc.target = vpc->info->num_instructions;
714 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
715 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
716 }
717 break;
718
719 case TGSI_OPCODE_BGNSUB:
720 ++sub_depth;
721 break;
722 case TGSI_OPCODE_ENDSUB:
723 --sub_depth;
724 break;
725 case TGSI_OPCODE_ENDIF:
726 /* nothing to do here */
727 break;
728
729 case TGSI_OPCODE_BGNLOOP:
730 loop.cont_target = idx;
731 loop.brk_target = finst->Label.Label + 1;
732 util_dynarray_append(&vpc->loop_stack, struct nvfx_loop_entry, loop);
733 break;
734
735 case TGSI_OPCODE_ENDLOOP:
736 loop = util_dynarray_pop(&vpc->loop_stack, struct nvfx_loop_entry);
737
738 reloc.location = vpc->vp->nr_insns;
739 reloc.target = loop.cont_target;
740 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
741
742 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
743 break;
744
745 case TGSI_OPCODE_CONT:
746 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);
747
748 reloc.location = vpc->vp->nr_insns;
749 reloc.target = loop.cont_target;
750 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
751
752 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
753 break;
754
755 case TGSI_OPCODE_BRK:
756 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);
757
758 reloc.location = vpc->vp->nr_insns;
759 reloc.target = loop.brk_target;
760 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
761
762 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
763 break;
764
765 case TGSI_OPCODE_END:
766 assert(!sub_depth);
767 if(nvfx->use_vp_clipping) {
768 if(idx != (vpc->info->num_instructions - 1)) {
769 reloc.location = vpc->vp->nr_insns;
770 reloc.target = vpc->info->num_instructions;
771 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
772 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
773 }
774 } else {
775 if(vpc->vp->nr_insns)
776 vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
777 nvfx_vp_emit(vpc, arith(0, VEC, NOP, none.reg, 0, none, none, none));
778 vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
779 }
780 break;
781
782 default:
783 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
784 return FALSE;
785 }
786
787 if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE && !nvfx->use_nv4x)
788 {
789 if(!vpc->r_0_1.type)
790 vpc->r_0_1 = constant(vpc, -1, 0, 1, 0, 0);
791 nvfx_vp_emit(vpc, arith(0, VEC, MAX, dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), X, X, X, X), none));
792 nvfx_vp_emit(vpc, arith(0, VEC, MIN, final_dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), Y, Y, Y, Y), none));
793 }
794
795 release_temps(vpc);
796 return TRUE;
797 }
798
799 static boolean
800 nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
801 const struct tgsi_full_declaration *fdec)
802 {
803 unsigned idx = fdec->Range.First;
804 int hw;
805
806 switch (fdec->Semantic.Name) {
807 case TGSI_SEMANTIC_POSITION:
808 hw = NVFX_VP(INST_DEST_POS);
809 vpc->hpos_idx = idx;
810 break;
811 case TGSI_SEMANTIC_COLOR:
812 if (fdec->Semantic.Index == 0) {
813 hw = NVFX_VP(INST_DEST_COL0);
814 } else
815 if (fdec->Semantic.Index == 1) {
816 hw = NVFX_VP(INST_DEST_COL1);
817 } else {
818 NOUVEAU_ERR("bad colour semantic index\n");
819 return FALSE;
820 }
821 break;
822 case TGSI_SEMANTIC_BCOLOR:
823 if (fdec->Semantic.Index == 0) {
824 hw = NVFX_VP(INST_DEST_BFC0);
825 } else
826 if (fdec->Semantic.Index == 1) {
827 hw = NVFX_VP(INST_DEST_BFC1);
828 } else {
829 NOUVEAU_ERR("bad bcolour semantic index\n");
830 return FALSE;
831 }
832 break;
833 case TGSI_SEMANTIC_FOG:
834 hw = NVFX_VP(INST_DEST_FOGC);
835 break;
836 case TGSI_SEMANTIC_PSIZE:
837 hw = NVFX_VP(INST_DEST_PSZ);
838 break;
839 case TGSI_SEMANTIC_GENERIC:
840 hw = (vpc->vp->generic_to_fp_input[fdec->Semantic.Index] & 0xf) - NVFX_FP_OP_INPUT_SRC_TC(0);
841 if(hw <= 8)
842 hw = NVFX_VP(INST_DEST_TC(hw));
843 else if(hw == 9) /* TODO: this is correct, but how does this overlapping work exactly? */
844 hw = NV40_VP_INST_DEST_PSZ;
845 else
846 assert(0);
847 break;
848 case TGSI_SEMANTIC_EDGEFLAG:
849 /* not really an error just a fallback */
850 NOUVEAU_ERR("cannot handle edgeflag output\n");
851 return FALSE;
852 default:
853 NOUVEAU_ERR("bad output semantic\n");
854 return FALSE;
855 }
856
857 vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
858 return TRUE;
859 }
860
861 static boolean
862 nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)
863 {
864 struct tgsi_parse_context p;
865 int high_const = -1, high_temp = -1, high_addr = -1, nr_imm = 0, i;
866 struct util_semantic_set set;
867 unsigned char sem_layout[10];
868 unsigned num_outputs;
869 unsigned num_texcoords = nvfx->is_nv4x ? 10 : 8;
870
871 num_outputs = util_semantic_set_from_program_file(&set, vpc->pipe.tokens, TGSI_FILE_OUTPUT);
872
873 if(num_outputs > num_texcoords) {
874 NOUVEAU_ERR("too many vertex program outputs: %i\n", num_outputs);
875 return FALSE;
876 }
877 util_semantic_layout_from_set(sem_layout, &set, num_texcoords, num_texcoords);
878
879 /* hope 0xf is (0, 0, 0, 1) initialized; otherwise, we are _probably_ not required to do this */
880 memset(vpc->vp->generic_to_fp_input, 0x0f, sizeof(vpc->vp->generic_to_fp_input));
881 for(int i = 0; i < num_texcoords; ++i) {
882 if(sem_layout[i] == 0xff)
883 continue;
884 //printf("vp: GENERIC[%i] to fpreg %i\n", sem_layout[i], NVFX_FP_OP_INPUT_SRC_TC(0) + i);
885 vpc->vp->generic_to_fp_input[sem_layout[i]] = 0xf0 | NVFX_FP_OP_INPUT_SRC_TC(i);
886 }
887
888 vpc->vp->sprite_fp_input = -1;
889 for(int i = 0; i < num_texcoords; ++i)
890 {
891 if(sem_layout[i] == 0xff)
892 {
893 vpc->vp->sprite_fp_input = NVFX_FP_OP_INPUT_SRC_TC(i);
894 break;
895 }
896 }
897
898 tgsi_parse_init(&p, vpc->pipe.tokens);
899 while (!tgsi_parse_end_of_tokens(&p)) {
900 const union tgsi_full_token *tok = &p.FullToken;
901
902 tgsi_parse_token(&p);
903 switch(tok->Token.Type) {
904 case TGSI_TOKEN_TYPE_IMMEDIATE:
905 nr_imm++;
906 break;
907 case TGSI_TOKEN_TYPE_DECLARATION:
908 {
909 const struct tgsi_full_declaration *fdec;
910
911 fdec = &p.FullToken.FullDeclaration;
912 switch (fdec->Declaration.File) {
913 case TGSI_FILE_TEMPORARY:
914 if (fdec->Range.Last > high_temp) {
915 high_temp =
916 fdec->Range.Last;
917 }
918 break;
919 case TGSI_FILE_ADDRESS:
920 if (fdec->Range.Last > high_addr) {
921 high_addr =
922 fdec->Range.Last;
923 }
924 break;
925 case TGSI_FILE_CONSTANT:
926 if (fdec->Range.Last > high_const) {
927 high_const =
928 fdec->Range.Last;
929 }
930 break;
931 case TGSI_FILE_OUTPUT:
932 if (!nvfx_vertprog_parse_decl_output(nvfx, vpc, fdec))
933 return FALSE;
934 break;
935 default:
936 break;
937 }
938 }
939 break;
940 default:
941 break;
942 }
943 }
944 tgsi_parse_free(&p);
945
946 if (nr_imm) {
947 vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_reg));
948 assert(vpc->imm);
949 }
950
951 if (++high_temp) {
952 vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
953 for (i = 0; i < high_temp; i++)
954 vpc->r_temp[i] = temp(vpc);
955 }
956
957 if (++high_addr) {
958 vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg));
959 for (i = 0; i < high_addr; i++)
960 vpc->r_address[i] = nvfx_reg(NVFXSR_TEMP, i);
961 }
962
963 if(++high_const) {
964 vpc->r_const = CALLOC(high_const, sizeof(struct nvfx_reg));
965 for (i = 0; i < high_const; i++)
966 vpc->r_const[i] = constant(vpc, i, 0, 0, 0, 0);
967 }
968
969 vpc->r_temps_discard = 0;
970 return TRUE;
971 }
972
973 DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE)
974
975 static struct nvfx_vertex_program*
976 nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps, const struct tgsi_shader_info* info)
977 {
978 struct tgsi_parse_context parse;
979 struct nvfx_vertex_program* vp = NULL;
980 struct nvfx_vpc *vpc = NULL;
981 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
982 struct util_dynarray insns;
983 int i;
984
985 tgsi_parse_init(&parse, vps->tokens);
986
987 vp = CALLOC_STRUCT(nvfx_vertex_program);
988 if(!vp)
989 goto out_err;
990
991 vpc = CALLOC_STRUCT(nvfx_vpc);
992 if (!vpc)
993 goto out_err;
994
995 vpc->nvfx = nvfx;
996 vpc->vp = vp;
997 vpc->pipe = *vps;
998 vpc->info = info;
999
1000 {
1001 // TODO: use a 64-bit atomic here!
1002 static unsigned long long id = 0;
1003 vp->id = ++id;
1004 }
1005
1006 /* reserve space for ucps */
1007 if(nvfx->use_vp_clipping)
1008 {
1009 for(i = 0; i < 6; ++i)
1010 constant(vpc, -1, 0, 0, 0, 0);
1011 }
1012
1013 if (!nvfx_vertprog_prepare(nvfx, vpc)) {
1014 FREE(vpc);
1015 return NULL;
1016 }
1017
1018 /* Redirect post-transform vertex position to a temp if user clip
1019 * planes are enabled. We need to append code to the vtxprog
1020 * to handle clip planes later.
1021 */
1022 /* TODO: maybe support patching this depending on whether there are ucps: not sure if it is really matters much */
1023 if (nvfx->use_vp_clipping) {
1024 vpc->r_result[vpc->hpos_idx] = temp(vpc);
1025 vpc->r_temps_discard = 0;
1026 }
1027
1028 util_dynarray_init(&insns);
1029 while (!tgsi_parse_end_of_tokens(&parse)) {
1030 tgsi_parse_token(&parse);
1031
1032 switch (parse.FullToken.Token.Type) {
1033 case TGSI_TOKEN_TYPE_IMMEDIATE:
1034 {
1035 const struct tgsi_full_immediate *imm;
1036
1037 imm = &parse.FullToken.FullImmediate;
1038 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
1039 assert(imm->Immediate.NrTokens == 4 + 1);
1040 vpc->imm[vpc->nr_imm++] =
1041 constant(vpc, -1,
1042 imm->u[0].Float,
1043 imm->u[1].Float,
1044 imm->u[2].Float,
1045 imm->u[3].Float);
1046 }
1047 break;
1048 case TGSI_TOKEN_TYPE_INSTRUCTION:
1049 {
1050 const struct tgsi_full_instruction *finst;
1051 unsigned idx = insns.size >> 2;
1052 util_dynarray_append(&insns, unsigned, vp->nr_insns);
1053 finst = &parse.FullToken.FullInstruction;
1054 if (!nvfx_vertprog_parse_instruction(nvfx, vpc, idx, finst))
1055 goto out_err;
1056 }
1057 break;
1058 default:
1059 break;
1060 }
1061 }
1062
1063 util_dynarray_append(&insns, unsigned, vp->nr_insns);
1064
1065 for(unsigned i = 0; i < vpc->label_relocs.size; i += sizeof(struct nvfx_relocation))
1066 {
1067 struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)vpc->label_relocs.data + i);
1068 struct nvfx_relocation hw_reloc;
1069
1070 hw_reloc.location = label_reloc->location;
1071 hw_reloc.target = ((unsigned*)insns.data)[label_reloc->target];
1072
1073 //debug_printf("hw %u -> tgsi %u = hw %u\n", hw_reloc.location, label_reloc->target, hw_reloc.target);
1074
1075 util_dynarray_append(&vp->branch_relocs, struct nvfx_relocation, hw_reloc);
1076 }
1077 util_dynarray_fini(&insns);
1078 util_dynarray_trim(&vp->branch_relocs);
1079
1080 /* XXX: what if we add a RET before?! make sure we jump here...*/
1081
1082 /* Write out HPOS if it was redirected to a temp earlier */
1083 if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) {
1084 struct nvfx_reg hpos = nvfx_reg(NVFXSR_OUTPUT,
1085 NVFX_VP(INST_DEST_POS));
1086 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
1087
1088 nvfx_vp_emit(vpc, arith(0, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none));
1089 }
1090
1091 /* Insert code to handle user clip planes */
1092 if(nvfx->use_vp_clipping)
1093 {
1094 for (i = 0; i < 6; i++) {
1095 struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i));
1096 struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, i));
1097 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
1098 unsigned mask;
1099
1100 if(nvfx->is_nv4x)
1101 {
1102 switch (i) {
1103 case 0: case 3: mask = NVFX_VP_MASK_Y; break;
1104 case 1: case 4: mask = NVFX_VP_MASK_Z; break;
1105 case 2: case 5: mask = NVFX_VP_MASK_W; break;
1106 default:
1107 NOUVEAU_ERR("invalid clip dist #%d\n", i);
1108 goto out_err;
1109 }
1110 }
1111 else
1112 mask = NVFX_VP_MASK_X;
1113
1114 nvfx_vp_emit(vpc, arith(0, VEC, DP4, cdst, mask, htmp, ceqn, none));
1115 }
1116 }
1117
1118 if(debug_get_option_nvfx_dump_vp())
1119 {
1120 debug_printf("\n");
1121 tgsi_dump(vpc->pipe.tokens, 0);
1122
1123 debug_printf("\n%s vertex program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x");
1124 for (i = 0; i < vp->nr_insns; i++)
1125 debug_printf("%3u: %08x %08x %08x %08x\n", i, vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);
1126 debug_printf("\n");
1127 }
1128
1129 vp->clip_nr = -1;
1130 vp->exec_start = -1;
1131
1132 out:
1133 tgsi_parse_free(&parse);
1134 if(vpc) {
1135 util_dynarray_fini(&vpc->label_relocs);
1136 util_dynarray_fini(&vpc->loop_stack);
1137 FREE(vpc->r_temp);
1138 FREE(vpc->r_address);
1139 FREE(vpc->r_const);
1140 FREE(vpc->imm);
1141 FREE(vpc);
1142 }
1143 return vp;
1144
1145 out_err:
1146 FREE(vp);
1147 vp = NULL;
1148 goto out;
1149 }
1150
1151 static struct nvfx_vertex_program*
1152 nvfx_vertprog_translate_draw_vp(struct nvfx_context *nvfx, struct nvfx_pipe_vertex_program* pvp)
1153 {
1154 struct nvfx_vertex_program* vp = NULL;
1155 struct pipe_shader_state vps;
1156 struct tgsi_shader_info info;
1157 struct ureg_program *ureg = NULL;
1158 unsigned num_outputs = MIN2(pvp->info.num_outputs, 16);
1159
1160 ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
1161 if(ureg == NULL)
1162 return 0;
1163
1164 for (unsigned i = 0; i < num_outputs; i++)
1165 ureg_MOV(ureg, ureg_DECL_output(ureg, pvp->info.output_semantic_name[i], pvp->info.output_semantic_index[i]), ureg_DECL_vs_input(ureg, i));
1166
1167 ureg_END( ureg );
1168
1169 vps.tokens = ureg_get_tokens(ureg, 0);
1170 tgsi_scan_shader(vps.tokens, &info);
1171 vp = nvfx_vertprog_translate(nvfx, &vps, &info);
1172 ureg_free_tokens(vps.tokens);
1173 ureg_destroy(ureg);
1174
1175 return vp;
1176 }
1177
1178 boolean
1179 nvfx_vertprog_validate(struct nvfx_context *nvfx)
1180 {
1181 struct nvfx_screen *screen = nvfx->screen;
1182 struct nouveau_channel *chan = screen->base.channel;
1183 struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog;
1184 struct nvfx_vertex_program* vp;
1185 struct pipe_resource *constbuf;
1186 boolean upload_code = FALSE, upload_data = FALSE;
1187 int i;
1188
1189 if (nvfx->render_mode == HW) {
1190 nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG;
1191 vp = pvp->vp;
1192
1193 if(!vp) {
1194 vp = nvfx_vertprog_translate(nvfx, &pvp->pipe, &pvp->info);
1195 if(!vp)
1196 vp = NVFX_VP_FAILED;
1197 pvp->vp = vp;
1198 }
1199
1200 if(vp == NVFX_VP_FAILED) {
1201 nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
1202 return FALSE;
1203 }
1204
1205 constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX];
1206 } else {
1207 vp = pvp->draw_vp;
1208 if(!vp)
1209 {
1210 pvp->draw_vp = vp = nvfx_vertprog_translate_draw_vp(nvfx, pvp);
1211 if(!vp) {
1212 _debug_printf("Error: unable to create a swtnl passthrough vertex shader: aborting.");
1213 abort();
1214 }
1215 }
1216 constbuf = NULL;
1217 }
1218
1219 nvfx->hw_vertprog = vp;
1220
1221 /* Allocate hw vtxprog exec slots */
1222 if (!vp->exec) {
1223 struct nouveau_resource *heap = nvfx->screen->vp_exec_heap;
1224 uint vplen = vp->nr_insns;
1225
1226 if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) {
1227 while (heap->next && heap->size < vplen) {
1228 struct nvfx_vertex_program *evict;
1229
1230 evict = heap->next->priv;
1231 nouveau_resource_free(&evict->exec);
1232 }
1233
1234 if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec))
1235 {
1236 debug_printf("Vertex shader too long: %u instructions\n", vplen);
1237 nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
1238 return FALSE;
1239 }
1240 }
1241
1242 upload_code = TRUE;
1243 }
1244
1245 /* Allocate hw vtxprog const slots */
1246 if (vp->nr_consts && !vp->data) {
1247 struct nouveau_resource *heap = nvfx->screen->vp_data_heap;
1248
1249 if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) {
1250 while (heap->next && heap->size < vp->nr_consts) {
1251 struct nvfx_vertex_program *evict;
1252
1253 evict = heap->next->priv;
1254 nouveau_resource_free(&evict->data);
1255 }
1256
1257 if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data))
1258 {
1259 debug_printf("Vertex shader uses too many constants: %u constants\n", vp->nr_consts);
1260 nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
1261 return FALSE;
1262 }
1263 }
1264
1265 //printf("start at %u nc %u\n", vp->data->start, vp->nr_consts);
1266
1267 /*XXX: handle this some day */
1268 assert(vp->data->start >= vp->data_start_min);
1269
1270 upload_data = TRUE;
1271 if (vp->data_start != vp->data->start)
1272 upload_code = TRUE;
1273 }
1274
1275 /* If exec or data segments moved we need to patch the program to
1276 * fixup offsets and register IDs.
1277 */
1278 if (vp->exec_start != vp->exec->start) {
1279 //printf("vp_relocs %u -> %u\n", vp->exec_start, vp->exec->start);
1280 for(unsigned i = 0; i < vp->branch_relocs.size; i += sizeof(struct nvfx_relocation))
1281 {
1282 struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->branch_relocs.data + i);
1283 uint32_t* hw = vp->insns[reloc->location].data;
1284 unsigned target = vp->exec->start + reloc->target;
1285
1286 //debug_printf("vp_reloc hw %u -> hw %u\n", reloc->location, target);
1287
1288 if(!nvfx->is_nv4x)
1289 {
1290 hw[2] &=~ NV30_VP_INST_IADDR_MASK;
1291 hw[2] |= (target & 0x1ff) << NV30_VP_INST_IADDR_SHIFT;
1292 }
1293 else
1294 {
1295 hw[3] &=~ NV40_VP_INST_IADDRL_MASK;
1296 hw[3] |= (target & 7) << NV40_VP_INST_IADDRL_SHIFT;
1297
1298 hw[2] &=~ NV40_VP_INST_IADDRH_MASK;
1299 hw[2] |= ((target >> 3) & 0x3f) << NV40_VP_INST_IADDRH_SHIFT;
1300 }
1301 }
1302
1303 vp->exec_start = vp->exec->start;
1304 }
1305
1306 if (vp->data_start != vp->data->start) {
1307 for(unsigned i = 0; i < vp->const_relocs.size; i += sizeof(struct nvfx_relocation))
1308 {
1309 struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->const_relocs.data + i);
1310 struct nvfx_vertex_program_exec *vpi = &vp->insns[reloc->location];
1311
1312 //printf("reloc %i to %i + %i\n", reloc->location, vp->data->start, reloc->target);
1313
1314 vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK);
1315 vpi->data[1] |=
1316 (reloc->target + vp->data->start) <<
1317 NVFX_VP(INST_CONST_SRC_SHIFT);
1318 }
1319
1320 vp->data_start = vp->data->start;
1321 upload_code = TRUE;
1322 }
1323
1324 /* Update + Upload constant values */
1325 if (vp->nr_consts) {
1326 float *map = NULL;
1327
1328 if (constbuf)
1329 map = (float*)nvfx_buffer(constbuf)->data;
1330
1331 /*
1332 * WAIT_RING(chan, 512 * 6);
1333 for (i = 0; i < 512; i++) {
1334 float v[4] = {0.1, 0,2, 0.3, 0.4};
1335 OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
1336 OUT_RING(chan, i);
1337 OUT_RINGp(chan, (uint32_t *)v, 4);
1338 printf("frob %i\n", i);
1339 }
1340 */
1341
1342 WAIT_RING(chan, 6 * vp->nr_consts);
1343 for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) {
1344 struct nvfx_vertex_program_data *vpd = &vp->consts[i];
1345
1346 if (vpd->index >= 0) {
1347 if (!upload_data &&
1348 !memcmp(vpd->value, &map[vpd->index * 4],
1349 4 * sizeof(float)))
1350 continue;
1351 memcpy(vpd->value, &map[vpd->index * 4],
1352 4 * sizeof(float));
1353 }
1354
1355 //printf("upload into %i + %i: %f %f %f %f\n", vp->data->start, i, vpd->value[0], vpd->value[1], vpd->value[2], vpd->value[3]);
1356
1357 OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
1358 OUT_RING(chan, i + vp->data->start);
1359 OUT_RINGp(chan, (uint32_t *)vpd->value, 4);
1360 }
1361 }
1362
1363 /* Upload vtxprog */
1364 if (upload_code) {
1365 WAIT_RING(chan, 2 + 5 * vp->nr_insns);
1366 OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
1367 OUT_RING(chan, vp->exec->start);
1368 for (i = 0; i < vp->nr_insns; i++) {
1369 OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
1370 //printf("%08x %08x %08x %08x\n", vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);
1371 OUT_RINGp(chan, vp->insns[i].data, 4);
1372 }
1373 vp->clip_nr = -1;
1374 }
1375
1376 if(nvfx->dirty & (NVFX_NEW_VERTPROG))
1377 {
1378 WAIT_RING(chan, 6);
1379 OUT_RING(chan, RING_3D(NV30_3D_VP_START_FROM_ID, 1));
1380 OUT_RING(chan, vp->exec->start);
1381 if(nvfx->is_nv4x) {
1382 OUT_RING(chan, RING_3D(NV40_3D_VP_ATTRIB_EN, 1));
1383 OUT_RING(chan, vp->ir);
1384 }
1385 }
1386
1387 return TRUE;
1388 }
1389
1390 void
1391 nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp)
1392 {
1393 if (vp->nr_insns)
1394 FREE(vp->insns);
1395
1396 if (vp->nr_consts)
1397 FREE(vp->consts);
1398
1399 nouveau_resource_free(&vp->exec);
1400 nouveau_resource_free(&vp->data);
1401
1402 util_dynarray_fini(&vp->branch_relocs);
1403 util_dynarray_fini(&vp->const_relocs);
1404 FREE(vp);
1405 }
1406
1407 static void *
1408 nvfx_vp_state_create(struct pipe_context *pipe, const struct pipe_shader_state *cso)
1409 {
1410 struct nvfx_context *nvfx = nvfx_context(pipe);
1411 struct nvfx_pipe_vertex_program *pvp;
1412
1413 pvp = CALLOC(1, sizeof(struct nvfx_pipe_vertex_program));
1414 pvp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
1415 tgsi_scan_shader(pvp->pipe.tokens, &pvp->info);
1416 pvp->draw_elements = MAX2(1, MIN2(pvp->info.num_outputs, 16));
1417 pvp->draw_no_elements = pvp->info.num_outputs == 0;
1418
1419 return (void *)pvp;
1420 }
1421
1422 static void
1423 nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso)
1424 {
1425 struct nvfx_context *nvfx = nvfx_context(pipe);
1426
1427 nvfx->vertprog = hwcso;
1428 nvfx->dirty |= NVFX_NEW_VERTPROG;
1429 nvfx->draw_dirty |= NVFX_NEW_VERTPROG;
1430 }
1431
1432 static void
1433 nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso)
1434 {
1435 struct nvfx_context *nvfx = nvfx_context(pipe);
1436 struct nvfx_pipe_vertex_program *pvp = hwcso;
1437
1438 if(pvp->draw_vs)
1439 draw_delete_vertex_shader(nvfx->draw, pvp->draw_vs);
1440 if(pvp->vp && pvp->vp != NVFX_VP_FAILED)
1441 nvfx_vertprog_destroy(nvfx, pvp->vp);
1442 if(pvp->draw_vp)
1443 nvfx_vertprog_destroy(nvfx, pvp->draw_vp);
1444 FREE((void*)pvp->pipe.tokens);
1445 FREE(pvp);
1446 }
1447
1448 void
1449 nvfx_init_vertprog_functions(struct nvfx_context *nvfx)
1450 {
1451 nvfx->pipe.create_vs_state = nvfx_vp_state_create;
1452 nvfx->pipe.bind_vs_state = nvfx_vp_state_bind;
1453 nvfx->pipe.delete_vs_state = nvfx_vp_state_delete;
1454 }