Nouveau: copy nv30_vertprog.c to nv20.
[mesa.git] / src / gallium / drivers / nv20 / nv30_vertprog.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4
5 #include "pipe/p_shader_tokens.h"
6 #include "tgsi/tgsi_parse.h"
7 #include "tgsi/tgsi_dump.h"
8
9 #include "nv30_context.h"
10 #include "nv30_state.h"
11
12 /* TODO (at least...):
13 * 1. Indexed consts + ARL
14 * 2. Arb. swz/negation
15 * 3. NV_vp11, NV_vp2, NV_vp3 features
16 * - extra arith opcodes
17 * - branching
18 * - texture sampling
19 * - indexed attribs
20 * - indexed results
21 * 4. bugs
22 */
23
24 #define SWZ_X 0
25 #define SWZ_Y 1
26 #define SWZ_Z 2
27 #define SWZ_W 3
28 #define MASK_X 8
29 #define MASK_Y 4
30 #define MASK_Z 2
31 #define MASK_W 1
32 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
33 #define DEF_SCALE 0
34 #define DEF_CTEST 0
35 #include "nv30_shader.h"
36
37 #define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
38 #define neg(s) nv30_sr_neg((s))
39 #define abs(s) nv30_sr_abs((s))
40
41 struct nv30_vpc {
42 struct nv30_vertex_program *vp;
43
44 struct nv30_vertex_program_exec *vpi;
45
46 unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
47
48 int high_temp;
49 int temp_temp_count;
50
51 struct nv30_sreg *imm;
52 unsigned nr_imm;
53 };
54
55 static struct nv30_sreg
56 temp(struct nv30_vpc *vpc)
57 {
58 int idx;
59
60 idx = vpc->temp_temp_count++;
61 idx += vpc->high_temp + 1;
62 return nv30_sr(NV30SR_TEMP, idx);
63 }
64
65 static struct nv30_sreg
66 constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)
67 {
68 struct nv30_vertex_program *vp = vpc->vp;
69 struct nv30_vertex_program_data *vpd;
70 int idx;
71
72 if (pipe >= 0) {
73 for (idx = 0; idx < vp->nr_consts; idx++) {
74 if (vp->consts[idx].index == pipe)
75 return nv30_sr(NV30SR_CONST, idx);
76 }
77 }
78
79 idx = vp->nr_consts++;
80 vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
81 vpd = &vp->consts[idx];
82
83 vpd->index = pipe;
84 vpd->value[0] = x;
85 vpd->value[1] = y;
86 vpd->value[2] = z;
87 vpd->value[3] = w;
88 return nv30_sr(NV30SR_CONST, idx);
89 }
90
91 #define arith(cc,s,o,d,m,s0,s1,s2) \
92 nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2))
93
94 static void
95 emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src)
96 {
97 struct nv30_vertex_program *vp = vpc->vp;
98 uint32_t sr = 0;
99
100 switch (src.type) {
101 case NV30SR_TEMP:
102 sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);
103 sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);
104 break;
105 case NV30SR_INPUT:
106 sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
107 NV30_VP_SRC_REG_TYPE_SHIFT);
108 vp->ir |= (1 << src.index);
109 hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);
110 break;
111 case NV30SR_CONST:
112 sr |= (NV30_VP_SRC_REG_TYPE_CONST <<
113 NV30_VP_SRC_REG_TYPE_SHIFT);
114 assert(vpc->vpi->const_index == -1 ||
115 vpc->vpi->const_index == src.index);
116 vpc->vpi->const_index = src.index;
117 break;
118 case NV30SR_NONE:
119 sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
120 NV30_VP_SRC_REG_TYPE_SHIFT);
121 break;
122 default:
123 assert(0);
124 }
125
126 if (src.negate)
127 sr |= NV30_VP_SRC_NEGATE;
128
129 if (src.abs)
130 hw[0] |= (1 << (21 + pos));
131
132 sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) |
133 (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) |
134 (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) |
135 (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT));
136
137 /*
138 * |VVV|
139 * d�.�b
140 * \u/
141 *
142 */
143
144 switch (pos) {
145 case 0:
146 hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >>
147 NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT;
148 hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) <<
149 NV30_VP_INST_SRC0L_SHIFT;
150 break;
151 case 1:
152 hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT;
153 break;
154 case 2:
155 hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >>
156 NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT;
157 hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) <<
158 NV30_VP_INST_SRC2L_SHIFT;
159 break;
160 default:
161 assert(0);
162 }
163 }
164
165 static void
166 emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst)
167 {
168 struct nv30_vertex_program *vp = vpc->vp;
169
170 switch (dst.type) {
171 case NV30SR_TEMP:
172 hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
173 break;
174 case NV30SR_OUTPUT:
175 switch (dst.index) {
176 case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
177 case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
178 case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
179 case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
180 case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
181 case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
182 case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
183 case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
184 case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
185 case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
186 case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
187 case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
188 case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
189 case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
190 default:
191 break;
192 }
193
194 hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
195 hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
196
197 /*XXX: no way this is entirely correct, someone needs to
198 * figure out what exactly it is.
199 */
200 hw[3] |= 0x800;
201 break;
202 default:
203 assert(0);
204 }
205 }
206
207 static void
208 nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,
209 struct nv30_sreg dst, int mask,
210 struct nv30_sreg s0, struct nv30_sreg s1,
211 struct nv30_sreg s2)
212 {
213 struct nv30_vertex_program *vp = vpc->vp;
214 uint32_t *hw;
215
216 vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
217 vpc->vpi = &vp->insns[vp->nr_insns - 1];
218 memset(vpc->vpi, 0, sizeof(*vpc->vpi));
219 vpc->vpi->const_index = -1;
220
221 hw = vpc->vpi->data;
222
223 hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
224 hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |
225 (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |
226 (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) |
227 (3 << NV30_VP_INST_COND_SWZ_W_SHIFT));
228
229 hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
230 // hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;
231 // hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT);
232
233 if (dst.type == NV30SR_OUTPUT) {
234 if (slot)
235 hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
236 else
237 hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
238 } else {
239 if (slot)
240 hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
241 else
242 hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
243 }
244
245 emit_dst(vpc, hw, slot, dst);
246 emit_src(vpc, hw, 0, s0);
247 emit_src(vpc, hw, 1, s1);
248 emit_src(vpc, hw, 2, s2);
249 }
250
251 static INLINE struct nv30_sreg
252 tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
253 struct nv30_sreg src;
254
255 switch (fsrc->SrcRegister.File) {
256 case TGSI_FILE_INPUT:
257 src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
258 break;
259 case TGSI_FILE_CONSTANT:
260 src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
261 break;
262 case TGSI_FILE_IMMEDIATE:
263 src = vpc->imm[fsrc->SrcRegister.Index];
264 break;
265 case TGSI_FILE_TEMPORARY:
266 if (vpc->high_temp < fsrc->SrcRegister.Index)
267 vpc->high_temp = fsrc->SrcRegister.Index;
268 src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
269 break;
270 default:
271 NOUVEAU_ERR("bad src file\n");
272 break;
273 }
274
275 src.abs = fsrc->SrcRegisterExtMod.Absolute;
276 src.negate = fsrc->SrcRegister.Negate;
277 src.swz[0] = fsrc->SrcRegister.SwizzleX;
278 src.swz[1] = fsrc->SrcRegister.SwizzleY;
279 src.swz[2] = fsrc->SrcRegister.SwizzleZ;
280 src.swz[3] = fsrc->SrcRegister.SwizzleW;
281 return src;
282 }
283
284 static INLINE struct nv30_sreg
285 tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
286 struct nv30_sreg dst;
287
288 switch (fdst->DstRegister.File) {
289 case TGSI_FILE_OUTPUT:
290 dst = nv30_sr(NV30SR_OUTPUT,
291 vpc->output_map[fdst->DstRegister.Index]);
292
293 break;
294 case TGSI_FILE_TEMPORARY:
295 dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index);
296 if (vpc->high_temp < dst.index)
297 vpc->high_temp = dst.index;
298 break;
299 default:
300 NOUVEAU_ERR("bad dst file\n");
301 break;
302 }
303
304 return dst;
305 }
306
307 static INLINE int
308 tgsi_mask(uint tgsi)
309 {
310 int mask = 0;
311
312 if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
313 if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
314 if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
315 if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
316 return mask;
317 }
318
319 static boolean
320 nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
321 const struct tgsi_full_instruction *finst)
322 {
323 struct nv30_sreg src[3], dst, tmp;
324 struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
325 int mask;
326 int ai = -1, ci = -1;
327 int i;
328
329 if (finst->Instruction.Opcode == TGSI_OPCODE_END)
330 return TRUE;
331
332 vpc->temp_temp_count = 0;
333 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
334 const struct tgsi_full_src_register *fsrc;
335
336 fsrc = &finst->FullSrcRegisters[i];
337 if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
338 src[i] = tgsi_src(vpc, fsrc);
339 }
340 }
341
342 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
343 const struct tgsi_full_src_register *fsrc;
344
345 fsrc = &finst->FullSrcRegisters[i];
346 switch (fsrc->SrcRegister.File) {
347 case TGSI_FILE_INPUT:
348 if (ai == -1 || ai == fsrc->SrcRegister.Index) {
349 ai = fsrc->SrcRegister.Index;
350 src[i] = tgsi_src(vpc, fsrc);
351 } else {
352 src[i] = temp(vpc);
353 arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
354 tgsi_src(vpc, fsrc), none, none);
355 }
356 break;
357 /*XXX: index comparison is broken now that consts come from
358 * two different register files.
359 */
360 case TGSI_FILE_CONSTANT:
361 case TGSI_FILE_IMMEDIATE:
362 if (ci == -1 || ci == fsrc->SrcRegister.Index) {
363 ci = fsrc->SrcRegister.Index;
364 src[i] = tgsi_src(vpc, fsrc);
365 } else {
366 src[i] = temp(vpc);
367 arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
368 tgsi_src(vpc, fsrc), none, none);
369 }
370 break;
371 case TGSI_FILE_TEMPORARY:
372 /* handled above */
373 break;
374 default:
375 NOUVEAU_ERR("bad src file\n");
376 return FALSE;
377 }
378 }
379
380 dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
381 mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
382
383 switch (finst->Instruction.Opcode) {
384 case TGSI_OPCODE_ABS:
385 arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
386 break;
387 case TGSI_OPCODE_ADD:
388 arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
389 break;
390 case TGSI_OPCODE_ARL:
391 arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
392 break;
393 case TGSI_OPCODE_DP3:
394 arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
395 break;
396 case TGSI_OPCODE_DP4:
397 arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
398 break;
399 case TGSI_OPCODE_DPH:
400 arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
401 break;
402 case TGSI_OPCODE_DST:
403 arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
404 break;
405 case TGSI_OPCODE_EX2:
406 arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
407 break;
408 case TGSI_OPCODE_EXP:
409 arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
410 break;
411 case TGSI_OPCODE_FLR:
412 arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
413 break;
414 case TGSI_OPCODE_FRC:
415 arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
416 break;
417 case TGSI_OPCODE_LG2:
418 arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
419 break;
420 case TGSI_OPCODE_LIT:
421 arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
422 break;
423 case TGSI_OPCODE_LOG:
424 arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
425 break;
426 case TGSI_OPCODE_MAD:
427 arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
428 break;
429 case TGSI_OPCODE_MAX:
430 arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
431 break;
432 case TGSI_OPCODE_MIN:
433 arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
434 break;
435 case TGSI_OPCODE_MOV:
436 arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
437 break;
438 case TGSI_OPCODE_MUL:
439 arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
440 break;
441 case TGSI_OPCODE_POW:
442 tmp = temp(vpc);
443 arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
444 swz(src[0], X, X, X, X));
445 arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
446 swz(src[1], X, X, X, X), none);
447 arith(vpc, 1, OP_EX2, dst, mask, none, none,
448 swz(tmp, X, X, X, X));
449 break;
450 case TGSI_OPCODE_RCP:
451 arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
452 break;
453 case TGSI_OPCODE_RET:
454 break;
455 case TGSI_OPCODE_RSQ:
456 arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
457 break;
458 case TGSI_OPCODE_SGE:
459 arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
460 break;
461 case TGSI_OPCODE_SGT:
462 arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none);
463 break;
464 case TGSI_OPCODE_SLT:
465 arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
466 break;
467 case TGSI_OPCODE_SUB:
468 arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
469 break;
470 case TGSI_OPCODE_XPD:
471 tmp = temp(vpc);
472 arith(vpc, 0, OP_MUL, tmp, mask,
473 swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
474 arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
475 swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
476 neg(tmp));
477 break;
478 default:
479 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
480 return FALSE;
481 }
482
483 return TRUE;
484 }
485
486 static boolean
487 nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
488 const struct tgsi_full_declaration *fdec)
489 {
490 int hw;
491
492 switch (fdec->Semantic.SemanticName) {
493 case TGSI_SEMANTIC_POSITION:
494 hw = NV30_VP_INST_DEST_POS;
495 break;
496 case TGSI_SEMANTIC_COLOR:
497 if (fdec->Semantic.SemanticIndex == 0) {
498 hw = NV30_VP_INST_DEST_COL0;
499 } else
500 if (fdec->Semantic.SemanticIndex == 1) {
501 hw = NV30_VP_INST_DEST_COL1;
502 } else {
503 NOUVEAU_ERR("bad colour semantic index\n");
504 return FALSE;
505 }
506 break;
507 case TGSI_SEMANTIC_BCOLOR:
508 if (fdec->Semantic.SemanticIndex == 0) {
509 hw = NV30_VP_INST_DEST_BFC0;
510 } else
511 if (fdec->Semantic.SemanticIndex == 1) {
512 hw = NV30_VP_INST_DEST_BFC1;
513 } else {
514 NOUVEAU_ERR("bad bcolour semantic index\n");
515 return FALSE;
516 }
517 break;
518 case TGSI_SEMANTIC_FOG:
519 hw = NV30_VP_INST_DEST_FOGC;
520 break;
521 case TGSI_SEMANTIC_PSIZE:
522 hw = NV30_VP_INST_DEST_PSZ;
523 break;
524 case TGSI_SEMANTIC_GENERIC:
525 if (fdec->Semantic.SemanticIndex <= 7) {
526 hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
527 } else {
528 NOUVEAU_ERR("bad generic semantic index\n");
529 return FALSE;
530 }
531 break;
532 default:
533 NOUVEAU_ERR("bad output semantic\n");
534 return FALSE;
535 }
536
537 vpc->output_map[fdec->DeclarationRange.First] = hw;
538 return TRUE;
539 }
540
541 static boolean
542 nv30_vertprog_prepare(struct nv30_vpc *vpc)
543 {
544 struct tgsi_parse_context p;
545 int nr_imm = 0;
546
547 tgsi_parse_init(&p, vpc->vp->pipe.tokens);
548 while (!tgsi_parse_end_of_tokens(&p)) {
549 const union tgsi_full_token *tok = &p.FullToken;
550
551 tgsi_parse_token(&p);
552 switch(tok->Token.Type) {
553 case TGSI_TOKEN_TYPE_IMMEDIATE:
554 nr_imm++;
555 break;
556 default:
557 break;
558 }
559 }
560 tgsi_parse_free(&p);
561
562 if (nr_imm) {
563 vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg));
564 assert(vpc->imm);
565 }
566
567 return TRUE;
568 }
569
570 static void
571 nv30_vertprog_translate(struct nv30_context *nv30,
572 struct nv30_vertex_program *vp)
573 {
574 struct tgsi_parse_context parse;
575 struct nv30_vpc *vpc = NULL;
576
577 tgsi_dump(vp->pipe.tokens,0);
578
579 vpc = CALLOC(1, sizeof(struct nv30_vpc));
580 if (!vpc)
581 return;
582 vpc->vp = vp;
583 vpc->high_temp = -1;
584
585 if (!nv30_vertprog_prepare(vpc)) {
586 FREE(vpc);
587 return;
588 }
589
590 tgsi_parse_init(&parse, vp->pipe.tokens);
591
592 while (!tgsi_parse_end_of_tokens(&parse)) {
593 tgsi_parse_token(&parse);
594
595 switch (parse.FullToken.Token.Type) {
596 case TGSI_TOKEN_TYPE_DECLARATION:
597 {
598 const struct tgsi_full_declaration *fdec;
599 fdec = &parse.FullToken.FullDeclaration;
600 switch (fdec->Declaration.File) {
601 case TGSI_FILE_OUTPUT:
602 if (!nv30_vertprog_parse_decl_output(vpc, fdec))
603 goto out_err;
604 break;
605 default:
606 break;
607 }
608 }
609 break;
610 case TGSI_TOKEN_TYPE_IMMEDIATE:
611 {
612 const struct tgsi_full_immediate *imm;
613
614 imm = &parse.FullToken.FullImmediate;
615 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
616 // assert(imm->Immediate.Size == 4);
617 vpc->imm[vpc->nr_imm++] =
618 constant(vpc, -1,
619 imm->u.ImmediateFloat32[0].Float,
620 imm->u.ImmediateFloat32[1].Float,
621 imm->u.ImmediateFloat32[2].Float,
622 imm->u.ImmediateFloat32[3].Float);
623 }
624 break;
625 case TGSI_TOKEN_TYPE_INSTRUCTION:
626 {
627 const struct tgsi_full_instruction *finst;
628 finst = &parse.FullToken.FullInstruction;
629 if (!nv30_vertprog_parse_instruction(vpc, finst))
630 goto out_err;
631 }
632 break;
633 default:
634 break;
635 }
636 }
637
638 vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST;
639 vp->translated = TRUE;
640 out_err:
641 tgsi_parse_free(&parse);
642 FREE(vpc);
643 }
644
645 static boolean
646 nv30_vertprog_validate(struct nv30_context *nv30)
647 {
648 struct nouveau_winsys *nvws = nv30->nvws;
649 struct pipe_winsys *ws = nv30->pipe.winsys;
650 struct nouveau_grobj *rankine = nv30->screen->rankine;
651 struct nv30_vertex_program *vp;
652 struct pipe_buffer *constbuf;
653 boolean upload_code = FALSE, upload_data = FALSE;
654 int i;
655
656 vp = nv30->vertprog;
657 constbuf = nv30->constbuf[PIPE_SHADER_VERTEX];
658
659 /* Translate TGSI shader into hw bytecode */
660 if (!vp->translated) {
661 nv30_vertprog_translate(nv30, vp);
662 if (!vp->translated)
663 return FALSE;
664 }
665
666 /* Allocate hw vtxprog exec slots */
667 if (!vp->exec) {
668 struct nouveau_resource *heap = nv30->screen->vp_exec_heap;
669 struct nouveau_stateobj *so;
670 uint vplen = vp->nr_insns;
671
672 if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
673 while (heap->next && heap->size < vplen) {
674 struct nv30_vertex_program *evict;
675
676 evict = heap->next->priv;
677 nvws->res_free(&evict->exec);
678 }
679
680 if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
681 assert(0);
682 }
683
684 so = so_new(2, 0);
685 so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
686 so_data (so, vp->exec->start);
687 so_ref(so, &vp->so);
688
689 upload_code = TRUE;
690 }
691
692 /* Allocate hw vtxprog const slots */
693 if (vp->nr_consts && !vp->data) {
694 struct nouveau_resource *heap = nv30->screen->vp_data_heap;
695
696 if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
697 while (heap->next && heap->size < vp->nr_consts) {
698 struct nv30_vertex_program *evict;
699
700 evict = heap->next->priv;
701 nvws->res_free(&evict->data);
702 }
703
704 if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
705 assert(0);
706 }
707
708 /*XXX: handle this some day */
709 assert(vp->data->start >= vp->data_start_min);
710
711 upload_data = TRUE;
712 if (vp->data_start != vp->data->start)
713 upload_code = TRUE;
714 }
715
716 /* If exec or data segments moved we need to patch the program to
717 * fixup offsets and register IDs.
718 */
719 if (vp->exec_start != vp->exec->start) {
720 for (i = 0; i < vp->nr_insns; i++) {
721 struct nv30_vertex_program_exec *vpi = &vp->insns[i];
722
723 if (vpi->has_branch_offset) {
724 assert(0);
725 }
726 }
727
728 vp->exec_start = vp->exec->start;
729 }
730
731 if (vp->nr_consts && vp->data_start != vp->data->start) {
732 for (i = 0; i < vp->nr_insns; i++) {
733 struct nv30_vertex_program_exec *vpi = &vp->insns[i];
734
735 if (vpi->const_index >= 0) {
736 vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK;
737 vpi->data[1] |=
738 (vpi->const_index + vp->data->start) <<
739 NV30_VP_INST_CONST_SRC_SHIFT;
740
741 }
742 }
743
744 vp->data_start = vp->data->start;
745 }
746
747 /* Update + Upload constant values */
748 if (vp->nr_consts) {
749 float *map = NULL;
750
751 if (constbuf) {
752 map = ws->buffer_map(ws, constbuf,
753 PIPE_BUFFER_USAGE_CPU_READ);
754 }
755
756 for (i = 0; i < vp->nr_consts; i++) {
757 struct nv30_vertex_program_data *vpd = &vp->consts[i];
758
759 if (vpd->index >= 0) {
760 if (!upload_data &&
761 !memcmp(vpd->value, &map[vpd->index * 4],
762 4 * sizeof(float)))
763 continue;
764 memcpy(vpd->value, &map[vpd->index * 4],
765 4 * sizeof(float));
766 }
767
768 BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
769 OUT_RING (i + vp->data->start);
770 OUT_RINGp ((uint32_t *)vpd->value, 4);
771 }
772
773 if (constbuf) {
774 ws->buffer_unmap(ws, constbuf);
775 }
776 }
777
778 /* Upload vtxprog */
779 if (upload_code) {
780 #if 0
781 for (i = 0; i < vp->nr_insns; i++) {
782 NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n",
783 i, vp->insns[i].data[0], vp->insns[i].data[1],
784 vp->insns[i].data[2], vp->insns[i].data[3]);
785 }
786 #endif
787 BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
788 OUT_RING (vp->exec->start);
789 for (i = 0; i < vp->nr_insns; i++) {
790 BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
791 OUT_RINGp (vp->insns[i].data, 4);
792 }
793 }
794
795 if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) {
796 so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]);
797 return TRUE;
798 }
799
800 return FALSE;
801 }
802
803 void
804 nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp)
805 {
806 struct nouveau_winsys *nvws = nv30->screen->nvws;
807
808 vp->translated = FALSE;
809
810 if (vp->nr_insns) {
811 FREE(vp->insns);
812 vp->insns = NULL;
813 vp->nr_insns = 0;
814 }
815
816 if (vp->nr_consts) {
817 FREE(vp->consts);
818 vp->consts = NULL;
819 vp->nr_consts = 0;
820 }
821
822 nvws->res_free(&vp->exec);
823 vp->exec_start = 0;
824 nvws->res_free(&vp->data);
825 vp->data_start = 0;
826 vp->data_start_min = 0;
827
828 vp->ir = vp->or = 0;
829 so_ref(NULL, &vp->so);
830 }
831
832 struct nv30_state_entry nv30_state_vertprog = {
833 .validate = nv30_vertprog_validate,
834 .dirty = {
835 .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/,
836 .hw = NV30_STATE_VERTPROG,
837 }
838 };