r300: Detangle fragment program compiler from driver-specific structure
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/programopt.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_optimize.h"
38 #include "shader/prog_parameter.h"
39 #include "shader/prog_print.h"
40 #include "shader/prog_statevars.h"
41 #include "tnl/tnl.h"
42
43 #include "compiler/radeon_nqssadce.h"
44 #include "r300_context.h"
45 #include "r300_state.h"
46
47 /* TODO: Get rid of t_src_class call */
48 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
49 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
50 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
51 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
52 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
53
54 /*
55 * Take an already-setup and valid source then swizzle it appropriately to
56 * obtain a constant ZERO or ONE source.
57 */
58 #define __CONST(x, y) \
59 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
60 t_swizzle(y), \
61 t_swizzle(y), \
62 t_swizzle(y), \
63 t_swizzle(y), \
64 t_src_class(src[x].File), \
65 VSF_FLAG_NONE) | (src[x].RelAddr << 4))
66
67 #define FREE_TEMPS() \
68 do { \
69 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
70 if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
71 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
72 vp->error = GL_TRUE; \
73 } \
74 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
75 } while (0)
76
77 static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst)
78 {
79 int pi;
80 float *dst_o = dst;
81 struct gl_program_parameter_list *paramList;
82
83 if (vp->IsNVProgram) {
84 _mesa_load_tracked_matrices(ctx);
85
86 for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
87 *dst++ = ctx->VertexProgram.Parameters[pi][0];
88 *dst++ = ctx->VertexProgram.Parameters[pi][1];
89 *dst++ = ctx->VertexProgram.Parameters[pi][2];
90 *dst++ = ctx->VertexProgram.Parameters[pi][3];
91 }
92 return dst - dst_o;
93 }
94
95 if (!vp->Base.Parameters)
96 return 0;
97
98 _mesa_load_state_parameters(ctx, vp->Base.Parameters);
99
100 if (vp->Base.Parameters->NumParameters * 4 >
101 VSF_MAX_FRAGMENT_LENGTH) {
102 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
103 _mesa_exit(-1);
104 }
105
106 paramList = vp->Base.Parameters;
107 for (pi = 0; pi < paramList->NumParameters; pi++) {
108 switch (paramList->Parameters[pi].Type) {
109 case PROGRAM_STATE_VAR:
110 case PROGRAM_NAMED_PARAM:
111 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
112 case PROGRAM_CONSTANT:
113 *dst++ = paramList->ParameterValues[pi][0];
114 *dst++ = paramList->ParameterValues[pi][1];
115 *dst++ = paramList->ParameterValues[pi][2];
116 *dst++ = paramList->ParameterValues[pi][3];
117 break;
118 default:
119 _mesa_problem(NULL, "Bad param type in %s",
120 __FUNCTION__);
121 }
122
123 }
124
125 return dst - dst_o;
126 }
127
128 static unsigned long t_dst_mask(GLuint mask)
129 {
130 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
131 return mask & VSF_FLAG_ALL;
132 }
133
134 static unsigned long t_dst_class(gl_register_file file)
135 {
136
137 switch (file) {
138 case PROGRAM_TEMPORARY:
139 return PVS_DST_REG_TEMPORARY;
140 case PROGRAM_OUTPUT:
141 return PVS_DST_REG_OUT;
142 case PROGRAM_ADDRESS:
143 return PVS_DST_REG_A0;
144 /*
145 case PROGRAM_INPUT:
146 case PROGRAM_LOCAL_PARAM:
147 case PROGRAM_ENV_PARAM:
148 case PROGRAM_NAMED_PARAM:
149 case PROGRAM_STATE_VAR:
150 case PROGRAM_WRITE_ONLY:
151 case PROGRAM_ADDRESS:
152 */
153 default:
154 fprintf(stderr, "problem in %s", __FUNCTION__);
155 _mesa_exit(-1);
156 return -1;
157 }
158 }
159
160 static unsigned long t_dst_index(struct r300_vertex_program *vp,
161 struct prog_dst_register *dst)
162 {
163 if (dst->File == PROGRAM_OUTPUT)
164 return vp->outputs[dst->Index];
165
166 return dst->Index;
167 }
168
169 static unsigned long t_src_class(gl_register_file file)
170 {
171 switch (file) {
172 case PROGRAM_TEMPORARY:
173 return PVS_SRC_REG_TEMPORARY;
174 case PROGRAM_INPUT:
175 return PVS_SRC_REG_INPUT;
176 case PROGRAM_LOCAL_PARAM:
177 case PROGRAM_ENV_PARAM:
178 case PROGRAM_NAMED_PARAM:
179 case PROGRAM_CONSTANT:
180 case PROGRAM_STATE_VAR:
181 return PVS_SRC_REG_CONSTANT;
182 /*
183 case PROGRAM_OUTPUT:
184 case PROGRAM_WRITE_ONLY:
185 case PROGRAM_ADDRESS:
186 */
187 default:
188 fprintf(stderr, "problem in %s", __FUNCTION__);
189 _mesa_exit(-1);
190 return -1;
191 }
192 }
193
194 static INLINE unsigned long t_swizzle(GLubyte swizzle)
195 {
196 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
197 return swizzle;
198 }
199
200 #if 0
201 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
202 {
203 int i;
204
205 if (vp == NULL) {
206 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
207 caller);
208 return;
209 }
210
211 fprintf(stderr, "%s:<", caller);
212 for (i = 0; i < VERT_ATTRIB_MAX; i++)
213 fprintf(stderr, "%d ", vp->inputs[i]);
214 fprintf(stderr, ">\n");
215
216 }
217 #endif
218
219 static unsigned long t_src_index(struct r300_vertex_program *vp,
220 struct prog_src_register *src)
221 {
222 if (src->File == PROGRAM_INPUT) {
223 assert(vp->inputs[src->Index] != -1);
224 return vp->inputs[src->Index];
225 } else {
226 if (src->Index < 0) {
227 fprintf(stderr,
228 "negative offsets for indirect addressing do not work.\n");
229 return 0;
230 }
231 return src->Index;
232 }
233 }
234
235 /* these two functions should probably be merged... */
236
237 static unsigned long t_src(struct r300_vertex_program *vp,
238 struct prog_src_register *src)
239 {
240 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
241 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
242 */
243 return PVS_SRC_OPERAND(t_src_index(vp, src),
244 t_swizzle(GET_SWZ(src->Swizzle, 0)),
245 t_swizzle(GET_SWZ(src->Swizzle, 1)),
246 t_swizzle(GET_SWZ(src->Swizzle, 2)),
247 t_swizzle(GET_SWZ(src->Swizzle, 3)),
248 t_src_class(src->File),
249 src->Negate) | (src->RelAddr << 4);
250 }
251
252 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
253 struct prog_src_register *src)
254 {
255 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
256 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
257 */
258 return PVS_SRC_OPERAND(t_src_index(vp, src),
259 t_swizzle(GET_SWZ(src->Swizzle, 0)),
260 t_swizzle(GET_SWZ(src->Swizzle, 0)),
261 t_swizzle(GET_SWZ(src->Swizzle, 0)),
262 t_swizzle(GET_SWZ(src->Swizzle, 0)),
263 t_src_class(src->File),
264 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
265 (src->RelAddr << 4);
266 }
267
268 static GLboolean valid_dst(struct r300_vertex_program *vp,
269 struct prog_dst_register *dst)
270 {
271 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
272 return GL_FALSE;
273 } else if (dst->File == PROGRAM_ADDRESS) {
274 assert(dst->Index == 0);
275 }
276
277 return GL_TRUE;
278 }
279
280 static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
281 struct prog_instruction *vpi,
282 GLuint * inst,
283 struct prog_src_register src[3])
284 {
285 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
286
287 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
288 GL_FALSE,
289 GL_FALSE,
290 t_dst_index(vp, &vpi->DstReg),
291 t_dst_mask(vpi->DstReg.WriteMask),
292 t_dst_class(vpi->DstReg.File));
293 inst[1] = t_src(vp, &src[0]);
294 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
295 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
296 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
297 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
298 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
299 t_src_class(src[0].File),
300 (!src[0].
301 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
302 (src[0].RelAddr << 4);
303 inst[3] = 0;
304
305 return inst;
306 }
307
308 static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
309 struct prog_instruction *vpi,
310 GLuint * inst,
311 struct prog_src_register src[3])
312 {
313 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
314 GL_FALSE,
315 GL_FALSE,
316 t_dst_index(vp, &vpi->DstReg),
317 t_dst_mask(vpi->DstReg.WriteMask),
318 t_dst_class(vpi->DstReg.File));
319 inst[1] = t_src(vp, &src[0]);
320 inst[2] = t_src(vp, &src[1]);
321 inst[3] = __CONST(1, SWIZZLE_ZERO);
322
323 return inst;
324 }
325
326 static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
327 struct prog_instruction *vpi,
328 GLuint * inst,
329 struct prog_src_register src[3])
330 {
331 inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
332 GL_FALSE,
333 GL_FALSE,
334 t_dst_index(vp, &vpi->DstReg),
335 t_dst_mask(vpi->DstReg.WriteMask),
336 t_dst_class(vpi->DstReg.File));
337 inst[1] = t_src(vp, &src[0]);
338 inst[2] = __CONST(0, SWIZZLE_ZERO);
339 inst[3] = __CONST(0, SWIZZLE_ZERO);
340
341 return inst;
342 }
343
344 static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
345 struct prog_instruction *vpi,
346 GLuint * inst,
347 struct prog_src_register src[3])
348 {
349 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
350
351 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
352 GL_FALSE,
353 GL_FALSE,
354 t_dst_index(vp, &vpi->DstReg),
355 t_dst_mask(vpi->DstReg.WriteMask),
356 t_dst_class(vpi->DstReg.File));
357 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
358 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
359 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
360 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
361 SWIZZLE_ZERO,
362 t_src_class(src[0].File),
363 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
364 (src[0].RelAddr << 4);
365 inst[2] =
366 PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
367 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
368 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
369 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
370 t_src_class(src[1].File),
371 src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
372 (src[1].RelAddr << 4);
373 inst[3] = __CONST(1, SWIZZLE_ZERO);
374
375 return inst;
376 }
377
378 static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
379 struct prog_instruction *vpi,
380 GLuint * inst,
381 struct prog_src_register src[3])
382 {
383 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
384 GL_FALSE,
385 GL_FALSE,
386 t_dst_index(vp, &vpi->DstReg),
387 t_dst_mask(vpi->DstReg.WriteMask),
388 t_dst_class(vpi->DstReg.File));
389 inst[1] = t_src(vp, &src[0]);
390 inst[2] = t_src(vp, &src[1]);
391 inst[3] = __CONST(1, SWIZZLE_ZERO);
392
393 return inst;
394 }
395
396 static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
397 struct prog_instruction *vpi,
398 GLuint * inst,
399 struct prog_src_register src[3])
400 {
401 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
402 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
403 GL_FALSE,
404 GL_FALSE,
405 t_dst_index(vp, &vpi->DstReg),
406 t_dst_mask(vpi->DstReg.WriteMask),
407 t_dst_class(vpi->DstReg.File));
408 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
409 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
410 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
411 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
412 PVS_SRC_SELECT_FORCE_1,
413 t_src_class(src[0].File),
414 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
415 (src[0].RelAddr << 4);
416 inst[2] = t_src(vp, &src[1]);
417 inst[3] = __CONST(1, SWIZZLE_ZERO);
418
419 return inst;
420 }
421
422 static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
423 struct prog_instruction *vpi,
424 GLuint * inst,
425 struct prog_src_register src[3])
426 {
427 inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
428 GL_FALSE,
429 GL_FALSE,
430 t_dst_index(vp, &vpi->DstReg),
431 t_dst_mask(vpi->DstReg.WriteMask),
432 t_dst_class(vpi->DstReg.File));
433 inst[1] = t_src(vp, &src[0]);
434 inst[2] = t_src(vp, &src[1]);
435 inst[3] = __CONST(1, SWIZZLE_ZERO);
436
437 return inst;
438 }
439
440 static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
441 struct prog_instruction *vpi,
442 GLuint * inst,
443 struct prog_src_register src[3])
444 {
445 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
446 GL_TRUE,
447 GL_FALSE,
448 t_dst_index(vp, &vpi->DstReg),
449 t_dst_mask(vpi->DstReg.WriteMask),
450 t_dst_class(vpi->DstReg.File));
451 inst[1] = t_src_scalar(vp, &src[0]);
452 inst[2] = __CONST(0, SWIZZLE_ZERO);
453 inst[3] = __CONST(0, SWIZZLE_ZERO);
454
455 return inst;
456 }
457
458 static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
459 struct prog_instruction *vpi,
460 GLuint * inst,
461 struct prog_src_register src[3])
462 {
463 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
464 GL_TRUE,
465 GL_FALSE,
466 t_dst_index(vp, &vpi->DstReg),
467 t_dst_mask(vpi->DstReg.WriteMask),
468 t_dst_class(vpi->DstReg.File));
469 inst[1] = t_src_scalar(vp, &src[0]);
470 inst[2] = __CONST(0, SWIZZLE_ZERO);
471 inst[3] = __CONST(0, SWIZZLE_ZERO);
472
473 return inst;
474 }
475
476 static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
477 struct prog_instruction *vpi,
478 GLuint * inst,
479 struct prog_src_register src[3],
480 int *u_temp_i)
481 {
482 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
483 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
484
485 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
486 GL_FALSE,
487 GL_FALSE,
488 *u_temp_i,
489 t_dst_mask(vpi->DstReg.WriteMask),
490 PVS_DST_REG_TEMPORARY);
491 inst[1] = t_src(vp, &src[0]);
492 inst[2] = __CONST(0, SWIZZLE_ZERO);
493 inst[3] = __CONST(0, SWIZZLE_ZERO);
494 inst += 4;
495
496 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
497 GL_FALSE,
498 GL_FALSE,
499 t_dst_index(vp, &vpi->DstReg),
500 t_dst_mask(vpi->DstReg.WriteMask),
501 t_dst_class(vpi->DstReg.File));
502 inst[1] = t_src(vp, &src[0]);
503 inst[2] = PVS_SRC_OPERAND(*u_temp_i,
504 PVS_SRC_SELECT_X,
505 PVS_SRC_SELECT_Y,
506 PVS_SRC_SELECT_Z,
507 PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
508 /* Not 100% sure about this */
509 (!src[0].
510 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE
511 /*VSF_FLAG_ALL */ );
512 inst[3] = __CONST(0, SWIZZLE_ZERO);
513 (*u_temp_i)--;
514
515 return inst;
516 }
517
518 static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
519 struct prog_instruction *vpi,
520 GLuint * inst,
521 struct prog_src_register src[3])
522 {
523 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
524 GL_FALSE,
525 GL_FALSE,
526 t_dst_index(vp, &vpi->DstReg),
527 t_dst_mask(vpi->DstReg.WriteMask),
528 t_dst_class(vpi->DstReg.File));
529 inst[1] = t_src(vp, &src[0]);
530 inst[2] = __CONST(0, SWIZZLE_ZERO);
531 inst[3] = __CONST(0, SWIZZLE_ZERO);
532
533 return inst;
534 }
535
536 static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
537 struct prog_instruction *vpi,
538 GLuint * inst,
539 struct prog_src_register src[3])
540 {
541 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
542
543 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
544 GL_TRUE,
545 GL_FALSE,
546 t_dst_index(vp, &vpi->DstReg),
547 t_dst_mask(vpi->DstReg.WriteMask),
548 t_dst_class(vpi->DstReg.File));
549 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
550 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
551 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
552 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
553 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
554 t_src_class(src[0].File),
555 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
556 (src[0].RelAddr << 4);
557 inst[2] = __CONST(0, SWIZZLE_ZERO);
558 inst[3] = __CONST(0, SWIZZLE_ZERO);
559
560 return inst;
561 }
562
563 static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
564 struct prog_instruction *vpi,
565 GLuint * inst,
566 struct prog_src_register src[3])
567 {
568 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
569
570 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
571 GL_TRUE,
572 GL_FALSE,
573 t_dst_index(vp, &vpi->DstReg),
574 t_dst_mask(vpi->DstReg.WriteMask),
575 t_dst_class(vpi->DstReg.File));
576 /* NOTE: Users swizzling might not work. */
577 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
578 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
579 PVS_SRC_SELECT_FORCE_0, // Z
580 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
581 t_src_class(src[0].File),
582 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
583 (src[0].RelAddr << 4);
584 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
585 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
586 PVS_SRC_SELECT_FORCE_0, // Z
587 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
588 t_src_class(src[0].File),
589 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
590 (src[0].RelAddr << 4);
591 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
592 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
593 PVS_SRC_SELECT_FORCE_0, // Z
594 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
595 t_src_class(src[0].File),
596 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
597 (src[0].RelAddr << 4);
598
599 return inst;
600 }
601
602 static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
603 struct prog_instruction *vpi,
604 GLuint * inst,
605 struct prog_src_register src[3])
606 {
607 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
608 GL_TRUE,
609 GL_FALSE,
610 t_dst_index(vp, &vpi->DstReg),
611 t_dst_mask(vpi->DstReg.WriteMask),
612 t_dst_class(vpi->DstReg.File));
613 inst[1] = t_src_scalar(vp, &src[0]);
614 inst[2] = __CONST(0, SWIZZLE_ZERO);
615 inst[3] = __CONST(0, SWIZZLE_ZERO);
616
617 return inst;
618 }
619
620 static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
621 struct prog_instruction *vpi,
622 GLuint * inst,
623 struct prog_src_register src[3])
624 {
625 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
626 GL_FALSE,
627 GL_TRUE,
628 t_dst_index(vp, &vpi->DstReg),
629 t_dst_mask(vpi->DstReg.WriteMask),
630 t_dst_class(vpi->DstReg.File));
631 inst[1] = t_src(vp, &src[0]);
632 inst[2] = t_src(vp, &src[1]);
633 inst[3] = t_src(vp, &src[2]);
634
635 return inst;
636 }
637
638 static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
639 struct prog_instruction *vpi,
640 GLuint * inst,
641 struct prog_src_register src[3])
642 {
643 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
644 GL_FALSE,
645 GL_FALSE,
646 t_dst_index(vp, &vpi->DstReg),
647 t_dst_mask(vpi->DstReg.WriteMask),
648 t_dst_class(vpi->DstReg.File));
649 inst[1] = t_src(vp, &src[0]);
650 inst[2] = t_src(vp, &src[1]);
651 inst[3] = __CONST(1, SWIZZLE_ZERO);
652
653 return inst;
654 }
655
656 static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
657 struct prog_instruction *vpi,
658 GLuint * inst,
659 struct prog_src_register src[3])
660 {
661 inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
662 GL_FALSE,
663 GL_FALSE,
664 t_dst_index(vp, &vpi->DstReg),
665 t_dst_mask(vpi->DstReg.WriteMask),
666 t_dst_class(vpi->DstReg.File));
667 inst[1] = t_src(vp, &src[0]);
668 inst[2] = t_src(vp, &src[1]);
669 inst[3] = __CONST(1, SWIZZLE_ZERO);
670
671 return inst;
672 }
673
674 static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
675 struct prog_instruction *vpi,
676 GLuint * inst,
677 struct prog_src_register src[3])
678 {
679 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
680
681 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
682 GL_FALSE,
683 GL_FALSE,
684 t_dst_index(vp, &vpi->DstReg),
685 t_dst_mask(vpi->DstReg.WriteMask),
686 t_dst_class(vpi->DstReg.File));
687 inst[1] = t_src(vp, &src[0]);
688 inst[2] = __CONST(0, SWIZZLE_ZERO);
689 inst[3] = __CONST(0, SWIZZLE_ZERO);
690
691 return inst;
692 }
693
694 static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
695 struct prog_instruction *vpi,
696 GLuint * inst,
697 struct prog_src_register src[3])
698 {
699 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
700 GL_FALSE,
701 GL_FALSE,
702 t_dst_index(vp, &vpi->DstReg),
703 t_dst_mask(vpi->DstReg.WriteMask),
704 t_dst_class(vpi->DstReg.File));
705 inst[1] = t_src(vp, &src[0]);
706 inst[2] = t_src(vp, &src[1]);
707 inst[3] = __CONST(1, SWIZZLE_ZERO);
708
709 return inst;
710 }
711
712 static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
713 struct prog_instruction *vpi,
714 GLuint * inst,
715 struct prog_src_register src[3])
716 {
717 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
718 GL_TRUE,
719 GL_FALSE,
720 t_dst_index(vp, &vpi->DstReg),
721 t_dst_mask(vpi->DstReg.WriteMask),
722 t_dst_class(vpi->DstReg.File));
723 inst[1] = t_src_scalar(vp, &src[0]);
724 inst[2] = __CONST(0, SWIZZLE_ZERO);
725 inst[3] = t_src_scalar(vp, &src[1]);
726
727 return inst;
728 }
729
730 static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
731 struct prog_instruction *vpi,
732 GLuint * inst,
733 struct prog_src_register src[3])
734 {
735 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
736 GL_TRUE,
737 GL_FALSE,
738 t_dst_index(vp, &vpi->DstReg),
739 t_dst_mask(vpi->DstReg.WriteMask),
740 t_dst_class(vpi->DstReg.File));
741 inst[1] = t_src_scalar(vp, &src[0]);
742 inst[2] = __CONST(0, SWIZZLE_ZERO);
743 inst[3] = __CONST(0, SWIZZLE_ZERO);
744
745 return inst;
746 }
747
748 static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
749 struct prog_instruction *vpi,
750 GLuint * inst,
751 struct prog_src_register src[3])
752 {
753 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
754 GL_TRUE,
755 GL_FALSE,
756 t_dst_index(vp, &vpi->DstReg),
757 t_dst_mask(vpi->DstReg.WriteMask),
758 t_dst_class(vpi->DstReg.File));
759 inst[1] = t_src_scalar(vp, &src[0]);
760 inst[2] = __CONST(0, SWIZZLE_ZERO);
761 inst[3] = __CONST(0, SWIZZLE_ZERO);
762
763 return inst;
764 }
765
766 static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
767 struct prog_instruction *vpi,
768 GLuint * inst,
769 struct prog_src_register src[3])
770 {
771 inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
772 GL_FALSE,
773 GL_FALSE,
774 t_dst_index(vp, &vpi->DstReg),
775 t_dst_mask(vpi->DstReg.WriteMask),
776 t_dst_class(vpi->DstReg.File));
777 inst[1] = t_src(vp, &src[0]);
778 inst[2] = t_src(vp, &src[1]);
779 inst[3] = __CONST(1, SWIZZLE_ZERO);
780
781 return inst;
782 }
783
784 static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
785 struct prog_instruction *vpi,
786 GLuint * inst,
787 struct prog_src_register src[3])
788 {
789 inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
790 GL_FALSE,
791 GL_FALSE,
792 t_dst_index(vp, &vpi->DstReg),
793 t_dst_mask(vpi->DstReg.WriteMask),
794 t_dst_class(vpi->DstReg.File));
795 inst[1] = t_src(vp, &src[0]);
796 inst[2] = t_src(vp, &src[1]);
797 inst[3] = __CONST(1, SWIZZLE_ZERO);
798
799 return inst;
800 }
801
802 static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
803 struct prog_instruction *vpi,
804 GLuint * inst,
805 struct prog_src_register src[3])
806 {
807 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
808
809 #if 0
810 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
811 GL_FALSE,
812 GL_FALSE,
813 t_dst_index(vp, &vpi->DstReg),
814 t_dst_mask(vpi->DstReg.WriteMask),
815 t_dst_class(vpi->DstReg.File));
816 inst[1] = t_src(vp, &src[0]);
817 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
818 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
819 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
820 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
821 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
822 t_src_class(src[1].File),
823 (!src[1].
824 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
825 (src[1].RelAddr << 4);
826 inst[3] = 0;
827 #else
828 inst[0] =
829 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
830 GL_FALSE,
831 GL_FALSE,
832 t_dst_index(vp, &vpi->DstReg),
833 t_dst_mask(vpi->DstReg.WriteMask),
834 t_dst_class(vpi->DstReg.File));
835 inst[1] = t_src(vp, &src[0]);
836 inst[2] = __CONST(0, SWIZZLE_ONE);
837 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
838 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
839 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
840 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
841 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
842 t_src_class(src[1].File),
843 (!src[1].
844 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
845 (src[1].RelAddr << 4);
846 #endif
847
848 return inst;
849 }
850
851 static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
852 struct prog_instruction *vpi,
853 GLuint * inst,
854 struct prog_src_register src[3])
855 {
856 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
857
858 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
859 GL_FALSE,
860 GL_FALSE,
861 t_dst_index(vp, &vpi->DstReg),
862 t_dst_mask(vpi->DstReg.WriteMask),
863 t_dst_class(vpi->DstReg.File));
864 inst[1] = t_src(vp, &src[0]);
865 inst[2] = __CONST(0, SWIZZLE_ZERO);
866 inst[3] = __CONST(0, SWIZZLE_ZERO);
867
868 return inst;
869 }
870
871 static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
872 struct prog_instruction *vpi,
873 GLuint * inst,
874 struct prog_src_register src[3],
875 int *u_temp_i)
876 {
877 /* mul r0, r1.yzxw, r2.zxyw
878 mad r0, -r2.yzxw, r1.zxyw, r0
879 */
880
881 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
882 GL_FALSE,
883 GL_FALSE,
884 *u_temp_i,
885 t_dst_mask(vpi->DstReg.WriteMask),
886 PVS_DST_REG_TEMPORARY);
887 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
888 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
889 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
890 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
891 t_src_class(src[0].File),
892 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
893 (src[0].RelAddr << 4);
894 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
895 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
896 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
897 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
898 t_src_class(src[1].File),
899 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
900 (src[1].RelAddr << 4);
901 inst[3] = __CONST(1, SWIZZLE_ZERO);
902 inst += 4;
903
904 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
905 GL_FALSE,
906 GL_FALSE,
907 t_dst_index(vp, &vpi->DstReg),
908 t_dst_mask(vpi->DstReg.WriteMask),
909 t_dst_class(vpi->DstReg.File));
910 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
911 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
912 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
913 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
914 t_src_class(src[1].File),
915 (!src[1].
916 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
917 (src[1].RelAddr << 4);
918 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
919 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
920 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
921 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
922 t_src_class(src[0].File),
923 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
924 (src[0].RelAddr << 4);
925 inst[3] =
926 PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
927 PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
928 PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
929
930 (*u_temp_i)--;
931
932 return inst;
933 }
934
935 static void t_inputs_outputs(struct r300_vertex_program *vp)
936 {
937 int i;
938 int cur_reg;
939 GLuint OutputsWritten, InputsRead;
940
941 OutputsWritten = vp->Base->Base.OutputsWritten;
942 InputsRead = vp->Base->Base.InputsRead;
943
944 cur_reg = -1;
945 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
946 if (InputsRead & (1 << i))
947 vp->inputs[i] = ++cur_reg;
948 else
949 vp->inputs[i] = -1;
950 }
951
952 cur_reg = 0;
953 for (i = 0; i < VERT_RESULT_MAX; i++)
954 vp->outputs[i] = -1;
955
956 assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
957
958 if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
959 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
960 }
961
962 if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
963 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
964 }
965
966 /* If we're writing back facing colors we need to send
967 * four colors to make front/back face colors selection work.
968 * If the vertex program doesn't write all 4 colors, lets
969 * pretend it does by skipping output index reg so the colors
970 * get written into appropriate output vectors.
971 */
972 if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
973 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
974 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
975 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
976 cur_reg++;
977 }
978
979 if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
980 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
981 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
982 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
983 cur_reg++;
984 }
985
986 if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
987 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
988 } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
989 cur_reg++;
990 }
991
992 if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
993 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
994 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
995 cur_reg++;
996 }
997
998 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
999 if (OutputsWritten & (1 << i)) {
1000 vp->outputs[i] = cur_reg++;
1001 }
1002 }
1003
1004 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
1005 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
1006 }
1007 }
1008
1009 void r300TranslateVertexShader(struct r300_vertex_program *vp)
1010 {
1011 struct prog_instruction *vpi = vp->Base->Base.Instructions;
1012 int i;
1013 GLuint *inst;
1014 unsigned long num_operands;
1015 /* Initial value should be last tmp reg that hw supports.
1016 Strangely enough r300 doesnt mind even though these would be out of range.
1017 Smart enough to realize that it doesnt need it? */
1018 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
1019 struct prog_src_register src[3];
1020
1021 vp->pos_end = 0; /* Not supported yet */
1022 vp->hw_code.length = 0;
1023 vp->translated = GL_TRUE;
1024 vp->error = GL_FALSE;
1025
1026 t_inputs_outputs(vp);
1027
1028 for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END;
1029 vpi++, inst += 4) {
1030
1031 FREE_TEMPS();
1032
1033 if (!valid_dst(vp, &vpi->DstReg)) {
1034 /* redirect result to unused temp */
1035 vpi->DstReg.File = PROGRAM_TEMPORARY;
1036 vpi->DstReg.Index = u_temp_i;
1037 }
1038
1039 num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
1040
1041 /* copy the sources (src) from mesa into a local variable... is this needed? */
1042 for (i = 0; i < num_operands; i++) {
1043 src[i] = vpi->SrcReg[i];
1044 }
1045
1046 if (num_operands == 3) { /* TODO: scalars */
1047 if (CMP_SRCS(src[1], src[2])
1048 || CMP_SRCS(src[0], src[2])) {
1049 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1050 GL_FALSE,
1051 GL_FALSE,
1052 u_temp_i,
1053 VSF_FLAG_ALL,
1054 PVS_DST_REG_TEMPORARY);
1055 inst[1] =
1056 PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
1057 SWIZZLE_X,
1058 SWIZZLE_Y,
1059 SWIZZLE_Z,
1060 SWIZZLE_W,
1061 t_src_class(src[2].File),
1062 VSF_FLAG_NONE) | (src[2].
1063 RelAddr <<
1064 4);
1065 inst[2] = __CONST(2, SWIZZLE_ZERO);
1066 inst[3] = __CONST(2, SWIZZLE_ZERO);
1067 inst += 4;
1068
1069 src[2].File = PROGRAM_TEMPORARY;
1070 src[2].Index = u_temp_i;
1071 src[2].RelAddr = 0;
1072 u_temp_i--;
1073 }
1074 }
1075
1076 if (num_operands >= 2) {
1077 if (CMP_SRCS(src[1], src[0])) {
1078 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1079 GL_FALSE,
1080 GL_FALSE,
1081 u_temp_i,
1082 VSF_FLAG_ALL,
1083 PVS_DST_REG_TEMPORARY);
1084 inst[1] =
1085 PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
1086 SWIZZLE_X,
1087 SWIZZLE_Y,
1088 SWIZZLE_Z,
1089 SWIZZLE_W,
1090 t_src_class(src[0].File),
1091 VSF_FLAG_NONE) | (src[0].
1092 RelAddr <<
1093 4);
1094 inst[2] = __CONST(0, SWIZZLE_ZERO);
1095 inst[3] = __CONST(0, SWIZZLE_ZERO);
1096 inst += 4;
1097
1098 src[0].File = PROGRAM_TEMPORARY;
1099 src[0].Index = u_temp_i;
1100 src[0].RelAddr = 0;
1101 u_temp_i--;
1102 }
1103 }
1104
1105 switch (vpi->Opcode) {
1106 case OPCODE_ABS:
1107 inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
1108 break;
1109 case OPCODE_ADD:
1110 inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
1111 break;
1112 case OPCODE_ARL:
1113 inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
1114 break;
1115 case OPCODE_DP3:
1116 inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
1117 break;
1118 case OPCODE_DP4:
1119 inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
1120 break;
1121 case OPCODE_DPH:
1122 inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
1123 break;
1124 case OPCODE_DST:
1125 inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
1126 break;
1127 case OPCODE_EX2:
1128 inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
1129 break;
1130 case OPCODE_EXP:
1131 inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
1132 break;
1133 case OPCODE_FLR:
1134 inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */
1135 &u_temp_i);
1136 break;
1137 case OPCODE_FRC:
1138 inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
1139 break;
1140 case OPCODE_LG2:
1141 inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
1142 break;
1143 case OPCODE_LIT:
1144 inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
1145 break;
1146 case OPCODE_LOG:
1147 inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
1148 break;
1149 case OPCODE_MAD:
1150 inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
1151 break;
1152 case OPCODE_MAX:
1153 inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
1154 break;
1155 case OPCODE_MIN:
1156 inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
1157 break;
1158 case OPCODE_MOV:
1159 inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
1160 break;
1161 case OPCODE_MUL:
1162 inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
1163 break;
1164 case OPCODE_POW:
1165 inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
1166 break;
1167 case OPCODE_RCP:
1168 inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
1169 break;
1170 case OPCODE_RSQ:
1171 inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
1172 break;
1173 case OPCODE_SGE:
1174 inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
1175 break;
1176 case OPCODE_SLT:
1177 inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
1178 break;
1179 case OPCODE_SUB:
1180 inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
1181 break;
1182 case OPCODE_SWZ:
1183 inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
1184 break;
1185 case OPCODE_XPD:
1186 inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */
1187 &u_temp_i);
1188 break;
1189 default:
1190 vp->error = GL_TRUE;
1191 break;
1192 }
1193 }
1194
1195 vp->hw_code.length = (inst - vp->hw_code.body.d);
1196 if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) {
1197 vp->error = GL_TRUE;
1198 }
1199 }
1200
1201 static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
1202 {
1203 struct prog_instruction *vpi;
1204
1205 _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
1206
1207 vpi = &prog->Instructions[prog->NumInstructions - 3];
1208
1209 vpi->Opcode = OPCODE_MOV;
1210
1211 vpi->DstReg.File = PROGRAM_OUTPUT;
1212 vpi->DstReg.Index = VERT_RESULT_HPOS;
1213 vpi->DstReg.WriteMask = WRITEMASK_XYZW;
1214 vpi->DstReg.CondMask = COND_TR;
1215
1216 vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
1217 vpi->SrcReg[0].Index = temp_index;
1218 vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
1219
1220 ++vpi;
1221
1222 vpi->Opcode = OPCODE_MOV;
1223
1224 vpi->DstReg.File = PROGRAM_OUTPUT;
1225 vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
1226 vpi->DstReg.WriteMask = WRITEMASK_XYZW;
1227 vpi->DstReg.CondMask = COND_TR;
1228
1229 vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
1230 vpi->SrcReg[0].Index = temp_index;
1231 vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
1232
1233 ++vpi;
1234
1235 vpi->Opcode = OPCODE_END;
1236 }
1237
1238 static void pos_as_texcoord(struct gl_program *prog, int tex_id)
1239 {
1240 struct prog_instruction *vpi;
1241 GLuint tempregi = prog->NumTemporaries;
1242
1243 prog->NumTemporaries++;
1244
1245 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1246 if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
1247 vpi->DstReg.File = PROGRAM_TEMPORARY;
1248 vpi->DstReg.Index = tempregi;
1249 }
1250 }
1251
1252 insert_wpos(prog, tempregi, tex_id);
1253
1254 prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
1255 }
1256
1257 /**
1258 * The fogcoord attribute is special in that only the first component
1259 * is relevant, and the remaining components are always fixed (when read
1260 * from by the fragment program) to yield an X001 pattern.
1261 *
1262 * We need to enforce this either in the vertex program or in the fragment
1263 * program, and this code chooses not to enforce it in the vertex program.
1264 * This is slightly cheaper, as long as the fragment program does not use
1265 * weird swizzles.
1266 *
1267 * And it seems that usually, weird swizzles are not used, so...
1268 *
1269 * See also the counterpart rewriting for fragment programs.
1270 */
1271 static void fog_as_texcoord(struct gl_program *prog, int tex_id)
1272 {
1273 struct prog_instruction *vpi;
1274
1275 vpi = prog->Instructions;
1276 while (vpi->Opcode != OPCODE_END) {
1277 if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
1278 vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
1279 vpi->DstReg.WriteMask = WRITEMASK_X;
1280 }
1281
1282 ++vpi;
1283 }
1284
1285 prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
1286 prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
1287 }
1288
1289 static int translateABS(struct gl_program *prog, int pos)
1290 {
1291 struct prog_instruction *inst;
1292
1293 inst = &prog->Instructions[pos];
1294
1295 inst->Opcode = OPCODE_MAX;
1296 inst->SrcReg[1] = inst->SrcReg[0];
1297 inst->SrcReg[1].Negate ^= NEGATE_XYZW;
1298
1299 return 0;
1300 }
1301
1302 static int translateDP3(struct gl_program *prog, int pos)
1303 {
1304 struct prog_instruction *inst;
1305
1306 inst = &prog->Instructions[pos];
1307
1308 inst->Opcode = OPCODE_DP4;
1309 inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
1310
1311 return 0;
1312 }
1313
1314 static int translateDPH(struct gl_program *prog, int pos)
1315 {
1316 struct prog_instruction *inst;
1317
1318 inst = &prog->Instructions[pos];
1319
1320 inst->Opcode = OPCODE_DP4;
1321 inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
1322
1323 return 0;
1324 }
1325
1326 static int translateFLR(struct gl_program *prog, int pos)
1327 {
1328 struct prog_instruction *inst;
1329 struct prog_dst_register dst;
1330 int tmp_idx;
1331
1332 tmp_idx = prog->NumTemporaries++;
1333
1334 _mesa_insert_instructions(prog, pos + 1, 1);
1335
1336 inst = &prog->Instructions[pos];
1337 dst = inst->DstReg;
1338
1339 inst->Opcode = OPCODE_FRC;
1340 inst->DstReg.File = PROGRAM_TEMPORARY;
1341 inst->DstReg.Index = tmp_idx;
1342 ++inst;
1343
1344 inst->Opcode = OPCODE_ADD;
1345 inst->DstReg = dst;
1346 inst->SrcReg[0] = (inst-1)->SrcReg[0];
1347 inst->SrcReg[1].File = PROGRAM_TEMPORARY;
1348 inst->SrcReg[1].Index = tmp_idx;
1349 inst->SrcReg[1].Negate = NEGATE_XYZW;
1350
1351 return 1;
1352 }
1353
1354 static int translateSUB(struct gl_program *prog, int pos)
1355 {
1356 struct prog_instruction *inst;
1357
1358 inst = &prog->Instructions[pos];
1359
1360 inst->Opcode = OPCODE_ADD;
1361 inst->SrcReg[1].Negate ^= NEGATE_XYZW;
1362
1363 return 0;
1364 }
1365
1366 static int translateSWZ(struct gl_program *prog, int pos)
1367 {
1368 prog->Instructions[pos].Opcode = OPCODE_MOV;
1369
1370 return 0;
1371 }
1372
1373 static int translateXPD(struct gl_program *prog, int pos)
1374 {
1375 struct prog_instruction *inst;
1376 int tmp_idx;
1377
1378 tmp_idx = prog->NumTemporaries++;
1379
1380 _mesa_insert_instructions(prog, pos + 1, 1);
1381
1382 inst = &prog->Instructions[pos];
1383
1384 *(inst+1) = *inst;
1385
1386 inst->Opcode = OPCODE_MUL;
1387 inst->DstReg.File = PROGRAM_TEMPORARY;
1388 inst->DstReg.Index = tmp_idx;
1389 inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
1390 inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
1391 ++inst;
1392
1393 inst->Opcode = OPCODE_MAD;
1394 inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
1395 inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
1396 inst->SrcReg[1].Negate ^= NEGATE_XYZW;
1397 inst->SrcReg[2].File = PROGRAM_TEMPORARY;
1398 inst->SrcReg[2].Index = tmp_idx;
1399
1400 return 1;
1401 }
1402
1403 static void translateInsts(struct gl_program *prog)
1404 {
1405 struct prog_instruction *inst;
1406 int i;
1407
1408 for (i = 0; i < prog->NumInstructions; ++i) {
1409 inst = &prog->Instructions[i];
1410
1411 switch (inst->Opcode) {
1412 case OPCODE_ABS:
1413 i += translateABS(prog, i);
1414 break;
1415 case OPCODE_DP3:
1416 i += translateDP3(prog, i);
1417 break;
1418 case OPCODE_DPH:
1419 i += translateDPH(prog, i);
1420 break;
1421 case OPCODE_FLR:
1422 i += translateFLR(prog, i);
1423 break;
1424 case OPCODE_SUB:
1425 i += translateSUB(prog, i);
1426 break;
1427 case OPCODE_SWZ:
1428 i += translateSWZ(prog, i);
1429 break;
1430 case OPCODE_XPD:
1431 i += translateXPD(prog, i);
1432 break;
1433 default:
1434 break;
1435 }
1436 }
1437 }
1438
1439 #define ADD_OUTPUT(fp_attr, vp_result) \
1440 do { \
1441 if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \
1442 OutputsAdded |= 1 << (vp_result); \
1443 count++; \
1444 } \
1445 } while (0)
1446
1447 static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog)
1448 {
1449 r300ContextPtr r300 = R300_CONTEXT(ctx);
1450 GLuint OutputsAdded, FpReads;
1451 int i, count;
1452
1453 OutputsAdded = 0;
1454 count = 0;
1455 FpReads = r300->selected_fp->Base->InputsRead;
1456
1457 ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
1458 ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
1459
1460 for (i = 0; i < 7; ++i) {
1461 ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
1462 }
1463
1464 /* Some outputs may be artificially added, to match the inputs of the fragment program.
1465 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
1466 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
1467 */
1468 if (count > 0) {
1469 struct prog_instruction *inst;
1470
1471 _mesa_insert_instructions(prog, prog->NumInstructions - 1, count);
1472 inst = &prog->Instructions[prog->NumInstructions - 1 - count];
1473
1474 for (i = 0; i < VERT_RESULT_MAX; ++i) {
1475 if (OutputsAdded & (1 << i)) {
1476 inst->Opcode = OPCODE_MOV;
1477
1478 inst->DstReg.File = PROGRAM_OUTPUT;
1479 inst->DstReg.Index = i;
1480 inst->DstReg.WriteMask = WRITEMASK_XYZW;
1481 inst->DstReg.CondMask = COND_TR;
1482
1483 inst->SrcReg[0].File = PROGRAM_CONSTANT;
1484 inst->SrcReg[0].Index = 0;
1485 inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
1486
1487 ++inst;
1488 }
1489 }
1490
1491 prog->OutputsWritten |= OutputsAdded;
1492 }
1493 }
1494
1495 #undef ADD_OUTPUT
1496
1497 static void nqssadceInit(struct nqssadce_state* s)
1498 {
1499 r300ContextPtr r300 = R300_CONTEXT(s->Ctx);
1500 GLuint fp_reads;
1501
1502 fp_reads = r300->selected_fp->Base->InputsRead;
1503 {
1504 if (fp_reads & FRAG_BIT_COL0) {
1505 s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
1506 s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
1507 }
1508
1509 if (fp_reads & FRAG_BIT_COL1) {
1510 s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
1511 s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
1512 }
1513 }
1514
1515 {
1516 int i;
1517 for (i = 0; i < 8; ++i) {
1518 if (fp_reads & FRAG_BIT_TEX(i)) {
1519 s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
1520 }
1521 }
1522 }
1523
1524 s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
1525 if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
1526 s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
1527 }
1528
1529 static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
1530 {
1531 (void) opcode;
1532 (void) reg;
1533
1534 return GL_TRUE;
1535 }
1536
1537 static struct r300_vertex_program *build_program(GLcontext *ctx,
1538 struct r300_vertex_program_key *wanted_key,
1539 const struct gl_vertex_program *mesa_vp)
1540 {
1541 r300ContextPtr r300 = R300_CONTEXT(ctx);
1542 struct r300_vertex_program *vp;
1543 struct gl_program *prog;
1544
1545 vp = _mesa_calloc(sizeof(*vp));
1546 vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
1547 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1548
1549 prog = &vp->Base->Base;
1550
1551 if (RADEON_DEBUG & DEBUG_VERTS) {
1552 fprintf(stderr, "Initial vertex program:\n");
1553 _mesa_print_program(prog);
1554 fflush(stdout);
1555 }
1556
1557 if (vp->Base->IsPositionInvariant) {
1558 _mesa_insert_mvp_code(ctx, vp->Base);
1559 }
1560
1561 if (r300->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) {
1562 pos_as_texcoord(&vp->Base->Base, r300->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0);
1563 }
1564
1565 if (r300->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) {
1566 fog_as_texcoord(&vp->Base->Base, r300->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0);
1567 }
1568
1569 addArtificialOutputs(ctx, prog);
1570
1571 translateInsts(prog);
1572
1573 if (RADEON_DEBUG & DEBUG_VERTS) {
1574 fprintf(stderr, "Vertex program after native rewrite:\n");
1575 _mesa_print_program(prog);
1576 fflush(stdout);
1577 }
1578
1579 {
1580 struct radeon_nqssadce_descr nqssadce = {
1581 .Init = &nqssadceInit,
1582 .IsNativeSwizzle = &swizzleIsNative,
1583 .BuildSwizzle = NULL
1584 };
1585 radeonNqssaDce(ctx, prog, &nqssadce);
1586
1587 /* We need this step for reusing temporary registers */
1588 _mesa_optimize_program(ctx, prog);
1589
1590 if (RADEON_DEBUG & DEBUG_VERTS) {
1591 fprintf(stderr, "Vertex program after NQSSADCE:\n");
1592 _mesa_print_program(prog);
1593 fflush(stdout);
1594 }
1595 }
1596
1597 assert(prog->NumInstructions);
1598 {
1599 struct prog_instruction *inst;
1600 int max, i, tmp;
1601
1602 inst = prog->Instructions;
1603 max = -1;
1604 while (inst->Opcode != OPCODE_END) {
1605 tmp = _mesa_num_inst_src_regs(inst->Opcode);
1606 for (i = 0; i < tmp; ++i) {
1607 if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
1608 if ((int) inst->SrcReg[i].Index > max) {
1609 max = inst->SrcReg[i].Index;
1610 }
1611 }
1612 }
1613
1614 if (_mesa_num_inst_dst_regs(inst->Opcode)) {
1615 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
1616 if ((int) inst->DstReg.Index > max) {
1617 max = inst->DstReg.Index;
1618 }
1619 }
1620 }
1621 ++inst;
1622 }
1623
1624 /* We actually want highest index of used temporary register,
1625 * not the number of temporaries used.
1626 * These values aren't always the same.
1627 */
1628 vp->num_temporaries = max + 1;
1629 }
1630
1631 return vp;
1632 }
1633
1634 struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx)
1635 {
1636 r300ContextPtr r300 = R300_CONTEXT(ctx);
1637 struct r300_vertex_program_key wanted_key = { 0 };
1638 struct r300_vertex_program_cont *vpc;
1639 struct r300_vertex_program *vp;
1640
1641 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1642 wanted_key.FpReads = r300->selected_fp->Base->InputsRead;
1643 wanted_key.FogAttr = r300->selected_fp->code.fog_attr;
1644 wanted_key.WPosAttr = r300->selected_fp->code.wpos_attr;
1645
1646 for (vp = vpc->progs; vp; vp = vp->next) {
1647 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
1648 == 0) {
1649 return r300->selected_vp = vp;
1650 }
1651 }
1652
1653 vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
1654 vp->next = vpc->progs;
1655 vpc->progs = vp;
1656
1657 return r300->selected_vp = vp;
1658 }
1659
1660 #define bump_vpu_count(ptr, new_count) do { \
1661 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
1662 int _nc=(new_count)/4; \
1663 assert(_nc < 256); \
1664 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
1665 } while(0)
1666
1667 static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code)
1668 {
1669 int i;
1670
1671 assert((code->length > 0) && (code->length % 4 == 0));
1672
1673 switch ((dest >> 8) & 0xf) {
1674 case 0:
1675 R300_STATECHANGE(r300, vpi);
1676 for (i = 0; i < code->length; i++)
1677 r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1678 bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
1679 break;
1680 case 2:
1681 R300_STATECHANGE(r300, vpp);
1682 for (i = 0; i < code->length; i++)
1683 r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1684 bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
1685 break;
1686 case 4:
1687 R300_STATECHANGE(r300, vps);
1688 for (i = 0; i < code->length; i++)
1689 r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1690 bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
1691 break;
1692 default:
1693 fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
1694 _mesa_exit(-1);
1695 }
1696 }
1697
1698 void r300SetupVertexProgram(r300ContextPtr rmesa)
1699 {
1700 GLcontext *ctx = rmesa->radeon.glCtx;
1701 struct r300_vertex_program *prog = rmesa->selected_vp;
1702 int inst_count = 0;
1703 int param_count = 0;
1704
1705 /* Reset state, in case we don't use something */
1706 ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
1707 ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
1708 ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
1709
1710 R300_STATECHANGE(rmesa, vpp);
1711 param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
1712 bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
1713 param_count /= 4;
1714
1715 r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
1716 inst_count = (prog->hw_code.length / 4) - 1;
1717
1718 r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead),
1719 _mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries);
1720
1721 R300_STATECHANGE(rmesa, pvs);
1722 rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
1723 (inst_count << R300_PVS_LAST_INST_SHIFT);
1724
1725 rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
1726 rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
1727 }