r300: use mesa provided function for adding MVP code
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/programopt.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_parameter.h"
38 #include "shader/prog_print.h"
39 #include "shader/prog_statevars.h"
40 #include "tnl/tnl.h"
41
42 #include "r300_context.h"
43 #include "r300_state.h"
44
45 /* TODO: Get rid of t_src_class call */
46 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
47 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
48 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
49 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
50 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
51
52 /*
53 * Take an already-setup and valid source then swizzle it appropriately to
54 * obtain a constant ZERO or ONE source.
55 */
56 #define __CONST(x, y) \
57 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
58 t_swizzle(y), \
59 t_swizzle(y), \
60 t_swizzle(y), \
61 t_swizzle(y), \
62 t_src_class(src[x].File), \
63 VSF_FLAG_NONE) | (src[x].RelAddr << 4))
64
65 #define FREE_TEMPS() \
66 do { \
67 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
68 if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
69 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
70 vp->error = GL_TRUE; \
71 } \
72 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
73 } while (0)
74
75 int r300VertexProgUpdateParams(GLcontext * ctx,
76 struct r300_vertex_program_cont *vp, float *dst)
77 {
78 int pi;
79 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
80 float *dst_o = dst;
81 struct gl_program_parameter_list *paramList;
82
83 if (mesa_vp->IsNVProgram) {
84 _mesa_load_tracked_matrices(ctx);
85
86 for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
87 *dst++ = ctx->VertexProgram.Parameters[pi][0];
88 *dst++ = ctx->VertexProgram.Parameters[pi][1];
89 *dst++ = ctx->VertexProgram.Parameters[pi][2];
90 *dst++ = ctx->VertexProgram.Parameters[pi][3];
91 }
92 return dst - dst_o;
93 }
94
95 assert(mesa_vp->Base.Parameters);
96 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
97
98 if (mesa_vp->Base.Parameters->NumParameters * 4 >
99 VSF_MAX_FRAGMENT_LENGTH) {
100 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
101 _mesa_exit(-1);
102 }
103
104 paramList = mesa_vp->Base.Parameters;
105 for (pi = 0; pi < paramList->NumParameters; pi++) {
106 switch (paramList->Parameters[pi].Type) {
107 case PROGRAM_STATE_VAR:
108 case PROGRAM_NAMED_PARAM:
109 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
110 case PROGRAM_CONSTANT:
111 *dst++ = paramList->ParameterValues[pi][0];
112 *dst++ = paramList->ParameterValues[pi][1];
113 *dst++ = paramList->ParameterValues[pi][2];
114 *dst++ = paramList->ParameterValues[pi][3];
115 break;
116 default:
117 _mesa_problem(NULL, "Bad param type in %s",
118 __FUNCTION__);
119 }
120
121 }
122
123 return dst - dst_o;
124 }
125
126 static unsigned long t_dst_mask(GLuint mask)
127 {
128 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
129 return mask & VSF_FLAG_ALL;
130 }
131
132 static unsigned long t_dst_class(gl_register_file file)
133 {
134
135 switch (file) {
136 case PROGRAM_TEMPORARY:
137 return PVS_DST_REG_TEMPORARY;
138 case PROGRAM_OUTPUT:
139 return PVS_DST_REG_OUT;
140 case PROGRAM_ADDRESS:
141 return PVS_DST_REG_A0;
142 /*
143 case PROGRAM_INPUT:
144 case PROGRAM_LOCAL_PARAM:
145 case PROGRAM_ENV_PARAM:
146 case PROGRAM_NAMED_PARAM:
147 case PROGRAM_STATE_VAR:
148 case PROGRAM_WRITE_ONLY:
149 case PROGRAM_ADDRESS:
150 */
151 default:
152 fprintf(stderr, "problem in %s", __FUNCTION__);
153 _mesa_exit(-1);
154 return -1;
155 }
156 }
157
158 static unsigned long t_dst_index(struct r300_vertex_program *vp,
159 struct prog_dst_register *dst)
160 {
161 if (dst->File == PROGRAM_OUTPUT)
162 return vp->outputs[dst->Index];
163
164 return dst->Index;
165 }
166
167 static unsigned long t_src_class(gl_register_file file)
168 {
169 switch (file) {
170 case PROGRAM_TEMPORARY:
171 return PVS_SRC_REG_TEMPORARY;
172 case PROGRAM_INPUT:
173 return PVS_SRC_REG_INPUT;
174 case PROGRAM_LOCAL_PARAM:
175 case PROGRAM_ENV_PARAM:
176 case PROGRAM_NAMED_PARAM:
177 case PROGRAM_CONSTANT:
178 case PROGRAM_STATE_VAR:
179 return PVS_SRC_REG_CONSTANT;
180 /*
181 case PROGRAM_OUTPUT:
182 case PROGRAM_WRITE_ONLY:
183 case PROGRAM_ADDRESS:
184 */
185 default:
186 fprintf(stderr, "problem in %s", __FUNCTION__);
187 _mesa_exit(-1);
188 return -1;
189 }
190 }
191
192 static INLINE unsigned long t_swizzle(GLubyte swizzle)
193 {
194 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
195 return swizzle;
196 }
197
198 #if 0
199 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
200 {
201 int i;
202
203 if (vp == NULL) {
204 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
205 caller);
206 return;
207 }
208
209 fprintf(stderr, "%s:<", caller);
210 for (i = 0; i < VERT_ATTRIB_MAX; i++)
211 fprintf(stderr, "%d ", vp->inputs[i]);
212 fprintf(stderr, ">\n");
213
214 }
215 #endif
216
217 static unsigned long t_src_index(struct r300_vertex_program *vp,
218 struct prog_src_register *src)
219 {
220 if (src->File == PROGRAM_INPUT) {
221 assert(vp->inputs[src->Index] != -1);
222 return vp->inputs[src->Index];
223 } else {
224 if (src->Index < 0) {
225 fprintf(stderr,
226 "negative offsets for indirect addressing do not work.\n");
227 return 0;
228 }
229 return src->Index;
230 }
231 }
232
233 /* these two functions should probably be merged... */
234
235 static unsigned long t_src(struct r300_vertex_program *vp,
236 struct prog_src_register *src)
237 {
238 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
239 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
240 */
241 return PVS_SRC_OPERAND(t_src_index(vp, src),
242 t_swizzle(GET_SWZ(src->Swizzle, 0)),
243 t_swizzle(GET_SWZ(src->Swizzle, 1)),
244 t_swizzle(GET_SWZ(src->Swizzle, 2)),
245 t_swizzle(GET_SWZ(src->Swizzle, 3)),
246 t_src_class(src->File),
247 src->Negate) | (src->RelAddr << 4);
248 }
249
250 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
251 struct prog_src_register *src)
252 {
253 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
254 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
255 */
256 return PVS_SRC_OPERAND(t_src_index(vp, src),
257 t_swizzle(GET_SWZ(src->Swizzle, 0)),
258 t_swizzle(GET_SWZ(src->Swizzle, 0)),
259 t_swizzle(GET_SWZ(src->Swizzle, 0)),
260 t_swizzle(GET_SWZ(src->Swizzle, 0)),
261 t_src_class(src->File),
262 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
263 (src->RelAddr << 4);
264 }
265
266 static GLboolean valid_dst(struct r300_vertex_program *vp,
267 struct prog_dst_register *dst)
268 {
269 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
270 return GL_FALSE;
271 } else if (dst->File == PROGRAM_ADDRESS) {
272 assert(dst->Index == 0);
273 }
274
275 return GL_TRUE;
276 }
277
278 static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
279 struct prog_instruction *vpi,
280 GLuint * inst,
281 struct prog_src_register src[3])
282 {
283 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
284
285 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
286 GL_FALSE,
287 GL_FALSE,
288 t_dst_index(vp, &vpi->DstReg),
289 t_dst_mask(vpi->DstReg.WriteMask),
290 t_dst_class(vpi->DstReg.File));
291 inst[1] = t_src(vp, &src[0]);
292 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
293 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
294 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
295 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
296 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
297 t_src_class(src[0].File),
298 (!src[0].
299 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
300 (src[0].RelAddr << 4);
301 inst[3] = 0;
302
303 return inst;
304 }
305
306 static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
307 struct prog_instruction *vpi,
308 GLuint * inst,
309 struct prog_src_register src[3])
310 {
311 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
312 GL_FALSE,
313 GL_FALSE,
314 t_dst_index(vp, &vpi->DstReg),
315 t_dst_mask(vpi->DstReg.WriteMask),
316 t_dst_class(vpi->DstReg.File));
317 inst[1] = t_src(vp, &src[0]);
318 inst[2] = t_src(vp, &src[1]);
319 inst[3] = __CONST(1, SWIZZLE_ZERO);
320
321 return inst;
322 }
323
324 static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
325 struct prog_instruction *vpi,
326 GLuint * inst,
327 struct prog_src_register src[3])
328 {
329 inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
330 GL_FALSE,
331 GL_FALSE,
332 t_dst_index(vp, &vpi->DstReg),
333 t_dst_mask(vpi->DstReg.WriteMask),
334 t_dst_class(vpi->DstReg.File));
335 inst[1] = t_src(vp, &src[0]);
336 inst[2] = __CONST(0, SWIZZLE_ZERO);
337 inst[3] = __CONST(0, SWIZZLE_ZERO);
338
339 return inst;
340 }
341
342 static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
343 struct prog_instruction *vpi,
344 GLuint * inst,
345 struct prog_src_register src[3])
346 {
347 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
348
349 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
350 GL_FALSE,
351 GL_FALSE,
352 t_dst_index(vp, &vpi->DstReg),
353 t_dst_mask(vpi->DstReg.WriteMask),
354 t_dst_class(vpi->DstReg.File));
355 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
356 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
357 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
358 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
359 SWIZZLE_ZERO,
360 t_src_class(src[0].File),
361 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
362 (src[0].RelAddr << 4);
363 inst[2] =
364 PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
365 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
366 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
367 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
368 t_src_class(src[1].File),
369 src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
370 (src[1].RelAddr << 4);
371 inst[3] = __CONST(1, SWIZZLE_ZERO);
372
373 return inst;
374 }
375
376 static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
377 struct prog_instruction *vpi,
378 GLuint * inst,
379 struct prog_src_register src[3])
380 {
381 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
382 GL_FALSE,
383 GL_FALSE,
384 t_dst_index(vp, &vpi->DstReg),
385 t_dst_mask(vpi->DstReg.WriteMask),
386 t_dst_class(vpi->DstReg.File));
387 inst[1] = t_src(vp, &src[0]);
388 inst[2] = t_src(vp, &src[1]);
389 inst[3] = __CONST(1, SWIZZLE_ZERO);
390
391 return inst;
392 }
393
394 static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
395 struct prog_instruction *vpi,
396 GLuint * inst,
397 struct prog_src_register src[3])
398 {
399 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
400 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
401 GL_FALSE,
402 GL_FALSE,
403 t_dst_index(vp, &vpi->DstReg),
404 t_dst_mask(vpi->DstReg.WriteMask),
405 t_dst_class(vpi->DstReg.File));
406 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
407 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
408 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
409 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
410 PVS_SRC_SELECT_FORCE_1,
411 t_src_class(src[0].File),
412 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
413 (src[0].RelAddr << 4);
414 inst[2] = t_src(vp, &src[1]);
415 inst[3] = __CONST(1, SWIZZLE_ZERO);
416
417 return inst;
418 }
419
420 static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
421 struct prog_instruction *vpi,
422 GLuint * inst,
423 struct prog_src_register src[3])
424 {
425 inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
426 GL_FALSE,
427 GL_FALSE,
428 t_dst_index(vp, &vpi->DstReg),
429 t_dst_mask(vpi->DstReg.WriteMask),
430 t_dst_class(vpi->DstReg.File));
431 inst[1] = t_src(vp, &src[0]);
432 inst[2] = t_src(vp, &src[1]);
433 inst[3] = __CONST(1, SWIZZLE_ZERO);
434
435 return inst;
436 }
437
438 static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
439 struct prog_instruction *vpi,
440 GLuint * inst,
441 struct prog_src_register src[3])
442 {
443 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
444 GL_TRUE,
445 GL_FALSE,
446 t_dst_index(vp, &vpi->DstReg),
447 t_dst_mask(vpi->DstReg.WriteMask),
448 t_dst_class(vpi->DstReg.File));
449 inst[1] = t_src_scalar(vp, &src[0]);
450 inst[2] = __CONST(0, SWIZZLE_ZERO);
451 inst[3] = __CONST(0, SWIZZLE_ZERO);
452
453 return inst;
454 }
455
456 static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
457 struct prog_instruction *vpi,
458 GLuint * inst,
459 struct prog_src_register src[3])
460 {
461 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
462 GL_TRUE,
463 GL_FALSE,
464 t_dst_index(vp, &vpi->DstReg),
465 t_dst_mask(vpi->DstReg.WriteMask),
466 t_dst_class(vpi->DstReg.File));
467 inst[1] = t_src_scalar(vp, &src[0]);
468 inst[2] = __CONST(0, SWIZZLE_ZERO);
469 inst[3] = __CONST(0, SWIZZLE_ZERO);
470
471 return inst;
472 }
473
474 static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
475 struct prog_instruction *vpi,
476 GLuint * inst,
477 struct prog_src_register src[3],
478 int *u_temp_i)
479 {
480 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
481 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
482
483 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
484 GL_FALSE,
485 GL_FALSE,
486 *u_temp_i,
487 t_dst_mask(vpi->DstReg.WriteMask),
488 PVS_DST_REG_TEMPORARY);
489 inst[1] = t_src(vp, &src[0]);
490 inst[2] = __CONST(0, SWIZZLE_ZERO);
491 inst[3] = __CONST(0, SWIZZLE_ZERO);
492 inst += 4;
493
494 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
495 GL_FALSE,
496 GL_FALSE,
497 t_dst_index(vp, &vpi->DstReg),
498 t_dst_mask(vpi->DstReg.WriteMask),
499 t_dst_class(vpi->DstReg.File));
500 inst[1] = t_src(vp, &src[0]);
501 inst[2] = PVS_SRC_OPERAND(*u_temp_i,
502 PVS_SRC_SELECT_X,
503 PVS_SRC_SELECT_Y,
504 PVS_SRC_SELECT_Z,
505 PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
506 /* Not 100% sure about this */
507 (!src[0].
508 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE
509 /*VSF_FLAG_ALL */ );
510 inst[3] = __CONST(0, SWIZZLE_ZERO);
511 (*u_temp_i)--;
512
513 return inst;
514 }
515
516 static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
517 struct prog_instruction *vpi,
518 GLuint * inst,
519 struct prog_src_register src[3])
520 {
521 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
522 GL_FALSE,
523 GL_FALSE,
524 t_dst_index(vp, &vpi->DstReg),
525 t_dst_mask(vpi->DstReg.WriteMask),
526 t_dst_class(vpi->DstReg.File));
527 inst[1] = t_src(vp, &src[0]);
528 inst[2] = __CONST(0, SWIZZLE_ZERO);
529 inst[3] = __CONST(0, SWIZZLE_ZERO);
530
531 return inst;
532 }
533
534 static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
535 struct prog_instruction *vpi,
536 GLuint * inst,
537 struct prog_src_register src[3])
538 {
539 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
540
541 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
542 GL_TRUE,
543 GL_FALSE,
544 t_dst_index(vp, &vpi->DstReg),
545 t_dst_mask(vpi->DstReg.WriteMask),
546 t_dst_class(vpi->DstReg.File));
547 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
548 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
549 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
550 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
551 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
552 t_src_class(src[0].File),
553 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
554 (src[0].RelAddr << 4);
555 inst[2] = __CONST(0, SWIZZLE_ZERO);
556 inst[3] = __CONST(0, SWIZZLE_ZERO);
557
558 return inst;
559 }
560
561 static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
562 struct prog_instruction *vpi,
563 GLuint * inst,
564 struct prog_src_register src[3])
565 {
566 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
567
568 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
569 GL_TRUE,
570 GL_FALSE,
571 t_dst_index(vp, &vpi->DstReg),
572 t_dst_mask(vpi->DstReg.WriteMask),
573 t_dst_class(vpi->DstReg.File));
574 /* NOTE: Users swizzling might not work. */
575 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
576 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
577 PVS_SRC_SELECT_FORCE_0, // Z
578 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
579 t_src_class(src[0].File),
580 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
581 (src[0].RelAddr << 4);
582 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
583 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
584 PVS_SRC_SELECT_FORCE_0, // Z
585 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
586 t_src_class(src[0].File),
587 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
588 (src[0].RelAddr << 4);
589 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
590 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
591 PVS_SRC_SELECT_FORCE_0, // Z
592 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
593 t_src_class(src[0].File),
594 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
595 (src[0].RelAddr << 4);
596
597 return inst;
598 }
599
600 static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
601 struct prog_instruction *vpi,
602 GLuint * inst,
603 struct prog_src_register src[3])
604 {
605 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
606 GL_TRUE,
607 GL_FALSE,
608 t_dst_index(vp, &vpi->DstReg),
609 t_dst_mask(vpi->DstReg.WriteMask),
610 t_dst_class(vpi->DstReg.File));
611 inst[1] = t_src_scalar(vp, &src[0]);
612 inst[2] = __CONST(0, SWIZZLE_ZERO);
613 inst[3] = __CONST(0, SWIZZLE_ZERO);
614
615 return inst;
616 }
617
618 static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
619 struct prog_instruction *vpi,
620 GLuint * inst,
621 struct prog_src_register src[3])
622 {
623 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
624 GL_FALSE,
625 GL_TRUE,
626 t_dst_index(vp, &vpi->DstReg),
627 t_dst_mask(vpi->DstReg.WriteMask),
628 t_dst_class(vpi->DstReg.File));
629 inst[1] = t_src(vp, &src[0]);
630 inst[2] = t_src(vp, &src[1]);
631 inst[3] = t_src(vp, &src[2]);
632
633 return inst;
634 }
635
636 static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
637 struct prog_instruction *vpi,
638 GLuint * inst,
639 struct prog_src_register src[3])
640 {
641 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
642 GL_FALSE,
643 GL_FALSE,
644 t_dst_index(vp, &vpi->DstReg),
645 t_dst_mask(vpi->DstReg.WriteMask),
646 t_dst_class(vpi->DstReg.File));
647 inst[1] = t_src(vp, &src[0]);
648 inst[2] = t_src(vp, &src[1]);
649 inst[3] = __CONST(1, SWIZZLE_ZERO);
650
651 return inst;
652 }
653
654 static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
655 struct prog_instruction *vpi,
656 GLuint * inst,
657 struct prog_src_register src[3])
658 {
659 inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
660 GL_FALSE,
661 GL_FALSE,
662 t_dst_index(vp, &vpi->DstReg),
663 t_dst_mask(vpi->DstReg.WriteMask),
664 t_dst_class(vpi->DstReg.File));
665 inst[1] = t_src(vp, &src[0]);
666 inst[2] = t_src(vp, &src[1]);
667 inst[3] = __CONST(1, SWIZZLE_ZERO);
668
669 return inst;
670 }
671
672 static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
673 struct prog_instruction *vpi,
674 GLuint * inst,
675 struct prog_src_register src[3])
676 {
677 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
678
679 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
680 GL_FALSE,
681 GL_FALSE,
682 t_dst_index(vp, &vpi->DstReg),
683 t_dst_mask(vpi->DstReg.WriteMask),
684 t_dst_class(vpi->DstReg.File));
685 inst[1] = t_src(vp, &src[0]);
686 inst[2] = __CONST(0, SWIZZLE_ZERO);
687 inst[3] = __CONST(0, SWIZZLE_ZERO);
688
689 return inst;
690 }
691
692 static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
693 struct prog_instruction *vpi,
694 GLuint * inst,
695 struct prog_src_register src[3])
696 {
697 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
698 GL_FALSE,
699 GL_FALSE,
700 t_dst_index(vp, &vpi->DstReg),
701 t_dst_mask(vpi->DstReg.WriteMask),
702 t_dst_class(vpi->DstReg.File));
703 inst[1] = t_src(vp, &src[0]);
704 inst[2] = t_src(vp, &src[1]);
705 inst[3] = __CONST(1, SWIZZLE_ZERO);
706
707 return inst;
708 }
709
710 static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
711 struct prog_instruction *vpi,
712 GLuint * inst,
713 struct prog_src_register src[3])
714 {
715 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
716 GL_TRUE,
717 GL_FALSE,
718 t_dst_index(vp, &vpi->DstReg),
719 t_dst_mask(vpi->DstReg.WriteMask),
720 t_dst_class(vpi->DstReg.File));
721 inst[1] = t_src_scalar(vp, &src[0]);
722 inst[2] = __CONST(0, SWIZZLE_ZERO);
723 inst[3] = t_src_scalar(vp, &src[1]);
724
725 return inst;
726 }
727
728 static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
729 struct prog_instruction *vpi,
730 GLuint * inst,
731 struct prog_src_register src[3])
732 {
733 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
734 GL_TRUE,
735 GL_FALSE,
736 t_dst_index(vp, &vpi->DstReg),
737 t_dst_mask(vpi->DstReg.WriteMask),
738 t_dst_class(vpi->DstReg.File));
739 inst[1] = t_src_scalar(vp, &src[0]);
740 inst[2] = __CONST(0, SWIZZLE_ZERO);
741 inst[3] = __CONST(0, SWIZZLE_ZERO);
742
743 return inst;
744 }
745
746 static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
747 struct prog_instruction *vpi,
748 GLuint * inst,
749 struct prog_src_register src[3])
750 {
751 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
752 GL_TRUE,
753 GL_FALSE,
754 t_dst_index(vp, &vpi->DstReg),
755 t_dst_mask(vpi->DstReg.WriteMask),
756 t_dst_class(vpi->DstReg.File));
757 inst[1] = t_src_scalar(vp, &src[0]);
758 inst[2] = __CONST(0, SWIZZLE_ZERO);
759 inst[3] = __CONST(0, SWIZZLE_ZERO);
760
761 return inst;
762 }
763
764 static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
765 struct prog_instruction *vpi,
766 GLuint * inst,
767 struct prog_src_register src[3])
768 {
769 inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
770 GL_FALSE,
771 GL_FALSE,
772 t_dst_index(vp, &vpi->DstReg),
773 t_dst_mask(vpi->DstReg.WriteMask),
774 t_dst_class(vpi->DstReg.File));
775 inst[1] = t_src(vp, &src[0]);
776 inst[2] = t_src(vp, &src[1]);
777 inst[3] = __CONST(1, SWIZZLE_ZERO);
778
779 return inst;
780 }
781
782 static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
783 struct prog_instruction *vpi,
784 GLuint * inst,
785 struct prog_src_register src[3])
786 {
787 inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
788 GL_FALSE,
789 GL_FALSE,
790 t_dst_index(vp, &vpi->DstReg),
791 t_dst_mask(vpi->DstReg.WriteMask),
792 t_dst_class(vpi->DstReg.File));
793 inst[1] = t_src(vp, &src[0]);
794 inst[2] = t_src(vp, &src[1]);
795 inst[3] = __CONST(1, SWIZZLE_ZERO);
796
797 return inst;
798 }
799
800 static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
801 struct prog_instruction *vpi,
802 GLuint * inst,
803 struct prog_src_register src[3])
804 {
805 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
806
807 #if 0
808 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
809 GL_FALSE,
810 GL_FALSE,
811 t_dst_index(vp, &vpi->DstReg),
812 t_dst_mask(vpi->DstReg.WriteMask),
813 t_dst_class(vpi->DstReg.File));
814 inst[1] = t_src(vp, &src[0]);
815 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
816 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
817 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
818 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
819 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
820 t_src_class(src[1].File),
821 (!src[1].
822 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
823 (src[1].RelAddr << 4);
824 inst[3] = 0;
825 #else
826 inst[0] =
827 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
828 GL_FALSE,
829 GL_FALSE,
830 t_dst_index(vp, &vpi->DstReg),
831 t_dst_mask(vpi->DstReg.WriteMask),
832 t_dst_class(vpi->DstReg.File));
833 inst[1] = t_src(vp, &src[0]);
834 inst[2] = __CONST(0, SWIZZLE_ONE);
835 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
836 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
837 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
838 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
839 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
840 t_src_class(src[1].File),
841 (!src[1].
842 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
843 (src[1].RelAddr << 4);
844 #endif
845
846 return inst;
847 }
848
849 static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
850 struct prog_instruction *vpi,
851 GLuint * inst,
852 struct prog_src_register src[3])
853 {
854 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
855
856 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
857 GL_FALSE,
858 GL_FALSE,
859 t_dst_index(vp, &vpi->DstReg),
860 t_dst_mask(vpi->DstReg.WriteMask),
861 t_dst_class(vpi->DstReg.File));
862 inst[1] = t_src(vp, &src[0]);
863 inst[2] = __CONST(0, SWIZZLE_ZERO);
864 inst[3] = __CONST(0, SWIZZLE_ZERO);
865
866 return inst;
867 }
868
869 static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
870 struct prog_instruction *vpi,
871 GLuint * inst,
872 struct prog_src_register src[3],
873 int *u_temp_i)
874 {
875 /* mul r0, r1.yzxw, r2.zxyw
876 mad r0, -r2.yzxw, r1.zxyw, r0
877 */
878
879 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
880 GL_FALSE,
881 GL_FALSE,
882 *u_temp_i,
883 t_dst_mask(vpi->DstReg.WriteMask),
884 PVS_DST_REG_TEMPORARY);
885 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
886 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
887 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
888 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
889 t_src_class(src[0].File),
890 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
891 (src[0].RelAddr << 4);
892 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
893 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
894 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
895 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
896 t_src_class(src[1].File),
897 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
898 (src[1].RelAddr << 4);
899 inst[3] = __CONST(1, SWIZZLE_ZERO);
900 inst += 4;
901
902 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
903 GL_FALSE,
904 GL_FALSE,
905 t_dst_index(vp, &vpi->DstReg),
906 t_dst_mask(vpi->DstReg.WriteMask),
907 t_dst_class(vpi->DstReg.File));
908 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
909 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
910 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
911 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
912 t_src_class(src[1].File),
913 (!src[1].
914 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
915 (src[1].RelAddr << 4);
916 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
917 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
918 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
919 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
920 t_src_class(src[0].File),
921 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
922 (src[0].RelAddr << 4);
923 inst[3] =
924 PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
925 PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
926 PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
927
928 (*u_temp_i)--;
929
930 return inst;
931 }
932
933 static void t_inputs_outputs(struct r300_vertex_program *vp)
934 {
935 int i;
936 int cur_reg;
937
938 cur_reg = -1;
939 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
940 if (vp->key.InputsRead & (1 << i))
941 vp->inputs[i] = ++cur_reg;
942 else
943 vp->inputs[i] = -1;
944 }
945
946 cur_reg = 0;
947 for (i = 0; i < VERT_RESULT_MAX; i++)
948 vp->outputs[i] = -1;
949
950 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
951
952 if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
953 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
954 }
955
956 if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
957 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
958 }
959
960 /* If we're writing back facing colors we need to send
961 * four colors to make front/back face colors selection work.
962 * If the vertex program doesn't write all 4 colors, lets
963 * pretend it does by skipping output index reg so the colors
964 * get written into appropriate output vectors.
965 */
966 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
967 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
968 } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
969 vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
970 cur_reg++;
971 }
972
973 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
974 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
975 } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
976 vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
977 cur_reg++;
978 }
979
980 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
981 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
982 } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
983 cur_reg++;
984 }
985
986 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
987 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
988 } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
989 cur_reg++;
990 }
991
992 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
993 if (vp->key.OutputsWritten & (1 << i)) {
994 vp->outputs[i] = cur_reg++;
995 }
996 }
997
998 if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
999 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
1000 }
1001 }
1002
1003 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
1004 struct prog_instruction *vpi)
1005 {
1006 int i;
1007 GLuint *inst;
1008 unsigned long num_operands;
1009 /* Initial value should be last tmp reg that hw supports.
1010 Strangely enough r300 doesnt mind even though these would be out of range.
1011 Smart enough to realize that it doesnt need it? */
1012 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
1013 struct prog_src_register src[3];
1014
1015 vp->pos_end = 0; /* Not supported yet */
1016 vp->hw_code.length = 0;
1017 vp->translated = GL_TRUE;
1018 vp->error = GL_FALSE;
1019
1020 t_inputs_outputs(vp);
1021
1022 for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END;
1023 vpi++, inst += 4) {
1024
1025 FREE_TEMPS();
1026
1027 if (!valid_dst(vp, &vpi->DstReg)) {
1028 /* redirect result to unused temp */
1029 vpi->DstReg.File = PROGRAM_TEMPORARY;
1030 vpi->DstReg.Index = u_temp_i;
1031 }
1032
1033 num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
1034
1035 /* copy the sources (src) from mesa into a local variable... is this needed? */
1036 for (i = 0; i < num_operands; i++) {
1037 src[i] = vpi->SrcReg[i];
1038 }
1039
1040 if (num_operands == 3) { /* TODO: scalars */
1041 if (CMP_SRCS(src[1], src[2])
1042 || CMP_SRCS(src[0], src[2])) {
1043 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1044 GL_FALSE,
1045 GL_FALSE,
1046 u_temp_i,
1047 VSF_FLAG_ALL,
1048 PVS_DST_REG_TEMPORARY);
1049 inst[1] =
1050 PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
1051 SWIZZLE_X,
1052 SWIZZLE_Y,
1053 SWIZZLE_Z,
1054 SWIZZLE_W,
1055 t_src_class(src[2].File),
1056 VSF_FLAG_NONE) | (src[2].
1057 RelAddr <<
1058 4);
1059 inst[2] = __CONST(2, SWIZZLE_ZERO);
1060 inst[3] = __CONST(2, SWIZZLE_ZERO);
1061 inst += 4;
1062
1063 src[2].File = PROGRAM_TEMPORARY;
1064 src[2].Index = u_temp_i;
1065 src[2].RelAddr = 0;
1066 u_temp_i--;
1067 }
1068 }
1069
1070 if (num_operands >= 2) {
1071 if (CMP_SRCS(src[1], src[0])) {
1072 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1073 GL_FALSE,
1074 GL_FALSE,
1075 u_temp_i,
1076 VSF_FLAG_ALL,
1077 PVS_DST_REG_TEMPORARY);
1078 inst[1] =
1079 PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
1080 SWIZZLE_X,
1081 SWIZZLE_Y,
1082 SWIZZLE_Z,
1083 SWIZZLE_W,
1084 t_src_class(src[0].File),
1085 VSF_FLAG_NONE) | (src[0].
1086 RelAddr <<
1087 4);
1088 inst[2] = __CONST(0, SWIZZLE_ZERO);
1089 inst[3] = __CONST(0, SWIZZLE_ZERO);
1090 inst += 4;
1091
1092 src[0].File = PROGRAM_TEMPORARY;
1093 src[0].Index = u_temp_i;
1094 src[0].RelAddr = 0;
1095 u_temp_i--;
1096 }
1097 }
1098
1099 switch (vpi->Opcode) {
1100 case OPCODE_ABS:
1101 inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
1102 break;
1103 case OPCODE_ADD:
1104 inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
1105 break;
1106 case OPCODE_ARL:
1107 inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
1108 break;
1109 case OPCODE_DP3:
1110 inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
1111 break;
1112 case OPCODE_DP4:
1113 inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
1114 break;
1115 case OPCODE_DPH:
1116 inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
1117 break;
1118 case OPCODE_DST:
1119 inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
1120 break;
1121 case OPCODE_EX2:
1122 inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
1123 break;
1124 case OPCODE_EXP:
1125 inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
1126 break;
1127 case OPCODE_FLR:
1128 inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */
1129 &u_temp_i);
1130 break;
1131 case OPCODE_FRC:
1132 inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
1133 break;
1134 case OPCODE_LG2:
1135 inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
1136 break;
1137 case OPCODE_LIT:
1138 inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
1139 break;
1140 case OPCODE_LOG:
1141 inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
1142 break;
1143 case OPCODE_MAD:
1144 inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
1145 break;
1146 case OPCODE_MAX:
1147 inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
1148 break;
1149 case OPCODE_MIN:
1150 inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
1151 break;
1152 case OPCODE_MOV:
1153 inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
1154 break;
1155 case OPCODE_MUL:
1156 inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
1157 break;
1158 case OPCODE_POW:
1159 inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
1160 break;
1161 case OPCODE_RCP:
1162 inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
1163 break;
1164 case OPCODE_RSQ:
1165 inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
1166 break;
1167 case OPCODE_SGE:
1168 inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
1169 break;
1170 case OPCODE_SLT:
1171 inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
1172 break;
1173 case OPCODE_SUB:
1174 inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
1175 break;
1176 case OPCODE_SWZ:
1177 inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
1178 break;
1179 case OPCODE_XPD:
1180 inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */
1181 &u_temp_i);
1182 break;
1183 default:
1184 vp->error = GL_TRUE;
1185 break;
1186 }
1187 }
1188
1189 vp->hw_code.length = (inst - vp->hw_code.body.d);
1190 if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) {
1191 vp->error = GL_TRUE;
1192 }
1193 }
1194
1195 static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
1196 GLuint temp_index)
1197 {
1198 struct prog_instruction *vpi;
1199
1200 _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
1201
1202 vpi = &prog->Instructions[prog->NumInstructions - 3];
1203
1204 vpi->Opcode = OPCODE_MOV;
1205
1206 vpi->DstReg.File = PROGRAM_OUTPUT;
1207 vpi->DstReg.Index = VERT_RESULT_HPOS;
1208 vpi->DstReg.WriteMask = WRITEMASK_XYZW;
1209 vpi->DstReg.CondMask = COND_TR;
1210
1211 vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
1212 vpi->SrcReg[0].Index = temp_index;
1213 vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
1214
1215 ++vpi;
1216
1217 vpi->Opcode = OPCODE_MOV;
1218
1219 vpi->DstReg.File = PROGRAM_OUTPUT;
1220 vpi->DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
1221 vpi->DstReg.WriteMask = WRITEMASK_XYZW;
1222 vpi->DstReg.CondMask = COND_TR;
1223
1224 vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
1225 vpi->SrcReg[0].Index = temp_index;
1226 vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
1227
1228 ++vpi;
1229
1230 vpi->Opcode = OPCODE_END;
1231 }
1232
1233 static void pos_as_texcoord(struct r300_vertex_program *vp,
1234 struct gl_program *prog)
1235 {
1236 struct prog_instruction *vpi;
1237 GLuint tempregi = prog->NumTemporaries;
1238
1239 prog->NumTemporaries++;
1240
1241 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1242 if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
1243 vpi->DstReg.File = PROGRAM_TEMPORARY;
1244 vpi->DstReg.Index = tempregi;
1245 }
1246 }
1247
1248 insert_wpos(vp, prog, tempregi);
1249 }
1250
1251 static struct r300_vertex_program *build_program(GLcontext *ctx,
1252 struct r300_vertex_program_key *wanted_key,
1253 struct gl_vertex_program *mesa_vp,
1254 GLint wpos_idx)
1255 {
1256 struct r300_vertex_program *vp;
1257
1258 vp = _mesa_calloc(sizeof(*vp));
1259 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1260 vp->wpos_idx = wpos_idx;
1261
1262 if (mesa_vp->IsPositionInvariant) {
1263 _mesa_insert_mvp_code(ctx, mesa_vp);
1264 }
1265
1266 if (wpos_idx > -1) {
1267 pos_as_texcoord(vp, &mesa_vp->Base);
1268 }
1269
1270 if (RADEON_DEBUG & DEBUG_VERTS) {
1271 fprintf(stderr, "Vertex program after native rewrite:\n");
1272 _mesa_print_program(&mesa_vp->Base);
1273 fflush(stdout);
1274 }
1275
1276 /* Some outputs may be artificially added, to match the inputs of the fragment program.
1277 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
1278 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
1279 */
1280 {
1281 int i, count = 0;
1282 for (i = 0; i < VERT_RESULT_MAX; ++i) {
1283 if (vp->key.OutputsAdded & (1 << i)) {
1284 ++count;
1285 }
1286 }
1287
1288 if (count > 0) {
1289 struct prog_instruction *inst;
1290
1291 _mesa_insert_instructions(&mesa_vp->Base, mesa_vp->Base.NumInstructions - 1, count);
1292 inst = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions - 1 - count];
1293
1294 for (i = 0; i < VERT_RESULT_MAX; ++i) {
1295 if (vp->key.OutputsAdded & (1 << i)) {
1296 inst->Opcode = OPCODE_MOV;
1297
1298 inst->DstReg.File = PROGRAM_OUTPUT;
1299 inst->DstReg.Index = i;
1300 inst->DstReg.WriteMask = WRITEMASK_XYZW;
1301 inst->DstReg.CondMask = COND_TR;
1302
1303 inst->SrcReg[0].File = PROGRAM_CONSTANT;
1304 inst->SrcReg[0].Index = 0;
1305 inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
1306
1307 ++inst;
1308 }
1309 }
1310 }
1311 }
1312
1313 assert(mesa_vp->Base.NumInstructions);
1314 vp->num_temporaries = mesa_vp->Base.NumTemporaries;
1315 r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
1316
1317 return vp;
1318 }
1319
1320 static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
1321 {
1322 if (key->OutputsWritten & (1 << vert))
1323 return;
1324
1325 key->OutputsWritten |= 1 << vert;
1326 key->OutputsAdded |= 1 << vert;
1327 }
1328
1329 void r300SelectVertexShader(r300ContextPtr r300)
1330 {
1331 GLcontext *ctx = ctx = r300->radeon.glCtx;
1332 GLuint InputsRead;
1333 struct r300_vertex_program_key wanted_key = { 0 };
1334 GLint i;
1335 struct r300_vertex_program_cont *vpc;
1336 struct r300_vertex_program *vp;
1337 GLint wpos_idx;
1338
1339 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1340 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1341 wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
1342 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1343
1344 wpos_idx = -1;
1345 if (InputsRead & FRAG_BIT_WPOS) {
1346 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1347 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1348 break;
1349
1350 if (i == ctx->Const.MaxTextureUnits) {
1351 fprintf(stderr, "\tno free texcoord found\n");
1352 _mesa_exit(-1);
1353 }
1354
1355 wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
1356 wpos_idx = i;
1357 }
1358
1359 if (vpc->mesa_program.IsPositionInvariant) {
1360 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1361 wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS);
1362 } else {
1363 add_outputs(&wanted_key, VERT_RESULT_HPOS);
1364 }
1365
1366 if (InputsRead & FRAG_BIT_COL0) {
1367 add_outputs(&wanted_key, VERT_RESULT_COL0);
1368 }
1369
1370 if (InputsRead & FRAG_BIT_COL1) {
1371 add_outputs(&wanted_key, VERT_RESULT_COL1);
1372 }
1373
1374 if (InputsRead & FRAG_BIT_FOGC) {
1375 add_outputs(&wanted_key, VERT_RESULT_FOGC);
1376 }
1377
1378 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1379 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1380 add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
1381 }
1382 }
1383
1384 for (vp = vpc->progs; vp; vp = vp->next)
1385 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
1386 == 0) {
1387 r300->selected_vp = vp;
1388 return;
1389 }
1390
1391 if (RADEON_DEBUG & DEBUG_VERTS) {
1392 fprintf(stderr, "Initial vertex program:\n");
1393 _mesa_print_program(&vpc->mesa_program.Base);
1394 fflush(stdout);
1395 }
1396
1397 vp = build_program(ctx, &wanted_key, &vpc->mesa_program, wpos_idx);
1398 vp->next = vpc->progs;
1399 vpc->progs = vp;
1400 r300->selected_vp = vp;
1401 }
1402
1403 #define bump_vpu_count(ptr, new_count) do { \
1404 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
1405 int _nc=(new_count)/4; \
1406 assert(_nc < 256); \
1407 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
1408 } while(0)
1409
1410 static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code)
1411 {
1412 int i;
1413
1414 assert((code->length > 0) && (code->length % 4 == 0));
1415
1416 switch ((dest >> 8) & 0xf) {
1417 case 0:
1418 R300_STATECHANGE(r300, vpi);
1419 for (i = 0; i < code->length; i++)
1420 r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1421 bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
1422 break;
1423 case 2:
1424 R300_STATECHANGE(r300, vpp);
1425 for (i = 0; i < code->length; i++)
1426 r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1427 bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
1428 break;
1429 case 4:
1430 R300_STATECHANGE(r300, vps);
1431 for (i = 0; i < code->length; i++)
1432 r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1433 bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
1434 break;
1435 default:
1436 fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
1437 _mesa_exit(-1);
1438 }
1439 }
1440
1441 void r300SetupVertexProgram(r300ContextPtr rmesa)
1442 {
1443 GLcontext *ctx = rmesa->radeon.glCtx;
1444 struct r300_vertex_program *prog = rmesa->selected_vp;
1445 int inst_count = 0;
1446 int param_count = 0;
1447
1448 /* Reset state, in case we don't use something */
1449 ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
1450 ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
1451 ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
1452
1453 R300_STATECHANGE(rmesa, vpp);
1454 param_count = r300VertexProgUpdateParams(ctx,
1455 (struct r300_vertex_program_cont *)
1456 ctx->VertexProgram._Current,
1457 (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
1458 bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
1459 param_count /= 4;
1460
1461 r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
1462 inst_count = (prog->hw_code.length / 4) - 1;
1463
1464 r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead),
1465 _mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries);
1466
1467 R300_STATECHANGE(rmesa, pvs);
1468 rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
1469 (inst_count << R300_PVS_LAST_INST_SHIFT);
1470
1471 rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
1472 rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
1473 }