Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/prog_instruction.h"
36 #include "shader/prog_parameter.h"
37 #include "shader/prog_statevars.h"
38 #include "tnl/tnl.h"
39
40 #include "r300_context.h"
41
42 /* TODO: Get rid of t_src_class call */
43 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
44 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
45 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
46 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
47 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
48
49 /*
50 * Take an already-setup and valid source then swizzle it appropriately to
51 * obtain a constant ZERO or ONE source.
52 */
53 #define __CONST(x, y) \
54 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
55 t_swizzle(y), \
56 t_swizzle(y), \
57 t_swizzle(y), \
58 t_swizzle(y), \
59 t_src_class(src[x].File), \
60 VSF_FLAG_NONE) | (src[x].RelAddr << 4))
61
62 #define FREE_TEMPS() \
63 do { \
64 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
65 if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
66 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
67 vp->native = GL_FALSE; \
68 } \
69 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
70 } while (0)
71
72 int r300VertexProgUpdateParams(GLcontext * ctx,
73 struct r300_vertex_program_cont *vp, float *dst)
74 {
75 int pi;
76 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
77 float *dst_o = dst;
78 struct gl_program_parameter_list *paramList;
79
80 if (mesa_vp->IsNVProgram) {
81 _mesa_load_tracked_matrices(ctx);
82
83 for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
84 *dst++ = ctx->VertexProgram.Parameters[pi][0];
85 *dst++ = ctx->VertexProgram.Parameters[pi][1];
86 *dst++ = ctx->VertexProgram.Parameters[pi][2];
87 *dst++ = ctx->VertexProgram.Parameters[pi][3];
88 }
89 return dst - dst_o;
90 }
91
92 assert(mesa_vp->Base.Parameters);
93 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
94
95 if (mesa_vp->Base.Parameters->NumParameters * 4 >
96 VSF_MAX_FRAGMENT_LENGTH) {
97 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
98 _mesa_exit(-1);
99 }
100
101 paramList = mesa_vp->Base.Parameters;
102 for (pi = 0; pi < paramList->NumParameters; pi++) {
103 switch (paramList->Parameters[pi].Type) {
104 case PROGRAM_STATE_VAR:
105 case PROGRAM_NAMED_PARAM:
106 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
107 case PROGRAM_CONSTANT:
108 *dst++ = paramList->ParameterValues[pi][0];
109 *dst++ = paramList->ParameterValues[pi][1];
110 *dst++ = paramList->ParameterValues[pi][2];
111 *dst++ = paramList->ParameterValues[pi][3];
112 break;
113 default:
114 _mesa_problem(NULL, "Bad param type in %s",
115 __FUNCTION__);
116 }
117
118 }
119
120 return dst - dst_o;
121 }
122
123 static unsigned long t_dst_mask(GLuint mask)
124 {
125 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
126 return mask & VSF_FLAG_ALL;
127 }
128
129 static unsigned long t_dst_class(gl_register_file file)
130 {
131
132 switch (file) {
133 case PROGRAM_TEMPORARY:
134 return PVS_DST_REG_TEMPORARY;
135 case PROGRAM_OUTPUT:
136 return PVS_DST_REG_OUT;
137 case PROGRAM_ADDRESS:
138 return PVS_DST_REG_A0;
139 /*
140 case PROGRAM_INPUT:
141 case PROGRAM_LOCAL_PARAM:
142 case PROGRAM_ENV_PARAM:
143 case PROGRAM_NAMED_PARAM:
144 case PROGRAM_STATE_VAR:
145 case PROGRAM_WRITE_ONLY:
146 case PROGRAM_ADDRESS:
147 */
148 default:
149 fprintf(stderr, "problem in %s", __FUNCTION__);
150 _mesa_exit(-1);
151 return -1;
152 }
153 }
154
155 static unsigned long t_dst_index(struct r300_vertex_program *vp,
156 struct prog_dst_register *dst)
157 {
158 if (dst->File == PROGRAM_OUTPUT)
159 return vp->outputs[dst->Index];
160
161 return dst->Index;
162 }
163
164 static unsigned long t_src_class(gl_register_file file)
165 {
166 switch (file) {
167 case PROGRAM_TEMPORARY:
168 return PVS_SRC_REG_TEMPORARY;
169 case PROGRAM_INPUT:
170 return PVS_SRC_REG_INPUT;
171 case PROGRAM_LOCAL_PARAM:
172 case PROGRAM_ENV_PARAM:
173 case PROGRAM_NAMED_PARAM:
174 case PROGRAM_CONSTANT:
175 case PROGRAM_STATE_VAR:
176 return PVS_SRC_REG_CONSTANT;
177 /*
178 case PROGRAM_OUTPUT:
179 case PROGRAM_WRITE_ONLY:
180 case PROGRAM_ADDRESS:
181 */
182 default:
183 fprintf(stderr, "problem in %s", __FUNCTION__);
184 _mesa_exit(-1);
185 return -1;
186 }
187 }
188
189 static INLINE unsigned long t_swizzle(GLubyte swizzle)
190 {
191 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
192 return swizzle;
193 }
194
195 #if 0
196 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
197 {
198 int i;
199
200 if (vp == NULL) {
201 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
202 caller);
203 return;
204 }
205
206 fprintf(stderr, "%s:<", caller);
207 for (i = 0; i < VERT_ATTRIB_MAX; i++)
208 fprintf(stderr, "%d ", vp->inputs[i]);
209 fprintf(stderr, ">\n");
210
211 }
212 #endif
213
214 static unsigned long t_src_index(struct r300_vertex_program *vp,
215 struct prog_src_register *src)
216 {
217 int i;
218 int max_reg = -1;
219
220 if (src->File == PROGRAM_INPUT) {
221 if (vp->inputs[src->Index] != -1)
222 return vp->inputs[src->Index];
223
224 for (i = 0; i < VERT_ATTRIB_MAX; i++)
225 if (vp->inputs[i] > max_reg)
226 max_reg = vp->inputs[i];
227
228 vp->inputs[src->Index] = max_reg + 1;
229
230 //vp_dump_inputs(vp, __FUNCTION__);
231
232 return vp->inputs[src->Index];
233 } else {
234 if (src->Index < 0) {
235 fprintf(stderr,
236 "negative offsets for indirect addressing do not work.\n");
237 return 0;
238 }
239 return src->Index;
240 }
241 }
242
243 /* these two functions should probably be merged... */
244
245 static unsigned long t_src(struct r300_vertex_program *vp,
246 struct prog_src_register *src)
247 {
248 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
249 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
250 */
251 return PVS_SRC_OPERAND(t_src_index(vp, src),
252 t_swizzle(GET_SWZ(src->Swizzle, 0)),
253 t_swizzle(GET_SWZ(src->Swizzle, 1)),
254 t_swizzle(GET_SWZ(src->Swizzle, 2)),
255 t_swizzle(GET_SWZ(src->Swizzle, 3)),
256 t_src_class(src->File),
257 src->Negate) | (src->RelAddr << 4);
258 }
259
260 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
261 struct prog_src_register *src)
262 {
263 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
264 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
265 */
266 return PVS_SRC_OPERAND(t_src_index(vp, src),
267 t_swizzle(GET_SWZ(src->Swizzle, 0)),
268 t_swizzle(GET_SWZ(src->Swizzle, 0)),
269 t_swizzle(GET_SWZ(src->Swizzle, 0)),
270 t_swizzle(GET_SWZ(src->Swizzle, 0)),
271 t_src_class(src->File),
272 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
273 (src->RelAddr << 4);
274 }
275
276 static GLboolean valid_dst(struct r300_vertex_program *vp,
277 struct prog_dst_register *dst)
278 {
279 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
280 return GL_FALSE;
281 } else if (dst->File == PROGRAM_ADDRESS) {
282 assert(dst->Index == 0);
283 }
284
285 return GL_TRUE;
286 }
287
288 static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
289 struct prog_instruction *vpi,
290 GLuint * inst,
291 struct prog_src_register src[3])
292 {
293 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
294
295 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
296 GL_FALSE,
297 GL_FALSE,
298 t_dst_index(vp, &vpi->DstReg),
299 t_dst_mask(vpi->DstReg.WriteMask),
300 t_dst_class(vpi->DstReg.File));
301 inst[1] = t_src(vp, &src[0]);
302 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
303 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
304 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
305 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
306 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
307 t_src_class(src[0].File),
308 (!src[0].
309 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
310 (src[0].RelAddr << 4);
311 inst[3] = 0;
312
313 return inst;
314 }
315
316 static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
317 struct prog_instruction *vpi,
318 GLuint * inst,
319 struct prog_src_register src[3])
320 {
321 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
322 GL_FALSE,
323 GL_FALSE,
324 t_dst_index(vp, &vpi->DstReg),
325 t_dst_mask(vpi->DstReg.WriteMask),
326 t_dst_class(vpi->DstReg.File));
327 inst[1] = t_src(vp, &src[0]);
328 inst[2] = t_src(vp, &src[1]);
329 inst[3] = __CONST(1, SWIZZLE_ZERO);
330
331 return inst;
332 }
333
334 static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
335 struct prog_instruction *vpi,
336 GLuint * inst,
337 struct prog_src_register src[3])
338 {
339 inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
340 GL_FALSE,
341 GL_FALSE,
342 t_dst_index(vp, &vpi->DstReg),
343 t_dst_mask(vpi->DstReg.WriteMask),
344 t_dst_class(vpi->DstReg.File));
345 inst[1] = t_src(vp, &src[0]);
346 inst[2] = __CONST(0, SWIZZLE_ZERO);
347 inst[3] = __CONST(0, SWIZZLE_ZERO);
348
349 return inst;
350 }
351
352 static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
353 struct prog_instruction *vpi,
354 GLuint * inst,
355 struct prog_src_register src[3])
356 {
357 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
358
359 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
360 GL_FALSE,
361 GL_FALSE,
362 t_dst_index(vp, &vpi->DstReg),
363 t_dst_mask(vpi->DstReg.WriteMask),
364 t_dst_class(vpi->DstReg.File));
365 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
366 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
367 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
368 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
369 SWIZZLE_ZERO,
370 t_src_class(src[0].File),
371 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
372 (src[0].RelAddr << 4);
373 inst[2] =
374 PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
375 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
376 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
377 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
378 t_src_class(src[1].File),
379 src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
380 (src[1].RelAddr << 4);
381 inst[3] = __CONST(1, SWIZZLE_ZERO);
382
383 return inst;
384 }
385
386 static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
387 struct prog_instruction *vpi,
388 GLuint * inst,
389 struct prog_src_register src[3])
390 {
391 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
392 GL_FALSE,
393 GL_FALSE,
394 t_dst_index(vp, &vpi->DstReg),
395 t_dst_mask(vpi->DstReg.WriteMask),
396 t_dst_class(vpi->DstReg.File));
397 inst[1] = t_src(vp, &src[0]);
398 inst[2] = t_src(vp, &src[1]);
399 inst[3] = __CONST(1, SWIZZLE_ZERO);
400
401 return inst;
402 }
403
404 static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
405 struct prog_instruction *vpi,
406 GLuint * inst,
407 struct prog_src_register src[3])
408 {
409 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
410 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
411 GL_FALSE,
412 GL_FALSE,
413 t_dst_index(vp, &vpi->DstReg),
414 t_dst_mask(vpi->DstReg.WriteMask),
415 t_dst_class(vpi->DstReg.File));
416 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
417 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
418 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
419 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
420 PVS_SRC_SELECT_FORCE_1,
421 t_src_class(src[0].File),
422 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
423 (src[0].RelAddr << 4);
424 inst[2] = t_src(vp, &src[1]);
425 inst[3] = __CONST(1, SWIZZLE_ZERO);
426
427 return inst;
428 }
429
430 static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
431 struct prog_instruction *vpi,
432 GLuint * inst,
433 struct prog_src_register src[3])
434 {
435 inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
436 GL_FALSE,
437 GL_FALSE,
438 t_dst_index(vp, &vpi->DstReg),
439 t_dst_mask(vpi->DstReg.WriteMask),
440 t_dst_class(vpi->DstReg.File));
441 inst[1] = t_src(vp, &src[0]);
442 inst[2] = t_src(vp, &src[1]);
443 inst[3] = __CONST(1, SWIZZLE_ZERO);
444
445 return inst;
446 }
447
448 static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
449 struct prog_instruction *vpi,
450 GLuint * inst,
451 struct prog_src_register src[3])
452 {
453 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
454 GL_TRUE,
455 GL_FALSE,
456 t_dst_index(vp, &vpi->DstReg),
457 t_dst_mask(vpi->DstReg.WriteMask),
458 t_dst_class(vpi->DstReg.File));
459 inst[1] = t_src_scalar(vp, &src[0]);
460 inst[2] = __CONST(0, SWIZZLE_ZERO);
461 inst[3] = __CONST(0, SWIZZLE_ZERO);
462
463 return inst;
464 }
465
466 static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
467 struct prog_instruction *vpi,
468 GLuint * inst,
469 struct prog_src_register src[3])
470 {
471 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
472 GL_TRUE,
473 GL_FALSE,
474 t_dst_index(vp, &vpi->DstReg),
475 t_dst_mask(vpi->DstReg.WriteMask),
476 t_dst_class(vpi->DstReg.File));
477 inst[1] = t_src_scalar(vp, &src[0]);
478 inst[2] = __CONST(0, SWIZZLE_ZERO);
479 inst[3] = __CONST(0, SWIZZLE_ZERO);
480
481 return inst;
482 }
483
484 static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
485 struct prog_instruction *vpi,
486 GLuint * inst,
487 struct prog_src_register src[3],
488 int *u_temp_i)
489 {
490 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
491 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
492
493 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
494 GL_FALSE,
495 GL_FALSE,
496 *u_temp_i,
497 t_dst_mask(vpi->DstReg.WriteMask),
498 PVS_DST_REG_TEMPORARY);
499 inst[1] = t_src(vp, &src[0]);
500 inst[2] = __CONST(0, SWIZZLE_ZERO);
501 inst[3] = __CONST(0, SWIZZLE_ZERO);
502 inst += 4;
503
504 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
505 GL_FALSE,
506 GL_FALSE,
507 t_dst_index(vp, &vpi->DstReg),
508 t_dst_mask(vpi->DstReg.WriteMask),
509 t_dst_class(vpi->DstReg.File));
510 inst[1] = t_src(vp, &src[0]);
511 inst[2] = PVS_SRC_OPERAND(*u_temp_i,
512 PVS_SRC_SELECT_X,
513 PVS_SRC_SELECT_Y,
514 PVS_SRC_SELECT_Z,
515 PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
516 /* Not 100% sure about this */
517 (!src[0].
518 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE
519 /*VSF_FLAG_ALL */ );
520 inst[3] = __CONST(0, SWIZZLE_ZERO);
521 (*u_temp_i)--;
522
523 return inst;
524 }
525
526 static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
527 struct prog_instruction *vpi,
528 GLuint * inst,
529 struct prog_src_register src[3])
530 {
531 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
532 GL_FALSE,
533 GL_FALSE,
534 t_dst_index(vp, &vpi->DstReg),
535 t_dst_mask(vpi->DstReg.WriteMask),
536 t_dst_class(vpi->DstReg.File));
537 inst[1] = t_src(vp, &src[0]);
538 inst[2] = __CONST(0, SWIZZLE_ZERO);
539 inst[3] = __CONST(0, SWIZZLE_ZERO);
540
541 return inst;
542 }
543
544 static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
545 struct prog_instruction *vpi,
546 GLuint * inst,
547 struct prog_src_register src[3])
548 {
549 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
550
551 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
552 GL_TRUE,
553 GL_FALSE,
554 t_dst_index(vp, &vpi->DstReg),
555 t_dst_mask(vpi->DstReg.WriteMask),
556 t_dst_class(vpi->DstReg.File));
557 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
558 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
559 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
560 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
561 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
562 t_src_class(src[0].File),
563 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
564 (src[0].RelAddr << 4);
565 inst[2] = __CONST(0, SWIZZLE_ZERO);
566 inst[3] = __CONST(0, SWIZZLE_ZERO);
567
568 return inst;
569 }
570
571 static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
572 struct prog_instruction *vpi,
573 GLuint * inst,
574 struct prog_src_register src[3])
575 {
576 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
577
578 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
579 GL_TRUE,
580 GL_FALSE,
581 t_dst_index(vp, &vpi->DstReg),
582 t_dst_mask(vpi->DstReg.WriteMask),
583 t_dst_class(vpi->DstReg.File));
584 /* NOTE: Users swizzling might not work. */
585 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
586 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
587 PVS_SRC_SELECT_FORCE_0, // Z
588 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
589 t_src_class(src[0].File),
590 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
591 (src[0].RelAddr << 4);
592 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
593 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
594 PVS_SRC_SELECT_FORCE_0, // Z
595 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
596 t_src_class(src[0].File),
597 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
598 (src[0].RelAddr << 4);
599 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
600 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
601 PVS_SRC_SELECT_FORCE_0, // Z
602 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
603 t_src_class(src[0].File),
604 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
605 (src[0].RelAddr << 4);
606
607 return inst;
608 }
609
610 static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
611 struct prog_instruction *vpi,
612 GLuint * inst,
613 struct prog_src_register src[3])
614 {
615 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
616 GL_TRUE,
617 GL_FALSE,
618 t_dst_index(vp, &vpi->DstReg),
619 t_dst_mask(vpi->DstReg.WriteMask),
620 t_dst_class(vpi->DstReg.File));
621 inst[1] = t_src_scalar(vp, &src[0]);
622 inst[2] = __CONST(0, SWIZZLE_ZERO);
623 inst[3] = __CONST(0, SWIZZLE_ZERO);
624
625 return inst;
626 }
627
628 static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
629 struct prog_instruction *vpi,
630 GLuint * inst,
631 struct prog_src_register src[3])
632 {
633 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
634 GL_FALSE,
635 GL_TRUE,
636 t_dst_index(vp, &vpi->DstReg),
637 t_dst_mask(vpi->DstReg.WriteMask),
638 t_dst_class(vpi->DstReg.File));
639 inst[1] = t_src(vp, &src[0]);
640 inst[2] = t_src(vp, &src[1]);
641 inst[3] = t_src(vp, &src[2]);
642
643 return inst;
644 }
645
646 static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
647 struct prog_instruction *vpi,
648 GLuint * inst,
649 struct prog_src_register src[3])
650 {
651 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
652 GL_FALSE,
653 GL_FALSE,
654 t_dst_index(vp, &vpi->DstReg),
655 t_dst_mask(vpi->DstReg.WriteMask),
656 t_dst_class(vpi->DstReg.File));
657 inst[1] = t_src(vp, &src[0]);
658 inst[2] = t_src(vp, &src[1]);
659 inst[3] = __CONST(1, SWIZZLE_ZERO);
660
661 return inst;
662 }
663
664 static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
665 struct prog_instruction *vpi,
666 GLuint * inst,
667 struct prog_src_register src[3])
668 {
669 inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
670 GL_FALSE,
671 GL_FALSE,
672 t_dst_index(vp, &vpi->DstReg),
673 t_dst_mask(vpi->DstReg.WriteMask),
674 t_dst_class(vpi->DstReg.File));
675 inst[1] = t_src(vp, &src[0]);
676 inst[2] = t_src(vp, &src[1]);
677 inst[3] = __CONST(1, SWIZZLE_ZERO);
678
679 return inst;
680 }
681
682 static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
683 struct prog_instruction *vpi,
684 GLuint * inst,
685 struct prog_src_register src[3])
686 {
687 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
688
689 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
690 GL_FALSE,
691 GL_FALSE,
692 t_dst_index(vp, &vpi->DstReg),
693 t_dst_mask(vpi->DstReg.WriteMask),
694 t_dst_class(vpi->DstReg.File));
695 inst[1] = t_src(vp, &src[0]);
696 inst[2] = __CONST(0, SWIZZLE_ZERO);
697 inst[3] = __CONST(0, SWIZZLE_ZERO);
698
699 return inst;
700 }
701
702 static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
703 struct prog_instruction *vpi,
704 GLuint * inst,
705 struct prog_src_register src[3])
706 {
707 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
708 GL_FALSE,
709 GL_FALSE,
710 t_dst_index(vp, &vpi->DstReg),
711 t_dst_mask(vpi->DstReg.WriteMask),
712 t_dst_class(vpi->DstReg.File));
713 inst[1] = t_src(vp, &src[0]);
714 inst[2] = t_src(vp, &src[1]);
715 inst[3] = __CONST(1, SWIZZLE_ZERO);
716
717 return inst;
718 }
719
720 static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
721 struct prog_instruction *vpi,
722 GLuint * inst,
723 struct prog_src_register src[3])
724 {
725 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
726 GL_TRUE,
727 GL_FALSE,
728 t_dst_index(vp, &vpi->DstReg),
729 t_dst_mask(vpi->DstReg.WriteMask),
730 t_dst_class(vpi->DstReg.File));
731 inst[1] = t_src_scalar(vp, &src[0]);
732 inst[2] = __CONST(0, SWIZZLE_ZERO);
733 inst[3] = t_src_scalar(vp, &src[1]);
734
735 return inst;
736 }
737
738 static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
739 struct prog_instruction *vpi,
740 GLuint * inst,
741 struct prog_src_register src[3])
742 {
743 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
744 GL_TRUE,
745 GL_FALSE,
746 t_dst_index(vp, &vpi->DstReg),
747 t_dst_mask(vpi->DstReg.WriteMask),
748 t_dst_class(vpi->DstReg.File));
749 inst[1] = t_src_scalar(vp, &src[0]);
750 inst[2] = __CONST(0, SWIZZLE_ZERO);
751 inst[3] = __CONST(0, SWIZZLE_ZERO);
752
753 return inst;
754 }
755
756 static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
757 struct prog_instruction *vpi,
758 GLuint * inst,
759 struct prog_src_register src[3])
760 {
761 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
762 GL_TRUE,
763 GL_FALSE,
764 t_dst_index(vp, &vpi->DstReg),
765 t_dst_mask(vpi->DstReg.WriteMask),
766 t_dst_class(vpi->DstReg.File));
767 inst[1] = t_src_scalar(vp, &src[0]);
768 inst[2] = __CONST(0, SWIZZLE_ZERO);
769 inst[3] = __CONST(0, SWIZZLE_ZERO);
770
771 return inst;
772 }
773
774 static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
775 struct prog_instruction *vpi,
776 GLuint * inst,
777 struct prog_src_register src[3])
778 {
779 inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
780 GL_FALSE,
781 GL_FALSE,
782 t_dst_index(vp, &vpi->DstReg),
783 t_dst_mask(vpi->DstReg.WriteMask),
784 t_dst_class(vpi->DstReg.File));
785 inst[1] = t_src(vp, &src[0]);
786 inst[2] = t_src(vp, &src[1]);
787 inst[3] = __CONST(1, SWIZZLE_ZERO);
788
789 return inst;
790 }
791
792 static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
793 struct prog_instruction *vpi,
794 GLuint * inst,
795 struct prog_src_register src[3])
796 {
797 inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
798 GL_FALSE,
799 GL_FALSE,
800 t_dst_index(vp, &vpi->DstReg),
801 t_dst_mask(vpi->DstReg.WriteMask),
802 t_dst_class(vpi->DstReg.File));
803 inst[1] = t_src(vp, &src[0]);
804 inst[2] = t_src(vp, &src[1]);
805 inst[3] = __CONST(1, SWIZZLE_ZERO);
806
807 return inst;
808 }
809
810 static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
811 struct prog_instruction *vpi,
812 GLuint * inst,
813 struct prog_src_register src[3])
814 {
815 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
816
817 #if 0
818 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
819 GL_FALSE,
820 GL_FALSE,
821 t_dst_index(vp, &vpi->DstReg),
822 t_dst_mask(vpi->DstReg.WriteMask),
823 t_dst_class(vpi->DstReg.File));
824 inst[1] = t_src(vp, &src[0]);
825 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
826 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
827 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
828 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
829 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
830 t_src_class(src[1].File),
831 (!src[1].
832 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
833 (src[1].RelAddr << 4);
834 inst[3] = 0;
835 #else
836 inst[0] =
837 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
838 GL_FALSE,
839 GL_FALSE,
840 t_dst_index(vp, &vpi->DstReg),
841 t_dst_mask(vpi->DstReg.WriteMask),
842 t_dst_class(vpi->DstReg.File));
843 inst[1] = t_src(vp, &src[0]);
844 inst[2] = __CONST(0, SWIZZLE_ONE);
845 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
846 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
847 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
848 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
849 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
850 t_src_class(src[1].File),
851 (!src[1].
852 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
853 (src[1].RelAddr << 4);
854 #endif
855
856 return inst;
857 }
858
859 static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
860 struct prog_instruction *vpi,
861 GLuint * inst,
862 struct prog_src_register src[3])
863 {
864 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
865
866 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
867 GL_FALSE,
868 GL_FALSE,
869 t_dst_index(vp, &vpi->DstReg),
870 t_dst_mask(vpi->DstReg.WriteMask),
871 t_dst_class(vpi->DstReg.File));
872 inst[1] = t_src(vp, &src[0]);
873 inst[2] = __CONST(0, SWIZZLE_ZERO);
874 inst[3] = __CONST(0, SWIZZLE_ZERO);
875
876 return inst;
877 }
878
879 static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
880 struct prog_instruction *vpi,
881 GLuint * inst,
882 struct prog_src_register src[3],
883 int *u_temp_i)
884 {
885 /* mul r0, r1.yzxw, r2.zxyw
886 mad r0, -r2.yzxw, r1.zxyw, r0
887 */
888
889 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
890 GL_FALSE,
891 GL_FALSE,
892 *u_temp_i,
893 t_dst_mask(vpi->DstReg.WriteMask),
894 PVS_DST_REG_TEMPORARY);
895 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
896 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
897 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
898 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
899 t_src_class(src[0].File),
900 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
901 (src[0].RelAddr << 4);
902 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
903 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
904 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
905 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
906 t_src_class(src[1].File),
907 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
908 (src[1].RelAddr << 4);
909 inst[3] = __CONST(1, SWIZZLE_ZERO);
910 inst += 4;
911
912 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
913 GL_FALSE,
914 GL_FALSE,
915 t_dst_index(vp, &vpi->DstReg),
916 t_dst_mask(vpi->DstReg.WriteMask),
917 t_dst_class(vpi->DstReg.File));
918 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
919 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
920 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
921 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
922 t_src_class(src[1].File),
923 (!src[1].
924 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
925 (src[1].RelAddr << 4);
926 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
927 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
928 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
929 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
930 t_src_class(src[0].File),
931 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
932 (src[0].RelAddr << 4);
933 inst[3] =
934 PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
935 PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
936 PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
937
938 (*u_temp_i)--;
939
940 return inst;
941 }
942
943 static void t_inputs_outputs(struct r300_vertex_program *vp)
944 {
945 int i;
946 int cur_reg = 0;
947
948 for (i = 0; i < VERT_ATTRIB_MAX; i++)
949 vp->inputs[i] = -1;
950
951 for (i = 0; i < VERT_RESULT_MAX; i++)
952 vp->outputs[i] = -1;
953
954 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
955
956 if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
957 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
958 }
959
960 if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
961 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
962 }
963
964 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
965 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
966 }
967
968 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
969 vp->outputs[VERT_RESULT_COL1] =
970 vp->outputs[VERT_RESULT_COL0] + 1;
971 cur_reg = vp->outputs[VERT_RESULT_COL1] + 1;
972 }
973
974 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
975 vp->outputs[VERT_RESULT_BFC0] =
976 vp->outputs[VERT_RESULT_COL0] + 2;
977 cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2;
978 }
979
980 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
981 vp->outputs[VERT_RESULT_BFC1] =
982 vp->outputs[VERT_RESULT_COL0] + 3;
983 cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
984 }
985
986 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
987 if (vp->key.OutputsWritten & (1 << i)) {
988 vp->outputs[i] = cur_reg++;
989 }
990 }
991
992 if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
993 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
994 }
995 }
996
997 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
998 struct prog_instruction *vpi)
999 {
1000 int i;
1001 GLuint *inst;
1002 unsigned long num_operands;
1003 /* Initial value should be last tmp reg that hw supports.
1004 Strangely enough r300 doesnt mind even though these would be out of range.
1005 Smart enough to realize that it doesnt need it? */
1006 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
1007 struct prog_src_register src[3];
1008
1009 vp->pos_end = 0; /* Not supported yet */
1010 vp->program.length = 0;
1011 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
1012 vp->translated = GL_TRUE;
1013 vp->native = GL_TRUE;
1014
1015 t_inputs_outputs(vp);
1016
1017 for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END;
1018 vpi++, inst += 4) {
1019
1020 FREE_TEMPS();
1021
1022 if (!valid_dst(vp, &vpi->DstReg)) {
1023 /* redirect result to unused temp */
1024 vpi->DstReg.File = PROGRAM_TEMPORARY;
1025 vpi->DstReg.Index = u_temp_i;
1026 }
1027
1028 num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
1029
1030 /* copy the sources (src) from mesa into a local variable... is this needed? */
1031 for (i = 0; i < num_operands; i++) {
1032 src[i] = vpi->SrcReg[i];
1033 }
1034
1035 if (num_operands == 3) { /* TODO: scalars */
1036 if (CMP_SRCS(src[1], src[2])
1037 || CMP_SRCS(src[0], src[2])) {
1038 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1039 GL_FALSE,
1040 GL_FALSE,
1041 u_temp_i,
1042 VSF_FLAG_ALL,
1043 PVS_DST_REG_TEMPORARY);
1044 inst[1] =
1045 PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
1046 SWIZZLE_X,
1047 SWIZZLE_Y,
1048 SWIZZLE_Z,
1049 SWIZZLE_W,
1050 t_src_class(src[2].File),
1051 VSF_FLAG_NONE) | (src[2].
1052 RelAddr <<
1053 4);
1054 inst[2] = __CONST(2, SWIZZLE_ZERO);
1055 inst[3] = __CONST(2, SWIZZLE_ZERO);
1056 inst += 4;
1057
1058 src[2].File = PROGRAM_TEMPORARY;
1059 src[2].Index = u_temp_i;
1060 src[2].RelAddr = 0;
1061 u_temp_i--;
1062 }
1063 }
1064
1065 if (num_operands >= 2) {
1066 if (CMP_SRCS(src[1], src[0])) {
1067 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1068 GL_FALSE,
1069 GL_FALSE,
1070 u_temp_i,
1071 VSF_FLAG_ALL,
1072 PVS_DST_REG_TEMPORARY);
1073 inst[1] =
1074 PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
1075 SWIZZLE_X,
1076 SWIZZLE_Y,
1077 SWIZZLE_Z,
1078 SWIZZLE_W,
1079 t_src_class(src[0].File),
1080 VSF_FLAG_NONE) | (src[0].
1081 RelAddr <<
1082 4);
1083 inst[2] = __CONST(0, SWIZZLE_ZERO);
1084 inst[3] = __CONST(0, SWIZZLE_ZERO);
1085 inst += 4;
1086
1087 src[0].File = PROGRAM_TEMPORARY;
1088 src[0].Index = u_temp_i;
1089 src[0].RelAddr = 0;
1090 u_temp_i--;
1091 }
1092 }
1093
1094 switch (vpi->Opcode) {
1095 case OPCODE_ABS:
1096 inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
1097 break;
1098 case OPCODE_ADD:
1099 inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
1100 break;
1101 case OPCODE_ARL:
1102 inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
1103 break;
1104 case OPCODE_DP3:
1105 inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
1106 break;
1107 case OPCODE_DP4:
1108 inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
1109 break;
1110 case OPCODE_DPH:
1111 inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
1112 break;
1113 case OPCODE_DST:
1114 inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
1115 break;
1116 case OPCODE_EX2:
1117 inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
1118 break;
1119 case OPCODE_EXP:
1120 inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
1121 break;
1122 case OPCODE_FLR:
1123 inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */
1124 &u_temp_i);
1125 break;
1126 case OPCODE_FRC:
1127 inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
1128 break;
1129 case OPCODE_LG2:
1130 inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
1131 break;
1132 case OPCODE_LIT:
1133 inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
1134 break;
1135 case OPCODE_LOG:
1136 inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
1137 break;
1138 case OPCODE_MAD:
1139 inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
1140 break;
1141 case OPCODE_MAX:
1142 inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
1143 break;
1144 case OPCODE_MIN:
1145 inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
1146 break;
1147 case OPCODE_MOV:
1148 inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
1149 break;
1150 case OPCODE_MUL:
1151 inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
1152 break;
1153 case OPCODE_POW:
1154 inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
1155 break;
1156 case OPCODE_RCP:
1157 inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
1158 break;
1159 case OPCODE_RSQ:
1160 inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
1161 break;
1162 case OPCODE_SGE:
1163 inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
1164 break;
1165 case OPCODE_SLT:
1166 inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
1167 break;
1168 case OPCODE_SUB:
1169 inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
1170 break;
1171 case OPCODE_SWZ:
1172 inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
1173 break;
1174 case OPCODE_XPD:
1175 inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */
1176 &u_temp_i);
1177 break;
1178 default:
1179 assert(0);
1180 break;
1181 }
1182 }
1183
1184 /* Some outputs may be artificially added, to match the inputs
1185 of the fragment program. Blank the outputs here. */
1186 for (i = 0; i < VERT_RESULT_MAX; i++) {
1187 if (vp->key.OutputsAdded & (1 << i)) {
1188 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1189 GL_FALSE,
1190 GL_FALSE,
1191 vp->outputs[i],
1192 VSF_FLAG_ALL,
1193 PVS_DST_REG_OUT);
1194 inst[1] = __CONST(0, SWIZZLE_ZERO);
1195 inst[2] = __CONST(0, SWIZZLE_ZERO);
1196 inst[3] = __CONST(0, SWIZZLE_ZERO);
1197 inst += 4;
1198 }
1199 }
1200
1201 vp->program.length = (inst - vp->program.body.i);
1202 if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) {
1203 vp->program.length = 0;
1204 vp->native = GL_FALSE;
1205 }
1206 #if 0
1207 fprintf(stderr, "hw program:\n");
1208 for (i = 0; i < vp->program.length; i++)
1209 fprintf(stderr, "%08x\n", vp->program.body.d[i]);
1210 #endif
1211 }
1212
1213 /* DP4 version seems to trigger some hw peculiarity */
1214 //#define PREFER_DP4
1215
1216 static void position_invariant(struct gl_program *prog)
1217 {
1218 struct prog_instruction *vpi;
1219 struct gl_program_parameter_list *paramList;
1220 int i;
1221
1222 gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
1223
1224 /* tokens[4] = matrix modifier */
1225 #ifdef PREFER_DP4
1226 tokens[4] = 0; /* not transposed or inverted */
1227 #else
1228 tokens[4] = STATE_MATRIX_TRANSPOSE;
1229 #endif
1230 paramList = prog->Parameters;
1231
1232 vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
1233 _mesa_init_instructions(vpi, prog->NumInstructions + 4);
1234
1235 for (i = 0; i < 4; i++) {
1236 GLint idx;
1237 tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
1238 idx = _mesa_add_state_reference(paramList, tokens);
1239 #ifdef PREFER_DP4
1240 vpi[i].Opcode = OPCODE_DP4;
1241 vpi[i].StringPos = 0;
1242 vpi[i].Data = 0;
1243
1244 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1245 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
1246 vpi[i].DstReg.WriteMask = 1 << i;
1247 vpi[i].DstReg.CondMask = COND_TR;
1248
1249 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1250 vpi[i].SrcReg[0].Index = idx;
1251 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1252
1253 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1254 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1255 vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
1256 #else
1257 if (i == 0)
1258 vpi[i].Opcode = OPCODE_MUL;
1259 else
1260 vpi[i].Opcode = OPCODE_MAD;
1261
1262 vpi[i].Data = 0;
1263
1264 if (i == 3)
1265 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1266 else
1267 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
1268 vpi[i].DstReg.Index = 0;
1269 vpi[i].DstReg.WriteMask = 0xf;
1270 vpi[i].DstReg.CondMask = COND_TR;
1271
1272 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1273 vpi[i].SrcReg[0].Index = idx;
1274 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1275
1276 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1277 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1278 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
1279
1280 if (i > 0) {
1281 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
1282 vpi[i].SrcReg[2].Index = 0;
1283 vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
1284 }
1285 #endif
1286 }
1287
1288 _mesa_copy_instructions(&vpi[i], prog->Instructions,
1289 prog->NumInstructions);
1290
1291 free(prog->Instructions);
1292
1293 prog->Instructions = vpi;
1294
1295 prog->NumInstructions += 4;
1296 vpi = &prog->Instructions[prog->NumInstructions - 1];
1297
1298 assert(vpi->Opcode == OPCODE_END);
1299 }
1300
1301 static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
1302 GLuint temp_index)
1303 {
1304 struct prog_instruction *vpi;
1305 struct prog_instruction *vpi_insert;
1306 int i = 0;
1307
1308 vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
1309 _mesa_init_instructions(vpi, prog->NumInstructions + 2);
1310 /* all but END */
1311 _mesa_copy_instructions(vpi, prog->Instructions,
1312 prog->NumInstructions - 1);
1313 /* END */
1314 _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
1315 &prog->Instructions[prog->NumInstructions - 1],
1316 1);
1317 vpi_insert = &vpi[prog->NumInstructions - 1];
1318
1319 vpi_insert[i].Opcode = OPCODE_MOV;
1320
1321 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1322 vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
1323 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1324 vpi_insert[i].DstReg.CondMask = COND_TR;
1325
1326 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1327 vpi_insert[i].SrcReg[0].Index = temp_index;
1328 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1329 i++;
1330
1331 vpi_insert[i].Opcode = OPCODE_MOV;
1332
1333 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1334 vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
1335 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1336 vpi_insert[i].DstReg.CondMask = COND_TR;
1337
1338 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1339 vpi_insert[i].SrcReg[0].Index = temp_index;
1340 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1341 i++;
1342
1343 free(prog->Instructions);
1344
1345 prog->Instructions = vpi;
1346
1347 prog->NumInstructions += i;
1348 vpi = &prog->Instructions[prog->NumInstructions - 1];
1349
1350 assert(vpi->Opcode == OPCODE_END);
1351 }
1352
1353 static void pos_as_texcoord(struct r300_vertex_program *vp,
1354 struct gl_program *prog)
1355 {
1356 struct prog_instruction *vpi;
1357 GLuint tempregi = prog->NumTemporaries;
1358 /* should do something else if no temps left... */
1359 prog->NumTemporaries++;
1360
1361 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1362 if (vpi->DstReg.File == PROGRAM_OUTPUT
1363 && vpi->DstReg.Index == VERT_RESULT_HPOS) {
1364 vpi->DstReg.File = PROGRAM_TEMPORARY;
1365 vpi->DstReg.Index = tempregi;
1366 }
1367 }
1368 insert_wpos(vp, prog, tempregi);
1369 }
1370
1371 static struct r300_vertex_program *build_program(struct r300_vertex_program_key
1372 *wanted_key, struct gl_vertex_program
1373 *mesa_vp, GLint wpos_idx)
1374 {
1375 struct r300_vertex_program *vp;
1376
1377 vp = _mesa_calloc(sizeof(*vp));
1378 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1379 vp->wpos_idx = wpos_idx;
1380
1381 if (mesa_vp->IsPositionInvariant) {
1382 position_invariant(&mesa_vp->Base);
1383 }
1384
1385 if (wpos_idx > -1) {
1386 pos_as_texcoord(vp, &mesa_vp->Base);
1387 }
1388
1389 assert(mesa_vp->Base.NumInstructions);
1390 vp->num_temporaries = mesa_vp->Base.NumTemporaries;
1391 r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
1392
1393 return vp;
1394 }
1395
1396 static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
1397 {
1398 if (key->OutputsWritten & (1 << vert))
1399 return;
1400
1401 key->OutputsWritten |= 1 << vert;
1402 key->OutputsAdded |= 1 << vert;
1403 }
1404
1405 void r300SelectVertexShader(r300ContextPtr r300)
1406 {
1407 GLcontext *ctx = ctx = r300->radeon.glCtx;
1408 GLuint InputsRead;
1409 struct r300_vertex_program_key wanted_key = { 0 };
1410 GLint i;
1411 struct r300_vertex_program_cont *vpc;
1412 struct r300_vertex_program *vp;
1413 GLint wpos_idx;
1414
1415 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1416 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1417 wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
1418 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1419
1420 wpos_idx = -1;
1421 if (InputsRead & FRAG_BIT_WPOS) {
1422 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1423 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1424 break;
1425
1426 if (i == ctx->Const.MaxTextureUnits) {
1427 fprintf(stderr, "\tno free texcoord found\n");
1428 _mesa_exit(-1);
1429 }
1430
1431 wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
1432 wpos_idx = i;
1433 }
1434
1435 add_outputs(&wanted_key, VERT_RESULT_HPOS);
1436
1437 if (InputsRead & FRAG_BIT_COL0) {
1438 add_outputs(&wanted_key, VERT_RESULT_COL0);
1439 }
1440
1441 if (InputsRead & FRAG_BIT_COL1) {
1442 add_outputs(&wanted_key, VERT_RESULT_COL1);
1443 }
1444
1445 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1446 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1447 add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
1448 }
1449 }
1450
1451 if (vpc->mesa_program.IsPositionInvariant) {
1452 /* we wan't position don't we ? */
1453 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1454 }
1455
1456 for (vp = vpc->progs; vp; vp = vp->next)
1457 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
1458 == 0) {
1459 r300->selected_vp = vp;
1460 return;
1461 }
1462 //_mesa_print_program(&vpc->mesa_program.Base);
1463
1464 vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
1465 vp->next = vpc->progs;
1466 vpc->progs = vp;
1467 r300->selected_vp = vp;
1468 }