Merge remote branch 'origin/master' into radeon-rewrite
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/prog_instruction.h"
36 #include "shader/prog_parameter.h"
37 #include "shader/prog_statevars.h"
38 #include "tnl/tnl.h"
39
40 #include "r300_context.h"
41
42 /* TODO: Get rid of t_src_class call */
43 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
44 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
45 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
46 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
47 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
48
49 /*
50 * Take an already-setup and valid source then swizzle it appropriately to
51 * obtain a constant ZERO or ONE source.
52 */
53 #define __CONST(x, y) \
54 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
55 t_swizzle(y), \
56 t_swizzle(y), \
57 t_swizzle(y), \
58 t_swizzle(y), \
59 t_src_class(src[x].File), \
60 VSF_FLAG_NONE) | (src[x].RelAddr << 4))
61
62 #define FREE_TEMPS() \
63 do { \
64 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
65 if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
66 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
67 vp->native = GL_FALSE; \
68 } \
69 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
70 } while (0)
71
72 int r300VertexProgUpdateParams(GLcontext * ctx,
73 struct r300_vertex_program_cont *vp, float *dst)
74 {
75 int pi;
76 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
77 float *dst_o = dst;
78 struct gl_program_parameter_list *paramList;
79
80 if (mesa_vp->IsNVProgram) {
81 _mesa_load_tracked_matrices(ctx);
82
83 for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
84 *dst++ = ctx->VertexProgram.Parameters[pi][0];
85 *dst++ = ctx->VertexProgram.Parameters[pi][1];
86 *dst++ = ctx->VertexProgram.Parameters[pi][2];
87 *dst++ = ctx->VertexProgram.Parameters[pi][3];
88 }
89 return dst - dst_o;
90 }
91
92 assert(mesa_vp->Base.Parameters);
93 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
94
95 if (mesa_vp->Base.Parameters->NumParameters * 4 >
96 VSF_MAX_FRAGMENT_LENGTH) {
97 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
98 _mesa_exit(-1);
99 }
100
101 paramList = mesa_vp->Base.Parameters;
102 for (pi = 0; pi < paramList->NumParameters; pi++) {
103 switch (paramList->Parameters[pi].Type) {
104 case PROGRAM_STATE_VAR:
105 case PROGRAM_NAMED_PARAM:
106 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
107 case PROGRAM_CONSTANT:
108 *dst++ = paramList->ParameterValues[pi][0];
109 *dst++ = paramList->ParameterValues[pi][1];
110 *dst++ = paramList->ParameterValues[pi][2];
111 *dst++ = paramList->ParameterValues[pi][3];
112 break;
113 default:
114 _mesa_problem(NULL, "Bad param type in %s",
115 __FUNCTION__);
116 }
117
118 }
119
120 return dst - dst_o;
121 }
122
123 static unsigned long t_dst_mask(GLuint mask)
124 {
125 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
126 return mask & VSF_FLAG_ALL;
127 }
128
129 static unsigned long t_dst_class(enum register_file file)
130 {
131
132 switch (file) {
133 case PROGRAM_TEMPORARY:
134 return PVS_DST_REG_TEMPORARY;
135 case PROGRAM_OUTPUT:
136 return PVS_DST_REG_OUT;
137 case PROGRAM_ADDRESS:
138 return PVS_DST_REG_A0;
139 /*
140 case PROGRAM_INPUT:
141 case PROGRAM_LOCAL_PARAM:
142 case PROGRAM_ENV_PARAM:
143 case PROGRAM_NAMED_PARAM:
144 case PROGRAM_STATE_VAR:
145 case PROGRAM_WRITE_ONLY:
146 case PROGRAM_ADDRESS:
147 */
148 default:
149 fprintf(stderr, "problem in %s", __FUNCTION__);
150 _mesa_exit(-1);
151 return -1;
152 }
153 }
154
155 static unsigned long t_dst_index(struct r300_vertex_program *vp,
156 struct prog_dst_register *dst)
157 {
158 if (dst->File == PROGRAM_OUTPUT)
159 return vp->outputs[dst->Index];
160
161 return dst->Index;
162 }
163
164 static unsigned long t_src_class(enum register_file file)
165 {
166 switch (file) {
167 case PROGRAM_TEMPORARY:
168 return PVS_SRC_REG_TEMPORARY;
169 case PROGRAM_INPUT:
170 return PVS_SRC_REG_INPUT;
171 case PROGRAM_LOCAL_PARAM:
172 case PROGRAM_ENV_PARAM:
173 case PROGRAM_NAMED_PARAM:
174 case PROGRAM_CONSTANT:
175 case PROGRAM_STATE_VAR:
176 return PVS_SRC_REG_CONSTANT;
177 /*
178 case PROGRAM_OUTPUT:
179 case PROGRAM_WRITE_ONLY:
180 case PROGRAM_ADDRESS:
181 */
182 default:
183 fprintf(stderr, "problem in %s", __FUNCTION__);
184 _mesa_exit(-1);
185 return -1;
186 }
187 }
188
189 static INLINE unsigned long t_swizzle(GLubyte swizzle)
190 {
191 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
192 return swizzle;
193 }
194
195 #if 0
196 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
197 {
198 int i;
199
200 if (vp == NULL) {
201 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
202 caller);
203 return;
204 }
205
206 fprintf(stderr, "%s:<", caller);
207 for (i = 0; i < VERT_ATTRIB_MAX; i++)
208 fprintf(stderr, "%d ", vp->inputs[i]);
209 fprintf(stderr, ">\n");
210
211 }
212 #endif
213
214 static unsigned long t_src_index(struct r300_vertex_program *vp,
215 struct prog_src_register *src)
216 {
217 int i;
218 int max_reg = -1;
219
220 if (src->File == PROGRAM_INPUT) {
221 if (vp->inputs[src->Index] != -1)
222 return vp->inputs[src->Index];
223
224 for (i = 0; i < VERT_ATTRIB_MAX; i++)
225 if (vp->inputs[i] > max_reg)
226 max_reg = vp->inputs[i];
227
228 vp->inputs[src->Index] = max_reg + 1;
229
230 //vp_dump_inputs(vp, __FUNCTION__);
231
232 return vp->inputs[src->Index];
233 } else {
234 if (src->Index < 0) {
235 fprintf(stderr,
236 "negative offsets for indirect addressing do not work.\n");
237 return 0;
238 }
239 return src->Index;
240 }
241 }
242
243 /* these two functions should probably be merged... */
244
245 static unsigned long t_src(struct r300_vertex_program *vp,
246 struct prog_src_register *src)
247 {
248 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
249 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
250 */
251 return PVS_SRC_OPERAND(t_src_index(vp, src),
252 t_swizzle(GET_SWZ(src->Swizzle, 0)),
253 t_swizzle(GET_SWZ(src->Swizzle, 1)),
254 t_swizzle(GET_SWZ(src->Swizzle, 2)),
255 t_swizzle(GET_SWZ(src->Swizzle, 3)),
256 t_src_class(src->File),
257 src->NegateBase) | (src->RelAddr << 4);
258 }
259
260 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
261 struct prog_src_register *src)
262 {
263 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
264 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
265 */
266 return PVS_SRC_OPERAND(t_src_index(vp, src),
267 t_swizzle(GET_SWZ(src->Swizzle, 0)),
268 t_swizzle(GET_SWZ(src->Swizzle, 0)),
269 t_swizzle(GET_SWZ(src->Swizzle, 0)),
270 t_swizzle(GET_SWZ(src->Swizzle, 0)),
271 t_src_class(src->File),
272 src->
273 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
274 (src->RelAddr << 4);
275 }
276
277 static GLboolean valid_dst(struct r300_vertex_program *vp,
278 struct prog_dst_register *dst)
279 {
280 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
281 return GL_FALSE;
282 } else if (dst->File == PROGRAM_ADDRESS) {
283 assert(dst->Index == 0);
284 }
285
286 return GL_TRUE;
287 }
288
289 static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
290 struct prog_instruction *vpi,
291 GLuint * inst,
292 struct prog_src_register src[3])
293 {
294 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
295
296 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
297 GL_FALSE,
298 GL_FALSE,
299 t_dst_index(vp, &vpi->DstReg),
300 t_dst_mask(vpi->DstReg.WriteMask),
301 t_dst_class(vpi->DstReg.File));
302 inst[1] = t_src(vp, &src[0]);
303 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
304 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
305 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
306 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
307 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
308 t_src_class(src[0].File),
309 (!src[0].
310 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
311 (src[0].RelAddr << 4);
312 inst[3] = 0;
313
314 return inst;
315 }
316
317 static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
318 struct prog_instruction *vpi,
319 GLuint * inst,
320 struct prog_src_register src[3])
321 {
322 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
323 GL_FALSE,
324 GL_FALSE,
325 t_dst_index(vp, &vpi->DstReg),
326 t_dst_mask(vpi->DstReg.WriteMask),
327 t_dst_class(vpi->DstReg.File));
328 inst[1] = t_src(vp, &src[0]);
329 inst[2] = t_src(vp, &src[1]);
330 inst[3] = __CONST(1, SWIZZLE_ZERO);
331
332 return inst;
333 }
334
335 static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
336 struct prog_instruction *vpi,
337 GLuint * inst,
338 struct prog_src_register src[3])
339 {
340 inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
341 GL_FALSE,
342 GL_FALSE,
343 t_dst_index(vp, &vpi->DstReg),
344 t_dst_mask(vpi->DstReg.WriteMask),
345 t_dst_class(vpi->DstReg.File));
346 inst[1] = t_src(vp, &src[0]);
347 inst[2] = __CONST(0, SWIZZLE_ZERO);
348 inst[3] = __CONST(0, SWIZZLE_ZERO);
349
350 return inst;
351 }
352
353 static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
354 struct prog_instruction *vpi,
355 GLuint * inst,
356 struct prog_src_register src[3])
357 {
358 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
359
360 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
361 GL_FALSE,
362 GL_FALSE,
363 t_dst_index(vp, &vpi->DstReg),
364 t_dst_mask(vpi->DstReg.WriteMask),
365 t_dst_class(vpi->DstReg.File));
366 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
367 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
368 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
369 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
370 SWIZZLE_ZERO,
371 t_src_class(src[0].File),
372 src[0].
373 NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
374 (src[0].RelAddr << 4);
375 inst[2] =
376 PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
377 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
378 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
379 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
380 t_src_class(src[1].File),
381 src[1].
382 NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
383 (src[1].RelAddr << 4);
384 inst[3] = __CONST(1, SWIZZLE_ZERO);
385
386 return inst;
387 }
388
389 static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
390 struct prog_instruction *vpi,
391 GLuint * inst,
392 struct prog_src_register src[3])
393 {
394 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
395 GL_FALSE,
396 GL_FALSE,
397 t_dst_index(vp, &vpi->DstReg),
398 t_dst_mask(vpi->DstReg.WriteMask),
399 t_dst_class(vpi->DstReg.File));
400 inst[1] = t_src(vp, &src[0]);
401 inst[2] = t_src(vp, &src[1]);
402 inst[3] = __CONST(1, SWIZZLE_ZERO);
403
404 return inst;
405 }
406
407 static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
408 struct prog_instruction *vpi,
409 GLuint * inst,
410 struct prog_src_register src[3])
411 {
412 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
413 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
414 GL_FALSE,
415 GL_FALSE,
416 t_dst_index(vp, &vpi->DstReg),
417 t_dst_mask(vpi->DstReg.WriteMask),
418 t_dst_class(vpi->DstReg.File));
419 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
420 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
421 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
422 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
423 PVS_SRC_SELECT_FORCE_1,
424 t_src_class(src[0].File),
425 src[0].
426 NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
427 (src[0].RelAddr << 4);
428 inst[2] = t_src(vp, &src[1]);
429 inst[3] = __CONST(1, SWIZZLE_ZERO);
430
431 return inst;
432 }
433
434 static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
435 struct prog_instruction *vpi,
436 GLuint * inst,
437 struct prog_src_register src[3])
438 {
439 inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
440 GL_FALSE,
441 GL_FALSE,
442 t_dst_index(vp, &vpi->DstReg),
443 t_dst_mask(vpi->DstReg.WriteMask),
444 t_dst_class(vpi->DstReg.File));
445 inst[1] = t_src(vp, &src[0]);
446 inst[2] = t_src(vp, &src[1]);
447 inst[3] = __CONST(1, SWIZZLE_ZERO);
448
449 return inst;
450 }
451
452 static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
453 struct prog_instruction *vpi,
454 GLuint * inst,
455 struct prog_src_register src[3])
456 {
457 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
458 GL_TRUE,
459 GL_FALSE,
460 t_dst_index(vp, &vpi->DstReg),
461 t_dst_mask(vpi->DstReg.WriteMask),
462 t_dst_class(vpi->DstReg.File));
463 inst[1] = t_src_scalar(vp, &src[0]);
464 inst[2] = __CONST(0, SWIZZLE_ZERO);
465 inst[3] = __CONST(0, SWIZZLE_ZERO);
466
467 return inst;
468 }
469
470 static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
471 struct prog_instruction *vpi,
472 GLuint * inst,
473 struct prog_src_register src[3])
474 {
475 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
476 GL_TRUE,
477 GL_FALSE,
478 t_dst_index(vp, &vpi->DstReg),
479 t_dst_mask(vpi->DstReg.WriteMask),
480 t_dst_class(vpi->DstReg.File));
481 inst[1] = t_src_scalar(vp, &src[0]);
482 inst[2] = __CONST(0, SWIZZLE_ZERO);
483 inst[3] = __CONST(0, SWIZZLE_ZERO);
484
485 return inst;
486 }
487
488 static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
489 struct prog_instruction *vpi,
490 GLuint * inst,
491 struct prog_src_register src[3],
492 int *u_temp_i)
493 {
494 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
495 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
496
497 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
498 GL_FALSE,
499 GL_FALSE,
500 *u_temp_i,
501 t_dst_mask(vpi->DstReg.WriteMask),
502 PVS_DST_REG_TEMPORARY);
503 inst[1] = t_src(vp, &src[0]);
504 inst[2] = __CONST(0, SWIZZLE_ZERO);
505 inst[3] = __CONST(0, SWIZZLE_ZERO);
506 inst += 4;
507
508 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
509 GL_FALSE,
510 GL_FALSE,
511 t_dst_index(vp, &vpi->DstReg),
512 t_dst_mask(vpi->DstReg.WriteMask),
513 t_dst_class(vpi->DstReg.File));
514 inst[1] = t_src(vp, &src[0]);
515 inst[2] = PVS_SRC_OPERAND(*u_temp_i,
516 PVS_SRC_SELECT_X,
517 PVS_SRC_SELECT_Y,
518 PVS_SRC_SELECT_Z,
519 PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
520 /* Not 100% sure about this */
521 (!src[0].
522 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE
523 /*VSF_FLAG_ALL */ );
524 inst[3] = __CONST(0, SWIZZLE_ZERO);
525 (*u_temp_i)--;
526
527 return inst;
528 }
529
530 static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
531 struct prog_instruction *vpi,
532 GLuint * inst,
533 struct prog_src_register src[3])
534 {
535 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
536 GL_FALSE,
537 GL_FALSE,
538 t_dst_index(vp, &vpi->DstReg),
539 t_dst_mask(vpi->DstReg.WriteMask),
540 t_dst_class(vpi->DstReg.File));
541 inst[1] = t_src(vp, &src[0]);
542 inst[2] = __CONST(0, SWIZZLE_ZERO);
543 inst[3] = __CONST(0, SWIZZLE_ZERO);
544
545 return inst;
546 }
547
548 static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
549 struct prog_instruction *vpi,
550 GLuint * inst,
551 struct prog_src_register src[3])
552 {
553 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
554
555 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
556 GL_TRUE,
557 GL_FALSE,
558 t_dst_index(vp, &vpi->DstReg),
559 t_dst_mask(vpi->DstReg.WriteMask),
560 t_dst_class(vpi->DstReg.File));
561 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
562 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
563 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
564 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
565 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
566 t_src_class(src[0].File),
567 src[0].
568 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
569 (src[0].RelAddr << 4);
570 inst[2] = __CONST(0, SWIZZLE_ZERO);
571 inst[3] = __CONST(0, SWIZZLE_ZERO);
572
573 return inst;
574 }
575
576 static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
577 struct prog_instruction *vpi,
578 GLuint * inst,
579 struct prog_src_register src[3])
580 {
581 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
582
583 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
584 GL_TRUE,
585 GL_FALSE,
586 t_dst_index(vp, &vpi->DstReg),
587 t_dst_mask(vpi->DstReg.WriteMask),
588 t_dst_class(vpi->DstReg.File));
589 /* NOTE: Users swizzling might not work. */
590 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
591 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
592 PVS_SRC_SELECT_FORCE_0, // Z
593 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
594 t_src_class(src[0].File),
595 src[0].
596 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
597 (src[0].RelAddr << 4);
598 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
599 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
600 PVS_SRC_SELECT_FORCE_0, // Z
601 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
602 t_src_class(src[0].File),
603 src[0].
604 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
605 (src[0].RelAddr << 4);
606 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
607 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
608 PVS_SRC_SELECT_FORCE_0, // Z
609 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
610 t_src_class(src[0].File),
611 src[0].
612 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
613 (src[0].RelAddr << 4);
614
615 return inst;
616 }
617
618 static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
619 struct prog_instruction *vpi,
620 GLuint * inst,
621 struct prog_src_register src[3])
622 {
623 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
624 GL_TRUE,
625 GL_FALSE,
626 t_dst_index(vp, &vpi->DstReg),
627 t_dst_mask(vpi->DstReg.WriteMask),
628 t_dst_class(vpi->DstReg.File));
629 inst[1] = t_src_scalar(vp, &src[0]);
630 inst[2] = __CONST(0, SWIZZLE_ZERO);
631 inst[3] = __CONST(0, SWIZZLE_ZERO);
632
633 return inst;
634 }
635
636 static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
637 struct prog_instruction *vpi,
638 GLuint * inst,
639 struct prog_src_register src[3])
640 {
641 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
642 GL_FALSE,
643 GL_TRUE,
644 t_dst_index(vp, &vpi->DstReg),
645 t_dst_mask(vpi->DstReg.WriteMask),
646 t_dst_class(vpi->DstReg.File));
647 inst[1] = t_src(vp, &src[0]);
648 inst[2] = t_src(vp, &src[1]);
649 inst[3] = t_src(vp, &src[2]);
650
651 return inst;
652 }
653
654 static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
655 struct prog_instruction *vpi,
656 GLuint * inst,
657 struct prog_src_register src[3])
658 {
659 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
660 GL_FALSE,
661 GL_FALSE,
662 t_dst_index(vp, &vpi->DstReg),
663 t_dst_mask(vpi->DstReg.WriteMask),
664 t_dst_class(vpi->DstReg.File));
665 inst[1] = t_src(vp, &src[0]);
666 inst[2] = t_src(vp, &src[1]);
667 inst[3] = __CONST(1, SWIZZLE_ZERO);
668
669 return inst;
670 }
671
672 static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
673 struct prog_instruction *vpi,
674 GLuint * inst,
675 struct prog_src_register src[3])
676 {
677 inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
678 GL_FALSE,
679 GL_FALSE,
680 t_dst_index(vp, &vpi->DstReg),
681 t_dst_mask(vpi->DstReg.WriteMask),
682 t_dst_class(vpi->DstReg.File));
683 inst[1] = t_src(vp, &src[0]);
684 inst[2] = t_src(vp, &src[1]);
685 inst[3] = __CONST(1, SWIZZLE_ZERO);
686
687 return inst;
688 }
689
690 static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
691 struct prog_instruction *vpi,
692 GLuint * inst,
693 struct prog_src_register src[3])
694 {
695 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
696
697 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
698 GL_FALSE,
699 GL_FALSE,
700 t_dst_index(vp, &vpi->DstReg),
701 t_dst_mask(vpi->DstReg.WriteMask),
702 t_dst_class(vpi->DstReg.File));
703 inst[1] = t_src(vp, &src[0]);
704 inst[2] = __CONST(0, SWIZZLE_ZERO);
705 inst[3] = __CONST(0, SWIZZLE_ZERO);
706
707 return inst;
708 }
709
710 static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
711 struct prog_instruction *vpi,
712 GLuint * inst,
713 struct prog_src_register src[3])
714 {
715 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
716 GL_FALSE,
717 GL_FALSE,
718 t_dst_index(vp, &vpi->DstReg),
719 t_dst_mask(vpi->DstReg.WriteMask),
720 t_dst_class(vpi->DstReg.File));
721 inst[1] = t_src(vp, &src[0]);
722 inst[2] = t_src(vp, &src[1]);
723 inst[3] = __CONST(1, SWIZZLE_ZERO);
724
725 return inst;
726 }
727
728 static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
729 struct prog_instruction *vpi,
730 GLuint * inst,
731 struct prog_src_register src[3])
732 {
733 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
734 GL_TRUE,
735 GL_FALSE,
736 t_dst_index(vp, &vpi->DstReg),
737 t_dst_mask(vpi->DstReg.WriteMask),
738 t_dst_class(vpi->DstReg.File));
739 inst[1] = t_src_scalar(vp, &src[0]);
740 inst[2] = __CONST(0, SWIZZLE_ZERO);
741 inst[3] = t_src_scalar(vp, &src[1]);
742
743 return inst;
744 }
745
746 static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
747 struct prog_instruction *vpi,
748 GLuint * inst,
749 struct prog_src_register src[3])
750 {
751 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
752 GL_TRUE,
753 GL_FALSE,
754 t_dst_index(vp, &vpi->DstReg),
755 t_dst_mask(vpi->DstReg.WriteMask),
756 t_dst_class(vpi->DstReg.File));
757 inst[1] = t_src_scalar(vp, &src[0]);
758 inst[2] = __CONST(0, SWIZZLE_ZERO);
759 inst[3] = __CONST(0, SWIZZLE_ZERO);
760
761 return inst;
762 }
763
764 static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
765 struct prog_instruction *vpi,
766 GLuint * inst,
767 struct prog_src_register src[3])
768 {
769 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
770 GL_TRUE,
771 GL_FALSE,
772 t_dst_index(vp, &vpi->DstReg),
773 t_dst_mask(vpi->DstReg.WriteMask),
774 t_dst_class(vpi->DstReg.File));
775 inst[1] = t_src_scalar(vp, &src[0]);
776 inst[2] = __CONST(0, SWIZZLE_ZERO);
777 inst[3] = __CONST(0, SWIZZLE_ZERO);
778
779 return inst;
780 }
781
782 static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
783 struct prog_instruction *vpi,
784 GLuint * inst,
785 struct prog_src_register src[3])
786 {
787 inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
788 GL_FALSE,
789 GL_FALSE,
790 t_dst_index(vp, &vpi->DstReg),
791 t_dst_mask(vpi->DstReg.WriteMask),
792 t_dst_class(vpi->DstReg.File));
793 inst[1] = t_src(vp, &src[0]);
794 inst[2] = t_src(vp, &src[1]);
795 inst[3] = __CONST(1, SWIZZLE_ZERO);
796
797 return inst;
798 }
799
800 static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
801 struct prog_instruction *vpi,
802 GLuint * inst,
803 struct prog_src_register src[3])
804 {
805 inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
806 GL_FALSE,
807 GL_FALSE,
808 t_dst_index(vp, &vpi->DstReg),
809 t_dst_mask(vpi->DstReg.WriteMask),
810 t_dst_class(vpi->DstReg.File));
811 inst[1] = t_src(vp, &src[0]);
812 inst[2] = t_src(vp, &src[1]);
813 inst[3] = __CONST(1, SWIZZLE_ZERO);
814
815 return inst;
816 }
817
818 static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
819 struct prog_instruction *vpi,
820 GLuint * inst,
821 struct prog_src_register src[3])
822 {
823 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
824
825 #if 0
826 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
827 GL_FALSE,
828 GL_FALSE,
829 t_dst_index(vp, &vpi->DstReg),
830 t_dst_mask(vpi->DstReg.WriteMask),
831 t_dst_class(vpi->DstReg.File));
832 inst[1] = t_src(vp, &src[0]);
833 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
834 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
835 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
836 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
837 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
838 t_src_class(src[1].File),
839 (!src[1].
840 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
841 (src[1].RelAddr << 4);
842 inst[3] = 0;
843 #else
844 inst[0] =
845 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
846 GL_FALSE,
847 GL_FALSE,
848 t_dst_index(vp, &vpi->DstReg),
849 t_dst_mask(vpi->DstReg.WriteMask),
850 t_dst_class(vpi->DstReg.File));
851 inst[1] = t_src(vp, &src[0]);
852 inst[2] = __CONST(0, SWIZZLE_ONE);
853 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
854 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
855 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
856 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
857 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
858 t_src_class(src[1].File),
859 (!src[1].
860 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
861 (src[1].RelAddr << 4);
862 #endif
863
864 return inst;
865 }
866
867 static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
868 struct prog_instruction *vpi,
869 GLuint * inst,
870 struct prog_src_register src[3])
871 {
872 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
873
874 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
875 GL_FALSE,
876 GL_FALSE,
877 t_dst_index(vp, &vpi->DstReg),
878 t_dst_mask(vpi->DstReg.WriteMask),
879 t_dst_class(vpi->DstReg.File));
880 inst[1] = t_src(vp, &src[0]);
881 inst[2] = __CONST(0, SWIZZLE_ZERO);
882 inst[3] = __CONST(0, SWIZZLE_ZERO);
883
884 return inst;
885 }
886
887 static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
888 struct prog_instruction *vpi,
889 GLuint * inst,
890 struct prog_src_register src[3],
891 int *u_temp_i)
892 {
893 /* mul r0, r1.yzxw, r2.zxyw
894 mad r0, -r2.yzxw, r1.zxyw, r0
895 */
896
897 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
898 GL_FALSE,
899 GL_FALSE,
900 *u_temp_i,
901 t_dst_mask(vpi->DstReg.WriteMask),
902 PVS_DST_REG_TEMPORARY);
903 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
904 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
905 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
906 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
907 t_src_class(src[0].File),
908 src[0].
909 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
910 (src[0].RelAddr << 4);
911 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
912 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
913 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
914 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
915 t_src_class(src[1].File),
916 src[1].
917 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
918 (src[1].RelAddr << 4);
919 inst[3] = __CONST(1, SWIZZLE_ZERO);
920 inst += 4;
921
922 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
923 GL_FALSE,
924 GL_FALSE,
925 t_dst_index(vp, &vpi->DstReg),
926 t_dst_mask(vpi->DstReg.WriteMask),
927 t_dst_class(vpi->DstReg.File));
928 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
929 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
930 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
931 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
932 t_src_class(src[1].File),
933 (!src[1].
934 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
935 (src[1].RelAddr << 4);
936 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
937 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
938 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
939 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
940 t_src_class(src[0].File),
941 src[0].
942 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
943 (src[0].RelAddr << 4);
944 inst[3] =
945 PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
946 PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
947 PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
948
949 (*u_temp_i)--;
950
951 return inst;
952 }
953
954 static void t_inputs_outputs(struct r300_vertex_program *vp)
955 {
956 int i;
957 int cur_reg = 0;
958
959 for (i = 0; i < VERT_ATTRIB_MAX; i++)
960 vp->inputs[i] = -1;
961
962 for (i = 0; i < VERT_RESULT_MAX; i++)
963 vp->outputs[i] = -1;
964
965 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
966
967 if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
968 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
969 }
970
971 if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
972 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
973 }
974
975 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
976 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
977 }
978
979 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
980 vp->outputs[VERT_RESULT_COL1] =
981 vp->outputs[VERT_RESULT_COL0] + 1;
982 cur_reg = vp->outputs[VERT_RESULT_COL1] + 1;
983 }
984
985 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
986 vp->outputs[VERT_RESULT_BFC0] =
987 vp->outputs[VERT_RESULT_COL0] + 2;
988 cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2;
989 }
990
991 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
992 vp->outputs[VERT_RESULT_BFC1] =
993 vp->outputs[VERT_RESULT_COL0] + 3;
994 cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
995 }
996 #if 0
997 if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
998 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
999 }
1000 #endif
1001
1002 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
1003 if (vp->key.OutputsWritten & (1 << i)) {
1004 vp->outputs[i] = cur_reg++;
1005 }
1006 }
1007 }
1008
1009 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
1010 struct prog_instruction *vpi)
1011 {
1012 int i;
1013 GLuint *inst;
1014 unsigned long num_operands;
1015 /* Initial value should be last tmp reg that hw supports.
1016 Strangely enough r300 doesnt mind even though these would be out of range.
1017 Smart enough to realize that it doesnt need it? */
1018 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
1019 struct prog_src_register src[3];
1020
1021 vp->pos_end = 0; /* Not supported yet */
1022 vp->program.length = 0;
1023 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
1024 vp->translated = GL_TRUE;
1025 vp->native = GL_TRUE;
1026
1027 t_inputs_outputs(vp);
1028
1029 for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END;
1030 vpi++, inst += 4) {
1031
1032 FREE_TEMPS();
1033
1034 if (!valid_dst(vp, &vpi->DstReg)) {
1035 /* redirect result to unused temp */
1036 vpi->DstReg.File = PROGRAM_TEMPORARY;
1037 vpi->DstReg.Index = u_temp_i;
1038 }
1039
1040 num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
1041
1042 /* copy the sources (src) from mesa into a local variable... is this needed? */
1043 for (i = 0; i < num_operands; i++) {
1044 src[i] = vpi->SrcReg[i];
1045 }
1046
1047 if (num_operands == 3) { /* TODO: scalars */
1048 if (CMP_SRCS(src[1], src[2])
1049 || CMP_SRCS(src[0], src[2])) {
1050 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1051 GL_FALSE,
1052 GL_FALSE,
1053 u_temp_i,
1054 VSF_FLAG_ALL,
1055 PVS_DST_REG_TEMPORARY);
1056 inst[1] =
1057 PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
1058 SWIZZLE_X,
1059 SWIZZLE_Y,
1060 SWIZZLE_Z,
1061 SWIZZLE_W,
1062 t_src_class(src[2].File),
1063 VSF_FLAG_NONE) | (src[2].
1064 RelAddr <<
1065 4);
1066 inst[2] = __CONST(2, SWIZZLE_ZERO);
1067 inst[3] = __CONST(2, SWIZZLE_ZERO);
1068 inst += 4;
1069
1070 src[2].File = PROGRAM_TEMPORARY;
1071 src[2].Index = u_temp_i;
1072 src[2].RelAddr = 0;
1073 u_temp_i--;
1074 }
1075 }
1076
1077 if (num_operands >= 2) {
1078 if (CMP_SRCS(src[1], src[0])) {
1079 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1080 GL_FALSE,
1081 GL_FALSE,
1082 u_temp_i,
1083 VSF_FLAG_ALL,
1084 PVS_DST_REG_TEMPORARY);
1085 inst[1] =
1086 PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
1087 SWIZZLE_X,
1088 SWIZZLE_Y,
1089 SWIZZLE_Z,
1090 SWIZZLE_W,
1091 t_src_class(src[0].File),
1092 VSF_FLAG_NONE) | (src[0].
1093 RelAddr <<
1094 4);
1095 inst[2] = __CONST(0, SWIZZLE_ZERO);
1096 inst[3] = __CONST(0, SWIZZLE_ZERO);
1097 inst += 4;
1098
1099 src[0].File = PROGRAM_TEMPORARY;
1100 src[0].Index = u_temp_i;
1101 src[0].RelAddr = 0;
1102 u_temp_i--;
1103 }
1104 }
1105
1106 switch (vpi->Opcode) {
1107 case OPCODE_ABS:
1108 inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
1109 break;
1110 case OPCODE_ADD:
1111 inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
1112 break;
1113 case OPCODE_ARL:
1114 inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
1115 break;
1116 case OPCODE_DP3:
1117 inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
1118 break;
1119 case OPCODE_DP4:
1120 inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
1121 break;
1122 case OPCODE_DPH:
1123 inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
1124 break;
1125 case OPCODE_DST:
1126 inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
1127 break;
1128 case OPCODE_EX2:
1129 inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
1130 break;
1131 case OPCODE_EXP:
1132 inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
1133 break;
1134 case OPCODE_FLR:
1135 inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */
1136 &u_temp_i);
1137 break;
1138 case OPCODE_FRC:
1139 inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
1140 break;
1141 case OPCODE_LG2:
1142 inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
1143 break;
1144 case OPCODE_LIT:
1145 inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
1146 break;
1147 case OPCODE_LOG:
1148 inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
1149 break;
1150 case OPCODE_MAD:
1151 inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
1152 break;
1153 case OPCODE_MAX:
1154 inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
1155 break;
1156 case OPCODE_MIN:
1157 inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
1158 break;
1159 case OPCODE_MOV:
1160 inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
1161 break;
1162 case OPCODE_MUL:
1163 inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
1164 break;
1165 case OPCODE_POW:
1166 inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
1167 break;
1168 case OPCODE_RCP:
1169 inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
1170 break;
1171 case OPCODE_RSQ:
1172 inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
1173 break;
1174 case OPCODE_SGE:
1175 inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
1176 break;
1177 case OPCODE_SLT:
1178 inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
1179 break;
1180 case OPCODE_SUB:
1181 inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
1182 break;
1183 case OPCODE_SWZ:
1184 inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
1185 break;
1186 case OPCODE_XPD:
1187 inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */
1188 &u_temp_i);
1189 break;
1190 default:
1191 assert(0);
1192 break;
1193 }
1194 }
1195
1196 /* Some outputs may be artificially added, to match the inputs
1197 of the fragment program. Blank the outputs here. */
1198 for (i = 0; i < VERT_RESULT_MAX; i++) {
1199 if (vp->key.OutputsAdded & (1 << i)) {
1200 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1201 GL_FALSE,
1202 GL_FALSE,
1203 vp->outputs[i],
1204 VSF_FLAG_ALL,
1205 PVS_DST_REG_OUT);
1206 inst[1] = __CONST(0, SWIZZLE_ZERO);
1207 inst[2] = __CONST(0, SWIZZLE_ZERO);
1208 inst[3] = __CONST(0, SWIZZLE_ZERO);
1209 inst += 4;
1210 }
1211 }
1212
1213 vp->program.length = (inst - vp->program.body.i);
1214 if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) {
1215 vp->program.length = 0;
1216 vp->native = GL_FALSE;
1217 }
1218 #if 0
1219 fprintf(stderr, "hw program:\n");
1220 for (i = 0; i < vp->program.length; i++)
1221 fprintf(stderr, "%08x\n", vp->program.body.d[i]);
1222 #endif
1223 }
1224
1225 /* DP4 version seems to trigger some hw peculiarity */
1226 //#define PREFER_DP4
1227
1228 static void position_invariant(struct gl_program *prog)
1229 {
1230 struct prog_instruction *vpi;
1231 struct gl_program_parameter_list *paramList;
1232 int i;
1233
1234 gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
1235
1236 /* tokens[4] = matrix modifier */
1237 #ifdef PREFER_DP4
1238 tokens[4] = 0; /* not transposed or inverted */
1239 #else
1240 tokens[4] = STATE_MATRIX_TRANSPOSE;
1241 #endif
1242 paramList = prog->Parameters;
1243
1244 vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
1245 _mesa_init_instructions(vpi, prog->NumInstructions + 4);
1246
1247 for (i = 0; i < 4; i++) {
1248 GLint idx;
1249 tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
1250 idx = _mesa_add_state_reference(paramList, tokens);
1251 #ifdef PREFER_DP4
1252 vpi[i].Opcode = OPCODE_DP4;
1253 vpi[i].StringPos = 0;
1254 vpi[i].Data = 0;
1255
1256 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1257 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
1258 vpi[i].DstReg.WriteMask = 1 << i;
1259 vpi[i].DstReg.CondMask = COND_TR;
1260
1261 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1262 vpi[i].SrcReg[0].Index = idx;
1263 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1264
1265 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1266 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1267 vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
1268 #else
1269 if (i == 0)
1270 vpi[i].Opcode = OPCODE_MUL;
1271 else
1272 vpi[i].Opcode = OPCODE_MAD;
1273
1274 vpi[i].StringPos = 0;
1275 vpi[i].Data = 0;
1276
1277 if (i == 3)
1278 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1279 else
1280 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
1281 vpi[i].DstReg.Index = 0;
1282 vpi[i].DstReg.WriteMask = 0xf;
1283 vpi[i].DstReg.CondMask = COND_TR;
1284
1285 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1286 vpi[i].SrcReg[0].Index = idx;
1287 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1288
1289 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1290 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1291 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
1292
1293 if (i > 0) {
1294 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
1295 vpi[i].SrcReg[2].Index = 0;
1296 vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
1297 }
1298 #endif
1299 }
1300
1301 _mesa_copy_instructions(&vpi[i], prog->Instructions,
1302 prog->NumInstructions);
1303
1304 free(prog->Instructions);
1305
1306 prog->Instructions = vpi;
1307
1308 prog->NumInstructions += 4;
1309 vpi = &prog->Instructions[prog->NumInstructions - 1];
1310
1311 assert(vpi->Opcode == OPCODE_END);
1312 }
1313
1314 static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
1315 GLuint temp_index)
1316 {
1317 struct prog_instruction *vpi;
1318 struct prog_instruction *vpi_insert;
1319 int i = 0;
1320
1321 vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
1322 _mesa_init_instructions(vpi, prog->NumInstructions + 2);
1323 /* all but END */
1324 _mesa_copy_instructions(vpi, prog->Instructions,
1325 prog->NumInstructions - 1);
1326 /* END */
1327 _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
1328 &prog->Instructions[prog->NumInstructions - 1],
1329 1);
1330 vpi_insert = &vpi[prog->NumInstructions - 1];
1331
1332 vpi_insert[i].Opcode = OPCODE_MOV;
1333
1334 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1335 vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
1336 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1337 vpi_insert[i].DstReg.CondMask = COND_TR;
1338
1339 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1340 vpi_insert[i].SrcReg[0].Index = temp_index;
1341 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1342 i++;
1343
1344 vpi_insert[i].Opcode = OPCODE_MOV;
1345
1346 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1347 vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
1348 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1349 vpi_insert[i].DstReg.CondMask = COND_TR;
1350
1351 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1352 vpi_insert[i].SrcReg[0].Index = temp_index;
1353 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1354 i++;
1355
1356 free(prog->Instructions);
1357
1358 prog->Instructions = vpi;
1359
1360 prog->NumInstructions += i;
1361 vpi = &prog->Instructions[prog->NumInstructions - 1];
1362
1363 assert(vpi->Opcode == OPCODE_END);
1364 }
1365
1366 static void pos_as_texcoord(struct r300_vertex_program *vp,
1367 struct gl_program *prog)
1368 {
1369 struct prog_instruction *vpi;
1370 GLuint tempregi = prog->NumTemporaries;
1371 /* should do something else if no temps left... */
1372 prog->NumTemporaries++;
1373
1374 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1375 if (vpi->DstReg.File == PROGRAM_OUTPUT
1376 && vpi->DstReg.Index == VERT_RESULT_HPOS) {
1377 vpi->DstReg.File = PROGRAM_TEMPORARY;
1378 vpi->DstReg.Index = tempregi;
1379 }
1380 }
1381 insert_wpos(vp, prog, tempregi);
1382 }
1383
1384 static struct r300_vertex_program *build_program(struct r300_vertex_program_key
1385 *wanted_key, struct gl_vertex_program
1386 *mesa_vp, GLint wpos_idx)
1387 {
1388 struct r300_vertex_program *vp;
1389
1390 vp = _mesa_calloc(sizeof(*vp));
1391 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1392 vp->wpos_idx = wpos_idx;
1393
1394 if (mesa_vp->IsPositionInvariant) {
1395 position_invariant(&mesa_vp->Base);
1396 }
1397
1398 if (wpos_idx > -1) {
1399 pos_as_texcoord(vp, &mesa_vp->Base);
1400 }
1401
1402 assert(mesa_vp->Base.NumInstructions);
1403 vp->num_temporaries = mesa_vp->Base.NumTemporaries;
1404 r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
1405
1406 return vp;
1407 }
1408
1409 static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
1410 {
1411 if (key->OutputsWritten & (1 << vert))
1412 return;
1413
1414 key->OutputsWritten |= 1 << vert;
1415 key->OutputsAdded |= 1 << vert;
1416 }
1417
1418 void r300SelectVertexShader(r300ContextPtr r300)
1419 {
1420 GLcontext *ctx = ctx = r300->radeon.glCtx;
1421 GLuint InputsRead;
1422 struct r300_vertex_program_key wanted_key = { 0 };
1423 GLint i;
1424 struct r300_vertex_program_cont *vpc;
1425 struct r300_vertex_program *vp;
1426 GLint wpos_idx;
1427
1428 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1429 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1430 wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
1431 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1432
1433 wpos_idx = -1;
1434 if (InputsRead & FRAG_BIT_WPOS) {
1435 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1436 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1437 break;
1438
1439 if (i == ctx->Const.MaxTextureUnits) {
1440 fprintf(stderr, "\tno free texcoord found\n");
1441 _mesa_exit(-1);
1442 }
1443
1444 wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
1445 wpos_idx = i;
1446 }
1447
1448 add_outputs(&wanted_key, VERT_RESULT_HPOS);
1449
1450 if (InputsRead & FRAG_BIT_COL0) {
1451 add_outputs(&wanted_key, VERT_RESULT_COL0);
1452 }
1453
1454 if (InputsRead & FRAG_BIT_COL1) {
1455 add_outputs(&wanted_key, VERT_RESULT_COL1);
1456 }
1457
1458 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1459 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1460 add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
1461 }
1462 }
1463
1464 if (vpc->mesa_program.IsPositionInvariant) {
1465 /* we wan't position don't we ? */
1466 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1467 }
1468
1469 for (vp = vpc->progs; vp; vp = vp->next)
1470 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
1471 == 0) {
1472 r300->selected_vp = vp;
1473 return;
1474 }
1475 //_mesa_print_program(&vpc->mesa_program.Base);
1476
1477 vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
1478 vp->next = vpc->progs;
1479 vpc->progs = vp;
1480 r300->selected_vp = vp;
1481 }