r300: rewrite and hopefully simplify RS setup
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/prog_instruction.h"
36 #include "shader/prog_parameter.h"
37 #include "shader/prog_statevars.h"
38 #include "tnl/tnl.h"
39
40 #include "r300_context.h"
41
42 /* TODO: Get rid of t_src_class call */
43 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
44 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
45 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
46 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
47 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
48
49 /*
50 * Take an already-setup and valid source then swizzle it appropriately to
51 * obtain a constant ZERO or ONE source.
52 */
53 #define __CONST(x, y) \
54 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
55 t_swizzle(y), \
56 t_swizzle(y), \
57 t_swizzle(y), \
58 t_swizzle(y), \
59 t_src_class(src[x].File), \
60 VSF_FLAG_NONE) | (src[x].RelAddr << 4))
61
62 #define FREE_TEMPS() \
63 do { \
64 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
65 if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
66 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
67 vp->native = GL_FALSE; \
68 } \
69 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
70 } while (0)
71
72 int r300VertexProgUpdateParams(GLcontext * ctx,
73 struct r300_vertex_program_cont *vp, float *dst)
74 {
75 int pi;
76 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
77 float *dst_o = dst;
78 struct gl_program_parameter_list *paramList;
79
80 if (mesa_vp->IsNVProgram) {
81 _mesa_load_tracked_matrices(ctx);
82
83 for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
84 *dst++ = ctx->VertexProgram.Parameters[pi][0];
85 *dst++ = ctx->VertexProgram.Parameters[pi][1];
86 *dst++ = ctx->VertexProgram.Parameters[pi][2];
87 *dst++ = ctx->VertexProgram.Parameters[pi][3];
88 }
89 return dst - dst_o;
90 }
91
92 assert(mesa_vp->Base.Parameters);
93 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
94
95 if (mesa_vp->Base.Parameters->NumParameters * 4 >
96 VSF_MAX_FRAGMENT_LENGTH) {
97 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
98 _mesa_exit(-1);
99 }
100
101 paramList = mesa_vp->Base.Parameters;
102 for (pi = 0; pi < paramList->NumParameters; pi++) {
103 switch (paramList->Parameters[pi].Type) {
104 case PROGRAM_STATE_VAR:
105 case PROGRAM_NAMED_PARAM:
106 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
107 case PROGRAM_CONSTANT:
108 *dst++ = paramList->ParameterValues[pi][0];
109 *dst++ = paramList->ParameterValues[pi][1];
110 *dst++ = paramList->ParameterValues[pi][2];
111 *dst++ = paramList->ParameterValues[pi][3];
112 break;
113 default:
114 _mesa_problem(NULL, "Bad param type in %s",
115 __FUNCTION__);
116 }
117
118 }
119
120 return dst - dst_o;
121 }
122
123 static unsigned long t_dst_mask(GLuint mask)
124 {
125 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
126 return mask & VSF_FLAG_ALL;
127 }
128
129 static unsigned long t_dst_class(enum register_file file)
130 {
131
132 switch (file) {
133 case PROGRAM_TEMPORARY:
134 return PVS_DST_REG_TEMPORARY;
135 case PROGRAM_OUTPUT:
136 return PVS_DST_REG_OUT;
137 case PROGRAM_ADDRESS:
138 return PVS_DST_REG_A0;
139 /*
140 case PROGRAM_INPUT:
141 case PROGRAM_LOCAL_PARAM:
142 case PROGRAM_ENV_PARAM:
143 case PROGRAM_NAMED_PARAM:
144 case PROGRAM_STATE_VAR:
145 case PROGRAM_WRITE_ONLY:
146 case PROGRAM_ADDRESS:
147 */
148 default:
149 fprintf(stderr, "problem in %s", __FUNCTION__);
150 _mesa_exit(-1);
151 return -1;
152 }
153 }
154
155 static unsigned long t_dst_index(struct r300_vertex_program *vp,
156 struct prog_dst_register *dst)
157 {
158 if (dst->File == PROGRAM_OUTPUT)
159 return vp->outputs[dst->Index];
160
161 return dst->Index;
162 }
163
164 static unsigned long t_src_class(enum register_file file)
165 {
166 switch (file) {
167 case PROGRAM_TEMPORARY:
168 return PVS_SRC_REG_TEMPORARY;
169 case PROGRAM_INPUT:
170 return PVS_SRC_REG_INPUT;
171 case PROGRAM_LOCAL_PARAM:
172 case PROGRAM_ENV_PARAM:
173 case PROGRAM_NAMED_PARAM:
174 case PROGRAM_CONSTANT:
175 case PROGRAM_STATE_VAR:
176 return PVS_SRC_REG_CONSTANT;
177 /*
178 case PROGRAM_OUTPUT:
179 case PROGRAM_WRITE_ONLY:
180 case PROGRAM_ADDRESS:
181 */
182 default:
183 fprintf(stderr, "problem in %s", __FUNCTION__);
184 _mesa_exit(-1);
185 return -1;
186 }
187 }
188
189 static INLINE unsigned long t_swizzle(GLubyte swizzle)
190 {
191 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
192 return swizzle;
193 }
194
195 #if 0
196 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
197 {
198 int i;
199
200 if (vp == NULL) {
201 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
202 caller);
203 return;
204 }
205
206 fprintf(stderr, "%s:<", caller);
207 for (i = 0; i < VERT_ATTRIB_MAX; i++)
208 fprintf(stderr, "%d ", vp->inputs[i]);
209 fprintf(stderr, ">\n");
210
211 }
212 #endif
213
214 static unsigned long t_src_index(struct r300_vertex_program *vp,
215 struct prog_src_register *src)
216 {
217 int i;
218 int max_reg = -1;
219
220 if (src->File == PROGRAM_INPUT) {
221 if (vp->inputs[src->Index] != -1)
222 return vp->inputs[src->Index];
223
224 for (i = 0; i < VERT_ATTRIB_MAX; i++)
225 if (vp->inputs[i] > max_reg)
226 max_reg = vp->inputs[i];
227
228 vp->inputs[src->Index] = max_reg + 1;
229
230 //vp_dump_inputs(vp, __FUNCTION__);
231
232 return vp->inputs[src->Index];
233 } else {
234 if (src->Index < 0) {
235 fprintf(stderr,
236 "negative offsets for indirect addressing do not work.\n");
237 return 0;
238 }
239 return src->Index;
240 }
241 }
242
243 /* these two functions should probably be merged... */
244
245 static unsigned long t_src(struct r300_vertex_program *vp,
246 struct prog_src_register *src)
247 {
248 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
249 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
250 */
251 return PVS_SRC_OPERAND(t_src_index(vp, src),
252 t_swizzle(GET_SWZ(src->Swizzle, 0)),
253 t_swizzle(GET_SWZ(src->Swizzle, 1)),
254 t_swizzle(GET_SWZ(src->Swizzle, 2)),
255 t_swizzle(GET_SWZ(src->Swizzle, 3)),
256 t_src_class(src->File),
257 src->NegateBase) | (src->RelAddr << 4);
258 }
259
260 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
261 struct prog_src_register *src)
262 {
263 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
264 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
265 */
266 return PVS_SRC_OPERAND(t_src_index(vp, src),
267 t_swizzle(GET_SWZ(src->Swizzle, 0)),
268 t_swizzle(GET_SWZ(src->Swizzle, 0)),
269 t_swizzle(GET_SWZ(src->Swizzle, 0)),
270 t_swizzle(GET_SWZ(src->Swizzle, 0)),
271 t_src_class(src->File),
272 src->
273 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
274 (src->RelAddr << 4);
275 }
276
277 static GLboolean valid_dst(struct r300_vertex_program *vp,
278 struct prog_dst_register *dst)
279 {
280 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
281 return GL_FALSE;
282 } else if (dst->File == PROGRAM_ADDRESS) {
283 assert(dst->Index == 0);
284 }
285
286 return GL_TRUE;
287 }
288
289 static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
290 struct prog_instruction *vpi,
291 GLuint * inst,
292 struct prog_src_register src[3])
293 {
294 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
295
296 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
297 GL_FALSE,
298 GL_FALSE,
299 t_dst_index(vp, &vpi->DstReg),
300 t_dst_mask(vpi->DstReg.WriteMask),
301 t_dst_class(vpi->DstReg.File));
302 inst[1] = t_src(vp, &src[0]);
303 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
304 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
305 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
306 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
307 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
308 t_src_class(src[0].File),
309 (!src[0].
310 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
311 (src[0].RelAddr << 4);
312 inst[3] = 0;
313
314 return inst;
315 }
316
317 static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
318 struct prog_instruction *vpi,
319 GLuint * inst,
320 struct prog_src_register src[3])
321 {
322 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
323 GL_FALSE,
324 GL_FALSE,
325 t_dst_index(vp, &vpi->DstReg),
326 t_dst_mask(vpi->DstReg.WriteMask),
327 t_dst_class(vpi->DstReg.File));
328 inst[1] = t_src(vp, &src[0]);
329 inst[2] = t_src(vp, &src[1]);
330 inst[3] = __CONST(1, SWIZZLE_ZERO);
331
332 return inst;
333 }
334
335 static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
336 struct prog_instruction *vpi,
337 GLuint * inst,
338 struct prog_src_register src[3])
339 {
340 inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
341 GL_FALSE,
342 GL_FALSE,
343 t_dst_index(vp, &vpi->DstReg),
344 t_dst_mask(vpi->DstReg.WriteMask),
345 t_dst_class(vpi->DstReg.File));
346 inst[1] = t_src(vp, &src[0]);
347 inst[2] = __CONST(0, SWIZZLE_ZERO);
348 inst[3] = __CONST(0, SWIZZLE_ZERO);
349
350 return inst;
351 }
352
353 static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
354 struct prog_instruction *vpi,
355 GLuint * inst,
356 struct prog_src_register src[3])
357 {
358 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
359
360 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
361 GL_FALSE,
362 GL_FALSE,
363 t_dst_index(vp, &vpi->DstReg),
364 t_dst_mask(vpi->DstReg.WriteMask),
365 t_dst_class(vpi->DstReg.File));
366 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
367 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
368 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
369 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
370 SWIZZLE_ZERO,
371 t_src_class(src[0].File),
372 src[0].
373 NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
374 (src[0].RelAddr << 4);
375 inst[2] =
376 PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
377 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
378 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
379 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
380 t_src_class(src[1].File),
381 src[1].
382 NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
383 (src[1].RelAddr << 4);
384 inst[3] = __CONST(1, SWIZZLE_ZERO);
385
386 return inst;
387 }
388
389 static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
390 struct prog_instruction *vpi,
391 GLuint * inst,
392 struct prog_src_register src[3])
393 {
394 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
395 GL_FALSE,
396 GL_FALSE,
397 t_dst_index(vp, &vpi->DstReg),
398 t_dst_mask(vpi->DstReg.WriteMask),
399 t_dst_class(vpi->DstReg.File));
400 inst[1] = t_src(vp, &src[0]);
401 inst[2] = t_src(vp, &src[1]);
402 inst[3] = __CONST(1, SWIZZLE_ZERO);
403
404 return inst;
405 }
406
407 static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
408 struct prog_instruction *vpi,
409 GLuint * inst,
410 struct prog_src_register src[3])
411 {
412 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
413 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
414 GL_FALSE,
415 GL_FALSE,
416 t_dst_index(vp, &vpi->DstReg),
417 t_dst_mask(vpi->DstReg.WriteMask),
418 t_dst_class(vpi->DstReg.File));
419 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
420 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
421 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
422 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
423 PVS_SRC_SELECT_FORCE_1,
424 t_src_class(src[0].File),
425 src[0].
426 NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
427 (src[0].RelAddr << 4);
428 inst[2] = t_src(vp, &src[1]);
429 inst[3] = __CONST(1, SWIZZLE_ZERO);
430
431 return inst;
432 }
433
434 static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
435 struct prog_instruction *vpi,
436 GLuint * inst,
437 struct prog_src_register src[3])
438 {
439 inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
440 GL_FALSE,
441 GL_FALSE,
442 t_dst_index(vp, &vpi->DstReg),
443 t_dst_mask(vpi->DstReg.WriteMask),
444 t_dst_class(vpi->DstReg.File));
445 inst[1] = t_src(vp, &src[0]);
446 inst[2] = t_src(vp, &src[1]);
447 inst[3] = __CONST(1, SWIZZLE_ZERO);
448
449 return inst;
450 }
451
452 static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
453 struct prog_instruction *vpi,
454 GLuint * inst,
455 struct prog_src_register src[3])
456 {
457 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
458 GL_TRUE,
459 GL_FALSE,
460 t_dst_index(vp, &vpi->DstReg),
461 t_dst_mask(vpi->DstReg.WriteMask),
462 t_dst_class(vpi->DstReg.File));
463 inst[1] = t_src_scalar(vp, &src[0]);
464 inst[2] = __CONST(0, SWIZZLE_ZERO);
465 inst[3] = __CONST(0, SWIZZLE_ZERO);
466
467 return inst;
468 }
469
470 static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
471 struct prog_instruction *vpi,
472 GLuint * inst,
473 struct prog_src_register src[3])
474 {
475 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
476 GL_TRUE,
477 GL_FALSE,
478 t_dst_index(vp, &vpi->DstReg),
479 t_dst_mask(vpi->DstReg.WriteMask),
480 t_dst_class(vpi->DstReg.File));
481 inst[1] = t_src_scalar(vp, &src[0]);
482 inst[2] = __CONST(0, SWIZZLE_ZERO);
483 inst[3] = __CONST(0, SWIZZLE_ZERO);
484
485 return inst;
486 }
487
488 static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
489 struct prog_instruction *vpi,
490 GLuint * inst,
491 struct prog_src_register src[3],
492 int *u_temp_i)
493 {
494 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
495 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
496
497 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
498 GL_FALSE,
499 GL_FALSE,
500 *u_temp_i,
501 t_dst_mask(vpi->DstReg.WriteMask),
502 PVS_DST_REG_TEMPORARY);
503 inst[1] = t_src(vp, &src[0]);
504 inst[2] = __CONST(0, SWIZZLE_ZERO);
505 inst[3] = __CONST(0, SWIZZLE_ZERO);
506 inst += 4;
507
508 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
509 GL_FALSE,
510 GL_FALSE,
511 t_dst_index(vp, &vpi->DstReg),
512 t_dst_mask(vpi->DstReg.WriteMask),
513 t_dst_class(vpi->DstReg.File));
514 inst[1] = t_src(vp, &src[0]);
515 inst[2] = PVS_SRC_OPERAND(*u_temp_i,
516 PVS_SRC_SELECT_X,
517 PVS_SRC_SELECT_Y,
518 PVS_SRC_SELECT_Z,
519 PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
520 /* Not 100% sure about this */
521 (!src[0].
522 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE
523 /*VSF_FLAG_ALL */ );
524 inst[3] = __CONST(0, SWIZZLE_ZERO);
525 (*u_temp_i)--;
526
527 return inst;
528 }
529
530 static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
531 struct prog_instruction *vpi,
532 GLuint * inst,
533 struct prog_src_register src[3])
534 {
535 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
536 GL_FALSE,
537 GL_FALSE,
538 t_dst_index(vp, &vpi->DstReg),
539 t_dst_mask(vpi->DstReg.WriteMask),
540 t_dst_class(vpi->DstReg.File));
541 inst[1] = t_src(vp, &src[0]);
542 inst[2] = __CONST(0, SWIZZLE_ZERO);
543 inst[3] = __CONST(0, SWIZZLE_ZERO);
544
545 return inst;
546 }
547
548 static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
549 struct prog_instruction *vpi,
550 GLuint * inst,
551 struct prog_src_register src[3])
552 {
553 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
554
555 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
556 GL_TRUE,
557 GL_FALSE,
558 t_dst_index(vp, &vpi->DstReg),
559 t_dst_mask(vpi->DstReg.WriteMask),
560 t_dst_class(vpi->DstReg.File));
561 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
562 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
563 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
564 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
565 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
566 t_src_class(src[0].File),
567 src[0].
568 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
569 (src[0].RelAddr << 4);
570 inst[2] = __CONST(0, SWIZZLE_ZERO);
571 inst[3] = __CONST(0, SWIZZLE_ZERO);
572
573 return inst;
574 }
575
576 static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
577 struct prog_instruction *vpi,
578 GLuint * inst,
579 struct prog_src_register src[3])
580 {
581 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
582
583 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
584 GL_TRUE,
585 GL_FALSE,
586 t_dst_index(vp, &vpi->DstReg),
587 t_dst_mask(vpi->DstReg.WriteMask),
588 t_dst_class(vpi->DstReg.File));
589 /* NOTE: Users swizzling might not work. */
590 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
591 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
592 PVS_SRC_SELECT_FORCE_0, // Z
593 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
594 t_src_class(src[0].File),
595 src[0].
596 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
597 (src[0].RelAddr << 4);
598 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
599 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
600 PVS_SRC_SELECT_FORCE_0, // Z
601 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
602 t_src_class(src[0].File),
603 src[0].
604 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
605 (src[0].RelAddr << 4);
606 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
607 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
608 PVS_SRC_SELECT_FORCE_0, // Z
609 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
610 t_src_class(src[0].File),
611 src[0].
612 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
613 (src[0].RelAddr << 4);
614
615 return inst;
616 }
617
618 static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
619 struct prog_instruction *vpi,
620 GLuint * inst,
621 struct prog_src_register src[3])
622 {
623 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
624 GL_TRUE,
625 GL_FALSE,
626 t_dst_index(vp, &vpi->DstReg),
627 t_dst_mask(vpi->DstReg.WriteMask),
628 t_dst_class(vpi->DstReg.File));
629 inst[1] = t_src_scalar(vp, &src[0]);
630 inst[2] = __CONST(0, SWIZZLE_ZERO);
631 inst[3] = __CONST(0, SWIZZLE_ZERO);
632
633 return inst;
634 }
635
636 static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
637 struct prog_instruction *vpi,
638 GLuint * inst,
639 struct prog_src_register src[3])
640 {
641 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
642 GL_FALSE,
643 GL_TRUE,
644 t_dst_index(vp, &vpi->DstReg),
645 t_dst_mask(vpi->DstReg.WriteMask),
646 t_dst_class(vpi->DstReg.File));
647 inst[1] = t_src(vp, &src[0]);
648 inst[2] = t_src(vp, &src[1]);
649 inst[3] = t_src(vp, &src[2]);
650
651 return inst;
652 }
653
654 static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
655 struct prog_instruction *vpi,
656 GLuint * inst,
657 struct prog_src_register src[3])
658 {
659 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
660 GL_FALSE,
661 GL_FALSE,
662 t_dst_index(vp, &vpi->DstReg),
663 t_dst_mask(vpi->DstReg.WriteMask),
664 t_dst_class(vpi->DstReg.File));
665 inst[1] = t_src(vp, &src[0]);
666 inst[2] = t_src(vp, &src[1]);
667 inst[3] = __CONST(1, SWIZZLE_ZERO);
668
669 return inst;
670 }
671
672 static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
673 struct prog_instruction *vpi,
674 GLuint * inst,
675 struct prog_src_register src[3])
676 {
677 inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
678 GL_FALSE,
679 GL_FALSE,
680 t_dst_index(vp, &vpi->DstReg),
681 t_dst_mask(vpi->DstReg.WriteMask),
682 t_dst_class(vpi->DstReg.File));
683 inst[1] = t_src(vp, &src[0]);
684 inst[2] = t_src(vp, &src[1]);
685 inst[3] = __CONST(1, SWIZZLE_ZERO);
686
687 return inst;
688 }
689
690 static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
691 struct prog_instruction *vpi,
692 GLuint * inst,
693 struct prog_src_register src[3])
694 {
695 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
696
697 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
698 GL_FALSE,
699 GL_FALSE,
700 t_dst_index(vp, &vpi->DstReg),
701 t_dst_mask(vpi->DstReg.WriteMask),
702 t_dst_class(vpi->DstReg.File));
703 inst[1] = t_src(vp, &src[0]);
704 inst[2] = __CONST(0, SWIZZLE_ZERO);
705 inst[3] = __CONST(0, SWIZZLE_ZERO);
706
707 return inst;
708 }
709
710 static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
711 struct prog_instruction *vpi,
712 GLuint * inst,
713 struct prog_src_register src[3])
714 {
715 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
716 GL_FALSE,
717 GL_FALSE,
718 t_dst_index(vp, &vpi->DstReg),
719 t_dst_mask(vpi->DstReg.WriteMask),
720 t_dst_class(vpi->DstReg.File));
721 inst[1] = t_src(vp, &src[0]);
722 inst[2] = t_src(vp, &src[1]);
723 inst[3] = __CONST(1, SWIZZLE_ZERO);
724
725 return inst;
726 }
727
728 static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
729 struct prog_instruction *vpi,
730 GLuint * inst,
731 struct prog_src_register src[3])
732 {
733 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
734 GL_TRUE,
735 GL_FALSE,
736 t_dst_index(vp, &vpi->DstReg),
737 t_dst_mask(vpi->DstReg.WriteMask),
738 t_dst_class(vpi->DstReg.File));
739 inst[1] = t_src_scalar(vp, &src[0]);
740 inst[2] = __CONST(0, SWIZZLE_ZERO);
741 inst[3] = t_src_scalar(vp, &src[1]);
742
743 return inst;
744 }
745
746 static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
747 struct prog_instruction *vpi,
748 GLuint * inst,
749 struct prog_src_register src[3])
750 {
751 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
752 GL_TRUE,
753 GL_FALSE,
754 t_dst_index(vp, &vpi->DstReg),
755 t_dst_mask(vpi->DstReg.WriteMask),
756 t_dst_class(vpi->DstReg.File));
757 inst[1] = t_src_scalar(vp, &src[0]);
758 inst[2] = __CONST(0, SWIZZLE_ZERO);
759 inst[3] = __CONST(0, SWIZZLE_ZERO);
760
761 return inst;
762 }
763
764 static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
765 struct prog_instruction *vpi,
766 GLuint * inst,
767 struct prog_src_register src[3])
768 {
769 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
770 GL_TRUE,
771 GL_FALSE,
772 t_dst_index(vp, &vpi->DstReg),
773 t_dst_mask(vpi->DstReg.WriteMask),
774 t_dst_class(vpi->DstReg.File));
775 inst[1] = t_src_scalar(vp, &src[0]);
776 inst[2] = __CONST(0, SWIZZLE_ZERO);
777 inst[3] = __CONST(0, SWIZZLE_ZERO);
778
779 return inst;
780 }
781
782 static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
783 struct prog_instruction *vpi,
784 GLuint * inst,
785 struct prog_src_register src[3])
786 {
787 inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
788 GL_FALSE,
789 GL_FALSE,
790 t_dst_index(vp, &vpi->DstReg),
791 t_dst_mask(vpi->DstReg.WriteMask),
792 t_dst_class(vpi->DstReg.File));
793 inst[1] = t_src(vp, &src[0]);
794 inst[2] = t_src(vp, &src[1]);
795 inst[3] = __CONST(1, SWIZZLE_ZERO);
796
797 return inst;
798 }
799
800 static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
801 struct prog_instruction *vpi,
802 GLuint * inst,
803 struct prog_src_register src[3])
804 {
805 inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
806 GL_FALSE,
807 GL_FALSE,
808 t_dst_index(vp, &vpi->DstReg),
809 t_dst_mask(vpi->DstReg.WriteMask),
810 t_dst_class(vpi->DstReg.File));
811 inst[1] = t_src(vp, &src[0]);
812 inst[2] = t_src(vp, &src[1]);
813 inst[3] = __CONST(1, SWIZZLE_ZERO);
814
815 return inst;
816 }
817
818 static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
819 struct prog_instruction *vpi,
820 GLuint * inst,
821 struct prog_src_register src[3])
822 {
823 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
824
825 #if 0
826 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
827 GL_FALSE,
828 GL_FALSE,
829 t_dst_index(vp, &vpi->DstReg),
830 t_dst_mask(vpi->DstReg.WriteMask),
831 t_dst_class(vpi->DstReg.File));
832 inst[1] = t_src(vp, &src[0]);
833 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
834 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
835 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
836 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
837 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
838 t_src_class(src[1].File),
839 (!src[1].
840 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
841 (src[1].RelAddr << 4);
842 inst[3] = 0;
843 #else
844 inst[0] =
845 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
846 GL_FALSE,
847 GL_FALSE,
848 t_dst_index(vp, &vpi->DstReg),
849 t_dst_mask(vpi->DstReg.WriteMask),
850 t_dst_class(vpi->DstReg.File));
851 inst[1] = t_src(vp, &src[0]);
852 inst[2] = __CONST(0, SWIZZLE_ONE);
853 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
854 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
855 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
856 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
857 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
858 t_src_class(src[1].File),
859 (!src[1].
860 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
861 (src[1].RelAddr << 4);
862 #endif
863
864 return inst;
865 }
866
867 static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
868 struct prog_instruction *vpi,
869 GLuint * inst,
870 struct prog_src_register src[3])
871 {
872 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
873
874 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
875 GL_FALSE,
876 GL_FALSE,
877 t_dst_index(vp, &vpi->DstReg),
878 t_dst_mask(vpi->DstReg.WriteMask),
879 t_dst_class(vpi->DstReg.File));
880 inst[1] = t_src(vp, &src[0]);
881 inst[2] = __CONST(0, SWIZZLE_ZERO);
882 inst[3] = __CONST(0, SWIZZLE_ZERO);
883
884 return inst;
885 }
886
887 static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
888 struct prog_instruction *vpi,
889 GLuint * inst,
890 struct prog_src_register src[3],
891 int *u_temp_i)
892 {
893 /* mul r0, r1.yzxw, r2.zxyw
894 mad r0, -r2.yzxw, r1.zxyw, r0
895 */
896
897 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
898 GL_FALSE,
899 GL_FALSE,
900 *u_temp_i,
901 t_dst_mask(vpi->DstReg.WriteMask),
902 PVS_DST_REG_TEMPORARY);
903 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
904 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
905 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
906 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
907 t_src_class(src[0].File),
908 src[0].
909 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
910 (src[0].RelAddr << 4);
911 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
912 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
913 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
914 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
915 t_src_class(src[1].File),
916 src[1].
917 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
918 (src[1].RelAddr << 4);
919 inst[3] = __CONST(1, SWIZZLE_ZERO);
920 inst += 4;
921
922 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
923 GL_FALSE,
924 GL_FALSE,
925 t_dst_index(vp, &vpi->DstReg),
926 t_dst_mask(vpi->DstReg.WriteMask),
927 t_dst_class(vpi->DstReg.File));
928 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
929 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
930 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
931 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
932 t_src_class(src[1].File),
933 (!src[1].
934 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
935 (src[1].RelAddr << 4);
936 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
937 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
938 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
939 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
940 t_src_class(src[0].File),
941 src[0].
942 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
943 (src[0].RelAddr << 4);
944 inst[3] =
945 PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
946 PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
947 PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
948
949 (*u_temp_i)--;
950
951 return inst;
952 }
953
954 static void t_inputs_outputs(struct r300_vertex_program *vp)
955 {
956 int i;
957 int cur_reg = 0;
958
959 for (i = 0; i < VERT_ATTRIB_MAX; i++)
960 vp->inputs[i] = -1;
961
962 for (i = 0; i < VERT_RESULT_MAX; i++)
963 vp->outputs[i] = -1;
964
965 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
966
967 if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
968 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
969 }
970
971 if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
972 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
973 }
974
975 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
976 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
977 }
978
979 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
980 vp->outputs[VERT_RESULT_COL1] =
981 vp->outputs[VERT_RESULT_COL0] + 1;
982 cur_reg = vp->outputs[VERT_RESULT_COL1] + 1;
983 }
984
985 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
986 vp->outputs[VERT_RESULT_BFC0] =
987 vp->outputs[VERT_RESULT_COL0] + 2;
988 cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2;
989 }
990
991 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
992 vp->outputs[VERT_RESULT_BFC1] =
993 vp->outputs[VERT_RESULT_COL0] + 3;
994 cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
995 }
996
997 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
998 if (vp->key.OutputsWritten & (1 << i)) {
999 vp->outputs[i] = cur_reg++;
1000 }
1001 }
1002
1003 if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
1004 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
1005 }
1006 }
1007
1008 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
1009 struct prog_instruction *vpi)
1010 {
1011 int i;
1012 GLuint *inst;
1013 unsigned long num_operands;
1014 /* Initial value should be last tmp reg that hw supports.
1015 Strangely enough r300 doesnt mind even though these would be out of range.
1016 Smart enough to realize that it doesnt need it? */
1017 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
1018 struct prog_src_register src[3];
1019
1020 vp->pos_end = 0; /* Not supported yet */
1021 vp->program.length = 0;
1022 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
1023 vp->translated = GL_TRUE;
1024 vp->native = GL_TRUE;
1025
1026 t_inputs_outputs(vp);
1027
1028 for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END;
1029 vpi++, inst += 4) {
1030
1031 FREE_TEMPS();
1032
1033 if (!valid_dst(vp, &vpi->DstReg)) {
1034 /* redirect result to unused temp */
1035 vpi->DstReg.File = PROGRAM_TEMPORARY;
1036 vpi->DstReg.Index = u_temp_i;
1037 }
1038
1039 num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
1040
1041 /* copy the sources (src) from mesa into a local variable... is this needed? */
1042 for (i = 0; i < num_operands; i++) {
1043 src[i] = vpi->SrcReg[i];
1044 }
1045
1046 if (num_operands == 3) { /* TODO: scalars */
1047 if (CMP_SRCS(src[1], src[2])
1048 || CMP_SRCS(src[0], src[2])) {
1049 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1050 GL_FALSE,
1051 GL_FALSE,
1052 u_temp_i,
1053 VSF_FLAG_ALL,
1054 PVS_DST_REG_TEMPORARY);
1055 inst[1] =
1056 PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
1057 SWIZZLE_X,
1058 SWIZZLE_Y,
1059 SWIZZLE_Z,
1060 SWIZZLE_W,
1061 t_src_class(src[2].File),
1062 VSF_FLAG_NONE) | (src[2].
1063 RelAddr <<
1064 4);
1065 inst[2] = __CONST(2, SWIZZLE_ZERO);
1066 inst[3] = __CONST(2, SWIZZLE_ZERO);
1067 inst += 4;
1068
1069 src[2].File = PROGRAM_TEMPORARY;
1070 src[2].Index = u_temp_i;
1071 src[2].RelAddr = 0;
1072 u_temp_i--;
1073 }
1074 }
1075
1076 if (num_operands >= 2) {
1077 if (CMP_SRCS(src[1], src[0])) {
1078 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1079 GL_FALSE,
1080 GL_FALSE,
1081 u_temp_i,
1082 VSF_FLAG_ALL,
1083 PVS_DST_REG_TEMPORARY);
1084 inst[1] =
1085 PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
1086 SWIZZLE_X,
1087 SWIZZLE_Y,
1088 SWIZZLE_Z,
1089 SWIZZLE_W,
1090 t_src_class(src[0].File),
1091 VSF_FLAG_NONE) | (src[0].
1092 RelAddr <<
1093 4);
1094 inst[2] = __CONST(0, SWIZZLE_ZERO);
1095 inst[3] = __CONST(0, SWIZZLE_ZERO);
1096 inst += 4;
1097
1098 src[0].File = PROGRAM_TEMPORARY;
1099 src[0].Index = u_temp_i;
1100 src[0].RelAddr = 0;
1101 u_temp_i--;
1102 }
1103 }
1104
1105 switch (vpi->Opcode) {
1106 case OPCODE_ABS:
1107 inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
1108 break;
1109 case OPCODE_ADD:
1110 inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
1111 break;
1112 case OPCODE_ARL:
1113 inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
1114 break;
1115 case OPCODE_DP3:
1116 inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
1117 break;
1118 case OPCODE_DP4:
1119 inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
1120 break;
1121 case OPCODE_DPH:
1122 inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
1123 break;
1124 case OPCODE_DST:
1125 inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
1126 break;
1127 case OPCODE_EX2:
1128 inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
1129 break;
1130 case OPCODE_EXP:
1131 inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
1132 break;
1133 case OPCODE_FLR:
1134 inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */
1135 &u_temp_i);
1136 break;
1137 case OPCODE_FRC:
1138 inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
1139 break;
1140 case OPCODE_LG2:
1141 inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
1142 break;
1143 case OPCODE_LIT:
1144 inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
1145 break;
1146 case OPCODE_LOG:
1147 inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
1148 break;
1149 case OPCODE_MAD:
1150 inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
1151 break;
1152 case OPCODE_MAX:
1153 inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
1154 break;
1155 case OPCODE_MIN:
1156 inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
1157 break;
1158 case OPCODE_MOV:
1159 inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
1160 break;
1161 case OPCODE_MUL:
1162 inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
1163 break;
1164 case OPCODE_POW:
1165 inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
1166 break;
1167 case OPCODE_RCP:
1168 inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
1169 break;
1170 case OPCODE_RSQ:
1171 inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
1172 break;
1173 case OPCODE_SGE:
1174 inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
1175 break;
1176 case OPCODE_SLT:
1177 inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
1178 break;
1179 case OPCODE_SUB:
1180 inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
1181 break;
1182 case OPCODE_SWZ:
1183 inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
1184 break;
1185 case OPCODE_XPD:
1186 inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */
1187 &u_temp_i);
1188 break;
1189 default:
1190 assert(0);
1191 break;
1192 }
1193 }
1194
1195 /* Some outputs may be artificially added, to match the inputs
1196 of the fragment program. Blank the outputs here. */
1197 for (i = 0; i < VERT_RESULT_MAX; i++) {
1198 if (vp->key.OutputsAdded & (1 << i)) {
1199 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1200 GL_FALSE,
1201 GL_FALSE,
1202 vp->outputs[i],
1203 VSF_FLAG_ALL,
1204 PVS_DST_REG_OUT);
1205 inst[1] = __CONST(0, SWIZZLE_ZERO);
1206 inst[2] = __CONST(0, SWIZZLE_ZERO);
1207 inst[3] = __CONST(0, SWIZZLE_ZERO);
1208 inst += 4;
1209 }
1210 }
1211
1212 vp->program.length = (inst - vp->program.body.i);
1213 if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) {
1214 vp->program.length = 0;
1215 vp->native = GL_FALSE;
1216 }
1217 #if 0
1218 fprintf(stderr, "hw program:\n");
1219 for (i = 0; i < vp->program.length; i++)
1220 fprintf(stderr, "%08x\n", vp->program.body.d[i]);
1221 #endif
1222 }
1223
1224 /* DP4 version seems to trigger some hw peculiarity */
1225 //#define PREFER_DP4
1226
1227 static void position_invariant(struct gl_program *prog)
1228 {
1229 struct prog_instruction *vpi;
1230 struct gl_program_parameter_list *paramList;
1231 int i;
1232
1233 gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
1234
1235 /* tokens[4] = matrix modifier */
1236 #ifdef PREFER_DP4
1237 tokens[4] = 0; /* not transposed or inverted */
1238 #else
1239 tokens[4] = STATE_MATRIX_TRANSPOSE;
1240 #endif
1241 paramList = prog->Parameters;
1242
1243 vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
1244 _mesa_init_instructions(vpi, prog->NumInstructions + 4);
1245
1246 for (i = 0; i < 4; i++) {
1247 GLint idx;
1248 tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
1249 idx = _mesa_add_state_reference(paramList, tokens);
1250 #ifdef PREFER_DP4
1251 vpi[i].Opcode = OPCODE_DP4;
1252 vpi[i].StringPos = 0;
1253 vpi[i].Data = 0;
1254
1255 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1256 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
1257 vpi[i].DstReg.WriteMask = 1 << i;
1258 vpi[i].DstReg.CondMask = COND_TR;
1259
1260 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1261 vpi[i].SrcReg[0].Index = idx;
1262 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1263
1264 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1265 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1266 vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
1267 #else
1268 if (i == 0)
1269 vpi[i].Opcode = OPCODE_MUL;
1270 else
1271 vpi[i].Opcode = OPCODE_MAD;
1272
1273 vpi[i].StringPos = 0;
1274 vpi[i].Data = 0;
1275
1276 if (i == 3)
1277 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1278 else
1279 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
1280 vpi[i].DstReg.Index = 0;
1281 vpi[i].DstReg.WriteMask = 0xf;
1282 vpi[i].DstReg.CondMask = COND_TR;
1283
1284 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1285 vpi[i].SrcReg[0].Index = idx;
1286 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1287
1288 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1289 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1290 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
1291
1292 if (i > 0) {
1293 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
1294 vpi[i].SrcReg[2].Index = 0;
1295 vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
1296 }
1297 #endif
1298 }
1299
1300 _mesa_copy_instructions(&vpi[i], prog->Instructions,
1301 prog->NumInstructions);
1302
1303 free(prog->Instructions);
1304
1305 prog->Instructions = vpi;
1306
1307 prog->NumInstructions += 4;
1308 vpi = &prog->Instructions[prog->NumInstructions - 1];
1309
1310 assert(vpi->Opcode == OPCODE_END);
1311 }
1312
1313 static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
1314 GLuint temp_index)
1315 {
1316 struct prog_instruction *vpi;
1317 struct prog_instruction *vpi_insert;
1318 int i = 0;
1319
1320 vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
1321 _mesa_init_instructions(vpi, prog->NumInstructions + 2);
1322 /* all but END */
1323 _mesa_copy_instructions(vpi, prog->Instructions,
1324 prog->NumInstructions - 1);
1325 /* END */
1326 _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
1327 &prog->Instructions[prog->NumInstructions - 1],
1328 1);
1329 vpi_insert = &vpi[prog->NumInstructions - 1];
1330
1331 vpi_insert[i].Opcode = OPCODE_MOV;
1332
1333 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1334 vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
1335 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1336 vpi_insert[i].DstReg.CondMask = COND_TR;
1337
1338 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1339 vpi_insert[i].SrcReg[0].Index = temp_index;
1340 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1341 i++;
1342
1343 vpi_insert[i].Opcode = OPCODE_MOV;
1344
1345 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1346 vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
1347 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1348 vpi_insert[i].DstReg.CondMask = COND_TR;
1349
1350 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1351 vpi_insert[i].SrcReg[0].Index = temp_index;
1352 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1353 i++;
1354
1355 free(prog->Instructions);
1356
1357 prog->Instructions = vpi;
1358
1359 prog->NumInstructions += i;
1360 vpi = &prog->Instructions[prog->NumInstructions - 1];
1361
1362 assert(vpi->Opcode == OPCODE_END);
1363 }
1364
1365 static void pos_as_texcoord(struct r300_vertex_program *vp,
1366 struct gl_program *prog)
1367 {
1368 struct prog_instruction *vpi;
1369 GLuint tempregi = prog->NumTemporaries;
1370 /* should do something else if no temps left... */
1371 prog->NumTemporaries++;
1372
1373 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1374 if (vpi->DstReg.File == PROGRAM_OUTPUT
1375 && vpi->DstReg.Index == VERT_RESULT_HPOS) {
1376 vpi->DstReg.File = PROGRAM_TEMPORARY;
1377 vpi->DstReg.Index = tempregi;
1378 }
1379 }
1380 insert_wpos(vp, prog, tempregi);
1381 }
1382
1383 static struct r300_vertex_program *build_program(struct r300_vertex_program_key
1384 *wanted_key, struct gl_vertex_program
1385 *mesa_vp, GLint wpos_idx)
1386 {
1387 struct r300_vertex_program *vp;
1388
1389 vp = _mesa_calloc(sizeof(*vp));
1390 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1391 vp->wpos_idx = wpos_idx;
1392
1393 if (mesa_vp->IsPositionInvariant) {
1394 position_invariant(&mesa_vp->Base);
1395 }
1396
1397 if (wpos_idx > -1) {
1398 pos_as_texcoord(vp, &mesa_vp->Base);
1399 }
1400
1401 assert(mesa_vp->Base.NumInstructions);
1402 vp->num_temporaries = mesa_vp->Base.NumTemporaries;
1403 r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
1404
1405 return vp;
1406 }
1407
1408 static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
1409 {
1410 if (key->OutputsWritten & (1 << vert))
1411 return;
1412
1413 key->OutputsWritten |= 1 << vert;
1414 key->OutputsAdded |= 1 << vert;
1415 }
1416
1417 void r300SelectVertexShader(r300ContextPtr r300)
1418 {
1419 GLcontext *ctx = ctx = r300->radeon.glCtx;
1420 GLuint InputsRead;
1421 struct r300_vertex_program_key wanted_key = { 0 };
1422 GLint i;
1423 struct r300_vertex_program_cont *vpc;
1424 struct r300_vertex_program *vp;
1425 GLint wpos_idx;
1426
1427 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1428 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1429 wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
1430 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1431
1432 wpos_idx = -1;
1433 if (InputsRead & FRAG_BIT_WPOS) {
1434 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1435 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1436 break;
1437
1438 if (i == ctx->Const.MaxTextureUnits) {
1439 fprintf(stderr, "\tno free texcoord found\n");
1440 _mesa_exit(-1);
1441 }
1442
1443 wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
1444 wpos_idx = i;
1445 }
1446
1447 add_outputs(&wanted_key, VERT_RESULT_HPOS);
1448
1449 if (InputsRead & FRAG_BIT_COL0) {
1450 add_outputs(&wanted_key, VERT_RESULT_COL0);
1451 }
1452
1453 if (InputsRead & FRAG_BIT_COL1) {
1454 add_outputs(&wanted_key, VERT_RESULT_COL1);
1455 }
1456
1457 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1458 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1459 add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
1460 }
1461 }
1462
1463 if (vpc->mesa_program.IsPositionInvariant) {
1464 /* we wan't position don't we ? */
1465 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1466 }
1467
1468 for (vp = vpc->progs; vp; vp = vp->next)
1469 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
1470 == 0) {
1471 r300->selected_vp = vp;
1472 return;
1473 }
1474 //_mesa_print_program(&vpc->mesa_program.Base);
1475
1476 vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
1477 vp->next = vpc->progs;
1478 vpc->progs = vp;
1479 r300->selected_vp = vp;
1480 }