Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/prog_instruction.h"
36 #include "shader/prog_parameter.h"
37 #include "shader/prog_print.h"
38 #include "shader/prog_statevars.h"
39 #include "tnl/tnl.h"
40
41 #include "r300_context.h"
42 #include "r300_state.h"
43
44 /* TODO: Get rid of t_src_class call */
45 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
46 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
47 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
48 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
49 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
50
51 /*
52 * Take an already-setup and valid source then swizzle it appropriately to
53 * obtain a constant ZERO or ONE source.
54 */
55 #define __CONST(x, y) \
56 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
57 t_swizzle(y), \
58 t_swizzle(y), \
59 t_swizzle(y), \
60 t_swizzle(y), \
61 t_src_class(src[x].File), \
62 VSF_FLAG_NONE) | (src[x].RelAddr << 4))
63
64 #define FREE_TEMPS() \
65 do { \
66 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
67 if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
68 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
69 vp->error = GL_TRUE; \
70 } \
71 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
72 } while (0)
73
74 int r300VertexProgUpdateParams(GLcontext * ctx,
75 struct r300_vertex_program_cont *vp, float *dst)
76 {
77 int pi;
78 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
79 float *dst_o = dst;
80 struct gl_program_parameter_list *paramList;
81
82 if (mesa_vp->IsNVProgram) {
83 _mesa_load_tracked_matrices(ctx);
84
85 for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
86 *dst++ = ctx->VertexProgram.Parameters[pi][0];
87 *dst++ = ctx->VertexProgram.Parameters[pi][1];
88 *dst++ = ctx->VertexProgram.Parameters[pi][2];
89 *dst++ = ctx->VertexProgram.Parameters[pi][3];
90 }
91 return dst - dst_o;
92 }
93
94 assert(mesa_vp->Base.Parameters);
95 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
96
97 if (mesa_vp->Base.Parameters->NumParameters * 4 >
98 VSF_MAX_FRAGMENT_LENGTH) {
99 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
100 _mesa_exit(-1);
101 }
102
103 paramList = mesa_vp->Base.Parameters;
104 for (pi = 0; pi < paramList->NumParameters; pi++) {
105 switch (paramList->Parameters[pi].Type) {
106 case PROGRAM_STATE_VAR:
107 case PROGRAM_NAMED_PARAM:
108 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
109 case PROGRAM_CONSTANT:
110 *dst++ = paramList->ParameterValues[pi][0];
111 *dst++ = paramList->ParameterValues[pi][1];
112 *dst++ = paramList->ParameterValues[pi][2];
113 *dst++ = paramList->ParameterValues[pi][3];
114 break;
115 default:
116 _mesa_problem(NULL, "Bad param type in %s",
117 __FUNCTION__);
118 }
119
120 }
121
122 return dst - dst_o;
123 }
124
125 static unsigned long t_dst_mask(GLuint mask)
126 {
127 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
128 return mask & VSF_FLAG_ALL;
129 }
130
131 static unsigned long t_dst_class(gl_register_file file)
132 {
133
134 switch (file) {
135 case PROGRAM_TEMPORARY:
136 return PVS_DST_REG_TEMPORARY;
137 case PROGRAM_OUTPUT:
138 return PVS_DST_REG_OUT;
139 case PROGRAM_ADDRESS:
140 return PVS_DST_REG_A0;
141 /*
142 case PROGRAM_INPUT:
143 case PROGRAM_LOCAL_PARAM:
144 case PROGRAM_ENV_PARAM:
145 case PROGRAM_NAMED_PARAM:
146 case PROGRAM_STATE_VAR:
147 case PROGRAM_WRITE_ONLY:
148 case PROGRAM_ADDRESS:
149 */
150 default:
151 fprintf(stderr, "problem in %s", __FUNCTION__);
152 _mesa_exit(-1);
153 return -1;
154 }
155 }
156
157 static unsigned long t_dst_index(struct r300_vertex_program *vp,
158 struct prog_dst_register *dst)
159 {
160 if (dst->File == PROGRAM_OUTPUT)
161 return vp->outputs[dst->Index];
162
163 return dst->Index;
164 }
165
166 static unsigned long t_src_class(gl_register_file file)
167 {
168 switch (file) {
169 case PROGRAM_TEMPORARY:
170 return PVS_SRC_REG_TEMPORARY;
171 case PROGRAM_INPUT:
172 return PVS_SRC_REG_INPUT;
173 case PROGRAM_LOCAL_PARAM:
174 case PROGRAM_ENV_PARAM:
175 case PROGRAM_NAMED_PARAM:
176 case PROGRAM_CONSTANT:
177 case PROGRAM_STATE_VAR:
178 return PVS_SRC_REG_CONSTANT;
179 /*
180 case PROGRAM_OUTPUT:
181 case PROGRAM_WRITE_ONLY:
182 case PROGRAM_ADDRESS:
183 */
184 default:
185 fprintf(stderr, "problem in %s", __FUNCTION__);
186 _mesa_exit(-1);
187 return -1;
188 }
189 }
190
191 static INLINE unsigned long t_swizzle(GLubyte swizzle)
192 {
193 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
194 return swizzle;
195 }
196
197 #if 0
198 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
199 {
200 int i;
201
202 if (vp == NULL) {
203 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
204 caller);
205 return;
206 }
207
208 fprintf(stderr, "%s:<", caller);
209 for (i = 0; i < VERT_ATTRIB_MAX; i++)
210 fprintf(stderr, "%d ", vp->inputs[i]);
211 fprintf(stderr, ">\n");
212
213 }
214 #endif
215
216 static unsigned long t_src_index(struct r300_vertex_program *vp,
217 struct prog_src_register *src)
218 {
219 if (src->File == PROGRAM_INPUT) {
220 assert(vp->inputs[src->Index] != -1);
221 return vp->inputs[src->Index];
222 } else {
223 if (src->Index < 0) {
224 fprintf(stderr,
225 "negative offsets for indirect addressing do not work.\n");
226 return 0;
227 }
228 return src->Index;
229 }
230 }
231
232 /* these two functions should probably be merged... */
233
234 static unsigned long t_src(struct r300_vertex_program *vp,
235 struct prog_src_register *src)
236 {
237 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
238 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
239 */
240 return PVS_SRC_OPERAND(t_src_index(vp, src),
241 t_swizzle(GET_SWZ(src->Swizzle, 0)),
242 t_swizzle(GET_SWZ(src->Swizzle, 1)),
243 t_swizzle(GET_SWZ(src->Swizzle, 2)),
244 t_swizzle(GET_SWZ(src->Swizzle, 3)),
245 t_src_class(src->File),
246 src->Negate) | (src->RelAddr << 4);
247 }
248
249 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
250 struct prog_src_register *src)
251 {
252 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
253 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
254 */
255 return PVS_SRC_OPERAND(t_src_index(vp, src),
256 t_swizzle(GET_SWZ(src->Swizzle, 0)),
257 t_swizzle(GET_SWZ(src->Swizzle, 0)),
258 t_swizzle(GET_SWZ(src->Swizzle, 0)),
259 t_swizzle(GET_SWZ(src->Swizzle, 0)),
260 t_src_class(src->File),
261 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
262 (src->RelAddr << 4);
263 }
264
265 static GLboolean valid_dst(struct r300_vertex_program *vp,
266 struct prog_dst_register *dst)
267 {
268 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
269 return GL_FALSE;
270 } else if (dst->File == PROGRAM_ADDRESS) {
271 assert(dst->Index == 0);
272 }
273
274 return GL_TRUE;
275 }
276
277 static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
278 struct prog_instruction *vpi,
279 GLuint * inst,
280 struct prog_src_register src[3])
281 {
282 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
283
284 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
285 GL_FALSE,
286 GL_FALSE,
287 t_dst_index(vp, &vpi->DstReg),
288 t_dst_mask(vpi->DstReg.WriteMask),
289 t_dst_class(vpi->DstReg.File));
290 inst[1] = t_src(vp, &src[0]);
291 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
292 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
293 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
294 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
295 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
296 t_src_class(src[0].File),
297 (!src[0].
298 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
299 (src[0].RelAddr << 4);
300 inst[3] = 0;
301
302 return inst;
303 }
304
305 static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
306 struct prog_instruction *vpi,
307 GLuint * inst,
308 struct prog_src_register src[3])
309 {
310 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
311 GL_FALSE,
312 GL_FALSE,
313 t_dst_index(vp, &vpi->DstReg),
314 t_dst_mask(vpi->DstReg.WriteMask),
315 t_dst_class(vpi->DstReg.File));
316 inst[1] = t_src(vp, &src[0]);
317 inst[2] = t_src(vp, &src[1]);
318 inst[3] = __CONST(1, SWIZZLE_ZERO);
319
320 return inst;
321 }
322
323 static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
324 struct prog_instruction *vpi,
325 GLuint * inst,
326 struct prog_src_register src[3])
327 {
328 inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
329 GL_FALSE,
330 GL_FALSE,
331 t_dst_index(vp, &vpi->DstReg),
332 t_dst_mask(vpi->DstReg.WriteMask),
333 t_dst_class(vpi->DstReg.File));
334 inst[1] = t_src(vp, &src[0]);
335 inst[2] = __CONST(0, SWIZZLE_ZERO);
336 inst[3] = __CONST(0, SWIZZLE_ZERO);
337
338 return inst;
339 }
340
341 static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
342 struct prog_instruction *vpi,
343 GLuint * inst,
344 struct prog_src_register src[3])
345 {
346 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
347
348 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
349 GL_FALSE,
350 GL_FALSE,
351 t_dst_index(vp, &vpi->DstReg),
352 t_dst_mask(vpi->DstReg.WriteMask),
353 t_dst_class(vpi->DstReg.File));
354 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
355 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
356 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
357 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
358 SWIZZLE_ZERO,
359 t_src_class(src[0].File),
360 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
361 (src[0].RelAddr << 4);
362 inst[2] =
363 PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
364 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
365 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
366 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
367 t_src_class(src[1].File),
368 src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
369 (src[1].RelAddr << 4);
370 inst[3] = __CONST(1, SWIZZLE_ZERO);
371
372 return inst;
373 }
374
375 static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
376 struct prog_instruction *vpi,
377 GLuint * inst,
378 struct prog_src_register src[3])
379 {
380 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
381 GL_FALSE,
382 GL_FALSE,
383 t_dst_index(vp, &vpi->DstReg),
384 t_dst_mask(vpi->DstReg.WriteMask),
385 t_dst_class(vpi->DstReg.File));
386 inst[1] = t_src(vp, &src[0]);
387 inst[2] = t_src(vp, &src[1]);
388 inst[3] = __CONST(1, SWIZZLE_ZERO);
389
390 return inst;
391 }
392
393 static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
394 struct prog_instruction *vpi,
395 GLuint * inst,
396 struct prog_src_register src[3])
397 {
398 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
399 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
400 GL_FALSE,
401 GL_FALSE,
402 t_dst_index(vp, &vpi->DstReg),
403 t_dst_mask(vpi->DstReg.WriteMask),
404 t_dst_class(vpi->DstReg.File));
405 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
406 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
407 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
408 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
409 PVS_SRC_SELECT_FORCE_1,
410 t_src_class(src[0].File),
411 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
412 (src[0].RelAddr << 4);
413 inst[2] = t_src(vp, &src[1]);
414 inst[3] = __CONST(1, SWIZZLE_ZERO);
415
416 return inst;
417 }
418
419 static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
420 struct prog_instruction *vpi,
421 GLuint * inst,
422 struct prog_src_register src[3])
423 {
424 inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
425 GL_FALSE,
426 GL_FALSE,
427 t_dst_index(vp, &vpi->DstReg),
428 t_dst_mask(vpi->DstReg.WriteMask),
429 t_dst_class(vpi->DstReg.File));
430 inst[1] = t_src(vp, &src[0]);
431 inst[2] = t_src(vp, &src[1]);
432 inst[3] = __CONST(1, SWIZZLE_ZERO);
433
434 return inst;
435 }
436
437 static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
438 struct prog_instruction *vpi,
439 GLuint * inst,
440 struct prog_src_register src[3])
441 {
442 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
443 GL_TRUE,
444 GL_FALSE,
445 t_dst_index(vp, &vpi->DstReg),
446 t_dst_mask(vpi->DstReg.WriteMask),
447 t_dst_class(vpi->DstReg.File));
448 inst[1] = t_src_scalar(vp, &src[0]);
449 inst[2] = __CONST(0, SWIZZLE_ZERO);
450 inst[3] = __CONST(0, SWIZZLE_ZERO);
451
452 return inst;
453 }
454
455 static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
456 struct prog_instruction *vpi,
457 GLuint * inst,
458 struct prog_src_register src[3])
459 {
460 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
461 GL_TRUE,
462 GL_FALSE,
463 t_dst_index(vp, &vpi->DstReg),
464 t_dst_mask(vpi->DstReg.WriteMask),
465 t_dst_class(vpi->DstReg.File));
466 inst[1] = t_src_scalar(vp, &src[0]);
467 inst[2] = __CONST(0, SWIZZLE_ZERO);
468 inst[3] = __CONST(0, SWIZZLE_ZERO);
469
470 return inst;
471 }
472
473 static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
474 struct prog_instruction *vpi,
475 GLuint * inst,
476 struct prog_src_register src[3],
477 int *u_temp_i)
478 {
479 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
480 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
481
482 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
483 GL_FALSE,
484 GL_FALSE,
485 *u_temp_i,
486 t_dst_mask(vpi->DstReg.WriteMask),
487 PVS_DST_REG_TEMPORARY);
488 inst[1] = t_src(vp, &src[0]);
489 inst[2] = __CONST(0, SWIZZLE_ZERO);
490 inst[3] = __CONST(0, SWIZZLE_ZERO);
491 inst += 4;
492
493 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
494 GL_FALSE,
495 GL_FALSE,
496 t_dst_index(vp, &vpi->DstReg),
497 t_dst_mask(vpi->DstReg.WriteMask),
498 t_dst_class(vpi->DstReg.File));
499 inst[1] = t_src(vp, &src[0]);
500 inst[2] = PVS_SRC_OPERAND(*u_temp_i,
501 PVS_SRC_SELECT_X,
502 PVS_SRC_SELECT_Y,
503 PVS_SRC_SELECT_Z,
504 PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
505 /* Not 100% sure about this */
506 (!src[0].
507 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE
508 /*VSF_FLAG_ALL */ );
509 inst[3] = __CONST(0, SWIZZLE_ZERO);
510 (*u_temp_i)--;
511
512 return inst;
513 }
514
515 static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
516 struct prog_instruction *vpi,
517 GLuint * inst,
518 struct prog_src_register src[3])
519 {
520 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
521 GL_FALSE,
522 GL_FALSE,
523 t_dst_index(vp, &vpi->DstReg),
524 t_dst_mask(vpi->DstReg.WriteMask),
525 t_dst_class(vpi->DstReg.File));
526 inst[1] = t_src(vp, &src[0]);
527 inst[2] = __CONST(0, SWIZZLE_ZERO);
528 inst[3] = __CONST(0, SWIZZLE_ZERO);
529
530 return inst;
531 }
532
533 static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
534 struct prog_instruction *vpi,
535 GLuint * inst,
536 struct prog_src_register src[3])
537 {
538 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
539
540 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
541 GL_TRUE,
542 GL_FALSE,
543 t_dst_index(vp, &vpi->DstReg),
544 t_dst_mask(vpi->DstReg.WriteMask),
545 t_dst_class(vpi->DstReg.File));
546 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
547 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
548 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
549 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
550 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
551 t_src_class(src[0].File),
552 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
553 (src[0].RelAddr << 4);
554 inst[2] = __CONST(0, SWIZZLE_ZERO);
555 inst[3] = __CONST(0, SWIZZLE_ZERO);
556
557 return inst;
558 }
559
560 static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
561 struct prog_instruction *vpi,
562 GLuint * inst,
563 struct prog_src_register src[3])
564 {
565 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
566
567 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
568 GL_TRUE,
569 GL_FALSE,
570 t_dst_index(vp, &vpi->DstReg),
571 t_dst_mask(vpi->DstReg.WriteMask),
572 t_dst_class(vpi->DstReg.File));
573 /* NOTE: Users swizzling might not work. */
574 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
575 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
576 PVS_SRC_SELECT_FORCE_0, // Z
577 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
578 t_src_class(src[0].File),
579 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
580 (src[0].RelAddr << 4);
581 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
582 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
583 PVS_SRC_SELECT_FORCE_0, // Z
584 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
585 t_src_class(src[0].File),
586 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
587 (src[0].RelAddr << 4);
588 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
589 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
590 PVS_SRC_SELECT_FORCE_0, // Z
591 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
592 t_src_class(src[0].File),
593 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
594 (src[0].RelAddr << 4);
595
596 return inst;
597 }
598
599 static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
600 struct prog_instruction *vpi,
601 GLuint * inst,
602 struct prog_src_register src[3])
603 {
604 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
605 GL_TRUE,
606 GL_FALSE,
607 t_dst_index(vp, &vpi->DstReg),
608 t_dst_mask(vpi->DstReg.WriteMask),
609 t_dst_class(vpi->DstReg.File));
610 inst[1] = t_src_scalar(vp, &src[0]);
611 inst[2] = __CONST(0, SWIZZLE_ZERO);
612 inst[3] = __CONST(0, SWIZZLE_ZERO);
613
614 return inst;
615 }
616
617 static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
618 struct prog_instruction *vpi,
619 GLuint * inst,
620 struct prog_src_register src[3])
621 {
622 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
623 GL_FALSE,
624 GL_TRUE,
625 t_dst_index(vp, &vpi->DstReg),
626 t_dst_mask(vpi->DstReg.WriteMask),
627 t_dst_class(vpi->DstReg.File));
628 inst[1] = t_src(vp, &src[0]);
629 inst[2] = t_src(vp, &src[1]);
630 inst[3] = t_src(vp, &src[2]);
631
632 return inst;
633 }
634
635 static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
636 struct prog_instruction *vpi,
637 GLuint * inst,
638 struct prog_src_register src[3])
639 {
640 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
641 GL_FALSE,
642 GL_FALSE,
643 t_dst_index(vp, &vpi->DstReg),
644 t_dst_mask(vpi->DstReg.WriteMask),
645 t_dst_class(vpi->DstReg.File));
646 inst[1] = t_src(vp, &src[0]);
647 inst[2] = t_src(vp, &src[1]);
648 inst[3] = __CONST(1, SWIZZLE_ZERO);
649
650 return inst;
651 }
652
653 static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
654 struct prog_instruction *vpi,
655 GLuint * inst,
656 struct prog_src_register src[3])
657 {
658 inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
659 GL_FALSE,
660 GL_FALSE,
661 t_dst_index(vp, &vpi->DstReg),
662 t_dst_mask(vpi->DstReg.WriteMask),
663 t_dst_class(vpi->DstReg.File));
664 inst[1] = t_src(vp, &src[0]);
665 inst[2] = t_src(vp, &src[1]);
666 inst[3] = __CONST(1, SWIZZLE_ZERO);
667
668 return inst;
669 }
670
671 static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
672 struct prog_instruction *vpi,
673 GLuint * inst,
674 struct prog_src_register src[3])
675 {
676 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
677
678 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
679 GL_FALSE,
680 GL_FALSE,
681 t_dst_index(vp, &vpi->DstReg),
682 t_dst_mask(vpi->DstReg.WriteMask),
683 t_dst_class(vpi->DstReg.File));
684 inst[1] = t_src(vp, &src[0]);
685 inst[2] = __CONST(0, SWIZZLE_ZERO);
686 inst[3] = __CONST(0, SWIZZLE_ZERO);
687
688 return inst;
689 }
690
691 static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
692 struct prog_instruction *vpi,
693 GLuint * inst,
694 struct prog_src_register src[3])
695 {
696 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
697 GL_FALSE,
698 GL_FALSE,
699 t_dst_index(vp, &vpi->DstReg),
700 t_dst_mask(vpi->DstReg.WriteMask),
701 t_dst_class(vpi->DstReg.File));
702 inst[1] = t_src(vp, &src[0]);
703 inst[2] = t_src(vp, &src[1]);
704 inst[3] = __CONST(1, SWIZZLE_ZERO);
705
706 return inst;
707 }
708
709 static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
710 struct prog_instruction *vpi,
711 GLuint * inst,
712 struct prog_src_register src[3])
713 {
714 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
715 GL_TRUE,
716 GL_FALSE,
717 t_dst_index(vp, &vpi->DstReg),
718 t_dst_mask(vpi->DstReg.WriteMask),
719 t_dst_class(vpi->DstReg.File));
720 inst[1] = t_src_scalar(vp, &src[0]);
721 inst[2] = __CONST(0, SWIZZLE_ZERO);
722 inst[3] = t_src_scalar(vp, &src[1]);
723
724 return inst;
725 }
726
727 static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
728 struct prog_instruction *vpi,
729 GLuint * inst,
730 struct prog_src_register src[3])
731 {
732 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
733 GL_TRUE,
734 GL_FALSE,
735 t_dst_index(vp, &vpi->DstReg),
736 t_dst_mask(vpi->DstReg.WriteMask),
737 t_dst_class(vpi->DstReg.File));
738 inst[1] = t_src_scalar(vp, &src[0]);
739 inst[2] = __CONST(0, SWIZZLE_ZERO);
740 inst[3] = __CONST(0, SWIZZLE_ZERO);
741
742 return inst;
743 }
744
745 static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
746 struct prog_instruction *vpi,
747 GLuint * inst,
748 struct prog_src_register src[3])
749 {
750 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
751 GL_TRUE,
752 GL_FALSE,
753 t_dst_index(vp, &vpi->DstReg),
754 t_dst_mask(vpi->DstReg.WriteMask),
755 t_dst_class(vpi->DstReg.File));
756 inst[1] = t_src_scalar(vp, &src[0]);
757 inst[2] = __CONST(0, SWIZZLE_ZERO);
758 inst[3] = __CONST(0, SWIZZLE_ZERO);
759
760 return inst;
761 }
762
763 static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
764 struct prog_instruction *vpi,
765 GLuint * inst,
766 struct prog_src_register src[3])
767 {
768 inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
769 GL_FALSE,
770 GL_FALSE,
771 t_dst_index(vp, &vpi->DstReg),
772 t_dst_mask(vpi->DstReg.WriteMask),
773 t_dst_class(vpi->DstReg.File));
774 inst[1] = t_src(vp, &src[0]);
775 inst[2] = t_src(vp, &src[1]);
776 inst[3] = __CONST(1, SWIZZLE_ZERO);
777
778 return inst;
779 }
780
781 static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
782 struct prog_instruction *vpi,
783 GLuint * inst,
784 struct prog_src_register src[3])
785 {
786 inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
787 GL_FALSE,
788 GL_FALSE,
789 t_dst_index(vp, &vpi->DstReg),
790 t_dst_mask(vpi->DstReg.WriteMask),
791 t_dst_class(vpi->DstReg.File));
792 inst[1] = t_src(vp, &src[0]);
793 inst[2] = t_src(vp, &src[1]);
794 inst[3] = __CONST(1, SWIZZLE_ZERO);
795
796 return inst;
797 }
798
799 static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
800 struct prog_instruction *vpi,
801 GLuint * inst,
802 struct prog_src_register src[3])
803 {
804 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
805
806 #if 0
807 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
808 GL_FALSE,
809 GL_FALSE,
810 t_dst_index(vp, &vpi->DstReg),
811 t_dst_mask(vpi->DstReg.WriteMask),
812 t_dst_class(vpi->DstReg.File));
813 inst[1] = t_src(vp, &src[0]);
814 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
815 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
816 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
817 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
818 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
819 t_src_class(src[1].File),
820 (!src[1].
821 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
822 (src[1].RelAddr << 4);
823 inst[3] = 0;
824 #else
825 inst[0] =
826 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
827 GL_FALSE,
828 GL_FALSE,
829 t_dst_index(vp, &vpi->DstReg),
830 t_dst_mask(vpi->DstReg.WriteMask),
831 t_dst_class(vpi->DstReg.File));
832 inst[1] = t_src(vp, &src[0]);
833 inst[2] = __CONST(0, SWIZZLE_ONE);
834 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
835 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
836 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
837 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
838 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
839 t_src_class(src[1].File),
840 (!src[1].
841 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
842 (src[1].RelAddr << 4);
843 #endif
844
845 return inst;
846 }
847
848 static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
849 struct prog_instruction *vpi,
850 GLuint * inst,
851 struct prog_src_register src[3])
852 {
853 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
854
855 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
856 GL_FALSE,
857 GL_FALSE,
858 t_dst_index(vp, &vpi->DstReg),
859 t_dst_mask(vpi->DstReg.WriteMask),
860 t_dst_class(vpi->DstReg.File));
861 inst[1] = t_src(vp, &src[0]);
862 inst[2] = __CONST(0, SWIZZLE_ZERO);
863 inst[3] = __CONST(0, SWIZZLE_ZERO);
864
865 return inst;
866 }
867
868 static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
869 struct prog_instruction *vpi,
870 GLuint * inst,
871 struct prog_src_register src[3],
872 int *u_temp_i)
873 {
874 /* mul r0, r1.yzxw, r2.zxyw
875 mad r0, -r2.yzxw, r1.zxyw, r0
876 */
877
878 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
879 GL_FALSE,
880 GL_FALSE,
881 *u_temp_i,
882 t_dst_mask(vpi->DstReg.WriteMask),
883 PVS_DST_REG_TEMPORARY);
884 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
885 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
886 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
887 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
888 t_src_class(src[0].File),
889 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
890 (src[0].RelAddr << 4);
891 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
892 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
893 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
894 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
895 t_src_class(src[1].File),
896 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
897 (src[1].RelAddr << 4);
898 inst[3] = __CONST(1, SWIZZLE_ZERO);
899 inst += 4;
900
901 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
902 GL_FALSE,
903 GL_FALSE,
904 t_dst_index(vp, &vpi->DstReg),
905 t_dst_mask(vpi->DstReg.WriteMask),
906 t_dst_class(vpi->DstReg.File));
907 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
908 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
909 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
910 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
911 t_src_class(src[1].File),
912 (!src[1].
913 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
914 (src[1].RelAddr << 4);
915 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
916 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
917 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
918 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
919 t_src_class(src[0].File),
920 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
921 (src[0].RelAddr << 4);
922 inst[3] =
923 PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
924 PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
925 PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
926
927 (*u_temp_i)--;
928
929 return inst;
930 }
931
932 static void t_inputs_outputs(struct r300_vertex_program *vp)
933 {
934 int i;
935 int cur_reg;
936
937 cur_reg = -1;
938 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
939 if (vp->key.InputsRead & (1 << i))
940 vp->inputs[i] = ++cur_reg;
941 else
942 vp->inputs[i] = -1;
943 }
944
945 cur_reg = 0;
946 for (i = 0; i < VERT_RESULT_MAX; i++)
947 vp->outputs[i] = -1;
948
949 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
950
951 if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
952 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
953 }
954
955 if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
956 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
957 }
958
959 /* If we're writing back facing colors we need to send
960 * four colors to make front/back face colors selection work.
961 * If the vertex program doesn't write all 4 colors, lets
962 * pretend it does by skipping output index reg so the colors
963 * get written into appropriate output vectors.
964 */
965 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
966 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
967 } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
968 vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
969 cur_reg++;
970 }
971
972 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
973 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
974 } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
975 vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
976 cur_reg++;
977 }
978
979 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
980 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
981 } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
982 cur_reg++;
983 }
984
985 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
986 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
987 } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
988 cur_reg++;
989 }
990
991 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
992 if (vp->key.OutputsWritten & (1 << i)) {
993 vp->outputs[i] = cur_reg++;
994 }
995 }
996
997 if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
998 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
999 }
1000 }
1001
1002 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
1003 struct prog_instruction *vpi)
1004 {
1005 int i;
1006 GLuint *inst;
1007 unsigned long num_operands;
1008 /* Initial value should be last tmp reg that hw supports.
1009 Strangely enough r300 doesnt mind even though these would be out of range.
1010 Smart enough to realize that it doesnt need it? */
1011 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
1012 struct prog_src_register src[3];
1013
1014 vp->pos_end = 0; /* Not supported yet */
1015 vp->hw_code.length = 0;
1016 vp->translated = GL_TRUE;
1017 vp->error = GL_FALSE;
1018
1019 t_inputs_outputs(vp);
1020
1021 for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END;
1022 vpi++, inst += 4) {
1023
1024 FREE_TEMPS();
1025
1026 if (!valid_dst(vp, &vpi->DstReg)) {
1027 /* redirect result to unused temp */
1028 vpi->DstReg.File = PROGRAM_TEMPORARY;
1029 vpi->DstReg.Index = u_temp_i;
1030 }
1031
1032 num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
1033
1034 /* copy the sources (src) from mesa into a local variable... is this needed? */
1035 for (i = 0; i < num_operands; i++) {
1036 src[i] = vpi->SrcReg[i];
1037 }
1038
1039 if (num_operands == 3) { /* TODO: scalars */
1040 if (CMP_SRCS(src[1], src[2])
1041 || CMP_SRCS(src[0], src[2])) {
1042 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1043 GL_FALSE,
1044 GL_FALSE,
1045 u_temp_i,
1046 VSF_FLAG_ALL,
1047 PVS_DST_REG_TEMPORARY);
1048 inst[1] =
1049 PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
1050 SWIZZLE_X,
1051 SWIZZLE_Y,
1052 SWIZZLE_Z,
1053 SWIZZLE_W,
1054 t_src_class(src[2].File),
1055 VSF_FLAG_NONE) | (src[2].
1056 RelAddr <<
1057 4);
1058 inst[2] = __CONST(2, SWIZZLE_ZERO);
1059 inst[3] = __CONST(2, SWIZZLE_ZERO);
1060 inst += 4;
1061
1062 src[2].File = PROGRAM_TEMPORARY;
1063 src[2].Index = u_temp_i;
1064 src[2].RelAddr = 0;
1065 u_temp_i--;
1066 }
1067 }
1068
1069 if (num_operands >= 2) {
1070 if (CMP_SRCS(src[1], src[0])) {
1071 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1072 GL_FALSE,
1073 GL_FALSE,
1074 u_temp_i,
1075 VSF_FLAG_ALL,
1076 PVS_DST_REG_TEMPORARY);
1077 inst[1] =
1078 PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
1079 SWIZZLE_X,
1080 SWIZZLE_Y,
1081 SWIZZLE_Z,
1082 SWIZZLE_W,
1083 t_src_class(src[0].File),
1084 VSF_FLAG_NONE) | (src[0].
1085 RelAddr <<
1086 4);
1087 inst[2] = __CONST(0, SWIZZLE_ZERO);
1088 inst[3] = __CONST(0, SWIZZLE_ZERO);
1089 inst += 4;
1090
1091 src[0].File = PROGRAM_TEMPORARY;
1092 src[0].Index = u_temp_i;
1093 src[0].RelAddr = 0;
1094 u_temp_i--;
1095 }
1096 }
1097
1098 switch (vpi->Opcode) {
1099 case OPCODE_ABS:
1100 inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
1101 break;
1102 case OPCODE_ADD:
1103 inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
1104 break;
1105 case OPCODE_ARL:
1106 inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
1107 break;
1108 case OPCODE_DP3:
1109 inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
1110 break;
1111 case OPCODE_DP4:
1112 inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
1113 break;
1114 case OPCODE_DPH:
1115 inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
1116 break;
1117 case OPCODE_DST:
1118 inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
1119 break;
1120 case OPCODE_EX2:
1121 inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
1122 break;
1123 case OPCODE_EXP:
1124 inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
1125 break;
1126 case OPCODE_FLR:
1127 inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */
1128 &u_temp_i);
1129 break;
1130 case OPCODE_FRC:
1131 inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
1132 break;
1133 case OPCODE_LG2:
1134 inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
1135 break;
1136 case OPCODE_LIT:
1137 inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
1138 break;
1139 case OPCODE_LOG:
1140 inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
1141 break;
1142 case OPCODE_MAD:
1143 inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
1144 break;
1145 case OPCODE_MAX:
1146 inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
1147 break;
1148 case OPCODE_MIN:
1149 inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
1150 break;
1151 case OPCODE_MOV:
1152 inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
1153 break;
1154 case OPCODE_MUL:
1155 inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
1156 break;
1157 case OPCODE_POW:
1158 inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
1159 break;
1160 case OPCODE_RCP:
1161 inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
1162 break;
1163 case OPCODE_RSQ:
1164 inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
1165 break;
1166 case OPCODE_SGE:
1167 inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
1168 break;
1169 case OPCODE_SLT:
1170 inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
1171 break;
1172 case OPCODE_SUB:
1173 inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
1174 break;
1175 case OPCODE_SWZ:
1176 inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
1177 break;
1178 case OPCODE_XPD:
1179 inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */
1180 &u_temp_i);
1181 break;
1182 default:
1183 vp->error = GL_TRUE;
1184 break;
1185 }
1186 }
1187
1188 vp->hw_code.length = (inst - vp->hw_code.body.d);
1189 if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) {
1190 vp->error = GL_TRUE;
1191 }
1192 }
1193
1194 /* DP4 version seems to trigger some hw peculiarity */
1195 //#define PREFER_DP4
1196
1197 static void position_invariant(struct gl_program *prog)
1198 {
1199 struct prog_instruction *vpi;
1200 struct gl_program_parameter_list *paramList;
1201 int i;
1202
1203 gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
1204
1205 /* tokens[4] = matrix modifier */
1206 #ifdef PREFER_DP4
1207 tokens[4] = 0; /* not transposed or inverted */
1208 #else
1209 tokens[4] = STATE_MATRIX_TRANSPOSE;
1210 #endif
1211 paramList = prog->Parameters;
1212
1213 vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
1214 _mesa_init_instructions(vpi, prog->NumInstructions + 4);
1215
1216 for (i = 0; i < 4; i++) {
1217 GLint idx;
1218 tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
1219 idx = _mesa_add_state_reference(paramList, tokens);
1220 #ifdef PREFER_DP4
1221 vpi[i].Opcode = OPCODE_DP4;
1222 vpi[i].StringPos = 0;
1223 vpi[i].Data = 0;
1224
1225 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1226 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
1227 vpi[i].DstReg.WriteMask = 1 << i;
1228 vpi[i].DstReg.CondMask = COND_TR;
1229
1230 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1231 vpi[i].SrcReg[0].Index = idx;
1232 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1233
1234 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1235 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1236 vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
1237 #else
1238 if (i == 0)
1239 vpi[i].Opcode = OPCODE_MUL;
1240 else
1241 vpi[i].Opcode = OPCODE_MAD;
1242
1243 vpi[i].Data = 0;
1244
1245 if (i == 3)
1246 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1247 else
1248 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
1249 vpi[i].DstReg.Index = 0;
1250 vpi[i].DstReg.WriteMask = 0xf;
1251 vpi[i].DstReg.CondMask = COND_TR;
1252
1253 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1254 vpi[i].SrcReg[0].Index = idx;
1255 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1256
1257 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1258 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1259 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
1260
1261 if (i > 0) {
1262 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
1263 vpi[i].SrcReg[2].Index = 0;
1264 vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
1265 }
1266 #endif
1267 }
1268
1269 _mesa_copy_instructions(&vpi[i], prog->Instructions,
1270 prog->NumInstructions);
1271
1272 free(prog->Instructions);
1273
1274 prog->Instructions = vpi;
1275
1276 prog->NumInstructions += 4;
1277 vpi = &prog->Instructions[prog->NumInstructions - 1];
1278
1279 assert(vpi->Opcode == OPCODE_END);
1280 }
1281
1282 static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
1283 GLuint temp_index)
1284 {
1285 struct prog_instruction *vpi;
1286 struct prog_instruction *vpi_insert;
1287 int i = 0;
1288
1289 vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
1290 _mesa_init_instructions(vpi, prog->NumInstructions + 2);
1291 /* all but END */
1292 _mesa_copy_instructions(vpi, prog->Instructions,
1293 prog->NumInstructions - 1);
1294 /* END */
1295 _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
1296 &prog->Instructions[prog->NumInstructions - 1],
1297 1);
1298 vpi_insert = &vpi[prog->NumInstructions - 1];
1299
1300 vpi_insert[i].Opcode = OPCODE_MOV;
1301
1302 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1303 vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
1304 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1305 vpi_insert[i].DstReg.CondMask = COND_TR;
1306
1307 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1308 vpi_insert[i].SrcReg[0].Index = temp_index;
1309 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1310 i++;
1311
1312 vpi_insert[i].Opcode = OPCODE_MOV;
1313
1314 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1315 vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
1316 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1317 vpi_insert[i].DstReg.CondMask = COND_TR;
1318
1319 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1320 vpi_insert[i].SrcReg[0].Index = temp_index;
1321 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1322 i++;
1323
1324 free(prog->Instructions);
1325
1326 prog->Instructions = vpi;
1327
1328 prog->NumInstructions += i;
1329 vpi = &prog->Instructions[prog->NumInstructions - 1];
1330
1331 assert(vpi->Opcode == OPCODE_END);
1332 }
1333
1334 static void pos_as_texcoord(struct r300_vertex_program *vp,
1335 struct gl_program *prog)
1336 {
1337 struct prog_instruction *vpi;
1338 GLuint tempregi = prog->NumTemporaries;
1339 /* should do something else if no temps left... */
1340 prog->NumTemporaries++;
1341
1342 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1343 if (vpi->DstReg.File == PROGRAM_OUTPUT
1344 && vpi->DstReg.Index == VERT_RESULT_HPOS) {
1345 vpi->DstReg.File = PROGRAM_TEMPORARY;
1346 vpi->DstReg.Index = tempregi;
1347 }
1348 }
1349 insert_wpos(vp, prog, tempregi);
1350 }
1351
1352 static struct r300_vertex_program *build_program(struct r300_vertex_program_key
1353 *wanted_key, struct gl_vertex_program
1354 *mesa_vp, GLint wpos_idx)
1355 {
1356 struct r300_vertex_program *vp;
1357
1358 vp = _mesa_calloc(sizeof(*vp));
1359 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1360 vp->wpos_idx = wpos_idx;
1361
1362 if (mesa_vp->IsPositionInvariant) {
1363 position_invariant(&mesa_vp->Base);
1364 }
1365
1366 if (wpos_idx > -1) {
1367 pos_as_texcoord(vp, &mesa_vp->Base);
1368 }
1369
1370 if (RADEON_DEBUG & DEBUG_VERTS) {
1371 fprintf(stderr, "Vertex program after native rewrite:\n");
1372 _mesa_print_program(&mesa_vp->Base);
1373 fflush(stdout);
1374 }
1375
1376 /* Some outputs may be artificially added, to match the inputs of the fragment program.
1377 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
1378 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
1379 */
1380 {
1381 int i, count = 0;
1382 for (i = 0; i < VERT_RESULT_MAX; ++i) {
1383 if (vp->key.OutputsAdded & (1 << i)) {
1384 ++count;
1385 }
1386 }
1387
1388 if (count > 0) {
1389 struct prog_instruction *inst;
1390
1391 _mesa_insert_instructions(&mesa_vp->Base, mesa_vp->Base.NumInstructions - 1, count);
1392 inst = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions - 1 - count];
1393
1394 for (i = 0; i < VERT_RESULT_MAX; ++i) {
1395 if (vp->key.OutputsAdded & (1 << i)) {
1396 inst->Opcode = OPCODE_MOV;
1397
1398 inst->DstReg.File = PROGRAM_OUTPUT;
1399 inst->DstReg.Index = i;
1400 inst->DstReg.WriteMask = WRITEMASK_XYZW;
1401 inst->DstReg.CondMask = COND_TR;
1402
1403 inst->SrcReg[0].File = PROGRAM_CONSTANT;
1404 inst->SrcReg[0].Index = 0;
1405 inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
1406
1407 ++inst;
1408 }
1409 }
1410 }
1411 }
1412
1413 assert(mesa_vp->Base.NumInstructions);
1414 vp->num_temporaries = mesa_vp->Base.NumTemporaries;
1415 r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
1416
1417 return vp;
1418 }
1419
1420 static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
1421 {
1422 if (key->OutputsWritten & (1 << vert))
1423 return;
1424
1425 key->OutputsWritten |= 1 << vert;
1426 key->OutputsAdded |= 1 << vert;
1427 }
1428
1429 void r300SelectVertexShader(r300ContextPtr r300)
1430 {
1431 GLcontext *ctx = ctx = r300->radeon.glCtx;
1432 GLuint InputsRead;
1433 struct r300_vertex_program_key wanted_key = { 0 };
1434 GLint i;
1435 struct r300_vertex_program_cont *vpc;
1436 struct r300_vertex_program *vp;
1437 GLint wpos_idx;
1438
1439 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1440 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1441 wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
1442 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1443
1444 wpos_idx = -1;
1445 if (InputsRead & FRAG_BIT_WPOS) {
1446 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1447 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1448 break;
1449
1450 if (i == ctx->Const.MaxTextureUnits) {
1451 fprintf(stderr, "\tno free texcoord found\n");
1452 _mesa_exit(-1);
1453 }
1454
1455 wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
1456 wpos_idx = i;
1457 }
1458
1459 if (vpc->mesa_program.IsPositionInvariant) {
1460 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1461 wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS);
1462 } else {
1463 add_outputs(&wanted_key, VERT_RESULT_HPOS);
1464 }
1465
1466 if (InputsRead & FRAG_BIT_COL0) {
1467 add_outputs(&wanted_key, VERT_RESULT_COL0);
1468 }
1469
1470 if (InputsRead & FRAG_BIT_COL1) {
1471 add_outputs(&wanted_key, VERT_RESULT_COL1);
1472 }
1473
1474 if (InputsRead & FRAG_BIT_FOGC) {
1475 add_outputs(&wanted_key, VERT_RESULT_FOGC);
1476 }
1477
1478 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1479 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1480 add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
1481 }
1482 }
1483
1484 for (vp = vpc->progs; vp; vp = vp->next)
1485 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
1486 == 0) {
1487 r300->selected_vp = vp;
1488 return;
1489 }
1490
1491 if (RADEON_DEBUG & DEBUG_VERTS) {
1492 fprintf(stderr, "Initial vertex program:\n");
1493 _mesa_print_program(&vpc->mesa_program.Base);
1494 fflush(stdout);
1495 }
1496
1497 vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
1498 vp->next = vpc->progs;
1499 vpc->progs = vp;
1500 r300->selected_vp = vp;
1501 }
1502
1503 #define bump_vpu_count(ptr, new_count) do { \
1504 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
1505 int _nc=(new_count)/4; \
1506 assert(_nc < 256); \
1507 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
1508 } while(0)
1509
1510 static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code)
1511 {
1512 int i;
1513
1514 assert((code->length > 0) && (code->length % 4 == 0));
1515
1516 switch ((dest >> 8) & 0xf) {
1517 case 0:
1518 R300_STATECHANGE(r300, vpi);
1519 for (i = 0; i < code->length; i++)
1520 r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1521 bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
1522 break;
1523 case 2:
1524 R300_STATECHANGE(r300, vpp);
1525 for (i = 0; i < code->length; i++)
1526 r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1527 bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
1528 break;
1529 case 4:
1530 R300_STATECHANGE(r300, vps);
1531 for (i = 0; i < code->length; i++)
1532 r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1533 bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
1534 break;
1535 default:
1536 fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
1537 _mesa_exit(-1);
1538 }
1539 }
1540
1541 void r300SetupVertexProgram(r300ContextPtr rmesa)
1542 {
1543 GLcontext *ctx = rmesa->radeon.glCtx;
1544 struct r300_vertex_program *prog = rmesa->selected_vp;
1545 int inst_count = 0;
1546 int param_count = 0;
1547
1548 /* Reset state, in case we don't use something */
1549 ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
1550 ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
1551 ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
1552
1553 R300_STATECHANGE(rmesa, vpp);
1554 param_count = r300VertexProgUpdateParams(ctx,
1555 (struct r300_vertex_program_cont *)
1556 ctx->VertexProgram._Current,
1557 (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
1558 bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
1559 param_count /= 4;
1560
1561 r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
1562 inst_count = (prog->hw_code.length / 4) - 1;
1563
1564 r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead),
1565 _mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries);
1566
1567 R300_STATECHANGE(rmesa, pvs);
1568 rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
1569 (inst_count << R300_PVS_LAST_INST_SHIFT);
1570
1571 rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
1572 rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
1573 }