35e5ec0f8e748788459410fdd9569a7afcace3b4
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/prog_instruction.h"
36 #include "shader/prog_parameter.h"
37 #include "shader/prog_statevars.h"
38 #include "tnl/tnl.h"
39
40 #include "r300_context.h"
41 #include "r300_state.h"
42
43 /* TODO: Get rid of t_src_class call */
44 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
45 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
46 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
47 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
48 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
49
50 /*
51 * Take an already-setup and valid source then swizzle it appropriately to
52 * obtain a constant ZERO or ONE source.
53 */
54 #define __CONST(x, y) \
55 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
56 t_swizzle(y), \
57 t_swizzle(y), \
58 t_swizzle(y), \
59 t_swizzle(y), \
60 t_src_class(src[x].File), \
61 VSF_FLAG_NONE) | (src[x].RelAddr << 4))
62
63 #define FREE_TEMPS() \
64 do { \
65 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
66 if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
67 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
68 vp->error = GL_TRUE; \
69 } \
70 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
71 } while (0)
72
73 int r300VertexProgUpdateParams(GLcontext * ctx,
74 struct r300_vertex_program_cont *vp, float *dst)
75 {
76 int pi;
77 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
78 float *dst_o = dst;
79 struct gl_program_parameter_list *paramList;
80
81 if (mesa_vp->IsNVProgram) {
82 _mesa_load_tracked_matrices(ctx);
83
84 for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
85 *dst++ = ctx->VertexProgram.Parameters[pi][0];
86 *dst++ = ctx->VertexProgram.Parameters[pi][1];
87 *dst++ = ctx->VertexProgram.Parameters[pi][2];
88 *dst++ = ctx->VertexProgram.Parameters[pi][3];
89 }
90 return dst - dst_o;
91 }
92
93 assert(mesa_vp->Base.Parameters);
94 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
95
96 if (mesa_vp->Base.Parameters->NumParameters * 4 >
97 VSF_MAX_FRAGMENT_LENGTH) {
98 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
99 _mesa_exit(-1);
100 }
101
102 paramList = mesa_vp->Base.Parameters;
103 for (pi = 0; pi < paramList->NumParameters; pi++) {
104 switch (paramList->Parameters[pi].Type) {
105 case PROGRAM_STATE_VAR:
106 case PROGRAM_NAMED_PARAM:
107 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
108 case PROGRAM_CONSTANT:
109 *dst++ = paramList->ParameterValues[pi][0];
110 *dst++ = paramList->ParameterValues[pi][1];
111 *dst++ = paramList->ParameterValues[pi][2];
112 *dst++ = paramList->ParameterValues[pi][3];
113 break;
114 default:
115 _mesa_problem(NULL, "Bad param type in %s",
116 __FUNCTION__);
117 }
118
119 }
120
121 return dst - dst_o;
122 }
123
124 static unsigned long t_dst_mask(GLuint mask)
125 {
126 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
127 return mask & VSF_FLAG_ALL;
128 }
129
130 static unsigned long t_dst_class(gl_register_file file)
131 {
132
133 switch (file) {
134 case PROGRAM_TEMPORARY:
135 return PVS_DST_REG_TEMPORARY;
136 case PROGRAM_OUTPUT:
137 return PVS_DST_REG_OUT;
138 case PROGRAM_ADDRESS:
139 return PVS_DST_REG_A0;
140 /*
141 case PROGRAM_INPUT:
142 case PROGRAM_LOCAL_PARAM:
143 case PROGRAM_ENV_PARAM:
144 case PROGRAM_NAMED_PARAM:
145 case PROGRAM_STATE_VAR:
146 case PROGRAM_WRITE_ONLY:
147 case PROGRAM_ADDRESS:
148 */
149 default:
150 fprintf(stderr, "problem in %s", __FUNCTION__);
151 _mesa_exit(-1);
152 return -1;
153 }
154 }
155
156 static unsigned long t_dst_index(struct r300_vertex_program *vp,
157 struct prog_dst_register *dst)
158 {
159 if (dst->File == PROGRAM_OUTPUT)
160 return vp->outputs[dst->Index];
161
162 return dst->Index;
163 }
164
165 static unsigned long t_src_class(gl_register_file file)
166 {
167 switch (file) {
168 case PROGRAM_TEMPORARY:
169 return PVS_SRC_REG_TEMPORARY;
170 case PROGRAM_INPUT:
171 return PVS_SRC_REG_INPUT;
172 case PROGRAM_LOCAL_PARAM:
173 case PROGRAM_ENV_PARAM:
174 case PROGRAM_NAMED_PARAM:
175 case PROGRAM_CONSTANT:
176 case PROGRAM_STATE_VAR:
177 return PVS_SRC_REG_CONSTANT;
178 /*
179 case PROGRAM_OUTPUT:
180 case PROGRAM_WRITE_ONLY:
181 case PROGRAM_ADDRESS:
182 */
183 default:
184 fprintf(stderr, "problem in %s", __FUNCTION__);
185 _mesa_exit(-1);
186 return -1;
187 }
188 }
189
190 static INLINE unsigned long t_swizzle(GLubyte swizzle)
191 {
192 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
193 return swizzle;
194 }
195
196 #if 0
197 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
198 {
199 int i;
200
201 if (vp == NULL) {
202 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
203 caller);
204 return;
205 }
206
207 fprintf(stderr, "%s:<", caller);
208 for (i = 0; i < VERT_ATTRIB_MAX; i++)
209 fprintf(stderr, "%d ", vp->inputs[i]);
210 fprintf(stderr, ">\n");
211
212 }
213 #endif
214
215 static unsigned long t_src_index(struct r300_vertex_program *vp,
216 struct prog_src_register *src)
217 {
218 if (src->File == PROGRAM_INPUT) {
219 assert(vp->inputs[src->Index] != -1);
220 return vp->inputs[src->Index];
221 } else {
222 if (src->Index < 0) {
223 fprintf(stderr,
224 "negative offsets for indirect addressing do not work.\n");
225 return 0;
226 }
227 return src->Index;
228 }
229 }
230
231 /* these two functions should probably be merged... */
232
233 static unsigned long t_src(struct r300_vertex_program *vp,
234 struct prog_src_register *src)
235 {
236 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
237 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
238 */
239 return PVS_SRC_OPERAND(t_src_index(vp, src),
240 t_swizzle(GET_SWZ(src->Swizzle, 0)),
241 t_swizzle(GET_SWZ(src->Swizzle, 1)),
242 t_swizzle(GET_SWZ(src->Swizzle, 2)),
243 t_swizzle(GET_SWZ(src->Swizzle, 3)),
244 t_src_class(src->File),
245 src->Negate) | (src->RelAddr << 4);
246 }
247
248 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
249 struct prog_src_register *src)
250 {
251 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
252 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
253 */
254 return PVS_SRC_OPERAND(t_src_index(vp, src),
255 t_swizzle(GET_SWZ(src->Swizzle, 0)),
256 t_swizzle(GET_SWZ(src->Swizzle, 0)),
257 t_swizzle(GET_SWZ(src->Swizzle, 0)),
258 t_swizzle(GET_SWZ(src->Swizzle, 0)),
259 t_src_class(src->File),
260 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
261 (src->RelAddr << 4);
262 }
263
264 static GLboolean valid_dst(struct r300_vertex_program *vp,
265 struct prog_dst_register *dst)
266 {
267 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
268 return GL_FALSE;
269 } else if (dst->File == PROGRAM_ADDRESS) {
270 assert(dst->Index == 0);
271 }
272
273 return GL_TRUE;
274 }
275
276 static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
277 struct prog_instruction *vpi,
278 GLuint * inst,
279 struct prog_src_register src[3])
280 {
281 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
282
283 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
284 GL_FALSE,
285 GL_FALSE,
286 t_dst_index(vp, &vpi->DstReg),
287 t_dst_mask(vpi->DstReg.WriteMask),
288 t_dst_class(vpi->DstReg.File));
289 inst[1] = t_src(vp, &src[0]);
290 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
291 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
292 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
293 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
294 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
295 t_src_class(src[0].File),
296 (!src[0].
297 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
298 (src[0].RelAddr << 4);
299 inst[3] = 0;
300
301 return inst;
302 }
303
304 static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
305 struct prog_instruction *vpi,
306 GLuint * inst,
307 struct prog_src_register src[3])
308 {
309 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
310 GL_FALSE,
311 GL_FALSE,
312 t_dst_index(vp, &vpi->DstReg),
313 t_dst_mask(vpi->DstReg.WriteMask),
314 t_dst_class(vpi->DstReg.File));
315 inst[1] = t_src(vp, &src[0]);
316 inst[2] = t_src(vp, &src[1]);
317 inst[3] = __CONST(1, SWIZZLE_ZERO);
318
319 return inst;
320 }
321
322 static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
323 struct prog_instruction *vpi,
324 GLuint * inst,
325 struct prog_src_register src[3])
326 {
327 inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
328 GL_FALSE,
329 GL_FALSE,
330 t_dst_index(vp, &vpi->DstReg),
331 t_dst_mask(vpi->DstReg.WriteMask),
332 t_dst_class(vpi->DstReg.File));
333 inst[1] = t_src(vp, &src[0]);
334 inst[2] = __CONST(0, SWIZZLE_ZERO);
335 inst[3] = __CONST(0, SWIZZLE_ZERO);
336
337 return inst;
338 }
339
340 static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
341 struct prog_instruction *vpi,
342 GLuint * inst,
343 struct prog_src_register src[3])
344 {
345 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
346
347 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
348 GL_FALSE,
349 GL_FALSE,
350 t_dst_index(vp, &vpi->DstReg),
351 t_dst_mask(vpi->DstReg.WriteMask),
352 t_dst_class(vpi->DstReg.File));
353 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
354 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
355 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
356 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
357 SWIZZLE_ZERO,
358 t_src_class(src[0].File),
359 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
360 (src[0].RelAddr << 4);
361 inst[2] =
362 PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
363 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
364 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
365 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
366 t_src_class(src[1].File),
367 src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
368 (src[1].RelAddr << 4);
369 inst[3] = __CONST(1, SWIZZLE_ZERO);
370
371 return inst;
372 }
373
374 static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
375 struct prog_instruction *vpi,
376 GLuint * inst,
377 struct prog_src_register src[3])
378 {
379 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
380 GL_FALSE,
381 GL_FALSE,
382 t_dst_index(vp, &vpi->DstReg),
383 t_dst_mask(vpi->DstReg.WriteMask),
384 t_dst_class(vpi->DstReg.File));
385 inst[1] = t_src(vp, &src[0]);
386 inst[2] = t_src(vp, &src[1]);
387 inst[3] = __CONST(1, SWIZZLE_ZERO);
388
389 return inst;
390 }
391
392 static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
393 struct prog_instruction *vpi,
394 GLuint * inst,
395 struct prog_src_register src[3])
396 {
397 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
398 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
399 GL_FALSE,
400 GL_FALSE,
401 t_dst_index(vp, &vpi->DstReg),
402 t_dst_mask(vpi->DstReg.WriteMask),
403 t_dst_class(vpi->DstReg.File));
404 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
405 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
406 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
407 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
408 PVS_SRC_SELECT_FORCE_1,
409 t_src_class(src[0].File),
410 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
411 (src[0].RelAddr << 4);
412 inst[2] = t_src(vp, &src[1]);
413 inst[3] = __CONST(1, SWIZZLE_ZERO);
414
415 return inst;
416 }
417
418 static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
419 struct prog_instruction *vpi,
420 GLuint * inst,
421 struct prog_src_register src[3])
422 {
423 inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
424 GL_FALSE,
425 GL_FALSE,
426 t_dst_index(vp, &vpi->DstReg),
427 t_dst_mask(vpi->DstReg.WriteMask),
428 t_dst_class(vpi->DstReg.File));
429 inst[1] = t_src(vp, &src[0]);
430 inst[2] = t_src(vp, &src[1]);
431 inst[3] = __CONST(1, SWIZZLE_ZERO);
432
433 return inst;
434 }
435
436 static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
437 struct prog_instruction *vpi,
438 GLuint * inst,
439 struct prog_src_register src[3])
440 {
441 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
442 GL_TRUE,
443 GL_FALSE,
444 t_dst_index(vp, &vpi->DstReg),
445 t_dst_mask(vpi->DstReg.WriteMask),
446 t_dst_class(vpi->DstReg.File));
447 inst[1] = t_src_scalar(vp, &src[0]);
448 inst[2] = __CONST(0, SWIZZLE_ZERO);
449 inst[3] = __CONST(0, SWIZZLE_ZERO);
450
451 return inst;
452 }
453
454 static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
455 struct prog_instruction *vpi,
456 GLuint * inst,
457 struct prog_src_register src[3])
458 {
459 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
460 GL_TRUE,
461 GL_FALSE,
462 t_dst_index(vp, &vpi->DstReg),
463 t_dst_mask(vpi->DstReg.WriteMask),
464 t_dst_class(vpi->DstReg.File));
465 inst[1] = t_src_scalar(vp, &src[0]);
466 inst[2] = __CONST(0, SWIZZLE_ZERO);
467 inst[3] = __CONST(0, SWIZZLE_ZERO);
468
469 return inst;
470 }
471
472 static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
473 struct prog_instruction *vpi,
474 GLuint * inst,
475 struct prog_src_register src[3],
476 int *u_temp_i)
477 {
478 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
479 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
480
481 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
482 GL_FALSE,
483 GL_FALSE,
484 *u_temp_i,
485 t_dst_mask(vpi->DstReg.WriteMask),
486 PVS_DST_REG_TEMPORARY);
487 inst[1] = t_src(vp, &src[0]);
488 inst[2] = __CONST(0, SWIZZLE_ZERO);
489 inst[3] = __CONST(0, SWIZZLE_ZERO);
490 inst += 4;
491
492 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
493 GL_FALSE,
494 GL_FALSE,
495 t_dst_index(vp, &vpi->DstReg),
496 t_dst_mask(vpi->DstReg.WriteMask),
497 t_dst_class(vpi->DstReg.File));
498 inst[1] = t_src(vp, &src[0]);
499 inst[2] = PVS_SRC_OPERAND(*u_temp_i,
500 PVS_SRC_SELECT_X,
501 PVS_SRC_SELECT_Y,
502 PVS_SRC_SELECT_Z,
503 PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
504 /* Not 100% sure about this */
505 (!src[0].
506 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE
507 /*VSF_FLAG_ALL */ );
508 inst[3] = __CONST(0, SWIZZLE_ZERO);
509 (*u_temp_i)--;
510
511 return inst;
512 }
513
514 static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
515 struct prog_instruction *vpi,
516 GLuint * inst,
517 struct prog_src_register src[3])
518 {
519 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
520 GL_FALSE,
521 GL_FALSE,
522 t_dst_index(vp, &vpi->DstReg),
523 t_dst_mask(vpi->DstReg.WriteMask),
524 t_dst_class(vpi->DstReg.File));
525 inst[1] = t_src(vp, &src[0]);
526 inst[2] = __CONST(0, SWIZZLE_ZERO);
527 inst[3] = __CONST(0, SWIZZLE_ZERO);
528
529 return inst;
530 }
531
532 static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
533 struct prog_instruction *vpi,
534 GLuint * inst,
535 struct prog_src_register src[3])
536 {
537 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
538
539 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
540 GL_TRUE,
541 GL_FALSE,
542 t_dst_index(vp, &vpi->DstReg),
543 t_dst_mask(vpi->DstReg.WriteMask),
544 t_dst_class(vpi->DstReg.File));
545 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
546 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
547 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
548 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
549 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
550 t_src_class(src[0].File),
551 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
552 (src[0].RelAddr << 4);
553 inst[2] = __CONST(0, SWIZZLE_ZERO);
554 inst[3] = __CONST(0, SWIZZLE_ZERO);
555
556 return inst;
557 }
558
559 static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
560 struct prog_instruction *vpi,
561 GLuint * inst,
562 struct prog_src_register src[3])
563 {
564 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
565
566 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
567 GL_TRUE,
568 GL_FALSE,
569 t_dst_index(vp, &vpi->DstReg),
570 t_dst_mask(vpi->DstReg.WriteMask),
571 t_dst_class(vpi->DstReg.File));
572 /* NOTE: Users swizzling might not work. */
573 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
574 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
575 PVS_SRC_SELECT_FORCE_0, // Z
576 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
577 t_src_class(src[0].File),
578 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
579 (src[0].RelAddr << 4);
580 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
581 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
582 PVS_SRC_SELECT_FORCE_0, // Z
583 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
584 t_src_class(src[0].File),
585 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
586 (src[0].RelAddr << 4);
587 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
588 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
589 PVS_SRC_SELECT_FORCE_0, // Z
590 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
591 t_src_class(src[0].File),
592 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
593 (src[0].RelAddr << 4);
594
595 return inst;
596 }
597
598 static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
599 struct prog_instruction *vpi,
600 GLuint * inst,
601 struct prog_src_register src[3])
602 {
603 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
604 GL_TRUE,
605 GL_FALSE,
606 t_dst_index(vp, &vpi->DstReg),
607 t_dst_mask(vpi->DstReg.WriteMask),
608 t_dst_class(vpi->DstReg.File));
609 inst[1] = t_src_scalar(vp, &src[0]);
610 inst[2] = __CONST(0, SWIZZLE_ZERO);
611 inst[3] = __CONST(0, SWIZZLE_ZERO);
612
613 return inst;
614 }
615
616 static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
617 struct prog_instruction *vpi,
618 GLuint * inst,
619 struct prog_src_register src[3])
620 {
621 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
622 GL_FALSE,
623 GL_TRUE,
624 t_dst_index(vp, &vpi->DstReg),
625 t_dst_mask(vpi->DstReg.WriteMask),
626 t_dst_class(vpi->DstReg.File));
627 inst[1] = t_src(vp, &src[0]);
628 inst[2] = t_src(vp, &src[1]);
629 inst[3] = t_src(vp, &src[2]);
630
631 return inst;
632 }
633
634 static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
635 struct prog_instruction *vpi,
636 GLuint * inst,
637 struct prog_src_register src[3])
638 {
639 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
640 GL_FALSE,
641 GL_FALSE,
642 t_dst_index(vp, &vpi->DstReg),
643 t_dst_mask(vpi->DstReg.WriteMask),
644 t_dst_class(vpi->DstReg.File));
645 inst[1] = t_src(vp, &src[0]);
646 inst[2] = t_src(vp, &src[1]);
647 inst[3] = __CONST(1, SWIZZLE_ZERO);
648
649 return inst;
650 }
651
652 static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
653 struct prog_instruction *vpi,
654 GLuint * inst,
655 struct prog_src_register src[3])
656 {
657 inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
658 GL_FALSE,
659 GL_FALSE,
660 t_dst_index(vp, &vpi->DstReg),
661 t_dst_mask(vpi->DstReg.WriteMask),
662 t_dst_class(vpi->DstReg.File));
663 inst[1] = t_src(vp, &src[0]);
664 inst[2] = t_src(vp, &src[1]);
665 inst[3] = __CONST(1, SWIZZLE_ZERO);
666
667 return inst;
668 }
669
670 static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
671 struct prog_instruction *vpi,
672 GLuint * inst,
673 struct prog_src_register src[3])
674 {
675 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
676
677 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
678 GL_FALSE,
679 GL_FALSE,
680 t_dst_index(vp, &vpi->DstReg),
681 t_dst_mask(vpi->DstReg.WriteMask),
682 t_dst_class(vpi->DstReg.File));
683 inst[1] = t_src(vp, &src[0]);
684 inst[2] = __CONST(0, SWIZZLE_ZERO);
685 inst[3] = __CONST(0, SWIZZLE_ZERO);
686
687 return inst;
688 }
689
690 static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
691 struct prog_instruction *vpi,
692 GLuint * inst,
693 struct prog_src_register src[3])
694 {
695 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
696 GL_FALSE,
697 GL_FALSE,
698 t_dst_index(vp, &vpi->DstReg),
699 t_dst_mask(vpi->DstReg.WriteMask),
700 t_dst_class(vpi->DstReg.File));
701 inst[1] = t_src(vp, &src[0]);
702 inst[2] = t_src(vp, &src[1]);
703 inst[3] = __CONST(1, SWIZZLE_ZERO);
704
705 return inst;
706 }
707
708 static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
709 struct prog_instruction *vpi,
710 GLuint * inst,
711 struct prog_src_register src[3])
712 {
713 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
714 GL_TRUE,
715 GL_FALSE,
716 t_dst_index(vp, &vpi->DstReg),
717 t_dst_mask(vpi->DstReg.WriteMask),
718 t_dst_class(vpi->DstReg.File));
719 inst[1] = t_src_scalar(vp, &src[0]);
720 inst[2] = __CONST(0, SWIZZLE_ZERO);
721 inst[3] = t_src_scalar(vp, &src[1]);
722
723 return inst;
724 }
725
726 static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
727 struct prog_instruction *vpi,
728 GLuint * inst,
729 struct prog_src_register src[3])
730 {
731 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
732 GL_TRUE,
733 GL_FALSE,
734 t_dst_index(vp, &vpi->DstReg),
735 t_dst_mask(vpi->DstReg.WriteMask),
736 t_dst_class(vpi->DstReg.File));
737 inst[1] = t_src_scalar(vp, &src[0]);
738 inst[2] = __CONST(0, SWIZZLE_ZERO);
739 inst[3] = __CONST(0, SWIZZLE_ZERO);
740
741 return inst;
742 }
743
744 static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
745 struct prog_instruction *vpi,
746 GLuint * inst,
747 struct prog_src_register src[3])
748 {
749 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
750 GL_TRUE,
751 GL_FALSE,
752 t_dst_index(vp, &vpi->DstReg),
753 t_dst_mask(vpi->DstReg.WriteMask),
754 t_dst_class(vpi->DstReg.File));
755 inst[1] = t_src_scalar(vp, &src[0]);
756 inst[2] = __CONST(0, SWIZZLE_ZERO);
757 inst[3] = __CONST(0, SWIZZLE_ZERO);
758
759 return inst;
760 }
761
762 static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
763 struct prog_instruction *vpi,
764 GLuint * inst,
765 struct prog_src_register src[3])
766 {
767 inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
768 GL_FALSE,
769 GL_FALSE,
770 t_dst_index(vp, &vpi->DstReg),
771 t_dst_mask(vpi->DstReg.WriteMask),
772 t_dst_class(vpi->DstReg.File));
773 inst[1] = t_src(vp, &src[0]);
774 inst[2] = t_src(vp, &src[1]);
775 inst[3] = __CONST(1, SWIZZLE_ZERO);
776
777 return inst;
778 }
779
780 static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
781 struct prog_instruction *vpi,
782 GLuint * inst,
783 struct prog_src_register src[3])
784 {
785 inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
786 GL_FALSE,
787 GL_FALSE,
788 t_dst_index(vp, &vpi->DstReg),
789 t_dst_mask(vpi->DstReg.WriteMask),
790 t_dst_class(vpi->DstReg.File));
791 inst[1] = t_src(vp, &src[0]);
792 inst[2] = t_src(vp, &src[1]);
793 inst[3] = __CONST(1, SWIZZLE_ZERO);
794
795 return inst;
796 }
797
798 static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
799 struct prog_instruction *vpi,
800 GLuint * inst,
801 struct prog_src_register src[3])
802 {
803 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
804
805 #if 0
806 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
807 GL_FALSE,
808 GL_FALSE,
809 t_dst_index(vp, &vpi->DstReg),
810 t_dst_mask(vpi->DstReg.WriteMask),
811 t_dst_class(vpi->DstReg.File));
812 inst[1] = t_src(vp, &src[0]);
813 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
814 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
815 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
816 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
817 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
818 t_src_class(src[1].File),
819 (!src[1].
820 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
821 (src[1].RelAddr << 4);
822 inst[3] = 0;
823 #else
824 inst[0] =
825 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
826 GL_FALSE,
827 GL_FALSE,
828 t_dst_index(vp, &vpi->DstReg),
829 t_dst_mask(vpi->DstReg.WriteMask),
830 t_dst_class(vpi->DstReg.File));
831 inst[1] = t_src(vp, &src[0]);
832 inst[2] = __CONST(0, SWIZZLE_ONE);
833 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
834 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
835 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
836 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
837 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
838 t_src_class(src[1].File),
839 (!src[1].
840 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
841 (src[1].RelAddr << 4);
842 #endif
843
844 return inst;
845 }
846
847 static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
848 struct prog_instruction *vpi,
849 GLuint * inst,
850 struct prog_src_register src[3])
851 {
852 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
853
854 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
855 GL_FALSE,
856 GL_FALSE,
857 t_dst_index(vp, &vpi->DstReg),
858 t_dst_mask(vpi->DstReg.WriteMask),
859 t_dst_class(vpi->DstReg.File));
860 inst[1] = t_src(vp, &src[0]);
861 inst[2] = __CONST(0, SWIZZLE_ZERO);
862 inst[3] = __CONST(0, SWIZZLE_ZERO);
863
864 return inst;
865 }
866
867 static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
868 struct prog_instruction *vpi,
869 GLuint * inst,
870 struct prog_src_register src[3],
871 int *u_temp_i)
872 {
873 /* mul r0, r1.yzxw, r2.zxyw
874 mad r0, -r2.yzxw, r1.zxyw, r0
875 */
876
877 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
878 GL_FALSE,
879 GL_FALSE,
880 *u_temp_i,
881 t_dst_mask(vpi->DstReg.WriteMask),
882 PVS_DST_REG_TEMPORARY);
883 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
884 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
885 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
886 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
887 t_src_class(src[0].File),
888 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
889 (src[0].RelAddr << 4);
890 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
891 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
892 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
893 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
894 t_src_class(src[1].File),
895 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
896 (src[1].RelAddr << 4);
897 inst[3] = __CONST(1, SWIZZLE_ZERO);
898 inst += 4;
899
900 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
901 GL_FALSE,
902 GL_FALSE,
903 t_dst_index(vp, &vpi->DstReg),
904 t_dst_mask(vpi->DstReg.WriteMask),
905 t_dst_class(vpi->DstReg.File));
906 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
907 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
908 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
909 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
910 t_src_class(src[1].File),
911 (!src[1].
912 Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
913 (src[1].RelAddr << 4);
914 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
915 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
916 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
917 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
918 t_src_class(src[0].File),
919 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
920 (src[0].RelAddr << 4);
921 inst[3] =
922 PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
923 PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
924 PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
925
926 (*u_temp_i)--;
927
928 return inst;
929 }
930
931 static void t_inputs_outputs(struct r300_vertex_program *vp)
932 {
933 int i;
934 int cur_reg;
935
936 cur_reg = -1;
937 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
938 if (vp->key.InputsRead & (1 << i))
939 vp->inputs[i] = ++cur_reg;
940 else
941 vp->inputs[i] = -1;
942 }
943
944 cur_reg = 0;
945 for (i = 0; i < VERT_RESULT_MAX; i++)
946 vp->outputs[i] = -1;
947
948 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
949
950 if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
951 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
952 }
953
954 if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
955 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
956 }
957
958 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
959 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
960 }
961
962 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
963 vp->outputs[VERT_RESULT_COL1] =
964 vp->outputs[VERT_RESULT_COL0] + 1;
965 cur_reg = vp->outputs[VERT_RESULT_COL1] + 1;
966 }
967
968 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
969 vp->outputs[VERT_RESULT_BFC0] =
970 vp->outputs[VERT_RESULT_COL0] + 2;
971 cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2;
972 }
973
974 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
975 vp->outputs[VERT_RESULT_BFC1] =
976 vp->outputs[VERT_RESULT_COL0] + 3;
977 cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
978 }
979
980 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
981 if (vp->key.OutputsWritten & (1 << i)) {
982 vp->outputs[i] = cur_reg++;
983 }
984 }
985
986 if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
987 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
988 }
989 }
990
991 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
992 struct prog_instruction *vpi)
993 {
994 int i;
995 GLuint *inst;
996 unsigned long num_operands;
997 /* Initial value should be last tmp reg that hw supports.
998 Strangely enough r300 doesnt mind even though these would be out of range.
999 Smart enough to realize that it doesnt need it? */
1000 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
1001 struct prog_src_register src[3];
1002
1003 vp->pos_end = 0; /* Not supported yet */
1004 vp->hw_code.length = 0;
1005 vp->translated = GL_TRUE;
1006 vp->error = GL_FALSE;
1007
1008 t_inputs_outputs(vp);
1009
1010 for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END;
1011 vpi++, inst += 4) {
1012
1013 FREE_TEMPS();
1014
1015 if (!valid_dst(vp, &vpi->DstReg)) {
1016 /* redirect result to unused temp */
1017 vpi->DstReg.File = PROGRAM_TEMPORARY;
1018 vpi->DstReg.Index = u_temp_i;
1019 }
1020
1021 num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
1022
1023 /* copy the sources (src) from mesa into a local variable... is this needed? */
1024 for (i = 0; i < num_operands; i++) {
1025 src[i] = vpi->SrcReg[i];
1026 }
1027
1028 if (num_operands == 3) { /* TODO: scalars */
1029 if (CMP_SRCS(src[1], src[2])
1030 || CMP_SRCS(src[0], src[2])) {
1031 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1032 GL_FALSE,
1033 GL_FALSE,
1034 u_temp_i,
1035 VSF_FLAG_ALL,
1036 PVS_DST_REG_TEMPORARY);
1037 inst[1] =
1038 PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
1039 SWIZZLE_X,
1040 SWIZZLE_Y,
1041 SWIZZLE_Z,
1042 SWIZZLE_W,
1043 t_src_class(src[2].File),
1044 VSF_FLAG_NONE) | (src[2].
1045 RelAddr <<
1046 4);
1047 inst[2] = __CONST(2, SWIZZLE_ZERO);
1048 inst[3] = __CONST(2, SWIZZLE_ZERO);
1049 inst += 4;
1050
1051 src[2].File = PROGRAM_TEMPORARY;
1052 src[2].Index = u_temp_i;
1053 src[2].RelAddr = 0;
1054 u_temp_i--;
1055 }
1056 }
1057
1058 if (num_operands >= 2) {
1059 if (CMP_SRCS(src[1], src[0])) {
1060 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1061 GL_FALSE,
1062 GL_FALSE,
1063 u_temp_i,
1064 VSF_FLAG_ALL,
1065 PVS_DST_REG_TEMPORARY);
1066 inst[1] =
1067 PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
1068 SWIZZLE_X,
1069 SWIZZLE_Y,
1070 SWIZZLE_Z,
1071 SWIZZLE_W,
1072 t_src_class(src[0].File),
1073 VSF_FLAG_NONE) | (src[0].
1074 RelAddr <<
1075 4);
1076 inst[2] = __CONST(0, SWIZZLE_ZERO);
1077 inst[3] = __CONST(0, SWIZZLE_ZERO);
1078 inst += 4;
1079
1080 src[0].File = PROGRAM_TEMPORARY;
1081 src[0].Index = u_temp_i;
1082 src[0].RelAddr = 0;
1083 u_temp_i--;
1084 }
1085 }
1086
1087 switch (vpi->Opcode) {
1088 case OPCODE_ABS:
1089 inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
1090 break;
1091 case OPCODE_ADD:
1092 inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
1093 break;
1094 case OPCODE_ARL:
1095 inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
1096 break;
1097 case OPCODE_DP3:
1098 inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
1099 break;
1100 case OPCODE_DP4:
1101 inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
1102 break;
1103 case OPCODE_DPH:
1104 inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
1105 break;
1106 case OPCODE_DST:
1107 inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
1108 break;
1109 case OPCODE_EX2:
1110 inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
1111 break;
1112 case OPCODE_EXP:
1113 inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
1114 break;
1115 case OPCODE_FLR:
1116 inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */
1117 &u_temp_i);
1118 break;
1119 case OPCODE_FRC:
1120 inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
1121 break;
1122 case OPCODE_LG2:
1123 inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
1124 break;
1125 case OPCODE_LIT:
1126 inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
1127 break;
1128 case OPCODE_LOG:
1129 inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
1130 break;
1131 case OPCODE_MAD:
1132 inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
1133 break;
1134 case OPCODE_MAX:
1135 inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
1136 break;
1137 case OPCODE_MIN:
1138 inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
1139 break;
1140 case OPCODE_MOV:
1141 inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
1142 break;
1143 case OPCODE_MUL:
1144 inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
1145 break;
1146 case OPCODE_POW:
1147 inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
1148 break;
1149 case OPCODE_RCP:
1150 inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
1151 break;
1152 case OPCODE_RSQ:
1153 inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
1154 break;
1155 case OPCODE_SGE:
1156 inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
1157 break;
1158 case OPCODE_SLT:
1159 inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
1160 break;
1161 case OPCODE_SUB:
1162 inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
1163 break;
1164 case OPCODE_SWZ:
1165 inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
1166 break;
1167 case OPCODE_XPD:
1168 inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */
1169 &u_temp_i);
1170 break;
1171 default:
1172 vp->error = GL_TRUE;
1173 break;
1174 }
1175 }
1176
1177 /* Some outputs may be artificially added, to match the inputs
1178 of the fragment program. Blank the outputs here. */
1179 for (i = 0; i < VERT_RESULT_MAX; i++) {
1180 if (vp->key.OutputsAdded & (1 << i)) {
1181 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
1182 GL_FALSE,
1183 GL_FALSE,
1184 vp->outputs[i],
1185 VSF_FLAG_ALL,
1186 PVS_DST_REG_OUT);
1187 inst[1] = __CONST(0, SWIZZLE_ZERO);
1188 inst[2] = __CONST(0, SWIZZLE_ZERO);
1189 inst[3] = __CONST(0, SWIZZLE_ZERO);
1190 inst += 4;
1191 }
1192 }
1193
1194 vp->hw_code.length = (inst - vp->hw_code.body.d);
1195 if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) {
1196 vp->error = GL_TRUE;
1197 }
1198 }
1199
1200 /* DP4 version seems to trigger some hw peculiarity */
1201 //#define PREFER_DP4
1202
1203 static void position_invariant(struct gl_program *prog)
1204 {
1205 struct prog_instruction *vpi;
1206 struct gl_program_parameter_list *paramList;
1207 int i;
1208
1209 gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
1210
1211 /* tokens[4] = matrix modifier */
1212 #ifdef PREFER_DP4
1213 tokens[4] = 0; /* not transposed or inverted */
1214 #else
1215 tokens[4] = STATE_MATRIX_TRANSPOSE;
1216 #endif
1217 paramList = prog->Parameters;
1218
1219 vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
1220 _mesa_init_instructions(vpi, prog->NumInstructions + 4);
1221
1222 for (i = 0; i < 4; i++) {
1223 GLint idx;
1224 tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
1225 idx = _mesa_add_state_reference(paramList, tokens);
1226 #ifdef PREFER_DP4
1227 vpi[i].Opcode = OPCODE_DP4;
1228 vpi[i].StringPos = 0;
1229 vpi[i].Data = 0;
1230
1231 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1232 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
1233 vpi[i].DstReg.WriteMask = 1 << i;
1234 vpi[i].DstReg.CondMask = COND_TR;
1235
1236 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1237 vpi[i].SrcReg[0].Index = idx;
1238 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1239
1240 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1241 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1242 vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
1243 #else
1244 if (i == 0)
1245 vpi[i].Opcode = OPCODE_MUL;
1246 else
1247 vpi[i].Opcode = OPCODE_MAD;
1248
1249 vpi[i].Data = 0;
1250
1251 if (i == 3)
1252 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1253 else
1254 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
1255 vpi[i].DstReg.Index = 0;
1256 vpi[i].DstReg.WriteMask = 0xf;
1257 vpi[i].DstReg.CondMask = COND_TR;
1258
1259 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1260 vpi[i].SrcReg[0].Index = idx;
1261 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1262
1263 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1264 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1265 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
1266
1267 if (i > 0) {
1268 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
1269 vpi[i].SrcReg[2].Index = 0;
1270 vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
1271 }
1272 #endif
1273 }
1274
1275 _mesa_copy_instructions(&vpi[i], prog->Instructions,
1276 prog->NumInstructions);
1277
1278 free(prog->Instructions);
1279
1280 prog->Instructions = vpi;
1281
1282 prog->NumInstructions += 4;
1283 vpi = &prog->Instructions[prog->NumInstructions - 1];
1284
1285 assert(vpi->Opcode == OPCODE_END);
1286 }
1287
1288 static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
1289 GLuint temp_index)
1290 {
1291 struct prog_instruction *vpi;
1292 struct prog_instruction *vpi_insert;
1293 int i = 0;
1294
1295 vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
1296 _mesa_init_instructions(vpi, prog->NumInstructions + 2);
1297 /* all but END */
1298 _mesa_copy_instructions(vpi, prog->Instructions,
1299 prog->NumInstructions - 1);
1300 /* END */
1301 _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
1302 &prog->Instructions[prog->NumInstructions - 1],
1303 1);
1304 vpi_insert = &vpi[prog->NumInstructions - 1];
1305
1306 vpi_insert[i].Opcode = OPCODE_MOV;
1307
1308 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1309 vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
1310 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1311 vpi_insert[i].DstReg.CondMask = COND_TR;
1312
1313 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1314 vpi_insert[i].SrcReg[0].Index = temp_index;
1315 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1316 i++;
1317
1318 vpi_insert[i].Opcode = OPCODE_MOV;
1319
1320 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1321 vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
1322 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1323 vpi_insert[i].DstReg.CondMask = COND_TR;
1324
1325 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1326 vpi_insert[i].SrcReg[0].Index = temp_index;
1327 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1328 i++;
1329
1330 free(prog->Instructions);
1331
1332 prog->Instructions = vpi;
1333
1334 prog->NumInstructions += i;
1335 vpi = &prog->Instructions[prog->NumInstructions - 1];
1336
1337 assert(vpi->Opcode == OPCODE_END);
1338 }
1339
1340 static void pos_as_texcoord(struct r300_vertex_program *vp,
1341 struct gl_program *prog)
1342 {
1343 struct prog_instruction *vpi;
1344 GLuint tempregi = prog->NumTemporaries;
1345 /* should do something else if no temps left... */
1346 prog->NumTemporaries++;
1347
1348 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1349 if (vpi->DstReg.File == PROGRAM_OUTPUT
1350 && vpi->DstReg.Index == VERT_RESULT_HPOS) {
1351 vpi->DstReg.File = PROGRAM_TEMPORARY;
1352 vpi->DstReg.Index = tempregi;
1353 }
1354 }
1355 insert_wpos(vp, prog, tempregi);
1356 }
1357
1358 static struct r300_vertex_program *build_program(struct r300_vertex_program_key
1359 *wanted_key, struct gl_vertex_program
1360 *mesa_vp, GLint wpos_idx)
1361 {
1362 struct r300_vertex_program *vp;
1363
1364 vp = _mesa_calloc(sizeof(*vp));
1365 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1366 vp->wpos_idx = wpos_idx;
1367
1368 if (mesa_vp->IsPositionInvariant) {
1369 position_invariant(&mesa_vp->Base);
1370 }
1371
1372 if (wpos_idx > -1) {
1373 pos_as_texcoord(vp, &mesa_vp->Base);
1374 }
1375
1376 assert(mesa_vp->Base.NumInstructions);
1377 vp->num_temporaries = mesa_vp->Base.NumTemporaries;
1378 r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
1379
1380 return vp;
1381 }
1382
1383 static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
1384 {
1385 if (key->OutputsWritten & (1 << vert))
1386 return;
1387
1388 key->OutputsWritten |= 1 << vert;
1389 key->OutputsAdded |= 1 << vert;
1390 }
1391
1392 void r300SelectVertexShader(r300ContextPtr r300)
1393 {
1394 GLcontext *ctx = ctx = r300->radeon.glCtx;
1395 GLuint InputsRead;
1396 struct r300_vertex_program_key wanted_key = { 0 };
1397 GLint i;
1398 struct r300_vertex_program_cont *vpc;
1399 struct r300_vertex_program *vp;
1400 GLint wpos_idx;
1401
1402 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1403 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1404 wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
1405 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1406
1407 wpos_idx = -1;
1408 if (InputsRead & FRAG_BIT_WPOS) {
1409 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1410 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1411 break;
1412
1413 if (i == ctx->Const.MaxTextureUnits) {
1414 fprintf(stderr, "\tno free texcoord found\n");
1415 _mesa_exit(-1);
1416 }
1417
1418 wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
1419 wpos_idx = i;
1420 }
1421
1422 add_outputs(&wanted_key, VERT_RESULT_HPOS);
1423
1424 if (InputsRead & FRAG_BIT_COL0) {
1425 add_outputs(&wanted_key, VERT_RESULT_COL0);
1426 }
1427
1428 if (InputsRead & FRAG_BIT_COL1) {
1429 add_outputs(&wanted_key, VERT_RESULT_COL1);
1430 }
1431
1432 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1433 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1434 add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
1435 }
1436 }
1437
1438 if (vpc->mesa_program.IsPositionInvariant) {
1439 /* we wan't position don't we ? */
1440 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1441 }
1442
1443 for (vp = vpc->progs; vp; vp = vp->next)
1444 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
1445 == 0) {
1446 r300->selected_vp = vp;
1447 return;
1448 }
1449 //_mesa_print_program(&vpc->mesa_program.Base);
1450
1451 vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
1452 vp->next = vpc->progs;
1453 vpc->progs = vp;
1454 r300->selected_vp = vp;
1455 }
1456
1457 #define bump_vpu_count(ptr, new_count) do { \
1458 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
1459 int _nc=(new_count)/4; \
1460 assert(_nc < 256); \
1461 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
1462 } while(0)
1463
1464 static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code)
1465 {
1466 int i;
1467
1468 assert((code->length > 0) && (code->length % 4 == 0));
1469
1470 switch ((dest >> 8) & 0xf) {
1471 case 0:
1472 R300_STATECHANGE(r300, vpi);
1473 for (i = 0; i < code->length; i++)
1474 r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1475 bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
1476 break;
1477 case 2:
1478 R300_STATECHANGE(r300, vpp);
1479 for (i = 0; i < code->length; i++)
1480 r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1481 bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
1482 break;
1483 case 4:
1484 R300_STATECHANGE(r300, vps);
1485 for (i = 0; i < code->length; i++)
1486 r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
1487 bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
1488 break;
1489 default:
1490 fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
1491 _mesa_exit(-1);
1492 }
1493 }
1494
1495 void r300SetupSwtclVertexProgram(r300ContextPtr rmesa)
1496 {
1497 struct r300_vertex_shader_hw_code *hw_code;
1498 GLuint o_reg = 0;
1499 GLuint i_reg = 0;
1500 int i;
1501 int inst_count = 0;
1502 int param_count = 0;
1503 int program_end = 0;
1504
1505 /* Reset state, in case we don't use something */
1506 ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
1507 ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
1508 ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
1509
1510 hw_code = _mesa_malloc(sizeof(struct r300_vertex_shader_hw_code));
1511
1512 for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) {
1513 if (rmesa->swtcl.sw_tcl_inputs[i] != -1) {
1514 hw_code->body.d[program_end + 0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, GL_FALSE, GL_FALSE, o_reg++, VSF_FLAG_ALL, PVS_DST_REG_OUT);
1515 hw_code->body.d[program_end + 1] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_X,
1516 PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
1517 hw_code->body.d[program_end + 2] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1,
1518 PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
1519 hw_code->body.d[program_end + 3] = PVS_SRC_OPERAND(rmesa->swtcl.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1,
1520 PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
1521 program_end += 4;
1522 i_reg++;
1523 }
1524 }
1525
1526 hw_code->length = program_end;
1527
1528 r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, hw_code);
1529 inst_count = (hw_code->length / 4) - 1;
1530
1531 r300VapCntl(rmesa, i_reg, o_reg, 0);
1532
1533 R300_STATECHANGE(rmesa, pvs);
1534 rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
1535 (inst_count << R300_PVS_LAST_INST_SHIFT);
1536
1537 rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
1538 rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
1539
1540 _mesa_free(hw_code);
1541 }
1542
1543 void r300SetupVertexProgram(r300ContextPtr rmesa)
1544 {
1545 GLcontext *ctx = rmesa->radeon.glCtx;
1546 struct r300_vertex_program *prog = rmesa->selected_vp;
1547 int inst_count = 0;
1548 int param_count = 0;
1549
1550 /* Reset state, in case we don't use something */
1551 ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
1552 ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
1553 ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
1554
1555 R300_STATECHANGE(rmesa, vpp);
1556 param_count = r300VertexProgUpdateParams(ctx,
1557 (struct r300_vertex_program_cont *)
1558 ctx->VertexProgram._Current,
1559 (float *)&rmesa->hw.vpp.
1560 cmd[R300_VPP_PARAM_0]);
1561 bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
1562 param_count /= 4;
1563
1564 r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
1565 inst_count = (prog->hw_code.length / 4) - 1;
1566
1567 r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead),
1568 _mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries);
1569
1570 R300_STATECHANGE(rmesa, pvs);
1571 rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
1572 (inst_count << R300_PVS_LAST_INST_SHIFT);
1573
1574 rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
1575 rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
1576 }