Merge branch 'master' of git+ssh://michal@git.freedesktop.org/git/mesa/mesa into...
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \author Aapo Tahkola <aet@rasterburn.org>
32 *
33 * \author Oliver McFadden <z3ro.geek@gmail.com>
34 *
35 * For a description of the vertex program instruction set see r300_reg.h.
36 */
37
38 #include "glheader.h"
39 #include "macros.h"
40 #include "enums.h"
41 #include "program.h"
42 #include "shader/prog_instruction.h"
43 #include "shader/prog_parameter.h"
44 #include "shader/prog_statevars.h"
45 #include "tnl/tnl.h"
46
47 #include "r300_context.h"
48
49 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
50 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
51 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
52 SWIZZLE_W != VSF_IN_COMPONENT_W || \
53 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
54 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
55 WRITEMASK_X != VSF_FLAG_X || \
56 WRITEMASK_Y != VSF_FLAG_Y || \
57 WRITEMASK_Z != VSF_FLAG_Z || \
58 WRITEMASK_W != VSF_FLAG_W
59 #error Cannot change these!
60 #endif
61
62 /* TODO: Get rid of t_src_class call */
63 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
64 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
65 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
66 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
67 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
68
69 #define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
70 SWIZZLE_ZERO, SWIZZLE_ZERO, \
71 SWIZZLE_ZERO, SWIZZLE_ZERO, \
72 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
73
74 #define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
75 SWIZZLE_ZERO, SWIZZLE_ZERO, \
76 SWIZZLE_ZERO, SWIZZLE_ZERO, \
77 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
78
79 #define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
80 SWIZZLE_ZERO, SWIZZLE_ZERO, \
81 SWIZZLE_ZERO, SWIZZLE_ZERO, \
82 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
83
84 #define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
85 SWIZZLE_ONE, SWIZZLE_ONE, \
86 SWIZZLE_ONE, SWIZZLE_ONE, \
87 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
88
89 #define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
90 SWIZZLE_ONE, SWIZZLE_ONE, \
91 SWIZZLE_ONE, SWIZZLE_ONE, \
92 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
93
94 #define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
95 SWIZZLE_ONE, SWIZZLE_ONE, \
96 SWIZZLE_ONE, SWIZZLE_ONE, \
97 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
98
99 /* DP4 version seems to trigger some hw peculiarity */
100 //#define PREFER_DP4
101
102 #define FREE_TEMPS() \
103 do { \
104 if(u_temp_i < vp->num_temporaries) { \
105 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
106 vp->native = GL_FALSE; \
107 } \
108 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
109 } while (0)
110
111 int r300VertexProgUpdateParams(GLcontext * ctx,
112 struct r300_vertex_program_cont *vp,
113 float *dst)
114 {
115 int pi;
116 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
117 float *dst_o = dst;
118 struct gl_program_parameter_list *paramList;
119
120 if (mesa_vp->IsNVProgram) {
121 _mesa_load_tracked_matrices(ctx);
122
123 for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
124 *dst++ = ctx->VertexProgram.Parameters[pi][0];
125 *dst++ = ctx->VertexProgram.Parameters[pi][1];
126 *dst++ = ctx->VertexProgram.Parameters[pi][2];
127 *dst++ = ctx->VertexProgram.Parameters[pi][3];
128 }
129 return dst - dst_o;
130 }
131
132 assert(mesa_vp->Base.Parameters);
133 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
134
135 if (mesa_vp->Base.Parameters->NumParameters * 4 >
136 VSF_MAX_FRAGMENT_LENGTH) {
137 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
138 _mesa_exit(-1);
139 }
140
141 paramList = mesa_vp->Base.Parameters;
142 for (pi = 0; pi < paramList->NumParameters; pi++) {
143 switch (paramList->Parameters[pi].Type) {
144
145 case PROGRAM_STATE_VAR:
146 case PROGRAM_NAMED_PARAM:
147 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
148 case PROGRAM_CONSTANT:
149 *dst++ = paramList->ParameterValues[pi][0];
150 *dst++ = paramList->ParameterValues[pi][1];
151 *dst++ = paramList->ParameterValues[pi][2];
152 *dst++ = paramList->ParameterValues[pi][3];
153 break;
154
155 default:
156 _mesa_problem(NULL, "Bad param type in %s",
157 __FUNCTION__);
158 }
159
160 }
161
162 return dst - dst_o;
163 }
164
165 static unsigned long t_dst_mask(GLuint mask)
166 {
167 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
168 return mask & VSF_FLAG_ALL;
169 }
170
171 static unsigned long t_dst_class(enum register_file file)
172 {
173
174 switch (file) {
175 case PROGRAM_TEMPORARY:
176 return VSF_OUT_CLASS_TMP;
177 case PROGRAM_OUTPUT:
178 return VSF_OUT_CLASS_RESULT;
179 case PROGRAM_ADDRESS:
180 return VSF_OUT_CLASS_ADDR;
181 /*
182 case PROGRAM_INPUT:
183 case PROGRAM_LOCAL_PARAM:
184 case PROGRAM_ENV_PARAM:
185 case PROGRAM_NAMED_PARAM:
186 case PROGRAM_STATE_VAR:
187 case PROGRAM_WRITE_ONLY:
188 case PROGRAM_ADDRESS:
189 */
190 default:
191 fprintf(stderr, "problem in %s", __FUNCTION__);
192 _mesa_exit(-1);
193 return -1;
194 }
195 }
196
197 static unsigned long t_dst_index(struct r300_vertex_program *vp,
198 struct prog_dst_register *dst)
199 {
200 if (dst->File == PROGRAM_OUTPUT)
201 return vp->outputs[dst->Index];
202
203 return dst->Index;
204 }
205
206 static unsigned long t_src_class(enum register_file file)
207 {
208
209 switch (file) {
210 case PROGRAM_TEMPORARY:
211 return VSF_IN_CLASS_TMP;
212
213 case PROGRAM_INPUT:
214 return VSF_IN_CLASS_ATTR;
215
216 case PROGRAM_LOCAL_PARAM:
217 case PROGRAM_ENV_PARAM:
218 case PROGRAM_NAMED_PARAM:
219 case PROGRAM_STATE_VAR:
220 return VSF_IN_CLASS_PARAM;
221 /*
222 case PROGRAM_OUTPUT:
223 case PROGRAM_WRITE_ONLY:
224 case PROGRAM_ADDRESS:
225 */
226 default:
227 fprintf(stderr, "problem in %s", __FUNCTION__);
228 _mesa_exit(-1);
229 return -1;
230 }
231 }
232
233 static inline unsigned long t_swizzle(GLubyte swizzle)
234 {
235 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
236 return swizzle;
237 }
238
239 #if 0
240 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
241 {
242 int i;
243
244 if (vp == NULL) {
245 fprintf(stderr, "vp null in call to %s from %s\n",
246 __FUNCTION__, caller);
247 return;
248 }
249
250 fprintf(stderr, "%s:<", caller);
251 for (i = 0; i < VERT_ATTRIB_MAX; i++)
252 fprintf(stderr, "%d ", vp->inputs[i]);
253 fprintf(stderr, ">\n");
254
255 }
256 #endif
257
258 static unsigned long t_src_index(struct r300_vertex_program *vp,
259 struct prog_src_register *src)
260 {
261 int i;
262 int max_reg = -1;
263
264 if (src->File == PROGRAM_INPUT) {
265 if (vp->inputs[src->Index] != -1)
266 return vp->inputs[src->Index];
267
268 for (i = 0; i < VERT_ATTRIB_MAX; i++)
269 if (vp->inputs[i] > max_reg)
270 max_reg = vp->inputs[i];
271
272 vp->inputs[src->Index] = max_reg + 1;
273
274 //vp_dump_inputs(vp, __FUNCTION__);
275
276 return vp->inputs[src->Index];
277 } else {
278 if (src->Index < 0) {
279 fprintf(stderr,
280 "negative offsets for indirect addressing do not work.\n");
281 return 0;
282 }
283 return src->Index;
284 }
285 }
286
287 /* these two functions should probably be merged... */
288
289 static unsigned long t_src(struct r300_vertex_program *vp,
290 struct prog_src_register *src)
291 {
292 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
293 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
294 */
295 return MAKE_VSF_SOURCE(t_src_index(vp, src),
296 t_swizzle(GET_SWZ(src->Swizzle, 0)),
297 t_swizzle(GET_SWZ(src->Swizzle, 1)),
298 t_swizzle(GET_SWZ(src->Swizzle, 2)),
299 t_swizzle(GET_SWZ(src->Swizzle, 3)),
300 t_src_class(src->File),
301 src->NegateBase) | (src->RelAddr << 4);
302 }
303
304 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
305 struct prog_src_register *src)
306 {
307 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
308 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
309 */
310 return MAKE_VSF_SOURCE(t_src_index(vp, src),
311 t_swizzle(GET_SWZ(src->Swizzle, 0)),
312 t_swizzle(GET_SWZ(src->Swizzle, 0)),
313 t_swizzle(GET_SWZ(src->Swizzle, 0)),
314 t_swizzle(GET_SWZ(src->Swizzle, 0)),
315 t_src_class(src->File),
316 src->
317 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
318 (src->RelAddr << 4);
319 }
320
321 static GLboolean valid_dst(struct r300_vertex_program *vp,
322 struct prog_dst_register *dst)
323 {
324 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
325 return GL_FALSE;
326 } else if (dst->File == PROGRAM_ADDRESS) {
327 assert(dst->Index == 0);
328 }
329
330 return GL_TRUE;
331 }
332
333 /*
334 * Instruction Inputs Output Description
335 * ----------- ------ ------ --------------------------------
336 * ABS v v absolute value
337 * ADD v,v v add
338 * ARL s a address register load
339 * DP3 v,v ssss 3-component dot product
340 * DP4 v,v ssss 4-component dot product
341 * DPH v,v ssss homogeneous dot product
342 * DST v,v v distance vector
343 * EX2 s ssss exponential base 2
344 * EXP s v exponential base 2 (approximate)
345 * FLR v v floor
346 * FRC v v fraction
347 * LG2 s ssss logarithm base 2
348 * LIT v v compute light coefficients
349 * LOG s v logarithm base 2 (approximate)
350 * MAD v,v,v v multiply and add
351 * MAX v,v v maximum
352 * MIN v,v v minimum
353 * MOV v v move
354 * MUL v,v v multiply
355 * POW s,s ssss exponentiate
356 * RCP s ssss reciprocal
357 * RSQ s ssss reciprocal square root
358 * SGE v,v v set on greater than or equal
359 * SLT v,v v set on less than
360 * SUB v,v v subtract
361 * SWZ v v extended swizzle
362 * XPD v,v v cross product
363 *
364 * Table X.5: Summary of vertex program instructions. "v" indicates a
365 * floating-point vector input or output, "s" indicates a floating-point
366 * scalar input, "ssss" indicates a scalar output replicated across a
367 * 4-component result vector, and "a" indicates a single address register
368 * component.
369 */
370
371 static GLuint *t_opcode_abs(struct r300_vertex_program *vp,
372 struct prog_instruction *vpi, GLuint * inst,
373 struct prog_src_register src[3])
374 {
375 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
376
377 inst[0] =
378 MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg),
379 t_dst_mask(vpi->DstReg.WriteMask),
380 t_dst_class(vpi->DstReg.File));
381
382 inst[1] = t_src(vp, &src[0]);
383 inst[2] =
384 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
385 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
386 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
387 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
388 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
389 t_src_class(src[0].File),
390 (!src[0].
391 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
392 (src[0].RelAddr << 4);
393 inst[3] = 0;
394
395 return inst;
396 }
397
398 static GLuint *t_opcode_add(struct r300_vertex_program *vp,
399 struct prog_instruction *vpi, GLuint * inst,
400 struct prog_src_register src[3])
401 {
402 unsigned long hw_op;
403
404 #if 1
405 hw_op = (src[0].File == PROGRAM_TEMPORARY
406 && src[1].File ==
407 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
408 R300_VPI_OUT_OP_MAD;
409
410 inst[0] =
411 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
412 t_dst_mask(vpi->DstReg.WriteMask),
413 t_dst_class(vpi->DstReg.File));
414 inst[1] = ONE_SRC_0;
415 inst[2] = t_src(vp, &src[0]);
416 inst[3] = t_src(vp, &src[1]);
417 #else
418 inst[0] =
419 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
420 t_dst_mask(vpi->DstReg.WriteMask),
421 t_dst_class(vpi->DstReg.File));
422 inst[1] = t_src(vp, &src[0]);
423 inst[2] = t_src(vp, &src[1]);
424 inst[3] = ZERO_SRC_1;
425
426 #endif
427
428 return inst;
429 }
430
431 static GLuint *t_opcode_arl(struct r300_vertex_program *vp,
432 struct prog_instruction *vpi, GLuint * inst,
433 struct prog_src_register src[3])
434 {
435 inst[0] =
436 MAKE_VSF_OP(R300_VPI_OUT_OP_ARL, t_dst_index(vp, &vpi->DstReg),
437 t_dst_mask(vpi->DstReg.WriteMask),
438 t_dst_class(vpi->DstReg.File));
439
440 inst[1] = t_src(vp, &src[0]);
441 inst[2] = ZERO_SRC_0;
442 inst[3] = ZERO_SRC_0;
443
444 return inst;
445 }
446
447 static GLuint *t_opcode_dp3(struct r300_vertex_program *vp,
448 struct prog_instruction *vpi, GLuint * inst,
449 struct prog_src_register src[3])
450 {
451 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
452
453 inst[0] =
454 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
455 t_dst_mask(vpi->DstReg.WriteMask),
456 t_dst_class(vpi->DstReg.File));
457
458 inst[1] =
459 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
460 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
461 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
462 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
463 SWIZZLE_ZERO, t_src_class(src[0].File),
464 src[0].
465 NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
466 (src[0].RelAddr << 4);
467
468 inst[2] =
469 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
470 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
471 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
472 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
473 SWIZZLE_ZERO, t_src_class(src[1].File),
474 src[1].
475 NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
476 (src[1].RelAddr << 4);
477
478 inst[3] = ZERO_SRC_1;
479
480 return inst;
481 }
482
483 static GLuint *t_opcode_dp4(struct r300_vertex_program *vp,
484 struct prog_instruction *vpi, GLuint * inst,
485 struct prog_src_register src[3])
486 {
487 inst[0] =
488 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
489 t_dst_mask(vpi->DstReg.WriteMask),
490 t_dst_class(vpi->DstReg.File));
491
492 inst[1] = t_src(vp, &src[0]);
493 inst[2] = t_src(vp, &src[1]);
494 inst[3] = ZERO_SRC_1;
495
496 return inst;
497 }
498
499 static GLuint *t_opcode_dph(struct r300_vertex_program *vp,
500 struct prog_instruction *vpi, GLuint * inst,
501 struct prog_src_register src[3])
502 {
503 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
504 inst[0] =
505 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
506 t_dst_mask(vpi->DstReg.WriteMask),
507 t_dst_class(vpi->DstReg.File));
508
509 inst[1] =
510 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
511 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
512 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
513 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
514 VSF_IN_COMPONENT_ONE, t_src_class(src[0].File),
515 src[0].
516 NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
517 (src[0].RelAddr << 4);
518 inst[2] = t_src(vp, &src[1]);
519 inst[3] = ZERO_SRC_1;
520
521 return inst;
522 }
523
524 static GLuint *t_opcode_dst(struct r300_vertex_program *vp,
525 struct prog_instruction *vpi, GLuint * inst,
526 struct prog_src_register src[3])
527 {
528 inst[0] =
529 MAKE_VSF_OP(R300_VPI_OUT_OP_DST, t_dst_index(vp, &vpi->DstReg),
530 t_dst_mask(vpi->DstReg.WriteMask),
531 t_dst_class(vpi->DstReg.File));
532
533 inst[1] = t_src(vp, &src[0]);
534 inst[2] = t_src(vp, &src[1]);
535 inst[3] = ZERO_SRC_1;
536
537 return inst;
538 }
539
540 static GLuint *t_opcode_ex2(struct r300_vertex_program *vp,
541 struct prog_instruction *vpi, GLuint * inst,
542 struct prog_src_register src[3])
543 {
544 inst[0] =
545 MAKE_VSF_OP(R300_VPI_OUT_OP_EX2, t_dst_index(vp, &vpi->DstReg),
546 t_dst_mask(vpi->DstReg.WriteMask),
547 t_dst_class(vpi->DstReg.File));
548
549 inst[1] = t_src_scalar(vp, &src[0]);
550 inst[2] = ZERO_SRC_0;
551 inst[3] = ZERO_SRC_0;
552
553 return inst;
554 }
555
556 static GLuint *t_opcode_exp(struct r300_vertex_program *vp,
557 struct prog_instruction *vpi, GLuint * inst,
558 struct prog_src_register src[3])
559 {
560 inst[0] =
561 MAKE_VSF_OP(R300_VPI_OUT_OP_EXP, t_dst_index(vp, &vpi->DstReg),
562 t_dst_mask(vpi->DstReg.WriteMask),
563 t_dst_class(vpi->DstReg.File));
564
565 inst[1] = t_src_scalar(vp, &src[0]);
566 inst[2] = ZERO_SRC_0;
567 inst[3] = ZERO_SRC_0;
568
569 return inst;
570 }
571
572 static GLuint *t_opcode_flr(struct r300_vertex_program *vp,
573 struct prog_instruction *vpi, GLuint * inst,
574 struct prog_src_register src[3], int *u_temp_i)
575 {
576 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
577 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
578
579 inst[0] =
580 MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, *u_temp_i,
581 t_dst_mask(vpi->DstReg.WriteMask),
582 VSF_OUT_CLASS_TMP);
583
584 inst[1] = t_src(vp, &src[0]);
585 inst[2] = ZERO_SRC_0;
586 inst[3] = ZERO_SRC_0;
587 inst += 4;
588
589 inst[0] =
590 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
591 t_dst_mask(vpi->DstReg.WriteMask),
592 t_dst_class(vpi->DstReg.File));
593
594 inst[1] = t_src(vp, &src[0]);
595 inst[2] =
596 MAKE_VSF_SOURCE(*u_temp_i, VSF_IN_COMPONENT_X,
597 VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z,
598 VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP,
599 /* Not 100% sure about this */
600 (!src[0].
601 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE
602 /*VSF_FLAG_ALL */ );
603
604 inst[3] = ZERO_SRC_0;
605 (*u_temp_i)--;
606
607 return inst;
608 }
609
610 static GLuint *t_opcode_frc(struct r300_vertex_program *vp,
611 struct prog_instruction *vpi, GLuint * inst,
612 struct prog_src_register src[3])
613 {
614 inst[0] =
615 MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, t_dst_index(vp, &vpi->DstReg),
616 t_dst_mask(vpi->DstReg.WriteMask),
617 t_dst_class(vpi->DstReg.File));
618
619 inst[1] = t_src(vp, &src[0]);
620 inst[2] = ZERO_SRC_0;
621 inst[3] = ZERO_SRC_0;
622
623 return inst;
624 }
625
626 static GLuint *t_opcode_lg2(struct r300_vertex_program *vp,
627 struct prog_instruction *vpi, GLuint * inst,
628 struct prog_src_register src[3])
629 {
630 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
631
632 inst[0] =
633 MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg),
634 t_dst_mask(vpi->DstReg.WriteMask),
635 t_dst_class(vpi->DstReg.File));
636
637 inst[1] =
638 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
639 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
640 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
641 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
642 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
643 t_src_class(src[0].File),
644 src[0].
645 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
646 (src[0].RelAddr << 4);
647 inst[2] = ZERO_SRC_0;
648 inst[3] = ZERO_SRC_0;
649
650 return inst;
651 }
652
653 static GLuint *t_opcode_lit(struct r300_vertex_program *vp,
654 struct prog_instruction *vpi, GLuint * inst,
655 struct prog_src_register src[3])
656 {
657 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
658
659 inst[0] =
660 MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg),
661 t_dst_mask(vpi->DstReg.WriteMask),
662 t_dst_class(vpi->DstReg.File));
663 /* NOTE: Users swizzling might not work. */
664 inst[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
665 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
666 VSF_IN_COMPONENT_ZERO, // z
667 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
668 t_src_class(src[0].File),
669 src[0].
670 NegateBase ? VSF_FLAG_ALL :
671 VSF_FLAG_NONE) | (src[0].RelAddr << 4);
672 inst[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
673 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
674 VSF_IN_COMPONENT_ZERO, // z
675 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
676 t_src_class(src[0].File),
677 src[0].
678 NegateBase ? VSF_FLAG_ALL :
679 VSF_FLAG_NONE) | (src[0].RelAddr << 4);
680 inst[3] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
681 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
682 VSF_IN_COMPONENT_ZERO, // z
683 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
684 t_src_class(src[0].File),
685 src[0].
686 NegateBase ? VSF_FLAG_ALL :
687 VSF_FLAG_NONE) | (src[0].RelAddr << 4);
688
689 return inst;
690 }
691
692 static GLuint *t_opcode_log(struct r300_vertex_program *vp,
693 struct prog_instruction *vpi, GLuint * inst,
694 struct prog_src_register src[3])
695 {
696 inst[0] =
697 MAKE_VSF_OP(R300_VPI_OUT_OP_LOG, t_dst_index(vp, &vpi->DstReg),
698 t_dst_mask(vpi->DstReg.WriteMask),
699 t_dst_class(vpi->DstReg.File));
700
701 inst[1] = t_src_scalar(vp, &src[0]);
702 inst[2] = ZERO_SRC_0;
703 inst[3] = ZERO_SRC_0;
704
705 return inst;
706 }
707
708 static GLuint *t_opcode_mad(struct r300_vertex_program *vp,
709 struct prog_instruction *vpi, GLuint * inst,
710 struct prog_src_register src[3])
711 {
712 unsigned long hw_op;
713
714 hw_op = (src[0].File == PROGRAM_TEMPORARY
715 && src[1].File == PROGRAM_TEMPORARY
716 && src[2].File ==
717 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
718 R300_VPI_OUT_OP_MAD;
719
720 inst[0] =
721 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
722 t_dst_mask(vpi->DstReg.WriteMask),
723 t_dst_class(vpi->DstReg.File));
724 inst[1] = t_src(vp, &src[0]);
725 inst[2] = t_src(vp, &src[1]);
726 inst[3] = t_src(vp, &src[2]);
727
728 return inst;
729 }
730
731 static GLuint *t_opcode_max(struct r300_vertex_program *vp,
732 struct prog_instruction *vpi, GLuint * inst,
733 struct prog_src_register src[3])
734 {
735 inst[0] =
736 MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg),
737 t_dst_mask(vpi->DstReg.WriteMask),
738 t_dst_class(vpi->DstReg.File));
739
740 inst[1] = t_src(vp, &src[0]);
741 inst[2] = t_src(vp, &src[1]);
742 inst[3] = ZERO_SRC_1;
743
744 return inst;
745 }
746
747 static GLuint *t_opcode_min(struct r300_vertex_program *vp,
748 struct prog_instruction *vpi, GLuint * inst,
749 struct prog_src_register src[3])
750 {
751 inst[0] =
752 MAKE_VSF_OP(R300_VPI_OUT_OP_MIN, t_dst_index(vp, &vpi->DstReg),
753 t_dst_mask(vpi->DstReg.WriteMask),
754 t_dst_class(vpi->DstReg.File));
755
756 inst[1] = t_src(vp, &src[0]);
757 inst[2] = t_src(vp, &src[1]);
758 inst[3] = ZERO_SRC_1;
759
760 return inst;
761 }
762
763 static GLuint *t_opcode_mov(struct r300_vertex_program *vp,
764 struct prog_instruction *vpi, GLuint * inst,
765 struct prog_src_register src[3])
766 {
767 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
768
769 #if 1
770 inst[0] =
771 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
772 t_dst_mask(vpi->DstReg.WriteMask),
773 t_dst_class(vpi->DstReg.File));
774 inst[1] = t_src(vp, &src[0]);
775 inst[2] = ZERO_SRC_0;
776 inst[3] = ZERO_SRC_0;
777 #else
778 hw_op =
779 (src[0].File ==
780 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
781 R300_VPI_OUT_OP_MAD;
782
783 inst[0] =
784 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
785 t_dst_mask(vpi->DstReg.WriteMask),
786 t_dst_class(vpi->DstReg.File));
787 inst[1] = t_src(vp, &src[0]);
788 inst[2] = ONE_SRC_0;
789 inst[3] = ZERO_SRC_0;
790 #endif
791
792 return inst;
793 }
794
795 static GLuint *t_opcode_mul(struct r300_vertex_program *vp,
796 struct prog_instruction *vpi, GLuint * inst,
797 struct prog_src_register src[3])
798 {
799 unsigned long hw_op;
800
801 // HW mul can take third arg but appears to have some other limitations.
802
803 hw_op = (src[0].File == PROGRAM_TEMPORARY
804 && src[1].File ==
805 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
806 R300_VPI_OUT_OP_MAD;
807
808 inst[0] =
809 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
810 t_dst_mask(vpi->DstReg.WriteMask),
811 t_dst_class(vpi->DstReg.File));
812 inst[1] = t_src(vp, &src[0]);
813 inst[2] = t_src(vp, &src[1]);
814
815 inst[3] = ZERO_SRC_1;
816
817 return inst;
818 }
819
820 static GLuint *t_opcode_pow(struct r300_vertex_program *vp,
821 struct prog_instruction *vpi, GLuint * inst,
822 struct prog_src_register src[3])
823 {
824 inst[0] =
825 MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg),
826 t_dst_mask(vpi->DstReg.WriteMask),
827 t_dst_class(vpi->DstReg.File));
828 inst[1] = t_src_scalar(vp, &src[0]);
829 inst[2] = ZERO_SRC_0;
830 inst[3] = t_src_scalar(vp, &src[1]);
831
832 return inst;
833 }
834
835 static GLuint *t_opcode_rcp(struct r300_vertex_program *vp,
836 struct prog_instruction *vpi, GLuint * inst,
837 struct prog_src_register src[3])
838 {
839 inst[0] =
840 MAKE_VSF_OP(R300_VPI_OUT_OP_RCP, t_dst_index(vp, &vpi->DstReg),
841 t_dst_mask(vpi->DstReg.WriteMask),
842 t_dst_class(vpi->DstReg.File));
843
844 inst[1] = t_src_scalar(vp, &src[0]);
845 inst[2] = ZERO_SRC_0;
846 inst[3] = ZERO_SRC_0;
847
848 return inst;
849 }
850
851 static GLuint *t_opcode_rsq(struct r300_vertex_program *vp,
852 struct prog_instruction *vpi, GLuint * inst,
853 struct prog_src_register src[3])
854 {
855 inst[0] =
856 MAKE_VSF_OP(R300_VPI_OUT_OP_RSQ, t_dst_index(vp, &vpi->DstReg),
857 t_dst_mask(vpi->DstReg.WriteMask),
858 t_dst_class(vpi->DstReg.File));
859
860 inst[1] = t_src_scalar(vp, &src[0]);
861 inst[2] = ZERO_SRC_0;
862 inst[3] = ZERO_SRC_0;
863
864 return inst;
865 }
866
867 static GLuint *t_opcode_sge(struct r300_vertex_program *vp,
868 struct prog_instruction *vpi, GLuint * inst,
869 struct prog_src_register src[3])
870 {
871 inst[0] =
872 MAKE_VSF_OP(R300_VPI_OUT_OP_SGE, t_dst_index(vp, &vpi->DstReg),
873 t_dst_mask(vpi->DstReg.WriteMask),
874 t_dst_class(vpi->DstReg.File));
875
876 inst[1] = t_src(vp, &src[0]);
877 inst[2] = t_src(vp, &src[1]);
878 inst[3] = ZERO_SRC_1;
879
880 return inst;
881 }
882
883 static GLuint *t_opcode_slt(struct r300_vertex_program *vp,
884 struct prog_instruction *vpi, GLuint * inst,
885 struct prog_src_register src[3])
886 {
887 inst[0] =
888 MAKE_VSF_OP(R300_VPI_OUT_OP_SLT, t_dst_index(vp, &vpi->DstReg),
889 t_dst_mask(vpi->DstReg.WriteMask),
890 t_dst_class(vpi->DstReg.File));
891
892 inst[1] = t_src(vp, &src[0]);
893 inst[2] = t_src(vp, &src[1]);
894 inst[3] = ZERO_SRC_1;
895
896 return inst;
897 }
898
899 static GLuint *t_opcode_sub(struct r300_vertex_program *vp,
900 struct prog_instruction *vpi, GLuint * inst,
901 struct prog_src_register src[3])
902 {
903 unsigned long hw_op;
904
905 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
906
907 #if 1
908 hw_op = (src[0].File == PROGRAM_TEMPORARY
909 && src[1].File ==
910 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
911 R300_VPI_OUT_OP_MAD;
912
913 inst[0] =
914 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
915 t_dst_mask(vpi->DstReg.WriteMask),
916 t_dst_class(vpi->DstReg.File));
917 inst[1] = t_src(vp, &src[0]);
918 inst[2] = ONE_SRC_0;
919 inst[3] =
920 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
921 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
922 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
923 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
924 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
925 t_src_class(src[1].File),
926 (!src[1].
927 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
928 (src[1].RelAddr << 4);
929 #else
930 inst[0] =
931 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
932 t_dst_mask(vpi->DstReg.WriteMask),
933 t_dst_class(vpi->DstReg.File));
934
935 inst[1] = t_src(vp, &src[0]);
936 inst[2] =
937 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
938 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
939 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
940 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
941 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
942 t_src_class(src[1].File),
943 (!src[1].
944 NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
945 (src[1].RelAddr << 4);
946 inst[3] = 0;
947 #endif
948
949 return inst;
950 }
951
952 static GLuint *t_opcode_swz(struct r300_vertex_program *vp,
953 struct prog_instruction *vpi, GLuint * inst,
954 struct prog_src_register src[3])
955 {
956 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
957
958 #if 1
959 inst[0] =
960 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
961 t_dst_mask(vpi->DstReg.WriteMask),
962 t_dst_class(vpi->DstReg.File));
963 inst[1] = t_src(vp, &src[0]);
964 inst[2] = ZERO_SRC_0;
965 inst[3] = ZERO_SRC_0;
966 #else
967 hw_op =
968 (src[0].File ==
969 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
970 R300_VPI_OUT_OP_MAD;
971
972 inst[0] =
973 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
974 t_dst_mask(vpi->DstReg.WriteMask),
975 t_dst_class(vpi->DstReg.File));
976 inst[1] = t_src(vp, &src[0]);
977 inst[2] = ONE_SRC_0;
978 inst[3] = ZERO_SRC_0;
979 #endif
980
981 return inst;
982 }
983
984 static GLuint *t_opcode_xpd(struct r300_vertex_program *vp,
985 struct prog_instruction *vpi, GLuint * inst,
986 struct prog_src_register src[3], int *u_temp_i)
987 {
988 /* mul r0, r1.yzxw, r2.zxyw
989 mad r0, -r2.yzxw, r1.zxyw, r0
990 NOTE: might need MAD_2
991 */
992
993 inst[0] =
994 MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, *u_temp_i,
995 t_dst_mask(vpi->DstReg.WriteMask),
996 VSF_OUT_CLASS_TMP);
997
998 inst[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
999 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
1000 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
1001 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
1002 t_src_class(src[0].File),
1003 src[0].
1004 NegateBase ? VSF_FLAG_ALL :
1005 VSF_FLAG_NONE) | (src[0].RelAddr << 4);
1006
1007 inst[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
1008 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
1009 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
1010 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
1011 t_src_class(src[1].File),
1012 src[1].
1013 NegateBase ? VSF_FLAG_ALL :
1014 VSF_FLAG_NONE) | (src[1].RelAddr << 4);
1015
1016 inst[3] = ZERO_SRC_1;
1017 inst += 4;
1018 (*u_temp_i)--;
1019
1020 inst[0] =
1021 MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg),
1022 t_dst_mask(vpi->DstReg.WriteMask),
1023 t_dst_class(vpi->DstReg.File));
1024
1025 inst[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
1026 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
1027 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
1028 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
1029 t_src_class(src[1].File),
1030 (!src[1].
1031 NegateBase) ? VSF_FLAG_ALL :
1032 VSF_FLAG_NONE) | (src[1].RelAddr << 4);
1033
1034 inst[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
1035 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
1036 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
1037 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
1038 t_src_class(src[0].File),
1039 src[0].
1040 NegateBase ? VSF_FLAG_ALL :
1041 VSF_FLAG_NONE) | (src[0].RelAddr << 4);
1042
1043 inst[3] =
1044 MAKE_VSF_SOURCE(*u_temp_i + 1, VSF_IN_COMPONENT_X,
1045 VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z,
1046 VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP,
1047 VSF_FLAG_NONE);
1048
1049 return inst;
1050 }
1051
1052 static void t_inputs_outputs(struct r300_vertex_program *vp)
1053 {
1054 int i;
1055 int cur_reg = 0;
1056
1057 for (i = 0; i < VERT_ATTRIB_MAX; i++)
1058 vp->inputs[i] = -1;
1059
1060 for (i = 0; i < VERT_RESULT_MAX; i++)
1061 vp->outputs[i] = -1;
1062
1063 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
1064
1065 if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
1066 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
1067 }
1068
1069 if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
1070 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
1071 }
1072
1073 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
1074 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
1075 }
1076
1077 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
1078 vp->outputs[VERT_RESULT_COL1] =
1079 vp->outputs[VERT_RESULT_COL0] + 1;
1080 cur_reg = vp->outputs[VERT_RESULT_COL1] + 1;
1081 }
1082
1083 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
1084 vp->outputs[VERT_RESULT_BFC0] =
1085 vp->outputs[VERT_RESULT_COL0] + 2;
1086 cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2;
1087 }
1088
1089 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
1090 vp->outputs[VERT_RESULT_BFC1] =
1091 vp->outputs[VERT_RESULT_COL0] + 3;
1092 cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
1093 }
1094 #if 0
1095 if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
1096 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
1097 }
1098 #endif
1099
1100 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
1101 if (vp->key.OutputsWritten & (1 << i)) {
1102 vp->outputs[i] = cur_reg++;
1103 }
1104 }
1105 }
1106
1107 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
1108 struct prog_instruction *vpi)
1109 {
1110 int i;
1111 GLuint *inst;
1112 unsigned long num_operands;
1113 /* Initial value should be last tmp reg that hw supports.
1114 Strangely enough r300 doesnt mind even though these would be out of range.
1115 Smart enough to realize that it doesnt need it? */
1116 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
1117 struct prog_src_register src[3];
1118
1119 vp->pos_end = 0; /* Not supported yet */
1120 vp->program.length = 0;
1121 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
1122 vp->translated = GL_TRUE;
1123 vp->native = GL_TRUE;
1124
1125 t_inputs_outputs(vp);
1126
1127 for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END;
1128 vpi++, inst += 4) {
1129
1130 FREE_TEMPS();
1131
1132 if (!valid_dst(vp, &vpi->DstReg)) {
1133 /* redirect result to unused temp */
1134 vpi->DstReg.File = PROGRAM_TEMPORARY;
1135 vpi->DstReg.Index = u_temp_i;
1136 }
1137
1138 num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
1139
1140 /* copy the sources (src) from mesa into a local variable... is this needed? */
1141 for (i = 0; i < num_operands; i++) {
1142 src[i] = vpi->SrcReg[i];
1143 }
1144
1145 if (num_operands == 3) { /* TODO: scalars */
1146 if (CMP_SRCS(src[1], src[2])
1147 || CMP_SRCS(src[0], src[2])) {
1148 inst[0] =
1149 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
1150 u_temp_i, VSF_FLAG_ALL,
1151 VSF_OUT_CLASS_TMP);
1152
1153 inst[1] =
1154 MAKE_VSF_SOURCE(t_src_index
1155 (vp, &src[2]),
1156 SWIZZLE_X, SWIZZLE_Y,
1157 SWIZZLE_Z, SWIZZLE_W,
1158 t_src_class(src[2].
1159 File),
1160 VSF_FLAG_NONE) |
1161 (src[2].RelAddr << 4);
1162
1163 inst[2] = ZERO_SRC_2;
1164 inst[3] = ZERO_SRC_2;
1165 inst += 4;
1166
1167 src[2].File = PROGRAM_TEMPORARY;
1168 src[2].Index = u_temp_i;
1169 src[2].RelAddr = 0;
1170 u_temp_i--;
1171 }
1172 }
1173
1174 if (num_operands >= 2) {
1175 if (CMP_SRCS(src[1], src[0])) {
1176 inst[0] =
1177 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
1178 u_temp_i, VSF_FLAG_ALL,
1179 VSF_OUT_CLASS_TMP);
1180
1181 inst[1] =
1182 MAKE_VSF_SOURCE(t_src_index
1183 (vp, &src[0]),
1184 SWIZZLE_X, SWIZZLE_Y,
1185 SWIZZLE_Z, SWIZZLE_W,
1186 t_src_class(src[0].
1187 File),
1188 VSF_FLAG_NONE) |
1189 (src[0].RelAddr << 4);
1190
1191 inst[2] = ZERO_SRC_0;
1192 inst[3] = ZERO_SRC_0;
1193 inst += 4;
1194
1195 src[0].File = PROGRAM_TEMPORARY;
1196 src[0].Index = u_temp_i;
1197 src[0].RelAddr = 0;
1198 u_temp_i--;
1199 }
1200 }
1201
1202 switch (vpi->Opcode) {
1203 case OPCODE_ABS:
1204 inst = t_opcode_abs(vp, vpi, inst, src);
1205 break;
1206 case OPCODE_ADD:
1207 inst = t_opcode_add(vp, vpi, inst, src);
1208 break;
1209 case OPCODE_ARL:
1210 inst = t_opcode_arl(vp, vpi, inst, src);
1211 break;
1212 case OPCODE_DP3:
1213 inst = t_opcode_dp3(vp, vpi, inst, src);
1214 break;
1215 case OPCODE_DP4:
1216 inst = t_opcode_dp4(vp, vpi, inst, src);
1217 break;
1218 case OPCODE_DPH:
1219 inst = t_opcode_dph(vp, vpi, inst, src);
1220 break;
1221 case OPCODE_DST:
1222 inst = t_opcode_dst(vp, vpi, inst, src);
1223 break;
1224 case OPCODE_EX2:
1225 inst = t_opcode_ex2(vp, vpi, inst, src);
1226 break;
1227 case OPCODE_EXP:
1228 inst = t_opcode_exp(vp, vpi, inst, src);
1229 break;
1230 case OPCODE_FLR:
1231 inst =
1232 t_opcode_flr(vp, vpi, inst, src, /* FIXME */
1233 &u_temp_i);
1234 break;
1235 case OPCODE_FRC:
1236 inst = t_opcode_frc(vp, vpi, inst, src);
1237 break;
1238 case OPCODE_LG2:
1239 inst = t_opcode_lg2(vp, vpi, inst, src);
1240 break;
1241 case OPCODE_LIT:
1242 inst = t_opcode_lit(vp, vpi, inst, src);
1243 break;
1244 case OPCODE_LOG:
1245 inst = t_opcode_log(vp, vpi, inst, src);
1246 break;
1247 case OPCODE_MAD:
1248 inst = t_opcode_mad(vp, vpi, inst, src);
1249 break;
1250 case OPCODE_MAX:
1251 inst = t_opcode_max(vp, vpi, inst, src);
1252 break;
1253 case OPCODE_MIN:
1254 inst = t_opcode_min(vp, vpi, inst, src);
1255 break;
1256 case OPCODE_MOV:
1257 inst = t_opcode_mov(vp, vpi, inst, src);
1258 break;
1259 case OPCODE_MUL:
1260 inst = t_opcode_mul(vp, vpi, inst, src);
1261 break;
1262 case OPCODE_POW:
1263 inst = t_opcode_pow(vp, vpi, inst, src);
1264 break;
1265 case OPCODE_RCP:
1266 inst = t_opcode_rcp(vp, vpi, inst, src);
1267 break;
1268 case OPCODE_RSQ:
1269 inst = t_opcode_rsq(vp, vpi, inst, src);
1270 break;
1271 case OPCODE_SGE:
1272 inst = t_opcode_sge(vp, vpi, inst, src);
1273 break;
1274 case OPCODE_SLT:
1275 inst = t_opcode_slt(vp, vpi, inst, src);
1276 break;
1277 case OPCODE_SUB:
1278 inst = t_opcode_sub(vp, vpi, inst, src);
1279 break;
1280 case OPCODE_SWZ:
1281 inst = t_opcode_swz(vp, vpi, inst, src);
1282 break;
1283 case OPCODE_XPD:
1284 inst =
1285 t_opcode_xpd(vp, vpi, inst, src, /* FIXME */
1286 &u_temp_i);
1287 break;
1288 default:
1289 assert(0);
1290 break;
1291 }
1292 }
1293
1294 vp->program.length = (inst - vp->program.body.i);
1295 if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) {
1296 vp->program.length = 0;
1297 vp->native = GL_FALSE;
1298 }
1299 #if 0
1300 fprintf(stderr, "hw program:\n");
1301 for (i = 0; i < vp->program.length; i++)
1302 fprintf(stderr, "%08x\n", vp->program.body.d[i]);
1303 #endif
1304 }
1305
1306 static void position_invariant(struct gl_program *prog)
1307 {
1308 struct prog_instruction *vpi;
1309 struct gl_program_parameter_list *paramList;
1310 int i;
1311
1312 gl_state_index tokens[STATE_LENGTH] =
1313 { STATE_MVP_MATRIX, 0, 0, 0, 0 };
1314
1315 /* tokens[4] = matrix modifier */
1316 #ifdef PREFER_DP4
1317 tokens[4] = 0; /* not transposed or inverted */
1318 #else
1319 tokens[4] = STATE_MATRIX_TRANSPOSE;
1320 #endif
1321 paramList = prog->Parameters;
1322
1323 vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
1324 _mesa_init_instructions(vpi, prog->NumInstructions + 4);
1325
1326 for (i = 0; i < 4; i++) {
1327 GLint idx;
1328 tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
1329 idx = _mesa_add_state_reference(paramList, tokens);
1330 #ifdef PREFER_DP4
1331 vpi[i].Opcode = OPCODE_DP4;
1332 vpi[i].StringPos = 0;
1333 vpi[i].Data = 0;
1334
1335 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1336 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
1337 vpi[i].DstReg.WriteMask = 1 << i;
1338 vpi[i].DstReg.CondMask = COND_TR;
1339
1340 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1341 vpi[i].SrcReg[0].Index = idx;
1342 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1343
1344 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1345 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1346 vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
1347 #else
1348 if (i == 0)
1349 vpi[i].Opcode = OPCODE_MUL;
1350 else
1351 vpi[i].Opcode = OPCODE_MAD;
1352
1353 vpi[i].StringPos = 0;
1354 vpi[i].Data = 0;
1355
1356 if (i == 3)
1357 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1358 else
1359 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
1360 vpi[i].DstReg.Index = 0;
1361 vpi[i].DstReg.WriteMask = 0xf;
1362 vpi[i].DstReg.CondMask = COND_TR;
1363
1364 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1365 vpi[i].SrcReg[0].Index = idx;
1366 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1367
1368 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1369 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1370 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
1371
1372 if (i > 0) {
1373 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
1374 vpi[i].SrcReg[2].Index = 0;
1375 vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
1376 }
1377 #endif
1378 }
1379
1380 _mesa_copy_instructions(&vpi[i], prog->Instructions,
1381 prog->NumInstructions);
1382
1383 free(prog->Instructions);
1384
1385 prog->Instructions = vpi;
1386
1387 prog->NumInstructions += 4;
1388 vpi = &prog->Instructions[prog->NumInstructions - 1];
1389
1390 assert(vpi->Opcode == OPCODE_END);
1391 }
1392
1393 static void insert_wpos(struct r300_vertex_program *vp,
1394 struct gl_program *prog, GLuint temp_index)
1395 {
1396 struct prog_instruction *vpi;
1397 struct prog_instruction *vpi_insert;
1398 int i = 0;
1399
1400 vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
1401 _mesa_init_instructions(vpi, prog->NumInstructions + 2);
1402 /* all but END */
1403 _mesa_copy_instructions(vpi, prog->Instructions,
1404 prog->NumInstructions - 1);
1405 /* END */
1406 _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
1407 &prog->Instructions[prog->NumInstructions -
1408 1], 1);
1409 vpi_insert = &vpi[prog->NumInstructions - 1];
1410
1411 vpi_insert[i].Opcode = OPCODE_MOV;
1412
1413 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1414 vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
1415 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1416 vpi_insert[i].DstReg.CondMask = COND_TR;
1417
1418 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1419 vpi_insert[i].SrcReg[0].Index = temp_index;
1420 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1421 i++;
1422
1423 vpi_insert[i].Opcode = OPCODE_MOV;
1424
1425 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1426 vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
1427 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1428 vpi_insert[i].DstReg.CondMask = COND_TR;
1429
1430 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1431 vpi_insert[i].SrcReg[0].Index = temp_index;
1432 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1433 i++;
1434
1435 free(prog->Instructions);
1436
1437 prog->Instructions = vpi;
1438
1439 prog->NumInstructions += i;
1440 vpi = &prog->Instructions[prog->NumInstructions - 1];
1441
1442 assert(vpi->Opcode == OPCODE_END);
1443 }
1444
1445 static void pos_as_texcoord(struct r300_vertex_program *vp,
1446 struct gl_program *prog)
1447 {
1448 struct prog_instruction *vpi;
1449 GLuint tempregi = prog->NumTemporaries;
1450 /* should do something else if no temps left... */
1451 prog->NumTemporaries++;
1452
1453 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1454 if (vpi->DstReg.File == PROGRAM_OUTPUT
1455 && vpi->DstReg.Index == VERT_RESULT_HPOS) {
1456 vpi->DstReg.File = PROGRAM_TEMPORARY;
1457 vpi->DstReg.Index = tempregi;
1458 }
1459 }
1460 insert_wpos(vp, prog, tempregi);
1461 }
1462
1463 static struct r300_vertex_program *build_program(struct r300_vertex_program_key
1464 *wanted_key, struct gl_vertex_program
1465 *mesa_vp, GLint wpos_idx)
1466 {
1467 struct r300_vertex_program *vp;
1468
1469 vp = _mesa_calloc(sizeof(*vp));
1470 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1471 vp->wpos_idx = wpos_idx;
1472
1473 if (mesa_vp->IsPositionInvariant) {
1474 position_invariant(&mesa_vp->Base);
1475 }
1476
1477 if (wpos_idx > -1) {
1478 pos_as_texcoord(vp, &mesa_vp->Base);
1479 }
1480
1481 assert(mesa_vp->Base.NumInstructions);
1482 vp->num_temporaries = mesa_vp->Base.NumTemporaries;
1483 r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
1484
1485 return vp;
1486 }
1487
1488 void r300SelectVertexShader(r300ContextPtr r300)
1489 {
1490 GLcontext *ctx = ctx = r300->radeon.glCtx;
1491 GLuint InputsRead;
1492 struct r300_vertex_program_key wanted_key = { 0 };
1493 GLint i;
1494 struct r300_vertex_program_cont *vpc;
1495 struct r300_vertex_program *vp;
1496 GLint wpos_idx;
1497
1498 vpc =
1499 (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1500 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1501
1502 wpos_idx = -1;
1503 if (InputsRead & FRAG_BIT_WPOS) {
1504 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1505 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1506 break;
1507
1508 if (i == ctx->Const.MaxTextureUnits) {
1509 fprintf(stderr, "\tno free texcoord found\n");
1510 _mesa_exit(-1);
1511 }
1512
1513 InputsRead |= (FRAG_BIT_TEX0 << i);
1514 wpos_idx = i;
1515 }
1516 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1517 wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
1518
1519 wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS;
1520
1521 if (InputsRead & FRAG_BIT_COL0) {
1522 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0;
1523 }
1524
1525 if ((InputsRead & FRAG_BIT_COL1)) {
1526 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1;
1527 }
1528
1529 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1530 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1531 wanted_key.OutputsWritten |=
1532 1 << (VERT_RESULT_TEX0 + i);
1533 }
1534 }
1535
1536 if (vpc->mesa_program.IsPositionInvariant) {
1537 /* we wan't position don't we ? */
1538 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1539 wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS);
1540 }
1541
1542 for (vp = vpc->progs; vp; vp = vp->next)
1543 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
1544 == 0) {
1545 r300->selected_vp = vp;
1546 return;
1547 }
1548 //_mesa_print_program(&vpc->mesa_program.Base);
1549
1550 vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
1551 vp->next = vpc->progs;
1552 vpc->progs = vp;
1553 r300->selected_vp = vp;
1554 }