Minor tweaks to help out at a driver level.
[mesa.git] / src / mesa / main / nvvertexec.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.0
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file nvvertexec.c
27 * Code to execute vertex programs.
28 * \author Brian Paul
29 */
30
31 #include "glheader.h"
32 #include "context.h"
33 #include "imports.h"
34 #include "macros.h"
35 #include "mtypes.h"
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
38 #include "program.h"
39 #include "math/m_matrix.h"
40
41
42 static const GLfloat zeroVec[4] = { 0, 0, 0, 0 };
43
44
45 /**
46 * Load/initialize the vertex program registers.
47 * This needs to be done per vertex.
48 */
49 void
50 _mesa_init_vp_registers(GLcontext *ctx)
51 {
52 GLuint i;
53
54 /* Input registers get initialized from the current vertex attribs */
55 MEMCPY(ctx->VertexProgram.Inputs, ctx->Current.Attrib,
56 VERT_ATTRIB_MAX * 4 * sizeof(GLfloat));
57
58 /* Output and temp regs are initialized to [0,0,0,1] */
59 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
60 ASSIGN_4V(ctx->VertexProgram.Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F);
61 }
62 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
63 ASSIGN_4V(ctx->VertexProgram.Temporaries[i], 0.0F, 0.0F, 0.0F, 1.0F);
64 }
65
66 /* The program parameters aren't touched */
67 /* XXX: This should be moved to glBegin() time, but its safe (and slow!)
68 * here - Karl
69 */
70 if (ctx->VertexProgram.Current->Parameters) {
71 /* Grab the state */
72 _mesa_load_state_parameters(ctx, ctx->VertexProgram.Current->Parameters);
73
74 /* And copy it into the program state */
75 for (i=0; i<ctx->VertexProgram.Current->Parameters->NumParameters; i++) {
76 MEMCPY(ctx->VertexProgram.Parameters[i],
77 &ctx->VertexProgram.Current->Parameters->Parameters[i].Values,
78 4*sizeof(GLfloat));
79 }
80 }
81 }
82
83
84
85 /**
86 * Copy the 16 elements of a matrix into four consecutive program
87 * registers starting at 'pos'.
88 */
89 static void
90 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
91 {
92 GLuint i;
93 for (i = 0; i < 4; i++) {
94 registers[pos + i][0] = mat[0 + i];
95 registers[pos + i][1] = mat[4 + i];
96 registers[pos + i][2] = mat[8 + i];
97 registers[pos + i][3] = mat[12 + i];
98 }
99 }
100
101
102 /**
103 * As above, but transpose the matrix.
104 */
105 static void
106 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
107 const GLfloat mat[16])
108 {
109 MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
110 }
111
112
113 /**
114 * Load all currently tracked matrices into the program registers.
115 * This needs to be done per glBegin/glEnd.
116 */
117 void
118 _mesa_init_tracked_matrices(GLcontext *ctx)
119 {
120 GLuint i;
121
122 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) {
123 /* point 'mat' at source matrix */
124 GLmatrix *mat;
125 if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
126 mat = ctx->ModelviewMatrixStack.Top;
127 }
128 else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
129 mat = ctx->ProjectionMatrixStack.Top;
130 }
131 else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
132 mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
133 }
134 else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
135 mat = ctx->ColorMatrixStack.Top;
136 }
137 else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
138 /* XXX verify the combined matrix is up to date */
139 mat = &ctx->_ModelProjectMatrix;
140 }
141 else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
142 ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
143 GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
144 ASSERT(n < MAX_PROGRAM_MATRICES);
145 mat = ctx->ProgramMatrixStack[n].Top;
146 }
147 else {
148 /* no matrix is tracked, but we leave the register values as-is */
149 assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
150 continue;
151 }
152
153 /* load the matrix */
154 if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
155 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
156 }
157 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
158 _math_matrix_analyse(mat); /* update the inverse */
159 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
160 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
161 }
162 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
163 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
164 }
165 else {
166 assert(ctx->VertexProgram.TrackMatrixTransform[i]
167 == GL_INVERSE_TRANSPOSE_NV);
168 _math_matrix_analyse(mat); /* update the inverse */
169 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
170 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
171 }
172 }
173 }
174
175
176
177 /**
178 * For debugging. Dump the current vertex program machine registers.
179 */
180 void
181 _mesa_dump_vp_state( const struct vertex_program_state *state )
182 {
183 int i;
184 _mesa_printf("VertexIn:\n");
185 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_INPUTS; i++) {
186 _mesa_printf("%d: %f %f %f %f ", i,
187 state->Inputs[i][0],
188 state->Inputs[i][1],
189 state->Inputs[i][2],
190 state->Inputs[i][3]);
191 }
192 _mesa_printf("\n");
193
194 _mesa_printf("VertexOut:\n");
195 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
196 _mesa_printf("%d: %f %f %f %f ", i,
197 state->Outputs[i][0],
198 state->Outputs[i][1],
199 state->Outputs[i][2],
200 state->Outputs[i][3]);
201 }
202 _mesa_printf("\n");
203
204 _mesa_printf("Registers:\n");
205 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
206 _mesa_printf("%d: %f %f %f %f ", i,
207 state->Temporaries[i][0],
208 state->Temporaries[i][1],
209 state->Temporaries[i][2],
210 state->Temporaries[i][3]);
211 }
212 _mesa_printf("\n");
213
214 _mesa_printf("Parameters:\n");
215 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS; i++) {
216 _mesa_printf("%d: %f %f %f %f ", i,
217 state->Parameters[i][0],
218 state->Parameters[i][1],
219 state->Parameters[i][2],
220 state->Parameters[i][3]);
221 }
222 _mesa_printf("\n");
223 }
224
225
226
227 /**
228 * Return a pointer to the 4-element float vector specified by the given
229 * source register.
230 */
231 static INLINE const GLfloat *
232 get_register_pointer( const struct vp_src_register *source,
233 const struct vertex_program_state *state )
234 {
235 if (source->RelAddr) {
236 const GLint reg = source->Index + state->AddressReg[0];
237 ASSERT( (source->File == PROGRAM_ENV_PARAM) ||
238 (source->File == PROGRAM_STATE_VAR) );
239 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
240 return zeroVec;
241 else
242 return state->Parameters[reg];
243 }
244 else {
245 switch (source->File) {
246 case PROGRAM_TEMPORARY:
247 return state->Temporaries[source->Index];
248 case PROGRAM_INPUT:
249 return state->Inputs[source->Index];
250 case PROGRAM_LOCAL_PARAM:
251 /* XXX fix */
252 return state->Temporaries[source->Index];
253 case PROGRAM_ENV_PARAM:
254 return state->Parameters[source->Index];
255 case PROGRAM_STATE_VAR:
256 return state->Parameters[source->Index];
257 default:
258 _mesa_problem(NULL,
259 "Bad source register file in fetch_vector4(vp)");
260 return NULL;
261 }
262 }
263 return NULL;
264 }
265
266
267 /**
268 * Fetch a 4-element float vector from the given source register.
269 * Apply swizzling and negating as needed.
270 */
271 static INLINE void
272 fetch_vector4( const struct vp_src_register *source,
273 const struct vertex_program_state *state,
274 GLfloat result[4] )
275 {
276 const GLfloat *src = get_register_pointer(source, state);
277
278 if (source->Negate) {
279 result[0] = -src[source->Swizzle[0]];
280 result[1] = -src[source->Swizzle[1]];
281 result[2] = -src[source->Swizzle[2]];
282 result[3] = -src[source->Swizzle[3]];
283 }
284 else {
285 result[0] = src[source->Swizzle[0]];
286 result[1] = src[source->Swizzle[1]];
287 result[2] = src[source->Swizzle[2]];
288 result[3] = src[source->Swizzle[3]];
289 }
290 }
291
292
293
294 /**
295 * As above, but only return result[0] element.
296 */
297 static INLINE void
298 fetch_vector1( const struct vp_src_register *source,
299 const struct vertex_program_state *state,
300 GLfloat result[4] )
301 {
302 const GLfloat *src = get_register_pointer(source, state);
303
304 if (source->Negate) {
305 result[0] = -src[source->Swizzle[0]];
306 }
307 else {
308 result[0] = src[source->Swizzle[0]];
309 }
310 }
311
312
313 /**
314 * Store 4 floats into a register.
315 */
316 static void
317 store_vector4( const struct vp_dst_register *dest,
318 struct vertex_program_state *state,
319 const GLfloat value[4] )
320 {
321 GLfloat *dst;
322 switch (dest->File) {
323 case PROGRAM_TEMPORARY:
324 dst = state->Temporaries[dest->Index];
325 break;
326 case PROGRAM_OUTPUT:
327 dst = state->Outputs[dest->Index];
328 break;
329 default:
330 _mesa_problem(NULL, "Invalid register file in fetch_vector1(vp)");
331 return;
332 }
333
334 if (dest->WriteMask[0])
335 dst[0] = value[0];
336 if (dest->WriteMask[1])
337 dst[1] = value[1];
338 if (dest->WriteMask[2])
339 dst[2] = value[2];
340 if (dest->WriteMask[3])
341 dst[3] = value[3];
342 }
343
344
345 /**
346 * Set x to positive or negative infinity.
347 */
348 #if defined(USE_IEEE) || defined(_WIN32)
349 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
350 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
351 #elif defined(VMS)
352 #define SET_POS_INFINITY(x) x = __MAXFLOAT
353 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
354 #else
355 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
356 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
357 #endif
358
359 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
360
361
362 /**
363 * Execute the given vertex program
364 */
365 void
366 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
367 {
368 struct vertex_program_state *state = &ctx->VertexProgram;
369 const struct vp_instruction *inst;
370
371 ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */
372
373 /* If the program is position invariant, multiply the input
374 * position and the MVP matrix and stick it into the output pos slot
375 */
376 if (ctx->VertexProgram.Current->IsPositionInvariant) {
377 TRANSFORM_POINT( ctx->VertexProgram.Outputs[0],
378 ctx->_ModelProjectMatrix.m,
379 ctx->VertexProgram.Inputs[0]);
380
381 /* XXX: This could go elsewhere */
382 ctx->VertexProgram.Current->OutputsWritten |= 0x1;
383 }
384
385 for (inst = program->Instructions; /*inst->Opcode != VP_OPCODE_END*/; inst++) {
386
387 if (ctx->VertexProgram.CallbackEnabled &&
388 ctx->VertexProgram.Callback) {
389 ctx->VertexProgram.CurrentPosition = inst->StringPos;
390 ctx->VertexProgram.Callback(program->Base.Target,
391 ctx->VertexProgram.CallbackData);
392 }
393
394 switch (inst->Opcode) {
395 case VP_OPCODE_MOV:
396 {
397 GLfloat t[4];
398 fetch_vector4( &inst->SrcReg[0], state, t );
399 store_vector4( &inst->DstReg, state, t );
400 }
401 break;
402 case VP_OPCODE_LIT:
403 {
404 const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
405 GLfloat t[4], lit[4];
406 fetch_vector4( &inst->SrcReg[0], state, t );
407 if (t[3] < -(128.0F - epsilon))
408 t[3] = - (128.0F - epsilon);
409 else if (t[3] > 128.0F - epsilon)
410 t[3] = 128.0F - epsilon;
411 if (t[0] < 0.0)
412 t[0] = 0.0;
413 if (t[1] < 0.0)
414 t[1] = 0.0;
415 lit[0] = 1.0;
416 lit[1] = t[0];
417 lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
418 lit[3] = 1.0;
419 store_vector4( &inst->DstReg, state, lit );
420 }
421 break;
422 case VP_OPCODE_RCP:
423 {
424 GLfloat t[4];
425 fetch_vector1( &inst->SrcReg[0], state, t );
426 if (t[0] != 1.0F)
427 t[0] = 1.0F / t[0]; /* div by zero is infinity! */
428 t[1] = t[2] = t[3] = t[0];
429 store_vector4( &inst->DstReg, state, t );
430 }
431 break;
432 case VP_OPCODE_RSQ:
433 {
434 GLfloat t[4];
435 fetch_vector1( &inst->SrcReg[0], state, t );
436 t[0] = INV_SQRTF(FABSF(t[0]));
437 t[1] = t[2] = t[3] = t[0];
438 store_vector4( &inst->DstReg, state, t );
439 }
440 break;
441 case VP_OPCODE_EXP:
442 {
443 GLfloat t[4], q[4], floor_t0;
444 fetch_vector1( &inst->SrcReg[0], state, t );
445 floor_t0 = (float) floor(t[0]);
446 if (floor_t0 > FLT_MAX_EXP) {
447 SET_POS_INFINITY(q[0]);
448 SET_POS_INFINITY(q[2]);
449 }
450 else if (floor_t0 < FLT_MIN_EXP) {
451 q[0] = 0.0F;
452 q[2] = 0.0F;
453 }
454 else {
455 #ifdef USE_IEEE
456 GLint ii = (GLint) floor_t0;
457 ii = (ii < 23) + 0x3f800000;
458 SET_FLOAT_BITS(q[0], ii);
459 q[0] = *((GLfloat *) &ii);
460 #else
461 q[0] = (GLfloat) pow(2.0, floor_t0);
462 #endif
463 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
464 }
465 q[1] = t[0] - floor_t0;
466 q[3] = 1.0F;
467 store_vector4( &inst->DstReg, state, q );
468 }
469 break;
470 case VP_OPCODE_LOG:
471 {
472 GLfloat t[4], q[4], abs_t0;
473 fetch_vector1( &inst->SrcReg[0], state, t );
474 abs_t0 = (GLfloat) fabs(t[0]);
475 if (abs_t0 != 0.0F) {
476 /* Since we really can't handle infinite values on VMS
477 * like other OSes we'll use __MAXFLOAT to represent
478 * infinity. This may need some tweaking.
479 */
480 #ifdef VMS
481 if (abs_t0 == __MAXFLOAT)
482 #else
483 if (IS_INF_OR_NAN(abs_t0))
484 #endif
485 {
486 SET_POS_INFINITY(q[0]);
487 q[1] = 1.0F;
488 SET_POS_INFINITY(q[2]);
489 }
490 else {
491 int exponent;
492 double mantissa = frexp(t[0], &exponent);
493 q[0] = (GLfloat) (exponent - 1);
494 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
495 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
496 }
497 }
498 else {
499 SET_NEG_INFINITY(q[0]);
500 q[1] = 1.0F;
501 SET_NEG_INFINITY(q[2]);
502 }
503 q[3] = 1.0;
504 store_vector4( &inst->DstReg, state, q );
505 }
506 break;
507 case VP_OPCODE_MUL:
508 {
509 GLfloat t[4], u[4], prod[4];
510 fetch_vector4( &inst->SrcReg[0], state, t );
511 fetch_vector4( &inst->SrcReg[1], state, u );
512 prod[0] = t[0] * u[0];
513 prod[1] = t[1] * u[1];
514 prod[2] = t[2] * u[2];
515 prod[3] = t[3] * u[3];
516 store_vector4( &inst->DstReg, state, prod );
517 }
518 break;
519 case VP_OPCODE_ADD:
520 {
521 GLfloat t[4], u[4], sum[4];
522 fetch_vector4( &inst->SrcReg[0], state, t );
523 fetch_vector4( &inst->SrcReg[1], state, u );
524 sum[0] = t[0] + u[0];
525 sum[1] = t[1] + u[1];
526 sum[2] = t[2] + u[2];
527 sum[3] = t[3] + u[3];
528 store_vector4( &inst->DstReg, state, sum );
529 }
530 break;
531 case VP_OPCODE_DP3:
532 {
533 GLfloat t[4], u[4], dot[4];
534 fetch_vector4( &inst->SrcReg[0], state, t );
535 fetch_vector4( &inst->SrcReg[1], state, u );
536 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
537 dot[1] = dot[2] = dot[3] = dot[0];
538 store_vector4( &inst->DstReg, state, dot );
539 }
540 break;
541 case VP_OPCODE_DP4:
542 {
543 GLfloat t[4], u[4], dot[4];
544 fetch_vector4( &inst->SrcReg[0], state, t );
545 fetch_vector4( &inst->SrcReg[1], state, u );
546 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
547 dot[1] = dot[2] = dot[3] = dot[0];
548 store_vector4( &inst->DstReg, state, dot );
549 }
550 break;
551 case VP_OPCODE_DST:
552 {
553 GLfloat t[4], u[4], dst[4];
554 fetch_vector4( &inst->SrcReg[0], state, t );
555 fetch_vector4( &inst->SrcReg[1], state, u );
556 dst[0] = 1.0F;
557 dst[1] = t[1] * u[1];
558 dst[2] = t[2];
559 dst[3] = u[3];
560 store_vector4( &inst->DstReg, state, dst );
561 }
562 break;
563 case VP_OPCODE_MIN:
564 {
565 GLfloat t[4], u[4], min[4];
566 fetch_vector4( &inst->SrcReg[0], state, t );
567 fetch_vector4( &inst->SrcReg[1], state, u );
568 min[0] = (t[0] < u[0]) ? t[0] : u[0];
569 min[1] = (t[1] < u[1]) ? t[1] : u[1];
570 min[2] = (t[2] < u[2]) ? t[2] : u[2];
571 min[3] = (t[3] < u[3]) ? t[3] : u[3];
572 store_vector4( &inst->DstReg, state, min );
573 }
574 break;
575 case VP_OPCODE_MAX:
576 {
577 GLfloat t[4], u[4], max[4];
578 fetch_vector4( &inst->SrcReg[0], state, t );
579 fetch_vector4( &inst->SrcReg[1], state, u );
580 max[0] = (t[0] > u[0]) ? t[0] : u[0];
581 max[1] = (t[1] > u[1]) ? t[1] : u[1];
582 max[2] = (t[2] > u[2]) ? t[2] : u[2];
583 max[3] = (t[3] > u[3]) ? t[3] : u[3];
584 store_vector4( &inst->DstReg, state, max );
585 }
586 break;
587 case VP_OPCODE_SLT:
588 {
589 GLfloat t[4], u[4], slt[4];
590 fetch_vector4( &inst->SrcReg[0], state, t );
591 fetch_vector4( &inst->SrcReg[1], state, u );
592 slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
593 slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
594 slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
595 slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
596 store_vector4( &inst->DstReg, state, slt );
597 }
598 break;
599 case VP_OPCODE_SGE:
600 {
601 GLfloat t[4], u[4], sge[4];
602 fetch_vector4( &inst->SrcReg[0], state, t );
603 fetch_vector4( &inst->SrcReg[1], state, u );
604 sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
605 sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
606 sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
607 sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
608 store_vector4( &inst->DstReg, state, sge );
609 }
610 break;
611 case VP_OPCODE_MAD:
612 {
613 GLfloat t[4], u[4], v[4], sum[4];
614 fetch_vector4( &inst->SrcReg[0], state, t );
615 fetch_vector4( &inst->SrcReg[1], state, u );
616 fetch_vector4( &inst->SrcReg[2], state, v );
617 sum[0] = t[0] * u[0] + v[0];
618 sum[1] = t[1] * u[1] + v[1];
619 sum[2] = t[2] * u[2] + v[2];
620 sum[3] = t[3] * u[3] + v[3];
621 store_vector4( &inst->DstReg, state, sum );
622 }
623 break;
624 case VP_OPCODE_ARL:
625 {
626 GLfloat t[4];
627 fetch_vector4( &inst->SrcReg[0], state, t );
628 state->AddressReg[0] = (GLint) floor(t[0]);
629 }
630 break;
631 case VP_OPCODE_DPH:
632 {
633 GLfloat t[4], u[4], dot[4];
634 fetch_vector4( &inst->SrcReg[0], state, t );
635 fetch_vector4( &inst->SrcReg[1], state, u );
636 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
637 dot[1] = dot[2] = dot[3] = dot[0];
638 store_vector4( &inst->DstReg, state, dot );
639 }
640 break;
641 case VP_OPCODE_RCC:
642 {
643 GLfloat t[4], u;
644 fetch_vector1( &inst->SrcReg[0], state, t );
645 if (t[0] == 1.0F)
646 u = 1.0F;
647 else
648 u = 1.0F / t[0];
649 if (u > 0.0F) {
650 if (u > 1.884467e+019F) {
651 u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
652 }
653 else if (u < 5.42101e-020F) {
654 u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
655 }
656 }
657 else {
658 if (u < -1.884467e+019F) {
659 u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
660 }
661 else if (u > -5.42101e-020F) {
662 u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
663 }
664 }
665 t[0] = t[1] = t[2] = t[3] = u;
666 store_vector4( &inst->DstReg, state, t );
667 }
668 break;
669 case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */
670 {
671 GLfloat t[4], u[4], sum[4];
672 fetch_vector4( &inst->SrcReg[0], state, t );
673 fetch_vector4( &inst->SrcReg[1], state, u );
674 sum[0] = t[0] - u[0];
675 sum[1] = t[1] - u[1];
676 sum[2] = t[2] - u[2];
677 sum[3] = t[3] - u[3];
678 store_vector4( &inst->DstReg, state, sum );
679 }
680 break;
681 case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */
682 {
683 GLfloat t[4];
684 fetch_vector4( &inst->SrcReg[0], state, t );
685 if (t[0] < 0.0) t[0] = -t[0];
686 if (t[1] < 0.0) t[1] = -t[1];
687 if (t[2] < 0.0) t[2] = -t[2];
688 if (t[3] < 0.0) t[3] = -t[3];
689 store_vector4( &inst->DstReg, state, t );
690 }
691 break;
692 case VP_OPCODE_FLR: /* GL_ARB_vertex_program */
693 {
694 GLfloat t[4];
695 fetch_vector4( &inst->SrcReg[0], state, t );
696 t[0] = FLOORF(t[0]);
697 t[1] = FLOORF(t[1]);
698 t[2] = FLOORF(t[2]);
699 t[3] = FLOORF(t[3]);
700 store_vector4( &inst->DstReg, state, t );
701 }
702 break;
703 case VP_OPCODE_FRC: /* GL_ARB_vertex_program */
704 {
705 GLfloat t[4];
706 fetch_vector4( &inst->SrcReg[0], state, t );
707 t[0] = t[0] - FLOORF(t[0]);
708 t[1] = t[1] - FLOORF(t[1]);
709 t[2] = t[2] - FLOORF(t[2]);
710 t[3] = t[3] - FLOORF(t[3]);
711 store_vector4( &inst->DstReg, state, t );
712 }
713 break;
714 case VP_OPCODE_EX2: /* GL_ARB_vertex_program */
715 {
716 GLfloat t[4];
717 fetch_vector1( &inst->SrcReg[0], state, t );
718 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]);
719 store_vector4( &inst->DstReg, state, t );
720 }
721 break;
722 case VP_OPCODE_LG2: /* GL_ARB_vertex_program */
723 {
724 GLfloat t[4];
725 fetch_vector1( &inst->SrcReg[0], state, t );
726 t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
727 store_vector4( &inst->DstReg, state, t );
728 }
729 break;
730 case VP_OPCODE_POW: /* GL_ARB_vertex_program */
731 {
732 GLfloat t[4], u[4];
733 fetch_vector1( &inst->SrcReg[0], state, t );
734 fetch_vector1( &inst->SrcReg[1], state, u );
735 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]);
736 store_vector4( &inst->DstReg, state, t );
737 }
738 break;
739 case VP_OPCODE_XPD: /* GL_ARB_vertex_program */
740 {
741 GLfloat t[4], u[4], cross[4];
742 fetch_vector4( &inst->SrcReg[0], state, t );
743 fetch_vector4( &inst->SrcReg[1], state, u );
744 cross[0] = t[1] * u[2] - t[2] * u[1];
745 cross[1] = t[2] * u[0] - t[0] * u[2];
746 cross[2] = t[0] * u[1] - t[1] * u[0];
747 store_vector4( &inst->DstReg, state, cross );
748 }
749 break;
750 case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */
751 {
752 const struct vp_src_register *source = &inst->SrcReg[0];
753 const GLfloat *src = get_register_pointer(source, state);
754 GLfloat result[4];
755 GLuint i;
756
757 /* do extended swizzling here */
758 for (i = 0; i < 3; i++) {
759 if (source->Swizzle[i] == SWIZZLE_ZERO)
760 result[i] = 0.0;
761 else if (source->Swizzle[i] == SWIZZLE_ONE)
762 result[i] = -1.0;
763 else
764 result[i] = -src[source->Swizzle[i]];
765 if (source->Negate)
766 result[i] = -result[i];
767 }
768 store_vector4( &inst->DstReg, state, result );
769 }
770 break;
771
772 case VP_OPCODE_END:
773 ctx->_CurrentProgram = 0;
774 return;
775 default:
776 /* bad instruction opcode */
777 _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
778 ctx->_CurrentProgram = 0;
779 return;
780 } /* switch */
781 } /* for */
782
783 ctx->_CurrentProgram = 0;
784 }
785
786
787
788 /**
789 Thoughts on vertex program optimization:
790
791 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
792 assembly code. That will probably be a lot of work.
793
794 Another approach might be to replace the vp_instruction->Opcode field with
795 a pointer to a specialized C function which executes the instruction.
796 In particular we can write functions which skip swizzling, negating,
797 masking, relative addressing, etc. when they're not needed.
798
799 For example:
800
801 void simple_add( struct vp_instruction *inst )
802 {
803 GLfloat *sum = machine->Registers[inst->DstReg.Register];
804 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
805 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
806 sum[0] = a[0] + b[0];
807 sum[1] = a[1] + b[1];
808 sum[2] = a[2] + b[2];
809 sum[3] = a[3] + b[3];
810 }
811
812 */
813
814 /*
815
816 KW:
817
818 A first step would be to 'vectorize' the programs in the same way as
819 the normal transformation code in the tnl module. Thus each opcode
820 takes zero or more input vectors (registers) and produces one or more
821 output vectors.
822
823 These operations would intially be coded in C, with machine-specific
824 assembly following, as is currently the case for matrix
825 transformations in the math/ directory. The preprocessing scheme for
826 selecting simpler operations Brian describes above would also work
827 here.
828
829 This should give reasonable performance without excessive effort.
830
831 */