- handle IsPositionInvariant
[mesa.git] / src / mesa / main / nvvertexec.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 5.1
4 *
5 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file nvvertexec.c
27 * Code to execute vertex programs.
28 * \author Brian Paul
29 */
30
31 #include "glheader.h"
32 #include "context.h"
33 #include "imports.h"
34 #include "macros.h"
35 #include "mtypes.h"
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
38 #include "program.h"
39 #include "math/m_matrix.h"
40
41
42 static const GLfloat zeroVec[4] = { 0, 0, 0, 0 };
43
44
45 /**
46 * Load/initialize the vertex program registers.
47 * This needs to be done per vertex.
48 */
49 void
50 _mesa_init_vp_registers(GLcontext *ctx)
51 {
52 GLuint i;
53
54 /* Input registers get initialized from the current vertex attribs */
55 MEMCPY(ctx->VertexProgram.Inputs, ctx->Current.Attrib,
56 VERT_ATTRIB_MAX * 4 * sizeof(GLfloat));
57
58 /* Output and temp regs are initialized to [0,0,0,1] */
59 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
60 ASSIGN_4V(ctx->VertexProgram.Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F);
61 }
62 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
63 ASSIGN_4V(ctx->VertexProgram.Temporaries[i], 0.0F, 0.0F, 0.0F, 1.0F);
64 }
65
66 /* The program parameters aren't touched */
67 /* XXX: This should be moved to glBegin() time, but its safe (and slow!)
68 * here - Karl
69 */
70 if (ctx->VertexProgram.Current->Parameters) {
71 /* Grab the state */
72 _mesa_load_state_parameters(ctx, ctx->VertexProgram.Current->Parameters);
73
74 /* And copy it into the program state */
75 for (i=0; i<ctx->VertexProgram.Current->Parameters->NumParameters; i++) {
76 MEMCPY(ctx->VertexProgram.Parameters[i],
77 &ctx->VertexProgram.Current->Parameters->Parameters[i].Values,
78 4*sizeof(GLfloat));
79 }
80 }
81 }
82
83
84
85 /**
86 * Copy the 16 elements of a matrix into four consecutive program
87 * registers starting at 'pos'.
88 */
89 static void
90 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
91 {
92 GLuint i;
93 for (i = 0; i < 4; i++) {
94 registers[pos + i][0] = mat[0 + i];
95 registers[pos + i][1] = mat[4 + i];
96 registers[pos + i][2] = mat[8 + i];
97 registers[pos + i][3] = mat[12 + i];
98 }
99 }
100
101
102 /**
103 * As above, but transpose the matrix.
104 */
105 static void
106 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
107 const GLfloat mat[16])
108 {
109 MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
110 }
111
112
113 /**
114 * Load all currently tracked matrices into the program registers.
115 * This needs to be done per glBegin/glEnd.
116 */
117 void
118 _mesa_init_tracked_matrices(GLcontext *ctx)
119 {
120 GLuint i;
121
122 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) {
123 /* point 'mat' at source matrix */
124 GLmatrix *mat;
125 if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
126 mat = ctx->ModelviewMatrixStack.Top;
127 }
128 else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
129 mat = ctx->ProjectionMatrixStack.Top;
130 }
131 else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
132 mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
133 }
134 else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
135 mat = ctx->ColorMatrixStack.Top;
136 }
137 else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
138 /* XXX verify the combined matrix is up to date */
139 mat = &ctx->_ModelProjectMatrix;
140 }
141 else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
142 ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
143 GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
144 ASSERT(n < MAX_PROGRAM_MATRICES);
145 mat = ctx->ProgramMatrixStack[n].Top;
146 }
147 else {
148 /* no matrix is tracked, but we leave the register values as-is */
149 assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
150 continue;
151 }
152
153 /* load the matrix */
154 if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
155 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
156 }
157 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
158 _math_matrix_analyse(mat); /* update the inverse */
159 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
160 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
161 }
162 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
163 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
164 }
165 else {
166 assert(ctx->VertexProgram.TrackMatrixTransform[i]
167 == GL_INVERSE_TRANSPOSE_NV);
168 _math_matrix_analyse(mat); /* update the inverse */
169 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
170 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
171 }
172 }
173 }
174
175
176
177 /**
178 * For debugging. Dump the current vertex program machine registers.
179 */
180 void
181 _mesa_dump_vp_state( const struct vertex_program_state *state )
182 {
183 int i;
184 _mesa_printf("VertexIn:\n");
185 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_INPUTS; i++) {
186 _mesa_printf("%d: %f %f %f %f ", i,
187 state->Inputs[i][0],
188 state->Inputs[i][1],
189 state->Inputs[i][2],
190 state->Inputs[i][3]);
191 }
192 _mesa_printf("\n");
193
194 _mesa_printf("VertexOut:\n");
195 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
196 _mesa_printf("%d: %f %f %f %f ", i,
197 state->Outputs[i][0],
198 state->Outputs[i][1],
199 state->Outputs[i][2],
200 state->Outputs[i][3]);
201 }
202 _mesa_printf("\n");
203
204 _mesa_printf("Registers:\n");
205 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
206 _mesa_printf("%d: %f %f %f %f ", i,
207 state->Temporaries[i][0],
208 state->Temporaries[i][1],
209 state->Temporaries[i][2],
210 state->Temporaries[i][3]);
211 }
212 _mesa_printf("\n");
213
214 _mesa_printf("Parameters:\n");
215 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS; i++) {
216 _mesa_printf("%d: %f %f %f %f ", i,
217 state->Parameters[i][0],
218 state->Parameters[i][1],
219 state->Parameters[i][2],
220 state->Parameters[i][3]);
221 }
222 _mesa_printf("\n");
223 }
224
225
226
227 /**
228 * Return a pointer to the 4-element float vector specified by the given
229 * source register.
230 */
231 static INLINE const GLfloat *
232 get_register_pointer( const struct vp_src_register *source,
233 const struct vertex_program_state *state )
234 {
235 if (source->RelAddr) {
236 const GLint reg = source->Index + state->AddressReg[0];
237 ASSERT( (source->File == PROGRAM_ENV_PARAM) ||
238 (source->File == PROGRAM_STATE_VAR) );
239 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
240 return zeroVec;
241 else
242 return state->Parameters[reg];
243 }
244 else {
245 switch (source->File) {
246 case PROGRAM_TEMPORARY:
247 return state->Temporaries[source->Index];
248 case PROGRAM_INPUT:
249 return state->Inputs[source->Index];
250 case PROGRAM_LOCAL_PARAM:
251 /* XXX fix */
252 return state->Temporaries[source->Index];
253 case PROGRAM_ENV_PARAM:
254 return state->Parameters[source->Index];
255 case PROGRAM_STATE_VAR:
256 return state->Parameters[source->Index];
257 default:
258 _mesa_problem(NULL,
259 "Bad source register file in fetch_vector4(vp)");
260 return NULL;
261 }
262 }
263 return NULL;
264 }
265
266
267 /**
268 * Fetch a 4-element float vector from the given source register.
269 * Apply swizzling and negating as needed.
270 */
271 static INLINE void
272 fetch_vector4( const struct vp_src_register *source,
273 const struct vertex_program_state *state,
274 GLfloat result[4] )
275 {
276 const GLfloat *src = get_register_pointer(source, state);
277
278 if (source->Negate) {
279 result[0] = -src[source->Swizzle[0]];
280 result[1] = -src[source->Swizzle[1]];
281 result[2] = -src[source->Swizzle[2]];
282 result[3] = -src[source->Swizzle[3]];
283 }
284 else {
285 result[0] = src[source->Swizzle[0]];
286 result[1] = src[source->Swizzle[1]];
287 result[2] = src[source->Swizzle[2]];
288 result[3] = src[source->Swizzle[3]];
289 }
290 }
291
292
293
294 /**
295 * As above, but only return result[0] element.
296 */
297 static INLINE void
298 fetch_vector1( const struct vp_src_register *source,
299 const struct vertex_program_state *state,
300 GLfloat result[4] )
301 {
302 const GLfloat *src = get_register_pointer(source, state);
303
304 if (source->Negate) {
305 result[0] = -src[source->Swizzle[0]];
306 }
307 else {
308 result[0] = src[source->Swizzle[0]];
309 }
310 }
311
312
313 /**
314 * Store 4 floats into a register.
315 */
316 static void
317 store_vector4( const struct vp_dst_register *dest,
318 struct vertex_program_state *state,
319 const GLfloat value[4] )
320 {
321 GLfloat *dst;
322 switch (dest->File) {
323 case PROGRAM_TEMPORARY:
324 dst = state->Temporaries[dest->Index];
325 break;
326 case PROGRAM_OUTPUT:
327 dst = state->Outputs[dest->Index];
328 break;
329 default:
330 _mesa_problem(NULL, "Invalid register file in fetch_vector1(vp)");
331 return;
332 }
333
334 if (dest->WriteMask[0])
335 dst[0] = value[0];
336 if (dest->WriteMask[1])
337 dst[1] = value[1];
338 if (dest->WriteMask[2])
339 dst[2] = value[2];
340 if (dest->WriteMask[3])
341 dst[3] = value[3];
342 }
343
344
345 /**
346 * Set x to positive or negative infinity.
347 */
348 #if defined(USE_IEEE) || defined(_WIN32)
349 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
350 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
351 #elif defined(VMS)
352 #define SET_POS_INFINITY(x) x = __MAXFLOAT
353 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
354 #else
355 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
356 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
357 #endif
358
359 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
360
361
362 /**
363 * Execute the given vertex program
364 */
365 void
366 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
367 {
368 struct vertex_program_state *state = &ctx->VertexProgram;
369 const struct vp_instruction *inst;
370
371 ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */
372
373 /* If the program is position invariant, multiply the input
374 * position and the MVP matrix and stick it into the output pos slot
375 */
376 if (ctx->VertexProgram.Current->IsPositionInvariant) {
377 TRANSFORM_POINT( ctx->VertexProgram.Outputs[0],
378 ctx->_ModelProjectMatrix.m,
379 ctx->VertexProgram.Inputs[0]);
380
381 /* XXX: This could go elsewhere */
382 ctx->VertexProgram.Current->OutputsWritten |= 0x1;
383 }
384
385
386
387 for (inst = program->Instructions; inst->Opcode != VP_OPCODE_END; inst++) {
388
389 if (ctx->VertexProgram.CallbackEnabled &&
390 ctx->VertexProgram.Callback) {
391 ctx->VertexProgram.CurrentPosition = inst->StringPos;
392 ctx->VertexProgram.Callback(program->Base.Target,
393 ctx->VertexProgram.CallbackData);
394 }
395
396 switch (inst->Opcode) {
397 case VP_OPCODE_MOV:
398 {
399 GLfloat t[4];
400 fetch_vector4( &inst->SrcReg[0], state, t );
401 store_vector4( &inst->DstReg, state, t );
402 }
403 break;
404 case VP_OPCODE_LIT:
405 {
406 const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
407 GLfloat t[4], lit[4];
408 fetch_vector4( &inst->SrcReg[0], state, t );
409 if (t[3] < -(128.0F - epsilon))
410 t[3] = - (128.0F - epsilon);
411 else if (t[3] > 128.0F - epsilon)
412 t[3] = 128.0F - epsilon;
413 if (t[0] < 0.0)
414 t[0] = 0.0;
415 if (t[1] < 0.0)
416 t[1] = 0.0;
417 lit[0] = 1.0;
418 lit[1] = t[0];
419 lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
420 lit[3] = 1.0;
421 store_vector4( &inst->DstReg, state, lit );
422 }
423 break;
424 case VP_OPCODE_RCP:
425 {
426 GLfloat t[4];
427 fetch_vector1( &inst->SrcReg[0], state, t );
428 if (t[0] != 1.0F)
429 t[0] = 1.0F / t[0]; /* div by zero is infinity! */
430 t[1] = t[2] = t[3] = t[0];
431 store_vector4( &inst->DstReg, state, t );
432 }
433 break;
434 case VP_OPCODE_RSQ:
435 {
436 GLfloat t[4];
437 fetch_vector1( &inst->SrcReg[0], state, t );
438 t[0] = INV_SQRTF(FABSF(t[0]));
439 t[1] = t[2] = t[3] = t[0];
440 store_vector4( &inst->DstReg, state, t );
441 }
442 break;
443 case VP_OPCODE_EXP:
444 {
445 GLfloat t[4], q[4], floor_t0;
446 fetch_vector1( &inst->SrcReg[0], state, t );
447 floor_t0 = (float) floor(t[0]);
448 if (floor_t0 > FLT_MAX_EXP) {
449 SET_POS_INFINITY(q[0]);
450 SET_POS_INFINITY(q[2]);
451 }
452 else if (floor_t0 < FLT_MIN_EXP) {
453 q[0] = 0.0F;
454 q[2] = 0.0F;
455 }
456 else {
457 #ifdef USE_IEEE
458 GLint ii = (GLint) floor_t0;
459 ii = (ii < 23) + 0x3f800000;
460 SET_FLOAT_BITS(q[0], ii);
461 q[0] = *((GLfloat *) &ii);
462 #else
463 q[0] = (GLfloat) pow(2.0, floor_t0);
464 #endif
465 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
466 }
467 q[1] = t[0] - floor_t0;
468 q[3] = 1.0F;
469 store_vector4( &inst->DstReg, state, q );
470 }
471 break;
472 case VP_OPCODE_LOG:
473 {
474 GLfloat t[4], q[4], abs_t0;
475 fetch_vector1( &inst->SrcReg[0], state, t );
476 abs_t0 = (GLfloat) fabs(t[0]);
477 if (abs_t0 != 0.0F) {
478 /* Since we really can't handle infinite values on VMS
479 * like other OSes we'll use __MAXFLOAT to represent
480 * infinity. This may need some tweaking.
481 */
482 #ifdef VMS
483 if (abs_t0 == __MAXFLOAT)
484 #else
485 if (IS_INF_OR_NAN(abs_t0))
486 #endif
487 {
488 SET_POS_INFINITY(q[0]);
489 q[1] = 1.0F;
490 SET_POS_INFINITY(q[2]);
491 }
492 else {
493 int exponent;
494 double mantissa = frexp(t[0], &exponent);
495 q[0] = (GLfloat) (exponent - 1);
496 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
497 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
498 }
499 }
500 else {
501 SET_NEG_INFINITY(q[0]);
502 q[1] = 1.0F;
503 SET_NEG_INFINITY(q[2]);
504 }
505 q[3] = 1.0;
506 store_vector4( &inst->DstReg, state, q );
507 }
508 break;
509 case VP_OPCODE_MUL:
510 {
511 GLfloat t[4], u[4], prod[4];
512 fetch_vector4( &inst->SrcReg[0], state, t );
513 fetch_vector4( &inst->SrcReg[1], state, u );
514 prod[0] = t[0] * u[0];
515 prod[1] = t[1] * u[1];
516 prod[2] = t[2] * u[2];
517 prod[3] = t[3] * u[3];
518 store_vector4( &inst->DstReg, state, prod );
519 }
520 break;
521 case VP_OPCODE_ADD:
522 {
523 GLfloat t[4], u[4], sum[4];
524 fetch_vector4( &inst->SrcReg[0], state, t );
525 fetch_vector4( &inst->SrcReg[1], state, u );
526 sum[0] = t[0] + u[0];
527 sum[1] = t[1] + u[1];
528 sum[2] = t[2] + u[2];
529 sum[3] = t[3] + u[3];
530 store_vector4( &inst->DstReg, state, sum );
531 }
532 break;
533 case VP_OPCODE_DP3:
534 {
535 GLfloat t[4], u[4], dot[4];
536 fetch_vector4( &inst->SrcReg[0], state, t );
537 fetch_vector4( &inst->SrcReg[1], state, u );
538 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
539 dot[1] = dot[2] = dot[3] = dot[0];
540 store_vector4( &inst->DstReg, state, dot );
541 }
542 break;
543 case VP_OPCODE_DP4:
544 {
545 GLfloat t[4], u[4], dot[4];
546 fetch_vector4( &inst->SrcReg[0], state, t );
547 fetch_vector4( &inst->SrcReg[1], state, u );
548 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
549 dot[1] = dot[2] = dot[3] = dot[0];
550 store_vector4( &inst->DstReg, state, dot );
551 }
552 break;
553 case VP_OPCODE_DST:
554 {
555 GLfloat t[4], u[4], dst[4];
556 fetch_vector4( &inst->SrcReg[0], state, t );
557 fetch_vector4( &inst->SrcReg[1], state, u );
558 dst[0] = 1.0F;
559 dst[1] = t[1] * u[1];
560 dst[2] = t[2];
561 dst[3] = u[3];
562 store_vector4( &inst->DstReg, state, dst );
563 }
564 break;
565 case VP_OPCODE_MIN:
566 {
567 GLfloat t[4], u[4], min[4];
568 fetch_vector4( &inst->SrcReg[0], state, t );
569 fetch_vector4( &inst->SrcReg[1], state, u );
570 min[0] = (t[0] < u[0]) ? t[0] : u[0];
571 min[1] = (t[1] < u[1]) ? t[1] : u[1];
572 min[2] = (t[2] < u[2]) ? t[2] : u[2];
573 min[3] = (t[3] < u[3]) ? t[3] : u[3];
574 store_vector4( &inst->DstReg, state, min );
575 }
576 break;
577 case VP_OPCODE_MAX:
578 {
579 GLfloat t[4], u[4], max[4];
580 fetch_vector4( &inst->SrcReg[0], state, t );
581 fetch_vector4( &inst->SrcReg[1], state, u );
582 max[0] = (t[0] > u[0]) ? t[0] : u[0];
583 max[1] = (t[1] > u[1]) ? t[1] : u[1];
584 max[2] = (t[2] > u[2]) ? t[2] : u[2];
585 max[3] = (t[3] > u[3]) ? t[3] : u[3];
586 store_vector4( &inst->DstReg, state, max );
587 }
588 break;
589 case VP_OPCODE_SLT:
590 {
591 GLfloat t[4], u[4], slt[4];
592 fetch_vector4( &inst->SrcReg[0], state, t );
593 fetch_vector4( &inst->SrcReg[1], state, u );
594 slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
595 slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
596 slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
597 slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
598 store_vector4( &inst->DstReg, state, slt );
599 }
600 break;
601 case VP_OPCODE_SGE:
602 {
603 GLfloat t[4], u[4], sge[4];
604 fetch_vector4( &inst->SrcReg[0], state, t );
605 fetch_vector4( &inst->SrcReg[1], state, u );
606 sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
607 sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
608 sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
609 sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
610 store_vector4( &inst->DstReg, state, sge );
611 }
612 break;
613 case VP_OPCODE_MAD:
614 {
615 GLfloat t[4], u[4], v[4], sum[4];
616 fetch_vector4( &inst->SrcReg[0], state, t );
617 fetch_vector4( &inst->SrcReg[1], state, u );
618 fetch_vector4( &inst->SrcReg[2], state, v );
619 sum[0] = t[0] * u[0] + v[0];
620 sum[1] = t[1] * u[1] + v[1];
621 sum[2] = t[2] * u[2] + v[2];
622 sum[3] = t[3] * u[3] + v[3];
623 store_vector4( &inst->DstReg, state, sum );
624 }
625 break;
626 case VP_OPCODE_ARL:
627 {
628 GLfloat t[4];
629 fetch_vector4( &inst->SrcReg[0], state, t );
630 state->AddressReg[0] = (GLint) floor(t[0]);
631 }
632 break;
633 case VP_OPCODE_DPH:
634 {
635 GLfloat t[4], u[4], dot[4];
636 fetch_vector4( &inst->SrcReg[0], state, t );
637 fetch_vector4( &inst->SrcReg[1], state, u );
638 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
639 dot[1] = dot[2] = dot[3] = dot[0];
640 store_vector4( &inst->DstReg, state, dot );
641 }
642 break;
643 case VP_OPCODE_RCC:
644 {
645 GLfloat t[4], u;
646 fetch_vector1( &inst->SrcReg[0], state, t );
647 if (t[0] == 1.0F)
648 u = 1.0F;
649 else
650 u = 1.0F / t[0];
651 if (u > 0.0F) {
652 if (u > 1.884467e+019F) {
653 u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
654 }
655 else if (u < 5.42101e-020F) {
656 u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
657 }
658 }
659 else {
660 if (u < -1.884467e+019F) {
661 u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
662 }
663 else if (u > -5.42101e-020F) {
664 u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
665 }
666 }
667 t[0] = t[1] = t[2] = t[3] = u;
668 store_vector4( &inst->DstReg, state, t );
669 }
670 break;
671 case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */
672 {
673 GLfloat t[4], u[4], sum[4];
674 fetch_vector4( &inst->SrcReg[0], state, t );
675 fetch_vector4( &inst->SrcReg[1], state, u );
676 sum[0] = t[0] - u[0];
677 sum[1] = t[1] - u[1];
678 sum[2] = t[2] - u[2];
679 sum[3] = t[3] - u[3];
680 store_vector4( &inst->DstReg, state, sum );
681 }
682 break;
683 case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */
684 {
685 GLfloat t[4];
686 fetch_vector4( &inst->SrcReg[0], state, t );
687 if (t[0] < 0.0) t[0] = -t[0];
688 if (t[1] < 0.0) t[1] = -t[1];
689 if (t[2] < 0.0) t[2] = -t[2];
690 if (t[3] < 0.0) t[3] = -t[3];
691 store_vector4( &inst->DstReg, state, t );
692 }
693 break;
694 case VP_OPCODE_FLR: /* GL_ARB_vertex_program */
695 {
696 GLfloat t[4];
697 fetch_vector4( &inst->SrcReg[0], state, t );
698 t[0] = FLOORF(t[0]);
699 t[1] = FLOORF(t[1]);
700 t[2] = FLOORF(t[2]);
701 t[3] = FLOORF(t[3]);
702 store_vector4( &inst->DstReg, state, t );
703 }
704 break;
705 case VP_OPCODE_FRC: /* GL_ARB_vertex_program */
706 {
707 GLfloat t[4];
708 fetch_vector4( &inst->SrcReg[0], state, t );
709 t[0] = t[0] - FLOORF(t[0]);
710 t[1] = t[1] - FLOORF(t[1]);
711 t[2] = t[2] - FLOORF(t[2]);
712 t[3] = t[3] - FLOORF(t[3]);
713 store_vector4( &inst->DstReg, state, t );
714 }
715 break;
716 case VP_OPCODE_EX2: /* GL_ARB_vertex_program */
717 {
718 GLfloat t[4];
719 fetch_vector1( &inst->SrcReg[0], state, t );
720 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]);
721 store_vector4( &inst->DstReg, state, t );
722 }
723 break;
724 case VP_OPCODE_LG2: /* GL_ARB_vertex_program */
725 {
726 GLfloat t[4];
727 fetch_vector1( &inst->SrcReg[0], state, t );
728 t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
729 store_vector4( &inst->DstReg, state, t );
730 }
731 break;
732 case VP_OPCODE_POW: /* GL_ARB_vertex_program */
733 {
734 GLfloat t[4], u[4];
735 fetch_vector1( &inst->SrcReg[0], state, t );
736 fetch_vector1( &inst->SrcReg[1], state, u );
737 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]);
738 store_vector4( &inst->DstReg, state, t );
739 }
740 break;
741 case VP_OPCODE_XPD: /* GL_ARB_vertex_program */
742 {
743 GLfloat t[4], u[4], cross[4];
744 fetch_vector4( &inst->SrcReg[0], state, t );
745 fetch_vector4( &inst->SrcReg[1], state, u );
746 cross[0] = t[1] * u[2] - t[2] * u[1];
747 cross[1] = t[2] * u[0] - t[0] * u[2];
748 cross[2] = t[0] * u[1] - t[1] * u[0];
749 store_vector4( &inst->DstReg, state, cross );
750 }
751 break;
752 case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */
753 {
754 const struct vp_src_register *source = &inst->SrcReg[0];
755 const GLfloat *src = get_register_pointer(source, state);
756 GLfloat result[4];
757 GLuint i;
758
759 /* do extended swizzling here */
760 for (i = 0; i < 3; i++) {
761 if (source->Swizzle[i] == SWIZZLE_ZERO)
762 result[i] = 0.0;
763 else if (source->Swizzle[i] == SWIZZLE_ONE)
764 result[i] = -1.0;
765 else
766 result[i] = -src[source->Swizzle[i]];
767 if (source->Negate)
768 result[i] = -result[i];
769 }
770 store_vector4( &inst->DstReg, state, result );
771 }
772 break;
773
774 case VP_OPCODE_END:
775 ctx->_CurrentProgram = 0;
776 return;
777 default:
778 /* bad instruction opcode */
779 _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
780 ctx->_CurrentProgram = 0;
781 return;
782 } /* switch */
783 } /* for */
784
785 ctx->_CurrentProgram = 0;
786 }
787
788
789
790 /**
791 Thoughts on vertex program optimization:
792
793 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
794 assembly code. That will probably be a lot of work.
795
796 Another approach might be to replace the vp_instruction->Opcode field with
797 a pointer to a specialized C function which executes the instruction.
798 In particular we can write functions which skip swizzling, negating,
799 masking, relative addressing, etc. when they're not needed.
800
801 For example:
802
803 void simple_add( struct vp_instruction *inst )
804 {
805 GLfloat *sum = machine->Registers[inst->DstReg.Register];
806 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
807 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
808 sum[0] = a[0] + b[0];
809 sum[1] = a[1] + b[1];
810 sum[2] = a[2] + b[2];
811 sum[3] = a[3] + b[3];
812 }
813
814 */
815
816 /*
817
818 KW:
819
820 A first step would be to 'vectorize' the programs in the same way as
821 the normal transformation code in the tnl module. Thus each opcode
822 takes zero or more input vectors (registers) and produces one or more
823 output vectors.
824
825 These operations would intially be coded in C, with machine-specific
826 assembly following, as is currently the case for matrix
827 transformations in the math/ directory. The preprocessing scheme for
828 selecting simpler operations Brian describes above would also work
829 here.
830
831 This should give reasonable performance without excessive effort.
832
833 */