remove duplicate declaration
[mesa.git] / src / mesa / main / nvvertexec.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 5.1
4 *
5 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file nvvertexec.c
27 * Code to execute vertex programs.
28 * \author Brian Paul
29 */
30
31 #include "glheader.h"
32 #include "context.h"
33 #include "imports.h"
34 #include "macros.h"
35 #include "mtypes.h"
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
38 #include "program.h"
39 #include "math/m_matrix.h"
40
41
42 static const GLfloat zeroVec[4] = { 0, 0, 0, 0 };
43
44
45 /**
46 * Load/initialize the vertex program registers.
47 * This needs to be done per vertex.
48 */
49 void
50 _mesa_init_vp_registers(GLcontext *ctx)
51 {
52 GLuint i;
53
54 /* Input registers get initialized from the current vertex attribs */
55 MEMCPY(ctx->VertexProgram.Inputs, ctx->Current.Attrib,
56 VERT_ATTRIB_MAX * 4 * sizeof(GLfloat));
57
58 /* Output and temp regs are initialized to [0,0,0,1] */
59 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
60 ASSIGN_4V(ctx->VertexProgram.Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F);
61 }
62 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
63 ASSIGN_4V(ctx->VertexProgram.Temporaries[i], 0.0F, 0.0F, 0.0F, 1.0F);
64 }
65
66 /* The program parameters aren't touched */
67 /* XXX: This should be moved to glBegin() time, but its safe (and slow!)
68 * here - Karl
69 */
70 if (ctx->VertexProgram.Current->Parameters) {
71 /* Grab the state */
72 _mesa_load_state_parameters(ctx, ctx->VertexProgram.Current->Parameters);
73
74 /* And copy it into the program state */
75 for (i=0; i<ctx->VertexProgram.Current->Parameters->NumParameters; i++) {
76 MEMCPY(ctx->VertexProgram.Parameters[i],
77 &ctx->VertexProgram.Current->Parameters->Parameters[i].Values,
78 4*sizeof(GLfloat));
79 }
80 }
81 }
82
83
84
85 /**
86 * Copy the 16 elements of a matrix into four consecutive program
87 * registers starting at 'pos'.
88 */
89 static void
90 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
91 {
92 GLuint i;
93 for (i = 0; i < 4; i++) {
94 registers[pos + i][0] = mat[0 + i];
95 registers[pos + i][1] = mat[4 + i];
96 registers[pos + i][2] = mat[8 + i];
97 registers[pos + i][3] = mat[12 + i];
98 }
99 }
100
101
102 /**
103 * As above, but transpose the matrix.
104 */
105 static void
106 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
107 const GLfloat mat[16])
108 {
109 MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
110 }
111
112
113 /**
114 * Load all currently tracked matrices into the program registers.
115 * This needs to be done per glBegin/glEnd.
116 */
117 void
118 _mesa_init_tracked_matrices(GLcontext *ctx)
119 {
120 GLuint i;
121
122 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) {
123 /* point 'mat' at source matrix */
124 GLmatrix *mat;
125 if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
126 mat = ctx->ModelviewMatrixStack.Top;
127 }
128 else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
129 mat = ctx->ProjectionMatrixStack.Top;
130 }
131 else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
132 mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
133 }
134 else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
135 mat = ctx->ColorMatrixStack.Top;
136 }
137 else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
138 /* XXX verify the combined matrix is up to date */
139 mat = &ctx->_ModelProjectMatrix;
140 }
141 else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
142 ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
143 GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
144 ASSERT(n < MAX_PROGRAM_MATRICES);
145 mat = ctx->ProgramMatrixStack[n].Top;
146 }
147 else {
148 /* no matrix is tracked, but we leave the register values as-is */
149 assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
150 continue;
151 }
152
153 /* load the matrix */
154 if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
155 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
156 }
157 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
158 _math_matrix_analyse(mat); /* update the inverse */
159 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
160 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
161 }
162 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
163 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
164 }
165 else {
166 assert(ctx->VertexProgram.TrackMatrixTransform[i]
167 == GL_INVERSE_TRANSPOSE_NV);
168 _math_matrix_analyse(mat); /* update the inverse */
169 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
170 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
171 }
172 }
173 }
174
175
176
177 /**
178 * For debugging. Dump the current vertex program machine registers.
179 */
180 void
181 _mesa_dump_vp_state( const struct vertex_program_state *state )
182 {
183 int i;
184 _mesa_printf("VertexIn:\n");
185 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_INPUTS; i++) {
186 _mesa_printf("%d: %f %f %f %f ", i,
187 state->Inputs[i][0],
188 state->Inputs[i][1],
189 state->Inputs[i][2],
190 state->Inputs[i][3]);
191 }
192 _mesa_printf("\n");
193
194 _mesa_printf("VertexOut:\n");
195 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
196 _mesa_printf("%d: %f %f %f %f ", i,
197 state->Outputs[i][0],
198 state->Outputs[i][1],
199 state->Outputs[i][2],
200 state->Outputs[i][3]);
201 }
202 _mesa_printf("\n");
203
204 _mesa_printf("Registers:\n");
205 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
206 _mesa_printf("%d: %f %f %f %f ", i,
207 state->Temporaries[i][0],
208 state->Temporaries[i][1],
209 state->Temporaries[i][2],
210 state->Temporaries[i][3]);
211 }
212 _mesa_printf("\n");
213
214 _mesa_printf("Parameters:\n");
215 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS; i++) {
216 _mesa_printf("%d: %f %f %f %f ", i,
217 state->Parameters[i][0],
218 state->Parameters[i][1],
219 state->Parameters[i][2],
220 state->Parameters[i][3]);
221 }
222 _mesa_printf("\n");
223 }
224
225
226
227 /**
228 * Return a pointer to the 4-element float vector specified by the given
229 * source register.
230 */
231 static INLINE const GLfloat *
232 get_register_pointer( const struct vp_src_register *source,
233 const struct vertex_program_state *state )
234 {
235 if (source->RelAddr) {
236 const GLint reg = source->Index + state->AddressReg[0];
237 ASSERT(source->File == PROGRAM_ENV_PARAM);
238 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
239 return zeroVec;
240 else
241 return state->Parameters[reg];
242 }
243 else {
244 switch (source->File) {
245 case PROGRAM_TEMPORARY:
246 return state->Temporaries[source->Index];
247 case PROGRAM_INPUT:
248 return state->Inputs[source->Index];
249 case PROGRAM_LOCAL_PARAM:
250 /* XXX fix */
251 return state->Temporaries[source->Index];
252 case PROGRAM_ENV_PARAM:
253 return state->Parameters[source->Index];
254 case PROGRAM_STATE_VAR:
255 return state->Parameters[source->Index];
256 default:
257 _mesa_problem(NULL,
258 "Bad source register file in fetch_vector4(vp)");
259 return NULL;
260 }
261 }
262 return NULL;
263 }
264
265
266 /**
267 * Fetch a 4-element float vector from the given source register.
268 * Apply swizzling and negating as needed.
269 */
270 static INLINE void
271 fetch_vector4( const struct vp_src_register *source,
272 const struct vertex_program_state *state,
273 GLfloat result[4] )
274 {
275 const GLfloat *src = get_register_pointer(source, state);
276
277 if (source->Negate) {
278 result[0] = -src[source->Swizzle[0]];
279 result[1] = -src[source->Swizzle[1]];
280 result[2] = -src[source->Swizzle[2]];
281 result[3] = -src[source->Swizzle[3]];
282 }
283 else {
284 result[0] = src[source->Swizzle[0]];
285 result[1] = src[source->Swizzle[1]];
286 result[2] = src[source->Swizzle[2]];
287 result[3] = src[source->Swizzle[3]];
288 }
289 }
290
291
292
293 /**
294 * As above, but only return result[0] element.
295 */
296 static INLINE void
297 fetch_vector1( const struct vp_src_register *source,
298 const struct vertex_program_state *state,
299 GLfloat result[4] )
300 {
301 const GLfloat *src = get_register_pointer(source, state);
302
303 if (source->Negate) {
304 result[0] = -src[source->Swizzle[0]];
305 }
306 else {
307 result[0] = src[source->Swizzle[0]];
308 }
309 }
310
311
312 /**
313 * Store 4 floats into a register.
314 */
315 static void
316 store_vector4( const struct vp_dst_register *dest,
317 struct vertex_program_state *state,
318 const GLfloat value[4] )
319 {
320 GLfloat *dst;
321 switch (dest->File) {
322 case PROGRAM_TEMPORARY:
323 dst = state->Temporaries[dest->Index];
324 break;
325 case PROGRAM_OUTPUT:
326 dst = state->Outputs[dest->Index];
327 break;
328 default:
329 _mesa_problem(NULL, "Invalid register file in fetch_vector1(vp)");
330 return;
331 }
332
333 if (dest->WriteMask[0])
334 dst[0] = value[0];
335 if (dest->WriteMask[1])
336 dst[1] = value[1];
337 if (dest->WriteMask[2])
338 dst[2] = value[2];
339 if (dest->WriteMask[3])
340 dst[3] = value[3];
341 }
342
343
344 /**
345 * Set x to positive or negative infinity.
346 */
347 #if defined(USE_IEEE) || defined(_WIN32)
348 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
349 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
350 #elif defined(VMS)
351 #define SET_POS_INFINITY(x) x = __MAXFLOAT
352 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
353 #else
354 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
355 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
356 #endif
357
358 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
359
360
361 /**
362 * Execute the given vertex program
363 */
364 void
365 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
366 {
367 struct vertex_program_state *state = &ctx->VertexProgram;
368 const struct vp_instruction *inst;
369
370 ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */
371
372 for (inst = program->Instructions; inst->Opcode != VP_OPCODE_END; inst++) {
373
374 if (ctx->VertexProgram.CallbackEnabled &&
375 ctx->VertexProgram.Callback) {
376 ctx->VertexProgram.CurrentPosition = inst->StringPos;
377 ctx->VertexProgram.Callback(program->Base.Target,
378 ctx->VertexProgram.CallbackData);
379 }
380
381 switch (inst->Opcode) {
382 case VP_OPCODE_MOV:
383 {
384 GLfloat t[4];
385 fetch_vector4( &inst->SrcReg[0], state, t );
386 store_vector4( &inst->DstReg, state, t );
387 }
388 break;
389 case VP_OPCODE_LIT:
390 {
391 const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
392 GLfloat t[4], lit[4];
393 fetch_vector4( &inst->SrcReg[0], state, t );
394 if (t[3] < -(128.0F - epsilon))
395 t[3] = - (128.0F - epsilon);
396 else if (t[3] > 128.0F - epsilon)
397 t[3] = 128.0F - epsilon;
398 if (t[0] < 0.0)
399 t[0] = 0.0;
400 if (t[1] < 0.0)
401 t[1] = 0.0;
402 lit[0] = 1.0;
403 lit[1] = t[0];
404 lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
405 lit[3] = 1.0;
406 store_vector4( &inst->DstReg, state, lit );
407 }
408 break;
409 case VP_OPCODE_RCP:
410 {
411 GLfloat t[4];
412 fetch_vector1( &inst->SrcReg[0], state, t );
413 if (t[0] != 1.0F)
414 t[0] = 1.0F / t[0]; /* div by zero is infinity! */
415 t[1] = t[2] = t[3] = t[0];
416 store_vector4( &inst->DstReg, state, t );
417 }
418 break;
419 case VP_OPCODE_RSQ:
420 {
421 GLfloat t[4];
422 fetch_vector1( &inst->SrcReg[0], state, t );
423 t[0] = INV_SQRTF(FABSF(t[0]));
424 t[1] = t[2] = t[3] = t[0];
425 store_vector4( &inst->DstReg, state, t );
426 }
427 break;
428 case VP_OPCODE_EXP:
429 {
430 GLfloat t[4], q[4], floor_t0;
431 fetch_vector1( &inst->SrcReg[0], state, t );
432 floor_t0 = (float) floor(t[0]);
433 if (floor_t0 > FLT_MAX_EXP) {
434 SET_POS_INFINITY(q[0]);
435 SET_POS_INFINITY(q[2]);
436 }
437 else if (floor_t0 < FLT_MIN_EXP) {
438 q[0] = 0.0F;
439 q[2] = 0.0F;
440 }
441 else {
442 #ifdef USE_IEEE
443 GLint ii = (GLint) floor_t0;
444 ii = (ii < 23) + 0x3f800000;
445 SET_FLOAT_BITS(q[0], ii);
446 q[0] = *((GLfloat *) &ii);
447 #else
448 q[0] = (GLfloat) pow(2.0, floor_t0);
449 #endif
450 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
451 }
452 q[1] = t[0] - floor_t0;
453 q[3] = 1.0F;
454 store_vector4( &inst->DstReg, state, q );
455 }
456 break;
457 case VP_OPCODE_LOG:
458 {
459 GLfloat t[4], q[4], abs_t0;
460 fetch_vector1( &inst->SrcReg[0], state, t );
461 abs_t0 = (GLfloat) fabs(t[0]);
462 if (abs_t0 != 0.0F) {
463 /* Since we really can't handle infinite values on VMS
464 * like other OSes we'll use __MAXFLOAT to represent
465 * infinity. This may need some tweaking.
466 */
467 #ifdef VMS
468 if (abs_t0 == __MAXFLOAT)
469 #else
470 if (IS_INF_OR_NAN(abs_t0))
471 #endif
472 {
473 SET_POS_INFINITY(q[0]);
474 q[1] = 1.0F;
475 SET_POS_INFINITY(q[2]);
476 }
477 else {
478 int exponent;
479 double mantissa = frexp(t[0], &exponent);
480 q[0] = (GLfloat) (exponent - 1);
481 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
482 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
483 }
484 }
485 else {
486 SET_NEG_INFINITY(q[0]);
487 q[1] = 1.0F;
488 SET_NEG_INFINITY(q[2]);
489 }
490 q[3] = 1.0;
491 store_vector4( &inst->DstReg, state, q );
492 }
493 break;
494 case VP_OPCODE_MUL:
495 {
496 GLfloat t[4], u[4], prod[4];
497 fetch_vector4( &inst->SrcReg[0], state, t );
498 fetch_vector4( &inst->SrcReg[1], state, u );
499 prod[0] = t[0] * u[0];
500 prod[1] = t[1] * u[1];
501 prod[2] = t[2] * u[2];
502 prod[3] = t[3] * u[3];
503 store_vector4( &inst->DstReg, state, prod );
504 }
505 break;
506 case VP_OPCODE_ADD:
507 {
508 GLfloat t[4], u[4], sum[4];
509 fetch_vector4( &inst->SrcReg[0], state, t );
510 fetch_vector4( &inst->SrcReg[1], state, u );
511 sum[0] = t[0] + u[0];
512 sum[1] = t[1] + u[1];
513 sum[2] = t[2] + u[2];
514 sum[3] = t[3] + u[3];
515 store_vector4( &inst->DstReg, state, sum );
516 }
517 break;
518 case VP_OPCODE_DP3:
519 {
520 GLfloat t[4], u[4], dot[4];
521 fetch_vector4( &inst->SrcReg[0], state, t );
522 fetch_vector4( &inst->SrcReg[1], state, u );
523 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
524 dot[1] = dot[2] = dot[3] = dot[0];
525 store_vector4( &inst->DstReg, state, dot );
526 }
527 break;
528 case VP_OPCODE_DP4:
529 {
530 GLfloat t[4], u[4], dot[4];
531 fetch_vector4( &inst->SrcReg[0], state, t );
532 fetch_vector4( &inst->SrcReg[1], state, u );
533 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
534 dot[1] = dot[2] = dot[3] = dot[0];
535 store_vector4( &inst->DstReg, state, dot );
536 }
537 break;
538 case VP_OPCODE_DST:
539 {
540 GLfloat t[4], u[4], dst[4];
541 fetch_vector4( &inst->SrcReg[0], state, t );
542 fetch_vector4( &inst->SrcReg[1], state, u );
543 dst[0] = 1.0F;
544 dst[1] = t[1] * u[1];
545 dst[2] = t[2];
546 dst[3] = u[3];
547 store_vector4( &inst->DstReg, state, dst );
548 }
549 break;
550 case VP_OPCODE_MIN:
551 {
552 GLfloat t[4], u[4], min[4];
553 fetch_vector4( &inst->SrcReg[0], state, t );
554 fetch_vector4( &inst->SrcReg[1], state, u );
555 min[0] = (t[0] < u[0]) ? t[0] : u[0];
556 min[1] = (t[1] < u[1]) ? t[1] : u[1];
557 min[2] = (t[2] < u[2]) ? t[2] : u[2];
558 min[3] = (t[3] < u[3]) ? t[3] : u[3];
559 store_vector4( &inst->DstReg, state, min );
560 }
561 break;
562 case VP_OPCODE_MAX:
563 {
564 GLfloat t[4], u[4], max[4];
565 fetch_vector4( &inst->SrcReg[0], state, t );
566 fetch_vector4( &inst->SrcReg[1], state, u );
567 max[0] = (t[0] > u[0]) ? t[0] : u[0];
568 max[1] = (t[1] > u[1]) ? t[1] : u[1];
569 max[2] = (t[2] > u[2]) ? t[2] : u[2];
570 max[3] = (t[3] > u[3]) ? t[3] : u[3];
571 store_vector4( &inst->DstReg, state, max );
572 }
573 break;
574 case VP_OPCODE_SLT:
575 {
576 GLfloat t[4], u[4], slt[4];
577 fetch_vector4( &inst->SrcReg[0], state, t );
578 fetch_vector4( &inst->SrcReg[1], state, u );
579 slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
580 slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
581 slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
582 slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
583 store_vector4( &inst->DstReg, state, slt );
584 }
585 break;
586 case VP_OPCODE_SGE:
587 {
588 GLfloat t[4], u[4], sge[4];
589 fetch_vector4( &inst->SrcReg[0], state, t );
590 fetch_vector4( &inst->SrcReg[1], state, u );
591 sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
592 sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
593 sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
594 sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
595 store_vector4( &inst->DstReg, state, sge );
596 }
597 break;
598 case VP_OPCODE_MAD:
599 {
600 GLfloat t[4], u[4], v[4], sum[4];
601 fetch_vector4( &inst->SrcReg[0], state, t );
602 fetch_vector4( &inst->SrcReg[1], state, u );
603 fetch_vector4( &inst->SrcReg[2], state, v );
604 sum[0] = t[0] * u[0] + v[0];
605 sum[1] = t[1] * u[1] + v[1];
606 sum[2] = t[2] * u[2] + v[2];
607 sum[3] = t[3] * u[3] + v[3];
608 store_vector4( &inst->DstReg, state, sum );
609 }
610 break;
611 case VP_OPCODE_ARL:
612 {
613 GLfloat t[4];
614 fetch_vector4( &inst->SrcReg[0], state, t );
615 state->AddressReg[0] = (GLint) floor(t[0]);
616 }
617 break;
618 case VP_OPCODE_DPH:
619 {
620 GLfloat t[4], u[4], dot[4];
621 fetch_vector4( &inst->SrcReg[0], state, t );
622 fetch_vector4( &inst->SrcReg[1], state, u );
623 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
624 dot[1] = dot[2] = dot[3] = dot[0];
625 store_vector4( &inst->DstReg, state, dot );
626 }
627 break;
628 case VP_OPCODE_RCC:
629 {
630 GLfloat t[4], u;
631 fetch_vector1( &inst->SrcReg[0], state, t );
632 if (t[0] == 1.0F)
633 u = 1.0F;
634 else
635 u = 1.0F / t[0];
636 if (u > 0.0F) {
637 if (u > 1.884467e+019F) {
638 u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
639 }
640 else if (u < 5.42101e-020F) {
641 u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
642 }
643 }
644 else {
645 if (u < -1.884467e+019F) {
646 u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
647 }
648 else if (u > -5.42101e-020F) {
649 u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
650 }
651 }
652 t[0] = t[1] = t[2] = t[3] = u;
653 store_vector4( &inst->DstReg, state, t );
654 }
655 break;
656 case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */
657 {
658 GLfloat t[4], u[4], sum[4];
659 fetch_vector4( &inst->SrcReg[0], state, t );
660 fetch_vector4( &inst->SrcReg[1], state, u );
661 sum[0] = t[0] - u[0];
662 sum[1] = t[1] - u[1];
663 sum[2] = t[2] - u[2];
664 sum[3] = t[3] - u[3];
665 store_vector4( &inst->DstReg, state, sum );
666 }
667 break;
668 case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */
669 {
670 GLfloat t[4];
671 fetch_vector4( &inst->SrcReg[0], state, t );
672 if (t[0] < 0.0) t[0] = -t[0];
673 if (t[1] < 0.0) t[1] = -t[1];
674 if (t[2] < 0.0) t[2] = -t[2];
675 if (t[3] < 0.0) t[3] = -t[3];
676 store_vector4( &inst->DstReg, state, t );
677 }
678 break;
679 case VP_OPCODE_FLR: /* GL_ARB_vertex_program */
680 {
681 GLfloat t[4];
682 fetch_vector4( &inst->SrcReg[0], state, t );
683 t[0] = FLOORF(t[0]);
684 t[1] = FLOORF(t[1]);
685 t[2] = FLOORF(t[2]);
686 t[3] = FLOORF(t[3]);
687 store_vector4( &inst->DstReg, state, t );
688 }
689 break;
690 case VP_OPCODE_FRC: /* GL_ARB_vertex_program */
691 {
692 GLfloat t[4];
693 fetch_vector4( &inst->SrcReg[0], state, t );
694 t[0] = t[0] - FLOORF(t[0]);
695 t[1] = t[1] - FLOORF(t[1]);
696 t[2] = t[2] - FLOORF(t[2]);
697 t[3] = t[3] - FLOORF(t[3]);
698 store_vector4( &inst->DstReg, state, t );
699 }
700 break;
701 case VP_OPCODE_EX2: /* GL_ARB_vertex_program */
702 {
703 GLfloat t[4];
704 fetch_vector1( &inst->SrcReg[0], state, t );
705 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]);
706 store_vector4( &inst->DstReg, state, t );
707 }
708 break;
709 case VP_OPCODE_LG2: /* GL_ARB_vertex_program */
710 {
711 GLfloat t[4];
712 fetch_vector1( &inst->SrcReg[0], state, t );
713 t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
714 store_vector4( &inst->DstReg, state, t );
715 }
716 break;
717 case VP_OPCODE_POW: /* GL_ARB_vertex_program */
718 {
719 GLfloat t[4], u[4];
720 fetch_vector1( &inst->SrcReg[0], state, t );
721 fetch_vector1( &inst->SrcReg[1], state, u );
722 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]);
723 store_vector4( &inst->DstReg, state, t );
724 }
725 break;
726 case VP_OPCODE_XPD: /* GL_ARB_vertex_program */
727 {
728 GLfloat t[4], u[4], cross[4];
729 fetch_vector4( &inst->SrcReg[0], state, t );
730 fetch_vector4( &inst->SrcReg[1], state, u );
731 cross[0] = t[1] * u[2] - t[2] * u[1];
732 cross[1] = t[2] * u[0] - t[0] * u[2];
733 cross[2] = t[0] * u[1] - t[1] * u[0];
734 store_vector4( &inst->DstReg, state, cross );
735 }
736 break;
737 case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */
738 {
739 const struct vp_src_register *source = &inst->SrcReg[0];
740 const GLfloat *src = get_register_pointer(source, state);
741 GLfloat result[4];
742 GLuint i;
743
744 /* do extended swizzling here */
745 for (i = 0; i < 3; i++) {
746 if (source->Swizzle[i] == SWIZZLE_ZERO)
747 result[i] = 0.0;
748 else if (source->Swizzle[i] == SWIZZLE_ONE)
749 result[i] = -1.0;
750 else
751 result[i] = -src[source->Swizzle[i]];
752 if (source->Negate)
753 result[i] = -result[i];
754 }
755 store_vector4( &inst->DstReg, state, result );
756 }
757 break;
758
759 case VP_OPCODE_END:
760 ctx->_CurrentProgram = 0;
761 return;
762 default:
763 /* bad instruction opcode */
764 _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
765 ctx->_CurrentProgram = 0;
766 return;
767 } /* switch */
768 } /* for */
769
770 ctx->_CurrentProgram = 0;
771 }
772
773
774
775 /**
776 Thoughts on vertex program optimization:
777
778 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
779 assembly code. That will probably be a lot of work.
780
781 Another approach might be to replace the vp_instruction->Opcode field with
782 a pointer to a specialized C function which executes the instruction.
783 In particular we can write functions which skip swizzling, negating,
784 masking, relative addressing, etc. when they're not needed.
785
786 For example:
787
788 void simple_add( struct vp_instruction *inst )
789 {
790 GLfloat *sum = machine->Registers[inst->DstReg.Register];
791 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
792 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
793 sum[0] = a[0] + b[0];
794 sum[1] = a[1] + b[1];
795 sum[2] = a[2] + b[2];
796 sum[3] = a[3] + b[3];
797 }
798
799 */
800
801 /*
802
803 KW:
804
805 A first step would be to 'vectorize' the programs in the same way as
806 the normal transformation code in the tnl module. Thus each opcode
807 takes zero or more input vectors (registers) and produces one or more
808 output vectors.
809
810 These operations would intially be coded in C, with machine-specific
811 assembly following, as is currently the case for matrix
812 transformations in the math/ directory. The preprocessing scheme for
813 selecting simpler operations Brian describes above would also work
814 here.
815
816 This should give reasonable performance without excessive effort.
817
818 */