Initial checking of new ARB_frag/vertex program parser
[mesa.git] / src / mesa / main / nvvertexec.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 5.1
4 *
5 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file nvvertexec.c
27 * Code to execute vertex programs.
28 * \author Brian Paul
29 */
30
31 #include "glheader.h"
32 #include "context.h"
33 #include "imports.h"
34 #include "macros.h"
35 #include "mtypes.h"
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
38 #include "program.h"
39 #include "math/m_matrix.h"
40
41
42 static const GLfloat zeroVec[4] = { 0, 0, 0, 0 };
43
44
45 /**
46 * Load/initialize the vertex program registers.
47 * This needs to be done per vertex.
48 */
49 void
50 _mesa_init_vp_registers(GLcontext *ctx)
51 {
52 GLuint i;
53
54 /* Input registers get initialized from the current vertex attribs */
55 MEMCPY(ctx->VertexProgram.Inputs, ctx->Current.Attrib,
56 VERT_ATTRIB_MAX * 4 * sizeof(GLfloat));
57
58 /* Output and temp regs are initialized to [0,0,0,1] */
59 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
60 ASSIGN_4V(ctx->VertexProgram.Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F);
61 }
62 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
63 ASSIGN_4V(ctx->VertexProgram.Temporaries[i], 0.0F, 0.0F, 0.0F, 1.0F);
64 }
65
66 /* The program parameters aren't touched */
67 /* XXX: This should be moved to glBegin() time, but its safe (and slow!)
68 * here - Karl
69 */
70 if (ctx->VertexProgram.Current->Parameters) {
71
72 /* Grab the state */
73 _mesa_load_state_parameters(ctx, ctx->VertexProgram.Current->Parameters);
74
75 /* And copy it into the program state */
76 for (i=0; i<ctx->VertexProgram.Current->Parameters->NumParameters; i++) {
77 MEMCPY(ctx->VertexProgram.Parameters[i],
78 &ctx->VertexProgram.Current->Parameters->Parameters[i].Values,
79 4*sizeof(GLfloat));
80 }
81
82 }
83
84 }
85
86
87
88 /**
89 * Copy the 16 elements of a matrix into four consecutive program
90 * registers starting at 'pos'.
91 */
92 static void
93 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
94 {
95 GLuint i;
96 for (i = 0; i < 4; i++) {
97 registers[pos + i][0] = mat[0 + i];
98 registers[pos + i][1] = mat[4 + i];
99 registers[pos + i][2] = mat[8 + i];
100 registers[pos + i][3] = mat[12 + i];
101 }
102 }
103
104
105 /**
106 * As above, but transpose the matrix.
107 */
108 static void
109 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
110 const GLfloat mat[16])
111 {
112 MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
113 }
114
115
116 /**
117 * Load all currently tracked matrices into the program registers.
118 * This needs to be done per glBegin/glEnd.
119 */
120 void
121 _mesa_init_tracked_matrices(GLcontext *ctx)
122 {
123 GLuint i;
124
125 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) {
126 /* point 'mat' at source matrix */
127 GLmatrix *mat;
128 if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
129 mat = ctx->ModelviewMatrixStack.Top;
130 }
131 else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
132 mat = ctx->ProjectionMatrixStack.Top;
133 }
134 else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
135 mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
136 }
137 else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
138 mat = ctx->ColorMatrixStack.Top;
139 }
140 else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
141 /* XXX verify the combined matrix is up to date */
142 mat = &ctx->_ModelProjectMatrix;
143 }
144 else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
145 ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
146 GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
147 ASSERT(n < MAX_PROGRAM_MATRICES);
148 mat = ctx->ProgramMatrixStack[n].Top;
149 }
150 else {
151 /* no matrix is tracked, but we leave the register values as-is */
152 assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
153 continue;
154 }
155
156 /* load the matrix */
157 if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
158 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
159 }
160 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
161 _math_matrix_analyse(mat); /* update the inverse */
162 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
163 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
164 }
165 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
166 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
167 }
168 else {
169 assert(ctx->VertexProgram.TrackMatrixTransform[i]
170 == GL_INVERSE_TRANSPOSE_NV);
171 _math_matrix_analyse(mat); /* update the inverse */
172 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
173 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
174 }
175 }
176 }
177
178
179
180 /**
181 * For debugging. Dump the current vertex program machine registers.
182 */
183 void
184 _mesa_dump_vp_state( const struct vertex_program_state *state )
185 {
186 int i;
187 _mesa_printf("VertexIn:\n");
188 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_INPUTS; i++) {
189 _mesa_printf("%d: %f %f %f %f ", i,
190 state->Inputs[i][0],
191 state->Inputs[i][1],
192 state->Inputs[i][2],
193 state->Inputs[i][3]);
194 }
195 _mesa_printf("\n");
196
197 _mesa_printf("VertexOut:\n");
198 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
199 _mesa_printf("%d: %f %f %f %f ", i,
200 state->Outputs[i][0],
201 state->Outputs[i][1],
202 state->Outputs[i][2],
203 state->Outputs[i][3]);
204 }
205 _mesa_printf("\n");
206
207 _mesa_printf("Registers:\n");
208 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
209 _mesa_printf("%d: %f %f %f %f ", i,
210 state->Temporaries[i][0],
211 state->Temporaries[i][1],
212 state->Temporaries[i][2],
213 state->Temporaries[i][3]);
214 }
215 _mesa_printf("\n");
216
217 _mesa_printf("Parameters:\n");
218 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS; i++) {
219 _mesa_printf("%d: %f %f %f %f ", i,
220 state->Parameters[i][0],
221 state->Parameters[i][1],
222 state->Parameters[i][2],
223 state->Parameters[i][3]);
224 }
225 _mesa_printf("\n");
226 }
227
228
229
230 /**
231 * Return a pointer to the 4-element float vector specified by the given
232 * source register.
233 */
234 static INLINE const GLfloat *
235 get_register_pointer( const struct vp_src_register *source,
236 const struct vertex_program_state *state )
237 {
238 if (source->RelAddr) {
239 const GLint reg = source->Index + state->AddressReg[0];
240 ASSERT(source->File == PROGRAM_ENV_PARAM);
241 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
242 return zeroVec;
243 else
244 return state->Parameters[reg];
245 }
246 else {
247 switch (source->File) {
248 case PROGRAM_TEMPORARY:
249 return state->Temporaries[source->Index];
250 case PROGRAM_INPUT:
251 return state->Inputs[source->Index];
252 case PROGRAM_LOCAL_PARAM:
253 /* XXX fix */
254 return state->Temporaries[source->Index];
255 case PROGRAM_ENV_PARAM:
256 return state->Parameters[source->Index];
257 case PROGRAM_STATE_VAR:
258 return state->Parameters[source->Index];
259 default:
260 _mesa_problem(NULL,
261 "Bad source register file in fetch_vector4(vp)");
262 return NULL;
263 }
264 }
265 return NULL;
266 }
267
268
269 /**
270 * Fetch a 4-element float vector from the given source register.
271 * Apply swizzling and negating as needed.
272 */
273 static INLINE void
274 fetch_vector4( const struct vp_src_register *source,
275 const struct vertex_program_state *state,
276 GLfloat result[4] )
277 {
278 const GLfloat *src = get_register_pointer(source, state);
279
280 if (source->Negate) {
281 result[0] = -src[source->Swizzle[0]];
282 result[1] = -src[source->Swizzle[1]];
283 result[2] = -src[source->Swizzle[2]];
284 result[3] = -src[source->Swizzle[3]];
285 }
286 else {
287 result[0] = src[source->Swizzle[0]];
288 result[1] = src[source->Swizzle[1]];
289 result[2] = src[source->Swizzle[2]];
290 result[3] = src[source->Swizzle[3]];
291 }
292 }
293
294
295
296 /**
297 * As above, but only return result[0] element.
298 */
299 static INLINE void
300 fetch_vector1( const struct vp_src_register *source,
301 const struct vertex_program_state *state,
302 GLfloat result[4] )
303 {
304 const GLfloat *src = get_register_pointer(source, state);
305
306 if (source->Negate) {
307 result[0] = -src[source->Swizzle[0]];
308 }
309 else {
310 result[0] = src[source->Swizzle[0]];
311 }
312 }
313
314
315 /**
316 * Store 4 floats into a register.
317 */
318 static void
319 store_vector4( const struct vp_dst_register *dest,
320 struct vertex_program_state *state,
321 const GLfloat value[4] )
322 {
323 GLfloat *dst;
324 switch (dest->File) {
325 case PROGRAM_TEMPORARY:
326 dst = state->Temporaries[dest->Index];
327 break;
328 case PROGRAM_OUTPUT:
329 dst = state->Outputs[dest->Index];
330 break;
331 default:
332 _mesa_problem(NULL, "Invalid register file in fetch_vector1(vp)");
333 return;
334 }
335
336 if (dest->WriteMask[0])
337 dst[0] = value[0];
338 if (dest->WriteMask[1])
339 dst[1] = value[1];
340 if (dest->WriteMask[2])
341 dst[2] = value[2];
342 if (dest->WriteMask[3])
343 dst[3] = value[3];
344 }
345
346
347 /**
348 * Set x to positive or negative infinity.
349 */
350 #if defined(USE_IEEE) || defined(_WIN32)
351 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
352 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
353 #elif defined(VMS)
354 #define SET_POS_INFINITY(x) x = __MAXFLOAT
355 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
356 #else
357 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
358 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
359 #endif
360
361 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
362
363
364 /**
365 * Execute the given vertex program
366 */
367 void
368 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
369 {
370 struct vertex_program_state *state = &ctx->VertexProgram;
371 const struct vp_instruction *inst;
372
373 ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */
374
375 for (inst = program->Instructions; inst->Opcode != VP_OPCODE_END; inst++) {
376
377 if (ctx->VertexProgram.CallbackEnabled &&
378 ctx->VertexProgram.Callback) {
379 ctx->VertexProgram.CurrentPosition = inst->StringPos;
380 ctx->VertexProgram.Callback(program->Base.Target,
381 ctx->VertexProgram.CallbackData);
382 }
383
384 switch (inst->Opcode) {
385 case VP_OPCODE_MOV:
386 {
387 GLfloat t[4];
388 fetch_vector4( &inst->SrcReg[0], state, t );
389 store_vector4( &inst->DstReg, state, t );
390 }
391 break;
392 case VP_OPCODE_LIT:
393 {
394 const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
395 GLfloat t[4], lit[4];
396 fetch_vector4( &inst->SrcReg[0], state, t );
397 if (t[3] < -(128.0F - epsilon))
398 t[3] = - (128.0F - epsilon);
399 else if (t[3] > 128.0F - epsilon)
400 t[3] = 128.0F - epsilon;
401 if (t[0] < 0.0)
402 t[0] = 0.0;
403 if (t[1] < 0.0)
404 t[1] = 0.0;
405 lit[0] = 1.0;
406 lit[1] = t[0];
407 lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
408 lit[3] = 1.0;
409 store_vector4( &inst->DstReg, state, lit );
410 }
411 break;
412 case VP_OPCODE_RCP:
413 {
414 GLfloat t[4];
415 fetch_vector1( &inst->SrcReg[0], state, t );
416 if (t[0] != 1.0F)
417 t[0] = 1.0F / t[0]; /* div by zero is infinity! */
418 t[1] = t[2] = t[3] = t[0];
419 store_vector4( &inst->DstReg, state, t );
420 }
421 break;
422 case VP_OPCODE_RSQ:
423 {
424 GLfloat t[4];
425 fetch_vector1( &inst->SrcReg[0], state, t );
426 t[0] = INV_SQRTF(FABSF(t[0]));
427 t[1] = t[2] = t[3] = t[0];
428 store_vector4( &inst->DstReg, state, t );
429 }
430 break;
431 case VP_OPCODE_EXP:
432 {
433 GLfloat t[4], q[4], floor_t0;
434 fetch_vector1( &inst->SrcReg[0], state, t );
435 floor_t0 = (float) floor(t[0]);
436 if (floor_t0 > FLT_MAX_EXP) {
437 SET_POS_INFINITY(q[0]);
438 SET_POS_INFINITY(q[2]);
439 }
440 else if (floor_t0 < FLT_MIN_EXP) {
441 q[0] = 0.0F;
442 q[2] = 0.0F;
443 }
444 else {
445 #ifdef USE_IEEE
446 GLint ii = (GLint) floor_t0;
447 ii = (ii < 23) + 0x3f800000;
448 SET_FLOAT_BITS(q[0], ii);
449 q[0] = *((GLfloat *) &ii);
450 #else
451 q[0] = (GLfloat) pow(2.0, floor_t0);
452 #endif
453 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
454 }
455 q[1] = t[0] - floor_t0;
456 q[3] = 1.0F;
457 store_vector4( &inst->DstReg, state, q );
458 }
459 break;
460 case VP_OPCODE_LOG:
461 {
462 GLfloat t[4], q[4], abs_t0;
463 fetch_vector1( &inst->SrcReg[0], state, t );
464 abs_t0 = (GLfloat) fabs(t[0]);
465 if (abs_t0 != 0.0F) {
466 /* Since we really can't handle infinite values on VMS
467 * like other OSes we'll use __MAXFLOAT to represent
468 * infinity. This may need some tweaking.
469 */
470 #ifdef VMS
471 if (abs_t0 == __MAXFLOAT)
472 #else
473 if (IS_INF_OR_NAN(abs_t0))
474 #endif
475 {
476 SET_POS_INFINITY(q[0]);
477 q[1] = 1.0F;
478 SET_POS_INFINITY(q[2]);
479 }
480 else {
481 int exponent;
482 double mantissa = frexp(t[0], &exponent);
483 q[0] = (GLfloat) (exponent - 1);
484 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
485 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
486 }
487 }
488 else {
489 SET_NEG_INFINITY(q[0]);
490 q[1] = 1.0F;
491 SET_NEG_INFINITY(q[2]);
492 }
493 q[3] = 1.0;
494 store_vector4( &inst->DstReg, state, q );
495 }
496 break;
497 case VP_OPCODE_MUL:
498 {
499 GLfloat t[4], u[4], prod[4];
500 fetch_vector4( &inst->SrcReg[0], state, t );
501 fetch_vector4( &inst->SrcReg[1], state, u );
502 prod[0] = t[0] * u[0];
503 prod[1] = t[1] * u[1];
504 prod[2] = t[2] * u[2];
505 prod[3] = t[3] * u[3];
506 store_vector4( &inst->DstReg, state, prod );
507 }
508 break;
509 case VP_OPCODE_ADD:
510 {
511 GLfloat t[4], u[4], sum[4];
512 fetch_vector4( &inst->SrcReg[0], state, t );
513 fetch_vector4( &inst->SrcReg[1], state, u );
514 sum[0] = t[0] + u[0];
515 sum[1] = t[1] + u[1];
516 sum[2] = t[2] + u[2];
517 sum[3] = t[3] + u[3];
518 store_vector4( &inst->DstReg, state, sum );
519 }
520 break;
521 case VP_OPCODE_DP3:
522 {
523 GLfloat t[4], u[4], dot[4];
524 fetch_vector4( &inst->SrcReg[0], state, t );
525 fetch_vector4( &inst->SrcReg[1], state, u );
526 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
527 dot[1] = dot[2] = dot[3] = dot[0];
528 store_vector4( &inst->DstReg, state, dot );
529 }
530 break;
531 case VP_OPCODE_DP4:
532 {
533 GLfloat t[4], u[4], dot[4];
534 fetch_vector4( &inst->SrcReg[0], state, t );
535 fetch_vector4( &inst->SrcReg[1], state, u );
536 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
537 dot[1] = dot[2] = dot[3] = dot[0];
538 store_vector4( &inst->DstReg, state, dot );
539 }
540 break;
541 case VP_OPCODE_DST:
542 {
543 GLfloat t[4], u[4], dst[4];
544 fetch_vector4( &inst->SrcReg[0], state, t );
545 fetch_vector4( &inst->SrcReg[1], state, u );
546 dst[0] = 1.0F;
547 dst[1] = t[1] * u[1];
548 dst[2] = t[2];
549 dst[3] = u[3];
550 store_vector4( &inst->DstReg, state, dst );
551 }
552 break;
553 case VP_OPCODE_MIN:
554 {
555 GLfloat t[4], u[4], min[4];
556 fetch_vector4( &inst->SrcReg[0], state, t );
557 fetch_vector4( &inst->SrcReg[1], state, u );
558 min[0] = (t[0] < u[0]) ? t[0] : u[0];
559 min[1] = (t[1] < u[1]) ? t[1] : u[1];
560 min[2] = (t[2] < u[2]) ? t[2] : u[2];
561 min[3] = (t[3] < u[3]) ? t[3] : u[3];
562 store_vector4( &inst->DstReg, state, min );
563 }
564 break;
565 case VP_OPCODE_MAX:
566 {
567 GLfloat t[4], u[4], max[4];
568 fetch_vector4( &inst->SrcReg[0], state, t );
569 fetch_vector4( &inst->SrcReg[1], state, u );
570 max[0] = (t[0] > u[0]) ? t[0] : u[0];
571 max[1] = (t[1] > u[1]) ? t[1] : u[1];
572 max[2] = (t[2] > u[2]) ? t[2] : u[2];
573 max[3] = (t[3] > u[3]) ? t[3] : u[3];
574 store_vector4( &inst->DstReg, state, max );
575 }
576 break;
577 case VP_OPCODE_SLT:
578 {
579 GLfloat t[4], u[4], slt[4];
580 fetch_vector4( &inst->SrcReg[0], state, t );
581 fetch_vector4( &inst->SrcReg[1], state, u );
582 slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
583 slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
584 slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
585 slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
586 store_vector4( &inst->DstReg, state, slt );
587 }
588 break;
589 case VP_OPCODE_SGE:
590 {
591 GLfloat t[4], u[4], sge[4];
592 fetch_vector4( &inst->SrcReg[0], state, t );
593 fetch_vector4( &inst->SrcReg[1], state, u );
594 sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
595 sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
596 sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
597 sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
598 store_vector4( &inst->DstReg, state, sge );
599 }
600 break;
601 case VP_OPCODE_MAD:
602 {
603 GLfloat t[4], u[4], v[4], sum[4];
604 fetch_vector4( &inst->SrcReg[0], state, t );
605 fetch_vector4( &inst->SrcReg[1], state, u );
606 fetch_vector4( &inst->SrcReg[2], state, v );
607 sum[0] = t[0] * u[0] + v[0];
608 sum[1] = t[1] * u[1] + v[1];
609 sum[2] = t[2] * u[2] + v[2];
610 sum[3] = t[3] * u[3] + v[3];
611 store_vector4( &inst->DstReg, state, sum );
612 }
613 break;
614 case VP_OPCODE_ARL:
615 {
616 GLfloat t[4];
617 fetch_vector4( &inst->SrcReg[0], state, t );
618 state->AddressReg[0] = (GLint) floor(t[0]);
619 }
620 break;
621 case VP_OPCODE_DPH:
622 {
623 GLfloat t[4], u[4], dot[4];
624 fetch_vector4( &inst->SrcReg[0], state, t );
625 fetch_vector4( &inst->SrcReg[1], state, u );
626 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
627 dot[1] = dot[2] = dot[3] = dot[0];
628 store_vector4( &inst->DstReg, state, dot );
629 }
630 break;
631 case VP_OPCODE_RCC:
632 {
633 GLfloat t[4], u;
634 fetch_vector1( &inst->SrcReg[0], state, t );
635 if (t[0] == 1.0F)
636 u = 1.0F;
637 else
638 u = 1.0F / t[0];
639 if (u > 0.0F) {
640 if (u > 1.884467e+019F) {
641 u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
642 }
643 else if (u < 5.42101e-020F) {
644 u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
645 }
646 }
647 else {
648 if (u < -1.884467e+019F) {
649 u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
650 }
651 else if (u > -5.42101e-020F) {
652 u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
653 }
654 }
655 t[0] = t[1] = t[2] = t[3] = u;
656 store_vector4( &inst->DstReg, state, t );
657 }
658 break;
659 case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */
660 {
661 GLfloat t[4], u[4], sum[4];
662 fetch_vector4( &inst->SrcReg[0], state, t );
663 fetch_vector4( &inst->SrcReg[1], state, u );
664 sum[0] = t[0] - u[0];
665 sum[1] = t[1] - u[1];
666 sum[2] = t[2] - u[2];
667 sum[3] = t[3] - u[3];
668 store_vector4( &inst->DstReg, state, sum );
669 }
670 break;
671 case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */
672 {
673 GLfloat t[4];
674 fetch_vector4( &inst->SrcReg[0], state, t );
675 if (t[0] < 0.0) t[0] = -t[0];
676 if (t[1] < 0.0) t[1] = -t[1];
677 if (t[2] < 0.0) t[2] = -t[2];
678 if (t[3] < 0.0) t[3] = -t[3];
679 store_vector4( &inst->DstReg, state, t );
680 }
681 break;
682 case VP_OPCODE_FLR: /* GL_ARB_vertex_program */
683 {
684 GLfloat t[4];
685 fetch_vector4( &inst->SrcReg[0], state, t );
686 t[0] = FLOORF(t[0]);
687 t[1] = FLOORF(t[1]);
688 t[2] = FLOORF(t[2]);
689 t[3] = FLOORF(t[3]);
690 store_vector4( &inst->DstReg, state, t );
691 }
692 break;
693 case VP_OPCODE_FRC: /* GL_ARB_vertex_program */
694 {
695 GLfloat t[4];
696 fetch_vector4( &inst->SrcReg[0], state, t );
697 t[0] = t[0] - FLOORF(t[0]);
698 t[1] = t[1] - FLOORF(t[1]);
699 t[2] = t[2] - FLOORF(t[2]);
700 t[3] = t[3] - FLOORF(t[3]);
701 store_vector4( &inst->DstReg, state, t );
702 }
703 break;
704 case VP_OPCODE_EX2: /* GL_ARB_vertex_program */
705 {
706 GLfloat t[4];
707 fetch_vector1( &inst->SrcReg[0], state, t );
708 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]);
709 store_vector4( &inst->DstReg, state, t );
710 }
711 break;
712 case VP_OPCODE_LG2: /* GL_ARB_vertex_program */
713 {
714 GLfloat t[4];
715 fetch_vector1( &inst->SrcReg[0], state, t );
716 t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
717 store_vector4( &inst->DstReg, state, t );
718 }
719 break;
720 case VP_OPCODE_POW: /* GL_ARB_vertex_program */
721 {
722 GLfloat t[4], u[4];
723 fetch_vector1( &inst->SrcReg[0], state, t );
724 fetch_vector1( &inst->SrcReg[1], state, u );
725 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]);
726 store_vector4( &inst->DstReg, state, t );
727 }
728 break;
729 case VP_OPCODE_XPD: /* GL_ARB_vertex_program */
730 {
731 GLfloat t[4], u[4], cross[4];
732 fetch_vector4( &inst->SrcReg[0], state, t );
733 fetch_vector4( &inst->SrcReg[1], state, u );
734 cross[0] = t[1] * u[2] - t[2] * u[1];
735 cross[1] = t[2] * u[0] - t[0] * u[2];
736 cross[2] = t[0] * u[1] - t[1] * u[0];
737 store_vector4( &inst->DstReg, state, cross );
738 }
739 break;
740 case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */
741 {
742 const struct vp_src_register *source = &inst->SrcReg[0];
743 const GLfloat *src = get_register_pointer(source, state);
744 GLfloat result[4];
745 GLuint i;
746
747 /* do extended swizzling here */
748 for (i = 0; i < 3; i++) {
749 if (source->Swizzle[i] == SWIZZLE_ZERO)
750 result[i] = 0.0;
751 else if (source->Swizzle[i] == SWIZZLE_ONE)
752 result[i] = -1.0;
753 else
754 result[i] = -src[source->Swizzle[i]];
755 if (source->Negate)
756 result[i] = -result[i];
757 }
758 store_vector4( &inst->DstReg, state, result );
759 }
760 break;
761
762 case VP_OPCODE_END:
763 ctx->_CurrentProgram = 0;
764 return;
765 default:
766 /* bad instruction opcode */
767 _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
768 ctx->_CurrentProgram = 0;
769 return;
770 } /* switch */
771 } /* for */
772
773 ctx->_CurrentProgram = 0;
774 }
775
776
777
778 /**
779 Thoughts on vertex program optimization:
780
781 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
782 assembly code. That will probably be a lot of work.
783
784 Another approach might be to replace the vp_instruction->Opcode field with
785 a pointer to a specialized C function which executes the instruction.
786 In particular we can write functions which skip swizzling, negating,
787 masking, relative addressing, etc. when they're not needed.
788
789 For example:
790
791 void simple_add( struct vp_instruction *inst )
792 {
793 GLfloat *sum = machine->Registers[inst->DstReg.Register];
794 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
795 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
796 sum[0] = a[0] + b[0];
797 sum[1] = a[1] + b[1];
798 sum[2] = a[2] + b[2];
799 sum[3] = a[3] + b[3];
800 }
801
802 */
803
804 /*
805
806 KW:
807
808 A first step would be to 'vectorize' the programs in the same way as
809 the normal transformation code in the tnl module. Thus each opcode
810 takes zero or more input vectors (registers) and produces one or more
811 output vectors.
812
813 These operations would intially be coded in C, with machine-specific
814 assembly following, as is currently the case for matrix
815 transformations in the math/ directory. The preprocessing scheme for
816 selecting simpler operations Brian describes above would also work
817 here.
818
819 This should give reasonable performance without excessive effort.
820
821 */