check glLoadProgramNV len < 0 (bug 6679)
[mesa.git] / src / mesa / shader / nvvertexec.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file nvvertexec.c
27 * Code to execute vertex programs.
28 * \author Brian Paul
29 */
30
31 #include "glheader.h"
32 #include "context.h"
33 #include "imports.h"
34 #include "macros.h"
35 #include "mtypes.h"
36 #include "nvvertexec.h"
37 #include "program_instruction.h"
38 #include "program.h"
39 #include "math/m_matrix.h"
40
41
42 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
43
44
45 /**
46 * Load/initialize the vertex program registers which need to be set
47 * per-vertex.
48 */
49 void
50 _mesa_init_vp_per_vertex_registers(GLcontext *ctx)
51 {
52 /* Input registers get initialized from the current vertex attribs */
53 MEMCPY(ctx->VertexProgram.Inputs, ctx->Current.Attrib,
54 MAX_VERTEX_PROGRAM_ATTRIBS * 4 * sizeof(GLfloat));
55
56 if (ctx->VertexProgram.Current->IsNVProgram) {
57 GLuint i;
58 /* Output/result regs are initialized to [0,0,0,1] */
59 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
60 ASSIGN_4V(ctx->VertexProgram.Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F);
61 }
62 /* Temp regs are initialized to [0,0,0,0] */
63 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
64 ASSIGN_4V(ctx->VertexProgram.Temporaries[i], 0.0F, 0.0F, 0.0F, 0.0F);
65 }
66 ASSIGN_4V(ctx->VertexProgram.AddressReg, 0, 0, 0, 0);
67 }
68 }
69
70
71
72 /**
73 * Copy the 16 elements of a matrix into four consecutive program
74 * registers starting at 'pos'.
75 */
76 static void
77 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
78 {
79 GLuint i;
80 for (i = 0; i < 4; i++) {
81 registers[pos + i][0] = mat[0 + i];
82 registers[pos + i][1] = mat[4 + i];
83 registers[pos + i][2] = mat[8 + i];
84 registers[pos + i][3] = mat[12 + i];
85 }
86 }
87
88
89 /**
90 * As above, but transpose the matrix.
91 */
92 static void
93 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
94 const GLfloat mat[16])
95 {
96 MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
97 }
98
99
100 /**
101 * Load program parameter registers with tracked matrices (if NV program)
102 * or GL state values (if ARB program).
103 * This needs to be done per glBegin/glEnd, not per-vertex.
104 */
105 void
106 _mesa_init_vp_per_primitive_registers(GLcontext *ctx)
107 {
108 if (ctx->VertexProgram.Current->IsNVProgram) {
109 GLuint i;
110
111 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) {
112 /* point 'mat' at source matrix */
113 GLmatrix *mat;
114 if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
115 mat = ctx->ModelviewMatrixStack.Top;
116 }
117 else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
118 mat = ctx->ProjectionMatrixStack.Top;
119 }
120 else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
121 mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
122 }
123 else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
124 mat = ctx->ColorMatrixStack.Top;
125 }
126 else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
127 /* XXX verify the combined matrix is up to date */
128 mat = &ctx->_ModelProjectMatrix;
129 }
130 else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
131 ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
132 GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
133 ASSERT(n < MAX_PROGRAM_MATRICES);
134 mat = ctx->ProgramMatrixStack[n].Top;
135 }
136 else {
137 /* no matrix is tracked, but we leave the register values as-is */
138 assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
139 continue;
140 }
141
142 /* load the matrix */
143 if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
144 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
145 }
146 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
147 _math_matrix_analyse(mat); /* update the inverse */
148 ASSERT(!_math_matrix_is_dirty(mat));
149 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
150 }
151 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
152 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
153 }
154 else {
155 assert(ctx->VertexProgram.TrackMatrixTransform[i]
156 == GL_INVERSE_TRANSPOSE_NV);
157 _math_matrix_analyse(mat); /* update the inverse */
158 ASSERT(!_math_matrix_is_dirty(mat));
159 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
160 }
161 }
162 }
163 else {
164 /* Using and ARB vertex program */
165 if (ctx->VertexProgram.Current->Base.Parameters) {
166 /* Grab the state GL state and put into registers */
167 _mesa_load_state_parameters(ctx,
168 ctx->VertexProgram.Current->Base.Parameters);
169 }
170 }
171 }
172
173
174
175 /**
176 * For debugging. Dump the current vertex program machine registers.
177 */
178 void
179 _mesa_dump_vp_state( const struct gl_vertex_program_state *state )
180 {
181 int i;
182 _mesa_printf("VertexIn:\n");
183 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_INPUTS; i++) {
184 _mesa_printf("%d: %f %f %f %f ", i,
185 state->Inputs[i][0],
186 state->Inputs[i][1],
187 state->Inputs[i][2],
188 state->Inputs[i][3]);
189 }
190 _mesa_printf("\n");
191
192 _mesa_printf("VertexOut:\n");
193 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
194 _mesa_printf("%d: %f %f %f %f ", i,
195 state->Outputs[i][0],
196 state->Outputs[i][1],
197 state->Outputs[i][2],
198 state->Outputs[i][3]);
199 }
200 _mesa_printf("\n");
201
202 _mesa_printf("Registers:\n");
203 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
204 _mesa_printf("%d: %f %f %f %f ", i,
205 state->Temporaries[i][0],
206 state->Temporaries[i][1],
207 state->Temporaries[i][2],
208 state->Temporaries[i][3]);
209 }
210 _mesa_printf("\n");
211
212 _mesa_printf("Parameters:\n");
213 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS; i++) {
214 _mesa_printf("%d: %f %f %f %f ", i,
215 state->Parameters[i][0],
216 state->Parameters[i][1],
217 state->Parameters[i][2],
218 state->Parameters[i][3]);
219 }
220 _mesa_printf("\n");
221 }
222
223
224
225 /**
226 * Return a pointer to the 4-element float vector specified by the given
227 * source register.
228 */
229 static INLINE const GLfloat *
230 get_register_pointer( const struct prog_src_register *source,
231 const struct gl_vertex_program_state *state )
232 {
233 if (source->RelAddr) {
234 const GLint reg = source->Index + state->AddressReg[0];
235 ASSERT( (source->File == PROGRAM_ENV_PARAM) ||
236 (source->File == PROGRAM_STATE_VAR) );
237 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
238 return ZeroVec;
239 else if (source->File == PROGRAM_ENV_PARAM)
240 return state->Parameters[reg];
241 else
242 return state->Current->Base.Parameters->ParameterValues[reg];
243 }
244 else {
245 switch (source->File) {
246 case PROGRAM_TEMPORARY:
247 ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_TEMPS);
248 return state->Temporaries[source->Index];
249 case PROGRAM_INPUT:
250 ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_INPUTS);
251 return state->Inputs[source->Index];
252 case PROGRAM_OUTPUT:
253 /* This is only needed for the PRINT instruction */
254 ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_OUTPUTS);
255 return state->Outputs[source->Index];
256 case PROGRAM_LOCAL_PARAM:
257 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
258 return state->Current->Base.LocalParams[source->Index];
259 case PROGRAM_ENV_PARAM:
260 ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_PARAMS);
261 return state->Parameters[source->Index];
262 case PROGRAM_STATE_VAR:
263 ASSERT(source->Index < state->Current->Base.Parameters->NumParameters);
264 return state->Current->Base.Parameters->ParameterValues[source->Index];
265 default:
266 _mesa_problem(NULL,
267 "Bad source register file in get_register_pointer");
268 return NULL;
269 }
270 }
271 return NULL;
272 }
273
274
275 /**
276 * Fetch a 4-element float vector from the given source register.
277 * Apply swizzling and negating as needed.
278 */
279 static INLINE void
280 fetch_vector4( const struct prog_src_register *source,
281 const struct gl_vertex_program_state *state,
282 GLfloat result[4] )
283 {
284 const GLfloat *src = get_register_pointer(source, state);
285
286 if (source->NegateBase) {
287 result[0] = -src[GET_SWZ(source->Swizzle, 0)];
288 result[1] = -src[GET_SWZ(source->Swizzle, 1)];
289 result[2] = -src[GET_SWZ(source->Swizzle, 2)];
290 result[3] = -src[GET_SWZ(source->Swizzle, 3)];
291 }
292 else {
293 result[0] = src[GET_SWZ(source->Swizzle, 0)];
294 result[1] = src[GET_SWZ(source->Swizzle, 1)];
295 result[2] = src[GET_SWZ(source->Swizzle, 2)];
296 result[3] = src[GET_SWZ(source->Swizzle, 3)];
297 }
298 }
299
300
301
302 /**
303 * As above, but only return result[0] element.
304 */
305 static INLINE void
306 fetch_vector1( const struct prog_src_register *source,
307 const struct gl_vertex_program_state *state,
308 GLfloat result[4] )
309 {
310 const GLfloat *src = get_register_pointer(source, state);
311
312 if (source->NegateBase) {
313 result[0] = -src[GET_SWZ(source->Swizzle, 0)];
314 }
315 else {
316 result[0] = src[GET_SWZ(source->Swizzle, 0)];
317 }
318 }
319
320
321 /**
322 * Store 4 floats into a register.
323 */
324 static void
325 store_vector4( const struct prog_dst_register *dest,
326 struct gl_vertex_program_state *state,
327 const GLfloat value[4] )
328 {
329 GLfloat *dst;
330 switch (dest->File) {
331 case PROGRAM_TEMPORARY:
332 dst = state->Temporaries[dest->Index];
333 break;
334 case PROGRAM_OUTPUT:
335 dst = state->Outputs[dest->Index];
336 break;
337 case PROGRAM_ENV_PARAM:
338 {
339 /* a slight hack */
340 GET_CURRENT_CONTEXT(ctx);
341 dst = ctx->VertexProgram.Parameters[dest->Index];
342 }
343 break;
344 default:
345 _mesa_problem(NULL, "Invalid register file in store_vector4(file=%d)",
346 dest->File);
347 return;
348 }
349
350 if (dest->WriteMask & WRITEMASK_X)
351 dst[0] = value[0];
352 if (dest->WriteMask & WRITEMASK_Y)
353 dst[1] = value[1];
354 if (dest->WriteMask & WRITEMASK_Z)
355 dst[2] = value[2];
356 if (dest->WriteMask & WRITEMASK_W)
357 dst[3] = value[3];
358 }
359
360
361 /**
362 * Set x to positive or negative infinity.
363 */
364 #if defined(USE_IEEE) || defined(_WIN32)
365 #define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
366 #define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
367 #elif defined(VMS)
368 #define SET_POS_INFINITY(x) x = __MAXFLOAT
369 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
370 #else
371 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
372 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
373 #endif
374
375 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
376
377
378 /**
379 * Execute the given vertex program
380 */
381 void
382 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
383 {
384 struct gl_vertex_program_state *state = &ctx->VertexProgram;
385 const struct prog_instruction *inst;
386
387 ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */
388
389 /* If the program is position invariant, multiply the input position
390 * by the MVP matrix and store in the vertex position result register.
391 */
392 if (ctx->VertexProgram.Current->IsPositionInvariant) {
393 TRANSFORM_POINT( ctx->VertexProgram.Outputs[VERT_RESULT_HPOS],
394 ctx->_ModelProjectMatrix.m,
395 ctx->VertexProgram.Inputs[VERT_ATTRIB_POS]);
396
397 /* XXX: This could go elsewhere */
398 ctx->VertexProgram.Current->Base.OutputsWritten |= VERT_BIT_POS;
399 }
400
401 for (inst = program->Base.Instructions; ; inst++) {
402
403 if (ctx->VertexProgram.CallbackEnabled &&
404 ctx->VertexProgram.Callback) {
405 ctx->VertexProgram.CurrentPosition = inst->StringPos;
406 ctx->VertexProgram.Callback(program->Base.Target,
407 ctx->VertexProgram.CallbackData);
408 }
409
410 switch (inst->Opcode) {
411 case OPCODE_MOV:
412 {
413 GLfloat t[4];
414 fetch_vector4( &inst->SrcReg[0], state, t );
415 store_vector4( &inst->DstReg, state, t );
416 }
417 break;
418 case OPCODE_LIT:
419 {
420 const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
421 GLfloat t[4], lit[4];
422 fetch_vector4( &inst->SrcReg[0], state, t );
423 t[0] = MAX2(t[0], 0.0F);
424 t[1] = MAX2(t[1], 0.0F);
425 t[3] = CLAMP(t[3], -(128.0F - epsilon), (128.0F - epsilon));
426 lit[0] = 1.0;
427 lit[1] = t[0];
428 lit[2] = (t[0] > 0.0) ? (GLfloat) _mesa_pow(t[1], t[3]) : 0.0F;
429 lit[3] = 1.0;
430 store_vector4( &inst->DstReg, state, lit );
431 }
432 break;
433 case OPCODE_RCP:
434 {
435 GLfloat t[4];
436 fetch_vector1( &inst->SrcReg[0], state, t );
437 if (t[0] != 1.0F)
438 t[0] = 1.0F / t[0]; /* div by zero is infinity! */
439 t[1] = t[2] = t[3] = t[0];
440 store_vector4( &inst->DstReg, state, t );
441 }
442 break;
443 case OPCODE_RSQ:
444 {
445 GLfloat t[4];
446 fetch_vector1( &inst->SrcReg[0], state, t );
447 t[0] = INV_SQRTF(FABSF(t[0]));
448 t[1] = t[2] = t[3] = t[0];
449 store_vector4( &inst->DstReg, state, t );
450 }
451 break;
452 case OPCODE_EXP:
453 {
454 GLfloat t[4], q[4], floor_t0;
455 fetch_vector1( &inst->SrcReg[0], state, t );
456 floor_t0 = FLOORF(t[0]);
457 if (floor_t0 > FLT_MAX_EXP) {
458 SET_POS_INFINITY(q[0]);
459 SET_POS_INFINITY(q[2]);
460 }
461 else if (floor_t0 < FLT_MIN_EXP) {
462 q[0] = 0.0F;
463 q[2] = 0.0F;
464 }
465 else {
466 #ifdef USE_IEEE
467 GLint ii = (GLint) floor_t0;
468 ii = (ii < 23) + 0x3f800000;
469 SET_FLOAT_BITS(q[0], ii);
470 q[0] = *((GLfloat *) (void *)&ii);
471 #else
472 q[0] = (GLfloat) pow(2.0, floor_t0);
473 #endif
474 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
475 }
476 q[1] = t[0] - floor_t0;
477 q[3] = 1.0F;
478 store_vector4( &inst->DstReg, state, q );
479 }
480 break;
481 case OPCODE_LOG:
482 {
483 GLfloat t[4], q[4], abs_t0;
484 fetch_vector1( &inst->SrcReg[0], state, t );
485 abs_t0 = FABSF(t[0]);
486 if (abs_t0 != 0.0F) {
487 /* Since we really can't handle infinite values on VMS
488 * like other OSes we'll use __MAXFLOAT to represent
489 * infinity. This may need some tweaking.
490 */
491 #ifdef VMS
492 if (abs_t0 == __MAXFLOAT)
493 #else
494 if (IS_INF_OR_NAN(abs_t0))
495 #endif
496 {
497 SET_POS_INFINITY(q[0]);
498 q[1] = 1.0F;
499 SET_POS_INFINITY(q[2]);
500 }
501 else {
502 int exponent;
503 GLfloat mantissa = FREXPF(t[0], &exponent);
504 q[0] = (GLfloat) (exponent - 1);
505 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
506 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
507 }
508 }
509 else {
510 SET_NEG_INFINITY(q[0]);
511 q[1] = 1.0F;
512 SET_NEG_INFINITY(q[2]);
513 }
514 q[3] = 1.0;
515 store_vector4( &inst->DstReg, state, q );
516 }
517 break;
518 case OPCODE_MUL:
519 {
520 GLfloat t[4], u[4], prod[4];
521 fetch_vector4( &inst->SrcReg[0], state, t );
522 fetch_vector4( &inst->SrcReg[1], state, u );
523 prod[0] = t[0] * u[0];
524 prod[1] = t[1] * u[1];
525 prod[2] = t[2] * u[2];
526 prod[3] = t[3] * u[3];
527 store_vector4( &inst->DstReg, state, prod );
528 }
529 break;
530 case OPCODE_ADD:
531 {
532 GLfloat t[4], u[4], sum[4];
533 fetch_vector4( &inst->SrcReg[0], state, t );
534 fetch_vector4( &inst->SrcReg[1], state, u );
535 sum[0] = t[0] + u[0];
536 sum[1] = t[1] + u[1];
537 sum[2] = t[2] + u[2];
538 sum[3] = t[3] + u[3];
539 store_vector4( &inst->DstReg, state, sum );
540 }
541 break;
542 case OPCODE_DP3:
543 {
544 GLfloat t[4], u[4], dot[4];
545 fetch_vector4( &inst->SrcReg[0], state, t );
546 fetch_vector4( &inst->SrcReg[1], state, u );
547 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
548 dot[1] = dot[2] = dot[3] = dot[0];
549 store_vector4( &inst->DstReg, state, dot );
550 }
551 break;
552 case OPCODE_DP4:
553 {
554 GLfloat t[4], u[4], dot[4];
555 fetch_vector4( &inst->SrcReg[0], state, t );
556 fetch_vector4( &inst->SrcReg[1], state, u );
557 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
558 dot[1] = dot[2] = dot[3] = dot[0];
559 store_vector4( &inst->DstReg, state, dot );
560 }
561 break;
562 case OPCODE_DST:
563 {
564 GLfloat t[4], u[4], dst[4];
565 fetch_vector4( &inst->SrcReg[0], state, t );
566 fetch_vector4( &inst->SrcReg[1], state, u );
567 dst[0] = 1.0F;
568 dst[1] = t[1] * u[1];
569 dst[2] = t[2];
570 dst[3] = u[3];
571 store_vector4( &inst->DstReg, state, dst );
572 }
573 break;
574 case OPCODE_MIN:
575 {
576 GLfloat t[4], u[4], min[4];
577 fetch_vector4( &inst->SrcReg[0], state, t );
578 fetch_vector4( &inst->SrcReg[1], state, u );
579 min[0] = (t[0] < u[0]) ? t[0] : u[0];
580 min[1] = (t[1] < u[1]) ? t[1] : u[1];
581 min[2] = (t[2] < u[2]) ? t[2] : u[2];
582 min[3] = (t[3] < u[3]) ? t[3] : u[3];
583 store_vector4( &inst->DstReg, state, min );
584 }
585 break;
586 case OPCODE_MAX:
587 {
588 GLfloat t[4], u[4], max[4];
589 fetch_vector4( &inst->SrcReg[0], state, t );
590 fetch_vector4( &inst->SrcReg[1], state, u );
591 max[0] = (t[0] > u[0]) ? t[0] : u[0];
592 max[1] = (t[1] > u[1]) ? t[1] : u[1];
593 max[2] = (t[2] > u[2]) ? t[2] : u[2];
594 max[3] = (t[3] > u[3]) ? t[3] : u[3];
595 store_vector4( &inst->DstReg, state, max );
596 }
597 break;
598 case OPCODE_SLT:
599 {
600 GLfloat t[4], u[4], slt[4];
601 fetch_vector4( &inst->SrcReg[0], state, t );
602 fetch_vector4( &inst->SrcReg[1], state, u );
603 slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
604 slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
605 slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
606 slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
607 store_vector4( &inst->DstReg, state, slt );
608 }
609 break;
610 case OPCODE_SGE:
611 {
612 GLfloat t[4], u[4], sge[4];
613 fetch_vector4( &inst->SrcReg[0], state, t );
614 fetch_vector4( &inst->SrcReg[1], state, u );
615 sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
616 sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
617 sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
618 sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
619 store_vector4( &inst->DstReg, state, sge );
620 }
621 break;
622 case OPCODE_MAD:
623 {
624 GLfloat t[4], u[4], v[4], sum[4];
625 fetch_vector4( &inst->SrcReg[0], state, t );
626 fetch_vector4( &inst->SrcReg[1], state, u );
627 fetch_vector4( &inst->SrcReg[2], state, v );
628 sum[0] = t[0] * u[0] + v[0];
629 sum[1] = t[1] * u[1] + v[1];
630 sum[2] = t[2] * u[2] + v[2];
631 sum[3] = t[3] * u[3] + v[3];
632 store_vector4( &inst->DstReg, state, sum );
633 }
634 break;
635 case OPCODE_ARL:
636 {
637 GLfloat t[4];
638 fetch_vector4( &inst->SrcReg[0], state, t );
639 state->AddressReg[0] = (GLint) FLOORF(t[0]);
640 }
641 break;
642 case OPCODE_DPH:
643 {
644 GLfloat t[4], u[4], dot[4];
645 fetch_vector4( &inst->SrcReg[0], state, t );
646 fetch_vector4( &inst->SrcReg[1], state, u );
647 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
648 dot[1] = dot[2] = dot[3] = dot[0];
649 store_vector4( &inst->DstReg, state, dot );
650 }
651 break;
652 case OPCODE_RCC:
653 {
654 GLfloat t[4], u;
655 fetch_vector1( &inst->SrcReg[0], state, t );
656 if (t[0] == 1.0F)
657 u = 1.0F;
658 else
659 u = 1.0F / t[0];
660 if (u > 0.0F) {
661 if (u > 1.884467e+019F) {
662 u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
663 }
664 else if (u < 5.42101e-020F) {
665 u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
666 }
667 }
668 else {
669 if (u < -1.884467e+019F) {
670 u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
671 }
672 else if (u > -5.42101e-020F) {
673 u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
674 }
675 }
676 t[0] = t[1] = t[2] = t[3] = u;
677 store_vector4( &inst->DstReg, state, t );
678 }
679 break;
680 case OPCODE_SUB: /* GL_NV_vertex_program1_1 */
681 {
682 GLfloat t[4], u[4], sum[4];
683 fetch_vector4( &inst->SrcReg[0], state, t );
684 fetch_vector4( &inst->SrcReg[1], state, u );
685 sum[0] = t[0] - u[0];
686 sum[1] = t[1] - u[1];
687 sum[2] = t[2] - u[2];
688 sum[3] = t[3] - u[3];
689 store_vector4( &inst->DstReg, state, sum );
690 }
691 break;
692 case OPCODE_ABS: /* GL_NV_vertex_program1_1 */
693 {
694 GLfloat t[4];
695 fetch_vector4( &inst->SrcReg[0], state, t );
696 if (t[0] < 0.0) t[0] = -t[0];
697 if (t[1] < 0.0) t[1] = -t[1];
698 if (t[2] < 0.0) t[2] = -t[2];
699 if (t[3] < 0.0) t[3] = -t[3];
700 store_vector4( &inst->DstReg, state, t );
701 }
702 break;
703 case OPCODE_FLR: /* GL_ARB_vertex_program */
704 {
705 GLfloat t[4];
706 fetch_vector4( &inst->SrcReg[0], state, t );
707 t[0] = FLOORF(t[0]);
708 t[1] = FLOORF(t[1]);
709 t[2] = FLOORF(t[2]);
710 t[3] = FLOORF(t[3]);
711 store_vector4( &inst->DstReg, state, t );
712 }
713 break;
714 case OPCODE_FRC: /* GL_ARB_vertex_program */
715 {
716 GLfloat t[4];
717 fetch_vector4( &inst->SrcReg[0], state, t );
718 t[0] = t[0] - FLOORF(t[0]);
719 t[1] = t[1] - FLOORF(t[1]);
720 t[2] = t[2] - FLOORF(t[2]);
721 t[3] = t[3] - FLOORF(t[3]);
722 store_vector4( &inst->DstReg, state, t );
723 }
724 break;
725 case OPCODE_EX2: /* GL_ARB_vertex_program */
726 {
727 GLfloat t[4];
728 fetch_vector1( &inst->SrcReg[0], state, t );
729 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]);
730 store_vector4( &inst->DstReg, state, t );
731 }
732 break;
733 case OPCODE_LG2: /* GL_ARB_vertex_program */
734 {
735 GLfloat t[4];
736 fetch_vector1( &inst->SrcReg[0], state, t );
737 t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
738 store_vector4( &inst->DstReg, state, t );
739 }
740 break;
741 case OPCODE_POW: /* GL_ARB_vertex_program */
742 {
743 GLfloat t[4], u[4];
744 fetch_vector1( &inst->SrcReg[0], state, t );
745 fetch_vector1( &inst->SrcReg[1], state, u );
746 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]);
747 store_vector4( &inst->DstReg, state, t );
748 }
749 break;
750 case OPCODE_XPD: /* GL_ARB_vertex_program */
751 {
752 GLfloat t[4], u[4], cross[4];
753 fetch_vector4( &inst->SrcReg[0], state, t );
754 fetch_vector4( &inst->SrcReg[1], state, u );
755 cross[0] = t[1] * u[2] - t[2] * u[1];
756 cross[1] = t[2] * u[0] - t[0] * u[2];
757 cross[2] = t[0] * u[1] - t[1] * u[0];
758 store_vector4( &inst->DstReg, state, cross );
759 }
760 break;
761 case OPCODE_SWZ: /* GL_ARB_vertex_program */
762 {
763 const struct prog_src_register *source = &inst->SrcReg[0];
764 const GLfloat *src = get_register_pointer(source, state);
765 GLfloat result[4];
766 GLuint i;
767
768 /* do extended swizzling here */
769 for (i = 0; i < 4; i++) {
770 if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ZERO)
771 result[i] = 0.0;
772 else if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ONE)
773 result[i] = 1.0;
774 else
775 result[i] = src[GET_SWZ(source->Swizzle, i)];
776 if (source->NegateBase & (1 << i))
777 result[i] = -result[i];
778 }
779 store_vector4( &inst->DstReg, state, result );
780 }
781 break;
782 case OPCODE_PRINT:
783 if (inst->SrcReg[0].File) {
784 GLfloat t[4];
785 fetch_vector4( &inst->SrcReg[0], state, t );
786 _mesa_printf("%s%g, %g, %g, %g\n",
787 (char *) inst->Data, t[0], t[1], t[2], t[3]);
788 }
789 else {
790 _mesa_printf("%s\n", (char *) inst->Data);
791 }
792 break;
793 case OPCODE_END:
794 ctx->_CurrentProgram = 0;
795 return;
796 default:
797 /* bad instruction opcode */
798 _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
799 ctx->_CurrentProgram = 0;
800 return;
801 } /* switch */
802 } /* for */
803
804 ctx->_CurrentProgram = 0;
805 }
806
807
808
809 /**
810 Thoughts on vertex program optimization:
811
812 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
813 assembly code. That will probably be a lot of work.
814
815 Another approach might be to replace the vp_instruction->Opcode field with
816 a pointer to a specialized C function which executes the instruction.
817 In particular we can write functions which skip swizzling, negating,
818 masking, relative addressing, etc. when they're not needed.
819
820 For example:
821
822 void simple_add( struct prog_instruction *inst )
823 {
824 GLfloat *sum = machine->Registers[inst->DstReg.Register];
825 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
826 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
827 sum[0] = a[0] + b[0];
828 sum[1] = a[1] + b[1];
829 sum[2] = a[2] + b[2];
830 sum[3] = a[3] + b[3];
831 }
832
833 */
834
835 /*
836
837 KW:
838
839 A first step would be to 'vectorize' the programs in the same way as
840 the normal transformation code in the tnl module. Thus each opcode
841 takes zero or more input vectors (registers) and produces one or more
842 output vectors.
843
844 These operations would intially be coded in C, with machine-specific
845 assembly following, as is currently the case for matrix
846 transformations in the math/ directory. The preprocessing scheme for
847 selecting simpler operations Brian describes above would also work
848 here.
849
850 This should give reasonable performance without excessive effort.
851
852 */