code tweaks, remove old comments
[mesa.git] / src / mesa / shader / nvvertexec.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5.2
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file nvvertexec.c
27 * Code to execute vertex programs.
28 * \author Brian Paul
29 */
30
31 #include "glheader.h"
32 #include "context.h"
33 #include "imports.h"
34 #include "macros.h"
35 #include "mtypes.h"
36 #include "nvvertexec.h"
37 #include "program_instruction.h"
38 #include "program.h"
39 #include "math/m_matrix.h"
40
41
42 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
43
44
45 /**
46 * Load/initialize the vertex program registers which need to be set
47 * per-vertex.
48 */
49 void
50 _mesa_init_vp_per_vertex_registers(GLcontext *ctx, struct vp_machine *machine)
51 {
52 /* Input registers get initialized from the current vertex attribs */
53 MEMCPY(machine->Inputs, ctx->Current.Attrib,
54 MAX_VERTEX_PROGRAM_ATTRIBS * 4 * sizeof(GLfloat));
55
56 if (ctx->VertexProgram.Current->IsNVProgram) {
57 GLuint i;
58 /* Output/result regs are initialized to [0,0,0,1] */
59 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
60 ASSIGN_4V(machine->Outputs[i], 0.0F, 0.0F, 0.0F, 1.0F);
61 }
62 /* Temp regs are initialized to [0,0,0,0] */
63 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
64 ASSIGN_4V(machine->Temporaries[i], 0.0F, 0.0F, 0.0F, 0.0F);
65 }
66 ASSIGN_4V(machine->AddressReg, 0, 0, 0, 0);
67 }
68 }
69
70
71
72 /**
73 * Copy the 16 elements of a matrix into four consecutive program
74 * registers starting at 'pos'.
75 */
76 static void
77 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
78 {
79 GLuint i;
80 for (i = 0; i < 4; i++) {
81 registers[pos + i][0] = mat[0 + i];
82 registers[pos + i][1] = mat[4 + i];
83 registers[pos + i][2] = mat[8 + i];
84 registers[pos + i][3] = mat[12 + i];
85 }
86 }
87
88
89 /**
90 * As above, but transpose the matrix.
91 */
92 static void
93 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
94 const GLfloat mat[16])
95 {
96 MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
97 }
98
99
100 /**
101 * Load program parameter registers with tracked matrices (if NV program)
102 * or GL state values (if ARB program).
103 * This needs to be done per glBegin/glEnd, not per-vertex.
104 */
105 void
106 _mesa_init_vp_per_primitive_registers(GLcontext *ctx)
107 {
108 if (ctx->VertexProgram.Current->IsNVProgram) {
109 GLuint i;
110
111 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) {
112 /* point 'mat' at source matrix */
113 GLmatrix *mat;
114 if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
115 mat = ctx->ModelviewMatrixStack.Top;
116 }
117 else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
118 mat = ctx->ProjectionMatrixStack.Top;
119 }
120 else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
121 mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
122 }
123 else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
124 mat = ctx->ColorMatrixStack.Top;
125 }
126 else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
127 /* XXX verify the combined matrix is up to date */
128 mat = &ctx->_ModelProjectMatrix;
129 }
130 else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
131 ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
132 GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
133 ASSERT(n < MAX_PROGRAM_MATRICES);
134 mat = ctx->ProgramMatrixStack[n].Top;
135 }
136 else {
137 /* no matrix is tracked, but we leave the register values as-is */
138 assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
139 continue;
140 }
141
142 /* load the matrix values into sequential registers */
143 if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
144 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
145 }
146 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
147 _math_matrix_analyse(mat); /* update the inverse */
148 ASSERT(!_math_matrix_is_dirty(mat));
149 load_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
150 }
151 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
152 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->m);
153 }
154 else {
155 assert(ctx->VertexProgram.TrackMatrixTransform[i]
156 == GL_INVERSE_TRANSPOSE_NV);
157 _math_matrix_analyse(mat); /* update the inverse */
158 ASSERT(!_math_matrix_is_dirty(mat));
159 load_transpose_matrix(ctx->VertexProgram.Parameters, i*4, mat->inv);
160 }
161 }
162 }
163 else {
164 /* ARB vertex program */
165 if (ctx->VertexProgram.Current->Base.Parameters) {
166 /* Grab the state GL state and put into registers */
167 _mesa_load_state_parameters(ctx,
168 ctx->VertexProgram.Current->Base.Parameters);
169 }
170 }
171 }
172
173
174
175 /**
176 * For debugging. Dump the current vertex program machine registers.
177 */
178 void
179 _mesa_dump_vp_state( const struct gl_vertex_program_state *state,
180 const struct vp_machine *machine)
181 {
182 int i;
183 _mesa_printf("VertexIn:\n");
184 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_INPUTS; i++) {
185 _mesa_printf("%d: %f %f %f %f ", i,
186 machine->Inputs[i][0],
187 machine->Inputs[i][1],
188 machine->Inputs[i][2],
189 machine->Inputs[i][3]);
190 }
191 _mesa_printf("\n");
192
193 _mesa_printf("VertexOut:\n");
194 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_OUTPUTS; i++) {
195 _mesa_printf("%d: %f %f %f %f ", i,
196 machine->Outputs[i][0],
197 machine->Outputs[i][1],
198 machine->Outputs[i][2],
199 machine->Outputs[i][3]);
200 }
201 _mesa_printf("\n");
202
203 _mesa_printf("Registers:\n");
204 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_TEMPS; i++) {
205 _mesa_printf("%d: %f %f %f %f ", i,
206 machine->Temporaries[i][0],
207 machine->Temporaries[i][1],
208 machine->Temporaries[i][2],
209 machine->Temporaries[i][3]);
210 }
211 _mesa_printf("\n");
212
213 _mesa_printf("Parameters:\n");
214 for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS; i++) {
215 _mesa_printf("%d: %f %f %f %f ", i,
216 state->Parameters[i][0],
217 state->Parameters[i][1],
218 state->Parameters[i][2],
219 state->Parameters[i][3]);
220 }
221 _mesa_printf("\n");
222 }
223
224
225
226 /**
227 * Return a pointer to the 4-element float vector specified by the given
228 * source register.
229 */
230 static INLINE const GLfloat *
231 get_register_pointer( GLcontext *ctx,
232 const struct prog_src_register *source,
233 struct vp_machine *machine,
234 const struct gl_vertex_program *program )
235 {
236 if (source->RelAddr) {
237 const GLint reg = source->Index + machine->AddressReg[0];
238 ASSERT( (source->File == PROGRAM_ENV_PARAM) ||
239 (source->File == PROGRAM_STATE_VAR) );
240 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
241 return ZeroVec;
242 else if (source->File == PROGRAM_ENV_PARAM)
243 return ctx->VertexProgram.Parameters[reg];
244 else {
245 ASSERT(source->File == PROGRAM_LOCAL_PARAM);
246 return program->Base.Parameters->ParameterValues[reg];
247 }
248 }
249 else {
250 switch (source->File) {
251 case PROGRAM_TEMPORARY:
252 ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_TEMPS);
253 return machine->Temporaries[source->Index];
254 case PROGRAM_INPUT:
255 ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_INPUTS);
256 return machine->Inputs[source->Index];
257 case PROGRAM_OUTPUT:
258 /* This is only needed for the PRINT instruction */
259 ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_OUTPUTS);
260 return machine->Outputs[source->Index];
261 case PROGRAM_LOCAL_PARAM:
262 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
263 return program->Base.LocalParams[source->Index];
264 case PROGRAM_ENV_PARAM:
265 ASSERT(source->Index < MAX_NV_VERTEX_PROGRAM_PARAMS);
266 return ctx->VertexProgram.Parameters[source->Index];
267 case PROGRAM_STATE_VAR:
268 ASSERT(source->Index < program->Base.Parameters->NumParameters);
269 return program->Base.Parameters->ParameterValues[source->Index];
270 default:
271 _mesa_problem(NULL,
272 "Bad source register file in get_register_pointer");
273 return NULL;
274 }
275 }
276 return NULL;
277 }
278
279
280 /**
281 * Fetch a 4-element float vector from the given source register.
282 * Apply swizzling and negating as needed.
283 */
284 static INLINE void
285 fetch_vector4( GLcontext *ctx,
286 const struct prog_src_register *source,
287 struct vp_machine *machine,
288 const struct gl_vertex_program *program,
289 GLfloat result[4] )
290 {
291 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
292 ASSERT(src);
293 result[0] = src[GET_SWZ(source->Swizzle, 0)];
294 result[1] = src[GET_SWZ(source->Swizzle, 1)];
295 result[2] = src[GET_SWZ(source->Swizzle, 2)];
296 result[3] = src[GET_SWZ(source->Swizzle, 3)];
297 if (source->NegateBase) {
298 result[0] = -result[0];
299 result[1] = -result[1];
300 result[2] = -result[2];
301 result[3] = -result[3];
302 }
303 }
304
305
306
307 /**
308 * As above, but only return result[0] element.
309 */
310 static INLINE void
311 fetch_vector1( GLcontext *ctx,
312 const struct prog_src_register *source,
313 struct vp_machine *machine,
314 const struct gl_vertex_program *program,
315 GLfloat result[4] )
316 {
317 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
318 ASSERT(src);
319 result[0] = src[GET_SWZ(source->Swizzle, 0)];
320 if (source->NegateBase) {
321 result[0] = -result[0];
322 }
323 }
324
325
326 /**
327 * Store 4 floats into a register.
328 */
329 static void
330 store_vector4( const struct prog_instruction *inst,
331 struct vp_machine *machine,
332 const GLfloat value[4] )
333 {
334 const struct prog_dst_register *dest = &(inst->DstReg);
335 GLfloat *dst;
336 switch (dest->File) {
337 case PROGRAM_OUTPUT:
338 dst = machine->Outputs[dest->Index];
339 break;
340 case PROGRAM_TEMPORARY:
341 dst = machine->Temporaries[dest->Index];
342 break;
343 case PROGRAM_ENV_PARAM:
344 /* Only for VP state programs */
345 {
346 /* a slight hack */
347 GET_CURRENT_CONTEXT(ctx);
348 dst = ctx->VertexProgram.Parameters[dest->Index];
349 }
350 break;
351 default:
352 _mesa_problem(NULL, "Invalid register file in store_vector4(file=%d)",
353 dest->File);
354 return;
355 }
356
357 if (dest->WriteMask & WRITEMASK_X)
358 dst[0] = value[0];
359 if (dest->WriteMask & WRITEMASK_Y)
360 dst[1] = value[1];
361 if (dest->WriteMask & WRITEMASK_Z)
362 dst[2] = value[2];
363 if (dest->WriteMask & WRITEMASK_W)
364 dst[3] = value[3];
365 }
366
367
368 /**
369 * Set x to positive or negative infinity.
370 */
371 #if defined(USE_IEEE) || defined(_WIN32)
372 #define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
373 #define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
374 #elif defined(VMS)
375 #define SET_POS_INFINITY(x) x = __MAXFLOAT
376 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
377 #else
378 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
379 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
380 #endif
381
382 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
383
384
385 /**
386 * Execute the given vertex program
387 */
388 void
389 _mesa_exec_vertex_program(GLcontext *ctx,
390 struct vp_machine *machine,
391 const struct gl_vertex_program *program)
392 {
393 const struct prog_instruction *inst;
394
395 ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */
396
397 /* If the program is position invariant, multiply the input position
398 * by the MVP matrix and store in the vertex position result register.
399 */
400 if (ctx->VertexProgram.Current->IsPositionInvariant) {
401 TRANSFORM_POINT( machine->Outputs[VERT_RESULT_HPOS],
402 ctx->_ModelProjectMatrix.m,
403 machine->Inputs[VERT_ATTRIB_POS]);
404
405 /* XXX: This could go elsewhere */
406 ctx->VertexProgram.Current->Base.OutputsWritten |= VERT_BIT_POS;
407 }
408
409 for (inst = program->Base.Instructions; ; inst++) {
410
411 if (ctx->VertexProgram.CallbackEnabled &&
412 ctx->VertexProgram.Callback) {
413 ctx->VertexProgram.CurrentPosition = inst->StringPos;
414 ctx->VertexProgram.Callback(program->Base.Target,
415 ctx->VertexProgram.CallbackData);
416 }
417
418 switch (inst->Opcode) {
419 case OPCODE_MOV:
420 {
421 GLfloat t[4];
422 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
423 store_vector4( inst, machine, t );
424 }
425 break;
426 case OPCODE_LIT:
427 {
428 const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
429 GLfloat t[4], lit[4];
430 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
431 t[0] = MAX2(t[0], 0.0F);
432 t[1] = MAX2(t[1], 0.0F);
433 t[3] = CLAMP(t[3], -(128.0F - epsilon), (128.0F - epsilon));
434 lit[0] = 1.0;
435 lit[1] = t[0];
436 lit[2] = (t[0] > 0.0) ? (GLfloat) _mesa_pow(t[1], t[3]) : 0.0F;
437 lit[3] = 1.0;
438 store_vector4( inst, machine, lit );
439 }
440 break;
441 case OPCODE_RCP:
442 {
443 GLfloat t[4];
444 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t );
445 if (t[0] != 1.0F)
446 t[0] = 1.0F / t[0]; /* div by zero is infinity! */
447 t[1] = t[2] = t[3] = t[0];
448 store_vector4( inst, machine, t );
449 }
450 break;
451 case OPCODE_RSQ:
452 {
453 GLfloat t[4];
454 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t );
455 t[0] = INV_SQRTF(FABSF(t[0]));
456 t[1] = t[2] = t[3] = t[0];
457 store_vector4( inst, machine, t );
458 }
459 break;
460 case OPCODE_EXP:
461 {
462 GLfloat t[4], q[4], floor_t0;
463 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t );
464 floor_t0 = FLOORF(t[0]);
465 if (floor_t0 > FLT_MAX_EXP) {
466 SET_POS_INFINITY(q[0]);
467 SET_POS_INFINITY(q[2]);
468 }
469 else if (floor_t0 < FLT_MIN_EXP) {
470 q[0] = 0.0F;
471 q[2] = 0.0F;
472 }
473 else {
474 #ifdef USE_IEEE
475 GLint ii = (GLint) floor_t0;
476 ii = (ii < 23) + 0x3f800000;
477 SET_FLOAT_BITS(q[0], ii);
478 q[0] = *((GLfloat *) (void *)&ii);
479 #else
480 q[0] = (GLfloat) pow(2.0, floor_t0);
481 #endif
482 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
483 }
484 q[1] = t[0] - floor_t0;
485 q[3] = 1.0F;
486 store_vector4( inst, machine, q );
487 }
488 break;
489 case OPCODE_LOG:
490 {
491 GLfloat t[4], q[4], abs_t0;
492 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t );
493 abs_t0 = FABSF(t[0]);
494 if (abs_t0 != 0.0F) {
495 /* Since we really can't handle infinite values on VMS
496 * like other OSes we'll use __MAXFLOAT to represent
497 * infinity. This may need some tweaking.
498 */
499 #ifdef VMS
500 if (abs_t0 == __MAXFLOAT)
501 #else
502 if (IS_INF_OR_NAN(abs_t0))
503 #endif
504 {
505 SET_POS_INFINITY(q[0]);
506 q[1] = 1.0F;
507 SET_POS_INFINITY(q[2]);
508 }
509 else {
510 int exponent;
511 GLfloat mantissa = FREXPF(t[0], &exponent);
512 q[0] = (GLfloat) (exponent - 1);
513 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
514 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
515 }
516 }
517 else {
518 SET_NEG_INFINITY(q[0]);
519 q[1] = 1.0F;
520 SET_NEG_INFINITY(q[2]);
521 }
522 q[3] = 1.0;
523 store_vector4( inst, machine, q );
524 }
525 break;
526 case OPCODE_MUL:
527 {
528 GLfloat t[4], u[4], prod[4];
529 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
530 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
531 prod[0] = t[0] * u[0];
532 prod[1] = t[1] * u[1];
533 prod[2] = t[2] * u[2];
534 prod[3] = t[3] * u[3];
535 store_vector4( inst, machine, prod );
536 }
537 break;
538 case OPCODE_ADD:
539 {
540 GLfloat t[4], u[4], sum[4];
541 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
542 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
543 sum[0] = t[0] + u[0];
544 sum[1] = t[1] + u[1];
545 sum[2] = t[2] + u[2];
546 sum[3] = t[3] + u[3];
547 store_vector4( inst, machine, sum );
548 }
549 break;
550 case OPCODE_DP3:
551 {
552 GLfloat t[4], u[4], dot[4];
553 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
554 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
555 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
556 dot[1] = dot[2] = dot[3] = dot[0];
557 store_vector4( inst, machine, dot );
558 }
559 break;
560 case OPCODE_DP4:
561 {
562 GLfloat t[4], u[4], dot[4];
563 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
564 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
565 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
566 dot[1] = dot[2] = dot[3] = dot[0];
567 store_vector4( inst, machine, dot );
568 }
569 break;
570 case OPCODE_DST:
571 {
572 GLfloat t[4], u[4], dst[4];
573 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
574 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
575 dst[0] = 1.0F;
576 dst[1] = t[1] * u[1];
577 dst[2] = t[2];
578 dst[3] = u[3];
579 store_vector4( inst, machine, dst );
580 }
581 break;
582 case OPCODE_MIN:
583 {
584 GLfloat t[4], u[4], min[4];
585 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
586 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
587 min[0] = (t[0] < u[0]) ? t[0] : u[0];
588 min[1] = (t[1] < u[1]) ? t[1] : u[1];
589 min[2] = (t[2] < u[2]) ? t[2] : u[2];
590 min[3] = (t[3] < u[3]) ? t[3] : u[3];
591 store_vector4( inst, machine, min );
592 }
593 break;
594 case OPCODE_MAX:
595 {
596 GLfloat t[4], u[4], max[4];
597 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
598 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
599 max[0] = (t[0] > u[0]) ? t[0] : u[0];
600 max[1] = (t[1] > u[1]) ? t[1] : u[1];
601 max[2] = (t[2] > u[2]) ? t[2] : u[2];
602 max[3] = (t[3] > u[3]) ? t[3] : u[3];
603 store_vector4( inst, machine, max );
604 }
605 break;
606 case OPCODE_SLT:
607 {
608 GLfloat t[4], u[4], slt[4];
609 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
610 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
611 slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
612 slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
613 slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
614 slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
615 store_vector4( inst, machine, slt );
616 }
617 break;
618 case OPCODE_SGE:
619 {
620 GLfloat t[4], u[4], sge[4];
621 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
622 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
623 sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
624 sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
625 sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
626 sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
627 store_vector4( inst, machine, sge );
628 }
629 break;
630 case OPCODE_MAD:
631 {
632 GLfloat t[4], u[4], v[4], sum[4];
633 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
634 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
635 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, v );
636 sum[0] = t[0] * u[0] + v[0];
637 sum[1] = t[1] * u[1] + v[1];
638 sum[2] = t[2] * u[2] + v[2];
639 sum[3] = t[3] * u[3] + v[3];
640 store_vector4( inst, machine, sum );
641 }
642 break;
643 case OPCODE_ARL:
644 {
645 GLfloat t[4];
646 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
647 machine->AddressReg[0] = (GLint) FLOORF(t[0]);
648 }
649 break;
650 case OPCODE_DPH:
651 {
652 GLfloat t[4], u[4], dot[4];
653 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
654 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
655 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
656 dot[1] = dot[2] = dot[3] = dot[0];
657 store_vector4( inst, machine, dot );
658 }
659 break;
660 case OPCODE_RCC:
661 {
662 GLfloat t[4], u;
663 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t );
664 if (t[0] == 1.0F)
665 u = 1.0F;
666 else
667 u = 1.0F / t[0];
668 if (u > 0.0F) {
669 if (u > 1.884467e+019F) {
670 u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
671 }
672 else if (u < 5.42101e-020F) {
673 u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
674 }
675 }
676 else {
677 if (u < -1.884467e+019F) {
678 u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
679 }
680 else if (u > -5.42101e-020F) {
681 u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
682 }
683 }
684 t[0] = t[1] = t[2] = t[3] = u;
685 store_vector4( inst, machine, t );
686 }
687 break;
688 case OPCODE_SUB: /* GL_NV_vertex_program1_1 */
689 {
690 GLfloat t[4], u[4], sum[4];
691 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
692 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
693 sum[0] = t[0] - u[0];
694 sum[1] = t[1] - u[1];
695 sum[2] = t[2] - u[2];
696 sum[3] = t[3] - u[3];
697 store_vector4( inst, machine, sum );
698 }
699 break;
700 case OPCODE_ABS: /* GL_NV_vertex_program1_1 */
701 {
702 GLfloat t[4];
703 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
704 if (t[0] < 0.0) t[0] = -t[0];
705 if (t[1] < 0.0) t[1] = -t[1];
706 if (t[2] < 0.0) t[2] = -t[2];
707 if (t[3] < 0.0) t[3] = -t[3];
708 store_vector4( inst, machine, t );
709 }
710 break;
711 case OPCODE_FLR: /* GL_ARB_vertex_program */
712 {
713 GLfloat t[4];
714 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
715 t[0] = FLOORF(t[0]);
716 t[1] = FLOORF(t[1]);
717 t[2] = FLOORF(t[2]);
718 t[3] = FLOORF(t[3]);
719 store_vector4( inst, machine, t );
720 }
721 break;
722 case OPCODE_FRC: /* GL_ARB_vertex_program */
723 {
724 GLfloat t[4];
725 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
726 t[0] = t[0] - FLOORF(t[0]);
727 t[1] = t[1] - FLOORF(t[1]);
728 t[2] = t[2] - FLOORF(t[2]);
729 t[3] = t[3] - FLOORF(t[3]);
730 store_vector4( inst, machine, t );
731 }
732 break;
733 case OPCODE_EX2: /* GL_ARB_vertex_program */
734 {
735 GLfloat t[4];
736 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t );
737 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]);
738 store_vector4( inst, machine, t );
739 }
740 break;
741 case OPCODE_LG2: /* GL_ARB_vertex_program */
742 {
743 GLfloat t[4];
744 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t );
745 t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
746 store_vector4( inst, machine, t );
747 }
748 break;
749 case OPCODE_POW: /* GL_ARB_vertex_program */
750 {
751 GLfloat t[4], u[4];
752 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, t );
753 fetch_vector1( ctx, &inst->SrcReg[1], machine, program, u );
754 t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]);
755 store_vector4( inst, machine, t );
756 }
757 break;
758 case OPCODE_XPD: /* GL_ARB_vertex_program */
759 {
760 GLfloat t[4], u[4], cross[4];
761 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
762 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, u );
763 cross[0] = t[1] * u[2] - t[2] * u[1];
764 cross[1] = t[2] * u[0] - t[0] * u[2];
765 cross[2] = t[0] * u[1] - t[1] * u[0];
766 store_vector4( inst, machine, cross );
767 }
768 break;
769 case OPCODE_SWZ: /* GL_ARB_vertex_program */
770 {
771 const struct prog_src_register *source = &inst->SrcReg[0];
772 const GLfloat *src = get_register_pointer(ctx, source,
773 machine, program);
774 GLfloat result[4];
775 GLuint i;
776
777 /* do extended swizzling here */
778 for (i = 0; i < 4; i++) {
779 const GLuint swz = GET_SWZ(source->Swizzle, i);
780 if (swz == SWIZZLE_ZERO)
781 result[i] = 0.0;
782 else if (swz == SWIZZLE_ONE)
783 result[i] = 1.0;
784 else {
785 ASSERT(swz >= 0);
786 ASSERT(swz <= 3);
787 result[i] = src[swz];
788 }
789 if (source->NegateBase & (1 << i))
790 result[i] = -result[i];
791 }
792 store_vector4( inst, machine, result );
793 }
794 break;
795 case OPCODE_PRINT:
796 if (inst->SrcReg[0].File) {
797 GLfloat t[4];
798 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, t );
799 _mesa_printf("%s%g, %g, %g, %g\n",
800 (char *) inst->Data, t[0], t[1], t[2], t[3]);
801 }
802 else {
803 _mesa_printf("%s\n", (char *) inst->Data);
804 }
805 break;
806 case OPCODE_END:
807 ctx->_CurrentProgram = 0;
808 return;
809 default:
810 /* bad instruction opcode */
811 _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
812 ctx->_CurrentProgram = 0;
813 return;
814 } /* switch */
815 } /* for */
816
817 ctx->_CurrentProgram = 0;
818 }
819
820
821 /**
822 * Execute a vertex state program.
823 * \sa _mesa_ExecuteProgramNV
824 */
825 void
826 _mesa_exec_vertex_state_program(GLcontext *ctx,
827 struct gl_vertex_program *vprog,
828 const GLfloat *params)
829 {
830 struct vp_machine machine;
831 _mesa_init_vp_per_vertex_registers(ctx, &machine);
832 _mesa_init_vp_per_primitive_registers(ctx);
833 COPY_4V(machine.Inputs[VERT_ATTRIB_POS], params);
834 _mesa_exec_vertex_program(ctx, &machine, vprog);
835 }