06f54ca909b5410fd829dee8c28a81650958a285
[mesa.git] / src / mesa / main / nvvertexec.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 5.1
4 *
5 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file nvvertexec.c
27 * Code to execute vertex programs.
28 * \author Brian Paul
29 */
30
31 #include "glheader.h"
32 #include "context.h"
33 #include "imports.h"
34 #include "macros.h"
35 #include "mtypes.h"
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
38 #include "math/m_matrix.h"
39
40
41 static const GLfloat zeroVec[4] = { 0, 0, 0, 0 };
42
43
44 /**
45 * Load/initialize the vertex program registers.
46 * This needs to be done per vertex.
47 */
48 void
49 _mesa_init_vp_registers(GLcontext *ctx)
50 {
51 struct vp_machine *machine = &(ctx->VertexProgram.Machine);
52 GLuint i;
53
54 /* Input registers get initialized from the current vertex attribs */
55 MEMCPY(machine->Registers[VP_INPUT_REG_START],
56 ctx->Current.Attrib,
57 16 * 4 * sizeof(GLfloat));
58
59 /* Output and temp regs are initialized to [0,0,0,1] */
60 for (i = VP_OUTPUT_REG_START; i <= VP_OUTPUT_REG_END; i++) {
61 machine->Registers[i][0] = 0.0F;
62 machine->Registers[i][1] = 0.0F;
63 machine->Registers[i][2] = 0.0F;
64 machine->Registers[i][3] = 1.0F;
65 }
66 for (i = VP_TEMP_REG_START; i <= VP_TEMP_REG_END; i++) {
67 machine->Registers[i][0] = 0.0F;
68 machine->Registers[i][1] = 0.0F;
69 machine->Registers[i][2] = 0.0F;
70 machine->Registers[i][3] = 1.0F;
71 }
72
73 /* The program regs aren't touched */
74 }
75
76
77
78 /**
79 * Copy the 16 elements of a matrix into four consecutive program
80 * registers starting at 'pos'.
81 */
82 static void
83 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
84 {
85 GLuint i;
86 pos += VP_PROG_REG_START;
87 for (i = 0; i < 4; i++) {
88 registers[pos + i][0] = mat[0 + i];
89 registers[pos + i][1] = mat[4 + i];
90 registers[pos + i][2] = mat[8 + i];
91 registers[pos + i][3] = mat[12 + i];
92 }
93 }
94
95
96 /**
97 * As above, but transpose the matrix.
98 */
99 static void
100 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
101 const GLfloat mat[16])
102 {
103 pos += VP_PROG_REG_START;
104 MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
105 }
106
107
108 /**
109 * Load all currently tracked matrices into the program registers.
110 * This needs to be done per glBegin/glEnd.
111 */
112 void
113 _mesa_init_tracked_matrices(GLcontext *ctx)
114 {
115 GLuint i;
116
117 for (i = 0; i < VP_NUM_PROG_REGS / 4; i++) {
118 /* point 'mat' at source matrix */
119 GLmatrix *mat;
120 if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
121 mat = ctx->ModelviewMatrixStack.Top;
122 }
123 else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
124 mat = ctx->ProjectionMatrixStack.Top;
125 }
126 else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
127 mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
128 }
129 else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
130 mat = ctx->ColorMatrixStack.Top;
131 }
132 else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
133 /* XXX verify the combined matrix is up to date */
134 mat = &ctx->_ModelProjectMatrix;
135 }
136 else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
137 ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
138 GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
139 ASSERT(n < MAX_PROGRAM_MATRICES);
140 mat = ctx->ProgramMatrixStack[n].Top;
141 }
142 else {
143 /* no matrix is tracked, but we leave the register values as-is */
144 assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
145 continue;
146 }
147
148 /* load the matrix */
149 if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
150 load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
151 }
152 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
153 _math_matrix_analyse(mat); /* update the inverse */
154 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
155 load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->inv);
156 }
157 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
158 load_transpose_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
159 }
160 else {
161 assert(ctx->VertexProgram.TrackMatrixTransform[i]
162 == GL_INVERSE_TRANSPOSE_NV);
163 _math_matrix_analyse(mat); /* update the inverse */
164 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
165 load_transpose_matrix(ctx->VertexProgram.Machine.Registers,
166 i*4, mat->inv);
167 }
168 }
169 }
170
171
172
173 /**
174 * For debugging. Dump the current vertex program machine registers.
175 */
176 void
177 _mesa_dump_vp_machine( const struct vp_machine *machine )
178 {
179 int i;
180 _mesa_printf("VertexIn:\n");
181 for (i = 0; i < VP_NUM_INPUT_REGS; i++) {
182 _mesa_printf("%d: %f %f %f %f ", i,
183 machine->Registers[i + VP_INPUT_REG_START][0],
184 machine->Registers[i + VP_INPUT_REG_START][1],
185 machine->Registers[i + VP_INPUT_REG_START][2],
186 machine->Registers[i + VP_INPUT_REG_START][3]);
187 }
188 _mesa_printf("\n");
189
190 _mesa_printf("VertexOut:\n");
191 for (i = 0; i < VP_NUM_OUTPUT_REGS; i++) {
192 _mesa_printf("%d: %f %f %f %f ", i,
193 machine->Registers[i + VP_OUTPUT_REG_START][0],
194 machine->Registers[i + VP_OUTPUT_REG_START][1],
195 machine->Registers[i + VP_OUTPUT_REG_START][2],
196 machine->Registers[i + VP_OUTPUT_REG_START][3]);
197 }
198 _mesa_printf("\n");
199
200 _mesa_printf("Registers:\n");
201 for (i = 0; i < VP_NUM_TEMP_REGS; i++) {
202 _mesa_printf("%d: %f %f %f %f ", i,
203 machine->Registers[i + VP_TEMP_REG_START][0],
204 machine->Registers[i + VP_TEMP_REG_START][1],
205 machine->Registers[i + VP_TEMP_REG_START][2],
206 machine->Registers[i + VP_TEMP_REG_START][3]);
207 }
208 _mesa_printf("\n");
209
210 _mesa_printf("Parameters:\n");
211 for (i = 0; i < VP_NUM_PROG_REGS; i++) {
212 _mesa_printf("%d: %f %f %f %f ", i,
213 machine->Registers[i + VP_PROG_REG_START][0],
214 machine->Registers[i + VP_PROG_REG_START][1],
215 machine->Registers[i + VP_PROG_REG_START][2],
216 machine->Registers[i + VP_PROG_REG_START][3]);
217 }
218 _mesa_printf("\n");
219 }
220
221
222 /**
223 * Fetch a 4-element float vector from the given source register.
224 * Apply swizzling and negating as needed.
225 */
226 static void
227 fetch_vector4( const struct vp_src_register *source,
228 const struct vp_machine *machine,
229 GLfloat result[4] )
230 {
231 const GLfloat *src;
232
233 if (source->RelAddr) {
234 const GLint reg = source->Register + machine->AddressReg;
235 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
236 src = zeroVec;
237 else
238 src = machine->Registers[VP_PROG_REG_START + reg];
239 }
240 else {
241 src = machine->Registers[source->Register];
242 }
243
244 if (source->Negate) {
245 result[0] = -src[source->Swizzle[0]];
246 result[1] = -src[source->Swizzle[1]];
247 result[2] = -src[source->Swizzle[2]];
248 result[3] = -src[source->Swizzle[3]];
249 }
250 else {
251 result[0] = src[source->Swizzle[0]];
252 result[1] = src[source->Swizzle[1]];
253 result[2] = src[source->Swizzle[2]];
254 result[3] = src[source->Swizzle[3]];
255 }
256 }
257
258
259 /**
260 * As above, but only return result[0] element.
261 */
262 static void
263 fetch_vector1( const struct vp_src_register *source,
264 const struct vp_machine *machine,
265 GLfloat result[4] )
266 {
267 const GLfloat *src;
268
269 if (source->RelAddr) {
270 const GLint reg = source->Register + machine->AddressReg;
271 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
272 src = zeroVec;
273 else
274 src = machine->Registers[VP_PROG_REG_START + reg];
275 }
276 else {
277 src = machine->Registers[source->Register];
278 }
279
280 if (source->Negate) {
281 result[0] = -src[source->Swizzle[0]];
282 }
283 else {
284 result[0] = src[source->Swizzle[0]];
285 }
286 }
287
288
289 /**
290 * Store 4 floats into a register.
291 */
292 static void
293 store_vector4( const struct vp_dst_register *dest, struct vp_machine *machine,
294 const GLfloat value[4] )
295 {
296 GLfloat *dst = machine->Registers[dest->Register];
297
298 if (dest->WriteMask[0])
299 dst[0] = value[0];
300 if (dest->WriteMask[1])
301 dst[1] = value[1];
302 if (dest->WriteMask[2])
303 dst[2] = value[2];
304 if (dest->WriteMask[3])
305 dst[3] = value[3];
306 }
307
308
309 /**
310 * Set x to positive or negative infinity.
311 */
312 #ifdef USE_IEEE
313 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
314 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
315 #elif defined(VMS)
316 #define SET_POS_INFINITY(x) x = __MAXFLOAT
317 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
318 #else
319 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
320 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
321 #endif
322
323 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
324
325
326 /**
327 * Execute the given vertex program
328 */
329 void
330 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
331 {
332 struct vp_machine *machine = &ctx->VertexProgram.Machine;
333 const struct vp_instruction *inst;
334
335 ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */
336
337 for (inst = program->Instructions; inst->Opcode != VP_OPCODE_END; inst++) {
338
339 if (ctx->VertexProgram.CallbackEnabled &&
340 ctx->VertexProgram.Callback) {
341 ctx->VertexProgram.CurrentPosition = inst->StringPos;
342 ctx->VertexProgram.Callback(program->Base.Target,
343 ctx->VertexProgram.CallbackData);
344 }
345
346 switch (inst->Opcode) {
347 case VP_OPCODE_MOV:
348 {
349 GLfloat t[4];
350 fetch_vector4( &inst->SrcReg[0], machine, t );
351 store_vector4( &inst->DstReg, machine, t );
352 }
353 break;
354 case VP_OPCODE_LIT:
355 {
356 const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
357 GLfloat t[4], lit[4];
358 fetch_vector4( &inst->SrcReg[0], machine, t );
359 if (t[3] < -(128.0F - epsilon))
360 t[3] = - (128.0F - epsilon);
361 else if (t[3] > 128.0F - epsilon)
362 t[3] = 128.0F - epsilon;
363 if (t[0] < 0.0)
364 t[0] = 0.0;
365 if (t[1] < 0.0)
366 t[1] = 0.0;
367 lit[0] = 1.0;
368 lit[1] = t[0];
369 lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
370 lit[3] = 1.0;
371 store_vector4( &inst->DstReg, machine, lit );
372 }
373 break;
374 case VP_OPCODE_RCP:
375 {
376 GLfloat t[4];
377 fetch_vector1( &inst->SrcReg[0], machine, t );
378 if (t[0] != 1.0F)
379 t[0] = 1.0F / t[0]; /* div by zero is infinity! */
380 t[1] = t[2] = t[3] = t[0];
381 store_vector4( &inst->DstReg, machine, t );
382 }
383 break;
384 case VP_OPCODE_RSQ:
385 {
386 GLfloat t[4];
387 fetch_vector1( &inst->SrcReg[0], machine, t );
388 t[0] = INV_SQRTF(FABSF(t[0]));
389 t[1] = t[2] = t[3] = t[0];
390 store_vector4( &inst->DstReg, machine, t );
391 }
392 break;
393 case VP_OPCODE_EXP:
394 {
395 GLfloat t[4], q[4], floor_t0;
396 fetch_vector1( &inst->SrcReg[0], machine, t );
397 floor_t0 = (float) floor(t[0]);
398 if (floor_t0 > FLT_MAX_EXP) {
399 SET_POS_INFINITY(q[0]);
400 SET_POS_INFINITY(q[2]);
401 }
402 else if (floor_t0 < FLT_MIN_EXP) {
403 q[0] = 0.0F;
404 q[2] = 0.0F;
405 }
406 else {
407 #ifdef USE_IEEE
408 GLint ii = (GLint) floor_t0;
409 ii = (ii < 23) + 0x3f800000;
410 SET_FLOAT_BITS(q[0], ii);
411 q[0] = *((GLfloat *) &ii);
412 #else
413 q[0] = (GLfloat) pow(2.0, floor_t0);
414 #endif
415 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
416 }
417 q[1] = t[0] - floor_t0;
418 q[3] = 1.0F;
419 store_vector4( &inst->DstReg, machine, q );
420 }
421 break;
422 case VP_OPCODE_LOG:
423 {
424 GLfloat t[4], q[4], abs_t0;
425 fetch_vector1( &inst->SrcReg[0], machine, t );
426 abs_t0 = (GLfloat) fabs(t[0]);
427 if (abs_t0 != 0.0F) {
428 /* Since we really can't handle infinite values on VMS
429 * like other OSes we'll use __MAXFLOAT to represent
430 * infinity. This may need some tweaking.
431 */
432 #ifdef VMS
433 if (abs_t0 == __MAXFLOAT)
434 #else
435 if (IS_INF_OR_NAN(abs_t0))
436 #endif
437 {
438 SET_POS_INFINITY(q[0]);
439 q[1] = 1.0F;
440 SET_POS_INFINITY(q[2]);
441 }
442 else {
443 int exponent;
444 double mantissa = frexp(t[0], &exponent);
445 q[0] = (GLfloat) (exponent - 1);
446 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
447 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
448 }
449 }
450 else {
451 SET_NEG_INFINITY(q[0]);
452 q[1] = 1.0F;
453 SET_NEG_INFINITY(q[2]);
454 }
455 q[3] = 1.0;
456 store_vector4( &inst->DstReg, machine, q );
457 }
458 break;
459 case VP_OPCODE_MUL:
460 {
461 GLfloat t[4], u[4], prod[4];
462 fetch_vector4( &inst->SrcReg[0], machine, t );
463 fetch_vector4( &inst->SrcReg[1], machine, u );
464 prod[0] = t[0] * u[0];
465 prod[1] = t[1] * u[1];
466 prod[2] = t[2] * u[2];
467 prod[3] = t[3] * u[3];
468 store_vector4( &inst->DstReg, machine, prod );
469 }
470 break;
471 case VP_OPCODE_ADD:
472 {
473 GLfloat t[4], u[4], sum[4];
474 fetch_vector4( &inst->SrcReg[0], machine, t );
475 fetch_vector4( &inst->SrcReg[1], machine, u );
476 sum[0] = t[0] + u[0];
477 sum[1] = t[1] + u[1];
478 sum[2] = t[2] + u[2];
479 sum[3] = t[3] + u[3];
480 store_vector4( &inst->DstReg, machine, sum );
481 }
482 break;
483 case VP_OPCODE_DP3:
484 {
485 GLfloat t[4], u[4], dot[4];
486 fetch_vector4( &inst->SrcReg[0], machine, t );
487 fetch_vector4( &inst->SrcReg[1], machine, u );
488 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
489 dot[1] = dot[2] = dot[3] = dot[0];
490 store_vector4( &inst->DstReg, machine, dot );
491 }
492 break;
493 case VP_OPCODE_DP4:
494 {
495 GLfloat t[4], u[4], dot[4];
496 fetch_vector4( &inst->SrcReg[0], machine, t );
497 fetch_vector4( &inst->SrcReg[1], machine, u );
498 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
499 dot[1] = dot[2] = dot[3] = dot[0];
500 store_vector4( &inst->DstReg, machine, dot );
501 }
502 break;
503 case VP_OPCODE_DST:
504 {
505 GLfloat t[4], u[4], dst[4];
506 fetch_vector4( &inst->SrcReg[0], machine, t );
507 fetch_vector4( &inst->SrcReg[1], machine, u );
508 dst[0] = 1.0F;
509 dst[1] = t[1] * u[1];
510 dst[2] = t[2];
511 dst[3] = u[3];
512 store_vector4( &inst->DstReg, machine, dst );
513 }
514 break;
515 case VP_OPCODE_MIN:
516 {
517 GLfloat t[4], u[4], min[4];
518 fetch_vector4( &inst->SrcReg[0], machine, t );
519 fetch_vector4( &inst->SrcReg[1], machine, u );
520 min[0] = (t[0] < u[0]) ? t[0] : u[0];
521 min[1] = (t[1] < u[1]) ? t[1] : u[1];
522 min[2] = (t[2] < u[2]) ? t[2] : u[2];
523 min[3] = (t[3] < u[3]) ? t[3] : u[3];
524 store_vector4( &inst->DstReg, machine, min );
525 }
526 break;
527 case VP_OPCODE_MAX:
528 {
529 GLfloat t[4], u[4], max[4];
530 fetch_vector4( &inst->SrcReg[0], machine, t );
531 fetch_vector4( &inst->SrcReg[1], machine, u );
532 max[0] = (t[0] > u[0]) ? t[0] : u[0];
533 max[1] = (t[1] > u[1]) ? t[1] : u[1];
534 max[2] = (t[2] > u[2]) ? t[2] : u[2];
535 max[3] = (t[3] > u[3]) ? t[3] : u[3];
536 store_vector4( &inst->DstReg, machine, max );
537 }
538 break;
539 case VP_OPCODE_SLT:
540 {
541 GLfloat t[4], u[4], slt[4];
542 fetch_vector4( &inst->SrcReg[0], machine, t );
543 fetch_vector4( &inst->SrcReg[1], machine, u );
544 slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
545 slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
546 slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
547 slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
548 store_vector4( &inst->DstReg, machine, slt );
549 }
550 break;
551 case VP_OPCODE_SGE:
552 {
553 GLfloat t[4], u[4], sge[4];
554 fetch_vector4( &inst->SrcReg[0], machine, t );
555 fetch_vector4( &inst->SrcReg[1], machine, u );
556 sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
557 sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
558 sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
559 sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
560 store_vector4( &inst->DstReg, machine, sge );
561 }
562 break;
563 case VP_OPCODE_MAD:
564 {
565 GLfloat t[4], u[4], v[4], sum[4];
566 fetch_vector4( &inst->SrcReg[0], machine, t );
567 fetch_vector4( &inst->SrcReg[1], machine, u );
568 fetch_vector4( &inst->SrcReg[2], machine, v );
569 sum[0] = t[0] * u[0] + v[0];
570 sum[1] = t[1] * u[1] + v[1];
571 sum[2] = t[2] * u[2] + v[2];
572 sum[3] = t[3] * u[3] + v[3];
573 store_vector4( &inst->DstReg, machine, sum );
574 }
575 break;
576 case VP_OPCODE_ARL:
577 {
578 GLfloat t[4];
579 fetch_vector4( &inst->SrcReg[0], machine, t );
580 machine->AddressReg = (GLint) floor(t[0]);
581 }
582 break;
583 case VP_OPCODE_DPH:
584 {
585 GLfloat t[4], u[4], dot[4];
586 fetch_vector4( &inst->SrcReg[0], machine, t );
587 fetch_vector4( &inst->SrcReg[1], machine, u );
588 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
589 dot[1] = dot[2] = dot[3] = dot[0];
590 store_vector4( &inst->DstReg, machine, dot );
591 }
592 break;
593 case VP_OPCODE_RCC:
594 {
595 GLfloat t[4], u;
596 fetch_vector1( &inst->SrcReg[0], machine, t );
597 if (t[0] == 1.0F)
598 u = 1.0F;
599 else
600 u = 1.0F / t[0];
601 if (u > 0.0F) {
602 if (u > 1.884467e+019F) {
603 u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
604 }
605 else if (u < 5.42101e-020F) {
606 u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
607 }
608 }
609 else {
610 if (u < -1.884467e+019F) {
611 u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
612 }
613 else if (u > -5.42101e-020F) {
614 u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
615 }
616 }
617 t[0] = t[1] = t[2] = t[3] = u;
618 store_vector4( &inst->DstReg, machine, t );
619 }
620 break;
621 case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */
622 {
623 GLfloat t[4], u[4], sum[4];
624 fetch_vector4( &inst->SrcReg[0], machine, t );
625 fetch_vector4( &inst->SrcReg[1], machine, u );
626 sum[0] = t[0] - u[0];
627 sum[1] = t[1] - u[1];
628 sum[2] = t[2] - u[2];
629 sum[3] = t[3] - u[3];
630 store_vector4( &inst->DstReg, machine, sum );
631 }
632 break;
633 case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */
634 {
635 GLfloat t[4];
636 fetch_vector4( &inst->SrcReg[0], machine, t );
637 if (t[0] < 0.0) t[0] = -t[0];
638 if (t[1] < 0.0) t[1] = -t[1];
639 if (t[2] < 0.0) t[2] = -t[2];
640 if (t[3] < 0.0) t[3] = -t[3];
641 store_vector4( &inst->DstReg, machine, t );
642 }
643 break;
644 case VP_OPCODE_FLR: /* GL_ARB_vertex_program */
645 {
646 GLfloat t[4];
647 fetch_vector4( &inst->SrcReg[0], machine, t );
648 t[0] = FLOORF(t[0]);
649 t[1] = FLOORF(t[1]);
650 t[2] = FLOORF(t[2]);
651 t[3] = FLOORF(t[3]);
652 store_vector4( &inst->DstReg, machine, t );
653 }
654 break;
655 case VP_OPCODE_FRC: /* GL_ARB_vertex_program */
656 {
657 GLfloat t[4];
658 fetch_vector4( &inst->SrcReg[0], machine, t );
659 t[0] = t[0] - FLOORF(t[0]);
660 t[1] = t[1] - FLOORF(t[1]);
661 t[2] = t[2] - FLOORF(t[2]);
662 t[3] = t[3] - FLOORF(t[3]);
663 store_vector4( &inst->DstReg, machine, t );
664 }
665 break;
666 case VP_OPCODE_EX2: /* GL_ARB_vertex_program */
667 {
668 GLfloat t[4];
669 fetch_vector1( &inst->SrcReg[0], machine, t );
670 t[0] = t[1] = t[2] = t[3] = _mesa_pow(2.0, t[0]);
671 store_vector4( &inst->DstReg, machine, t );
672 }
673 break;
674 case VP_OPCODE_LG2: /* GL_ARB_vertex_program */
675 {
676 GLfloat t[4];
677 fetch_vector1( &inst->SrcReg[0], machine, t );
678 t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
679 store_vector4( &inst->DstReg, machine, t );
680 }
681 break;
682 case VP_OPCODE_POW: /* GL_ARB_vertex_program */
683 {
684 GLfloat t[4], u[4];
685 fetch_vector1( &inst->SrcReg[0], machine, t );
686 fetch_vector1( &inst->SrcReg[1], machine, u );
687 t[0] = t[1] = t[2] = t[3] = _mesa_pow(t[0], u[0]);
688 store_vector4( &inst->DstReg, machine, t );
689 }
690 break;
691 case VP_OPCODE_XPD: /* GL_ARB_vertex_program */
692 {
693 GLfloat t[4], u[4], cross[4];
694 fetch_vector4( &inst->SrcReg[0], machine, t );
695 fetch_vector4( &inst->SrcReg[1], machine, u );
696 cross[0] = t[1] * u[2] - t[2] * u[1];
697 cross[1] = t[2] * u[0] - t[0] * u[2];
698 cross[2] = t[0] * u[1] - t[1] * u[0];
699 store_vector4( &inst->DstReg, machine, cross );
700 }
701 break;
702 case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */
703 {
704 const struct vp_src_register *source = &inst->SrcReg[0];
705 const GLfloat *src;
706 GLfloat result[4];
707 GLuint i;
708
709 /* Code similar to fetch_vector4() */
710 if (source->RelAddr) {
711 const GLint reg = source->Register + machine->AddressReg;
712 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
713 src = zeroVec;
714 else
715 src = machine->Registers[VP_PROG_REG_START + reg];
716 }
717 else {
718 src = machine->Registers[source->Register];
719 }
720
721 /* extended swizzling here */
722 for (i = 0; i < 3; i++) {
723 if (source->Swizzle[i] == SWIZZLE_ZERO)
724 result[i] = 0.0;
725 else if (source->Swizzle[i] == SWIZZLE_ONE)
726 result[i] = -1.0;
727 else
728 result[i] = -src[source->Swizzle[i]];
729 if (source->Negate)
730 result[i] = -result[i];
731 }
732 store_vector4( &inst->DstReg, machine, result );
733 }
734 break;
735
736 case VP_OPCODE_END:
737 ctx->_CurrentProgram = 0;
738 return;
739 default:
740 /* bad instruction opcode */
741 _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
742 ctx->_CurrentProgram = 0;
743 return;
744 } /* switch */
745 } /* for */
746
747 ctx->_CurrentProgram = 0;
748 }
749
750
751
752 /**
753 Thoughts on vertex program optimization:
754
755 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
756 assembly code. That will probably be a lot of work.
757
758 Another approach might be to replace the vp_instruction->Opcode field with
759 a pointer to a specialized C function which executes the instruction.
760 In particular we can write functions which skip swizzling, negating,
761 masking, relative addressing, etc. when they're not needed.
762
763 For example:
764
765 void simple_add( struct vp_instruction *inst )
766 {
767 GLfloat *sum = machine->Registers[inst->DstReg.Register];
768 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
769 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
770 sum[0] = a[0] + b[0];
771 sum[1] = a[1] + b[1];
772 sum[2] = a[2] + b[2];
773 sum[3] = a[3] + b[3];
774 }
775
776 */
777
778 /*
779
780 KW:
781
782 A first step would be to 'vectorize' the programs in the same way as
783 the normal transformation code in the tnl module. Thus each opcode
784 takes zero or more input vectors (registers) and produces one or more
785 output vectors.
786
787 These operations would intially be coded in C, with machine-specific
788 assembly following, as is currently the case for matrix
789 transformations in the math/ directory. The preprocessing scheme for
790 selecting simpler operations Brian describes above would also work
791 here.
792
793 This should give reasonable performance without excessive effort.
794
795 */