94507aba8808cdbe8f63032f20817c140ccff6f8
[mesa.git] / src / mesa / main / nvvertexec.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 5.1
4 *
5 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file nvvertexec.c
27 * \brief Code to execute vertex programs.
28 * \author Brian Paul
29 */
30
31 #include "glheader.h"
32 #include "context.h"
33 #include "imports.h"
34 #include "macros.h"
35 #include "mtypes.h"
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
38 #include "math/m_matrix.h"
39
40
41 /**
42 * Load/initialize the vertex program registers.
43 * This needs to be done per vertex.
44 */
45 void
46 _mesa_init_vp_registers(GLcontext *ctx)
47 {
48 struct vp_machine *machine = &(ctx->VertexProgram.Machine);
49 GLuint i;
50
51 /* Input registers get initialized from the current vertex attribs */
52 MEMCPY(machine->Registers[VP_INPUT_REG_START],
53 ctx->Current.Attrib,
54 16 * 4 * sizeof(GLfloat));
55
56 /* Output and temp regs are initialized to [0,0,0,1] */
57 for (i = VP_OUTPUT_REG_START; i <= VP_OUTPUT_REG_END; i++) {
58 machine->Registers[i][0] = 0.0F;
59 machine->Registers[i][1] = 0.0F;
60 machine->Registers[i][2] = 0.0F;
61 machine->Registers[i][3] = 1.0F;
62 }
63 for (i = VP_TEMP_REG_START; i <= VP_TEMP_REG_END; i++) {
64 machine->Registers[i][0] = 0.0F;
65 machine->Registers[i][1] = 0.0F;
66 machine->Registers[i][2] = 0.0F;
67 machine->Registers[i][3] = 1.0F;
68 }
69
70 /* The program regs aren't touched */
71 }
72
73
74
75 /**
76 * Copy the 16 elements of a matrix into four consecutive program
77 * registers starting at 'pos'.
78 */
79 static void
80 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
81 {
82 GLuint i;
83 pos += VP_PROG_REG_START;
84 for (i = 0; i < 4; i++) {
85 registers[pos + i][0] = mat[0 + i];
86 registers[pos + i][1] = mat[4 + i];
87 registers[pos + i][2] = mat[8 + i];
88 registers[pos + i][3] = mat[12 + i];
89 }
90 }
91
92
93 /**
94 * As above, but transpose the matrix.
95 */
96 static void
97 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
98 const GLfloat mat[16])
99 {
100 pos += VP_PROG_REG_START;
101 MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
102 }
103
104
105 /**
106 * Load all currently tracked matrices into the program registers.
107 * This needs to be done per glBegin/glEnd.
108 */
109 void
110 _mesa_init_tracked_matrices(GLcontext *ctx)
111 {
112 GLuint i;
113
114 for (i = 0; i < VP_NUM_PROG_REGS / 4; i++) {
115 /* point 'mat' at source matrix */
116 GLmatrix *mat;
117 if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
118 mat = ctx->ModelviewMatrixStack.Top;
119 }
120 else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
121 mat = ctx->ProjectionMatrixStack.Top;
122 }
123 else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
124 mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
125 }
126 else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
127 mat = ctx->ColorMatrixStack.Top;
128 }
129 else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
130 /* XXX verify the combined matrix is up to date */
131 mat = &ctx->_ModelProjectMatrix;
132 }
133 else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
134 ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
135 GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
136 ASSERT(n < MAX_PROGRAM_MATRICES);
137 mat = ctx->ProgramMatrixStack[n].Top;
138 }
139 else {
140 /* no matrix is tracked, but we leave the register values as-is */
141 assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
142 continue;
143 }
144
145 /* load the matrix */
146 if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
147 load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
148 }
149 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
150 _math_matrix_analyse(mat); /* update the inverse */
151 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
152 load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->inv);
153 }
154 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
155 load_transpose_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
156 }
157 else {
158 assert(ctx->VertexProgram.TrackMatrixTransform[i]
159 == GL_INVERSE_TRANSPOSE_NV);
160 _math_matrix_analyse(mat); /* update the inverse */
161 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
162 load_transpose_matrix(ctx->VertexProgram.Machine.Registers,
163 i*4, mat->inv);
164 }
165 }
166 }
167
168
169
170 /**
171 * For debugging. Dump the current vertex program machine registers.
172 */
173 void
174 _mesa_dump_vp_machine( const struct vp_machine *machine )
175 {
176 int i;
177 _mesa_printf("VertexIn:\n");
178 for (i = 0; i < VP_NUM_INPUT_REGS; i++) {
179 _mesa_printf("%d: %f %f %f %f ", i,
180 machine->Registers[i + VP_INPUT_REG_START][0],
181 machine->Registers[i + VP_INPUT_REG_START][1],
182 machine->Registers[i + VP_INPUT_REG_START][2],
183 machine->Registers[i + VP_INPUT_REG_START][3]);
184 }
185 _mesa_printf("\n");
186
187 _mesa_printf("VertexOut:\n");
188 for (i = 0; i < VP_NUM_OUTPUT_REGS; i++) {
189 _mesa_printf("%d: %f %f %f %f ", i,
190 machine->Registers[i + VP_OUTPUT_REG_START][0],
191 machine->Registers[i + VP_OUTPUT_REG_START][1],
192 machine->Registers[i + VP_OUTPUT_REG_START][2],
193 machine->Registers[i + VP_OUTPUT_REG_START][3]);
194 }
195 _mesa_printf("\n");
196
197 _mesa_printf("Registers:\n");
198 for (i = 0; i < VP_NUM_TEMP_REGS; i++) {
199 _mesa_printf("%d: %f %f %f %f ", i,
200 machine->Registers[i + VP_TEMP_REG_START][0],
201 machine->Registers[i + VP_TEMP_REG_START][1],
202 machine->Registers[i + VP_TEMP_REG_START][2],
203 machine->Registers[i + VP_TEMP_REG_START][3]);
204 }
205 _mesa_printf("\n");
206
207 _mesa_printf("Parameters:\n");
208 for (i = 0; i < VP_NUM_PROG_REGS; i++) {
209 _mesa_printf("%d: %f %f %f %f ", i,
210 machine->Registers[i + VP_PROG_REG_START][0],
211 machine->Registers[i + VP_PROG_REG_START][1],
212 machine->Registers[i + VP_PROG_REG_START][2],
213 machine->Registers[i + VP_PROG_REG_START][3]);
214 }
215 _mesa_printf("\n");
216 }
217
218
219 /**
220 * Fetch a 4-element float vector from the given source register.
221 * Apply swizzling and negating as needed.
222 */
223 static void
224 fetch_vector4( const struct vp_src_register *source,
225 const struct vp_machine *machine,
226 GLfloat result[4] )
227 {
228 static const GLfloat zero[4] = { 0, 0, 0, 0 };
229 const GLfloat *src;
230
231 if (source->RelAddr) {
232 const GLint reg = source->Register + machine->AddressReg;
233 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
234 src = zero;
235 else
236 src = machine->Registers[VP_PROG_REG_START + reg];
237 }
238 else {
239 src = machine->Registers[source->Register];
240 }
241
242 if (source->Negate) {
243 result[0] = -src[source->Swizzle[0]];
244 result[1] = -src[source->Swizzle[1]];
245 result[2] = -src[source->Swizzle[2]];
246 result[3] = -src[source->Swizzle[3]];
247 }
248 else {
249 result[0] = src[source->Swizzle[0]];
250 result[1] = src[source->Swizzle[1]];
251 result[2] = src[source->Swizzle[2]];
252 result[3] = src[source->Swizzle[3]];
253 }
254 }
255
256
257 /**
258 * As above, but only return result[0] element.
259 */
260 static void
261 fetch_vector1( const struct vp_src_register *source,
262 const struct vp_machine *machine,
263 GLfloat result[4] )
264 {
265 static const GLfloat zero[4] = { 0, 0, 0, 0 };
266 const GLfloat *src;
267
268 if (source->RelAddr) {
269 const GLint reg = source->Register + machine->AddressReg;
270 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
271 src = zero;
272 else
273 src = machine->Registers[VP_PROG_REG_START + reg];
274 }
275 else {
276 src = machine->Registers[source->Register];
277 }
278
279 if (source->Negate) {
280 result[0] = -src[source->Swizzle[0]];
281 }
282 else {
283 result[0] = src[source->Swizzle[0]];
284 }
285 }
286
287
288 /**
289 * Store 4 floats into a register.
290 */
291 static void
292 store_vector4( const struct vp_dst_register *dest, struct vp_machine *machine,
293 const GLfloat value[4] )
294 {
295 GLfloat *dst = machine->Registers[dest->Register];
296
297 if (dest->WriteMask[0])
298 dst[0] = value[0];
299 if (dest->WriteMask[1])
300 dst[1] = value[1];
301 if (dest->WriteMask[2])
302 dst[2] = value[2];
303 if (dest->WriteMask[3])
304 dst[3] = value[3];
305 }
306
307
308 /**
309 * Set x to positive or negative infinity.
310 */
311 #ifdef USE_IEEE
312 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
313 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
314 #elif defined(VMS)
315 #define SET_POS_INFINITY(x) x = __MAXFLOAT
316 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
317 #else
318 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
319 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
320 #endif
321
322 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
323
324
325 /**
326 * Execute the given vertex program
327 */
328 void
329 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
330 {
331 struct vp_machine *machine = &ctx->VertexProgram.Machine;
332 const struct vp_instruction *inst;
333
334 for (inst = program->Instructions; inst->Opcode != VP_OPCODE_END; inst++) {
335 switch (inst->Opcode) {
336 case VP_OPCODE_MOV:
337 {
338 GLfloat t[4];
339 fetch_vector4( &inst->SrcReg[0], machine, t );
340 store_vector4( &inst->DstReg, machine, t );
341 }
342 break;
343 case VP_OPCODE_LIT:
344 {
345 const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
346 GLfloat t[4], lit[4];
347 fetch_vector4( &inst->SrcReg[0], machine, t );
348 if (t[3] < -(128.0F - epsilon))
349 t[3] = - (128.0F - epsilon);
350 else if (t[3] > 128.0F - epsilon)
351 t[3] = 128.0F - epsilon;
352 if (t[0] < 0.0)
353 t[0] = 0.0;
354 if (t[1] < 0.0)
355 t[1] = 0.0;
356 lit[0] = 1.0;
357 lit[1] = t[0];
358 lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
359 lit[3] = 1.0;
360 store_vector4( &inst->DstReg, machine, lit );
361 }
362 break;
363 case VP_OPCODE_RCP:
364 {
365 GLfloat t[4];
366 fetch_vector1( &inst->SrcReg[0], machine, t );
367 if (t[0] != 1.0F)
368 t[0] = 1.0F / t[0]; /* div by zero is infinity! */
369 t[1] = t[2] = t[3] = t[0];
370 store_vector4( &inst->DstReg, machine, t );
371 }
372 break;
373 case VP_OPCODE_RSQ:
374 {
375 GLfloat t[4];
376 fetch_vector1( &inst->SrcReg[0], machine, t );
377 t[0] = INV_SQRTF(FABSF(t[0]));
378 t[1] = t[2] = t[3] = t[0];
379 store_vector4( &inst->DstReg, machine, t );
380 }
381 break;
382 case VP_OPCODE_EXP:
383 {
384 GLfloat t[4], q[4], floor_t0;
385 fetch_vector1( &inst->SrcReg[0], machine, t );
386 floor_t0 = (float) floor(t[0]);
387 if (floor_t0 > FLT_MAX_EXP) {
388 SET_POS_INFINITY(q[0]);
389 SET_POS_INFINITY(q[2]);
390 }
391 else if (floor_t0 < FLT_MIN_EXP) {
392 q[0] = 0.0F;
393 q[2] = 0.0F;
394 }
395 else {
396 #ifdef USE_IEEE
397 GLint ii = (GLint) floor_t0;
398 ii = (ii < 23) + 0x3f800000;
399 SET_FLOAT_BITS(q[0], ii);
400 q[0] = *((GLfloat *) &ii);
401 #else
402 q[0] = (GLfloat) pow(2.0, floor_t0);
403 #endif
404 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
405 }
406 q[1] = t[0] - floor_t0;
407 q[3] = 1.0F;
408 store_vector4( &inst->DstReg, machine, q );
409 }
410 break;
411 case VP_OPCODE_LOG:
412 {
413 GLfloat t[4], q[4], abs_t0;
414 fetch_vector1( &inst->SrcReg[0], machine, t );
415 abs_t0 = (GLfloat) fabs(t[0]);
416 if (abs_t0 != 0.0F) {
417 /* Since we really can't handle infinite values on VMS
418 * like other OSes we'll use __MAXFLOAT to represent
419 * infinity. This may need some tweaking.
420 */
421 #ifdef VMS
422 if (abs_t0 == __MAXFLOAT) {
423 #else
424 if (IS_INF_OR_NAN(abs_t0)) {
425 #endif
426 SET_POS_INFINITY(q[0]);
427 q[1] = 1.0F;
428 SET_POS_INFINITY(q[2]);
429 }
430 else {
431 int exponent;
432 double mantissa = frexp(t[0], &exponent);
433 q[0] = (GLfloat) (exponent - 1);
434 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
435 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
436 }
437 }
438 else {
439 SET_NEG_INFINITY(q[0]);
440 q[1] = 1.0F;
441 SET_NEG_INFINITY(q[2]);
442 }
443 q[3] = 1.0;
444 store_vector4( &inst->DstReg, machine, q );
445 }
446 break;
447 case VP_OPCODE_MUL:
448 {
449 GLfloat t[4], u[4], prod[4];
450 fetch_vector4( &inst->SrcReg[0], machine, t );
451 fetch_vector4( &inst->SrcReg[1], machine, u );
452 prod[0] = t[0] * u[0];
453 prod[1] = t[1] * u[1];
454 prod[2] = t[2] * u[2];
455 prod[3] = t[3] * u[3];
456 store_vector4( &inst->DstReg, machine, prod );
457 }
458 break;
459 case VP_OPCODE_ADD:
460 {
461 GLfloat t[4], u[4], sum[4];
462 fetch_vector4( &inst->SrcReg[0], machine, t );
463 fetch_vector4( &inst->SrcReg[1], machine, u );
464 sum[0] = t[0] + u[0];
465 sum[1] = t[1] + u[1];
466 sum[2] = t[2] + u[2];
467 sum[3] = t[3] + u[3];
468 store_vector4( &inst->DstReg, machine, sum );
469 }
470 break;
471 case VP_OPCODE_DP3:
472 {
473 GLfloat t[4], u[4], dot[4];
474 fetch_vector4( &inst->SrcReg[0], machine, t );
475 fetch_vector4( &inst->SrcReg[1], machine, u );
476 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
477 dot[1] = dot[2] = dot[3] = dot[0];
478 store_vector4( &inst->DstReg, machine, dot );
479 }
480 break;
481 case VP_OPCODE_DP4:
482 {
483 GLfloat t[4], u[4], dot[4];
484 fetch_vector4( &inst->SrcReg[0], machine, t );
485 fetch_vector4( &inst->SrcReg[1], machine, u );
486 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
487 dot[1] = dot[2] = dot[3] = dot[0];
488 store_vector4( &inst->DstReg, machine, dot );
489 }
490 break;
491 case VP_OPCODE_DST:
492 {
493 GLfloat t[4], u[4], dst[4];
494 fetch_vector4( &inst->SrcReg[0], machine, t );
495 fetch_vector4( &inst->SrcReg[1], machine, u );
496 dst[0] = 1.0F;
497 dst[1] = t[1] * u[1];
498 dst[2] = t[2];
499 dst[3] = u[3];
500 store_vector4( &inst->DstReg, machine, dst );
501 }
502 break;
503 case VP_OPCODE_MIN:
504 {
505 GLfloat t[4], u[4], min[4];
506 fetch_vector4( &inst->SrcReg[0], machine, t );
507 fetch_vector4( &inst->SrcReg[1], machine, u );
508 min[0] = (t[0] < u[0]) ? t[0] : u[0];
509 min[1] = (t[1] < u[1]) ? t[1] : u[1];
510 min[2] = (t[2] < u[2]) ? t[2] : u[2];
511 min[3] = (t[3] < u[3]) ? t[3] : u[3];
512 store_vector4( &inst->DstReg, machine, min );
513 }
514 break;
515 case VP_OPCODE_MAX:
516 {
517 GLfloat t[4], u[4], max[4];
518 fetch_vector4( &inst->SrcReg[0], machine, t );
519 fetch_vector4( &inst->SrcReg[1], machine, u );
520 max[0] = (t[0] > u[0]) ? t[0] : u[0];
521 max[1] = (t[1] > u[1]) ? t[1] : u[1];
522 max[2] = (t[2] > u[2]) ? t[2] : u[2];
523 max[3] = (t[3] > u[3]) ? t[3] : u[3];
524 store_vector4( &inst->DstReg, machine, max );
525 }
526 break;
527 case VP_OPCODE_SLT:
528 {
529 GLfloat t[4], u[4], slt[4];
530 fetch_vector4( &inst->SrcReg[0], machine, t );
531 fetch_vector4( &inst->SrcReg[1], machine, u );
532 slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
533 slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
534 slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
535 slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
536 store_vector4( &inst->DstReg, machine, slt );
537 }
538 break;
539 case VP_OPCODE_SGE:
540 {
541 GLfloat t[4], u[4], sge[4];
542 fetch_vector4( &inst->SrcReg[0], machine, t );
543 fetch_vector4( &inst->SrcReg[1], machine, u );
544 sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
545 sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
546 sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
547 sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
548 store_vector4( &inst->DstReg, machine, sge );
549 }
550 break;
551 case VP_OPCODE_MAD:
552 {
553 GLfloat t[4], u[4], v[4], sum[4];
554 fetch_vector4( &inst->SrcReg[0], machine, t );
555 fetch_vector4( &inst->SrcReg[1], machine, u );
556 fetch_vector4( &inst->SrcReg[2], machine, v );
557 sum[0] = t[0] * u[0] + v[0];
558 sum[1] = t[1] * u[1] + v[1];
559 sum[2] = t[2] * u[2] + v[2];
560 sum[3] = t[3] * u[3] + v[3];
561 store_vector4( &inst->DstReg, machine, sum );
562 }
563 break;
564 case VP_OPCODE_ARL:
565 {
566 GLfloat t[4];
567 fetch_vector4( &inst->SrcReg[0], machine, t );
568 machine->AddressReg = (GLint) floor(t[0]);
569 }
570 break;
571 case VP_OPCODE_DPH:
572 {
573 GLfloat t[4], u[4], dot[4];
574 fetch_vector4( &inst->SrcReg[0], machine, t );
575 fetch_vector4( &inst->SrcReg[1], machine, u );
576 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
577 dot[1] = dot[2] = dot[3] = dot[0];
578 store_vector4( &inst->DstReg, machine, dot );
579 }
580 break;
581 case VP_OPCODE_RCC:
582 {
583 GLfloat t[4], u;
584 fetch_vector1( &inst->SrcReg[0], machine, t );
585 if (t[0] == 1.0F)
586 u = 1.0F;
587 else
588 u = 1.0F / t[0];
589 if (u > 0.0F) {
590 if (u > 1.884467e+019F) {
591 u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
592 }
593 else if (u < 5.42101e-020F) {
594 u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
595 }
596 }
597 else {
598 if (u < -1.884467e+019F) {
599 u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
600 }
601 else if (u > -5.42101e-020F) {
602 u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
603 }
604 }
605 t[0] = t[1] = t[2] = t[3] = u;
606 store_vector4( &inst->DstReg, machine, t );
607 }
608 break;
609 case VP_OPCODE_SUB:
610 {
611 GLfloat t[4], u[4], sum[4];
612 fetch_vector4( &inst->SrcReg[0], machine, t );
613 fetch_vector4( &inst->SrcReg[1], machine, u );
614 sum[0] = t[0] - u[0];
615 sum[1] = t[1] - u[1];
616 sum[2] = t[2] - u[2];
617 sum[3] = t[3] - u[3];
618 store_vector4( &inst->DstReg, machine, sum );
619 }
620 break;
621 case VP_OPCODE_ABS:
622 {
623 GLfloat t[4];
624 fetch_vector4( &inst->SrcReg[0], machine, t );
625 if (t[0] < 0.0) t[0] = -t[0];
626 if (t[1] < 0.0) t[1] = -t[1];
627 if (t[2] < 0.0) t[2] = -t[2];
628 if (t[3] < 0.0) t[3] = -t[3];
629 store_vector4( &inst->DstReg, machine, t );
630 }
631 break;
632
633 case VP_OPCODE_END:
634 return;
635 default:
636 /* bad instruction opcode */
637 _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
638 return;
639 }
640 }
641 }
642
643
644
645 /**
646 Thoughts on vertex program optimization:
647
648 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
649 assembly code. That will probably be a lot of work.
650
651 Another approach might be to replace the vp_instruction->Opcode field with
652 a pointer to a specialized C function which executes the instruction.
653 In particular we can write functions which skip swizzling, negating,
654 masking, relative addressing, etc. when they're not needed.
655
656 For example:
657
658 void simple_add( struct vp_instruction *inst )
659 {
660 GLfloat *sum = machine->Registers[inst->DstReg.Register];
661 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
662 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
663 sum[0] = a[0] + b[0];
664 sum[1] = a[1] + b[1];
665 sum[2] = a[2] + b[2];
666 sum[3] = a[3] + b[3];
667 }
668
669 */
670
671 /*
672
673 KW:
674
675 A first step would be to 'vectorize' the programs in the same way as
676 the normal transformation code in the tnl module. Thus each opcode
677 takes zero or more input vectors (registers) and produces one or more
678 output vectors.
679
680 These operations would intially be coded in C, with machine-specific
681 assembly following, as is currently the case for matrix
682 transformations in the math/ directory. The preprocessing scheme for
683 selecting simpler operations Brian describes above would also work
684 here.
685
686 This should give reasonable performance without excessive effort.
687
688 */