dcf385d0e396eff05f4fa1077797883b7c110dda
[mesa.git] / src / mesa / main / nvvertexec.c
1 /* $Id: nvvertexec.c,v 1.5 2003/03/29 16:04:31 brianp Exp $ */
2
3 /*
4 * Mesa 3-D graphics library
5 * Version: 5.1
6 *
7 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 /**
28 * \file nvvertexec.c
29 * \brief Code to execute vertex programs.
30 * \author Brian Paul
31 */
32
33 #include "glheader.h"
34 #include "context.h"
35 #include "imports.h"
36 #include "macros.h"
37 #include "mtypes.h"
38 #include "nvvertexec.h"
39 #include "nvvertprog.h"
40 #include "math/m_matrix.h"
41
42
43 /**
44 * Load/initialize the vertex program registers.
45 * This needs to be done per vertex.
46 */
47 void
48 _mesa_init_vp_registers(GLcontext *ctx)
49 {
50 struct vp_machine *machine = &(ctx->VertexProgram.Machine);
51 GLuint i;
52
53 /* Input registers get initialized from the current vertex attribs */
54 MEMCPY(machine->Registers[VP_INPUT_REG_START],
55 ctx->Current.Attrib,
56 16 * 4 * sizeof(GLfloat));
57
58 /* Output and temp regs are initialized to [0,0,0,1] */
59 for (i = VP_OUTPUT_REG_START; i <= VP_OUTPUT_REG_END; i++) {
60 machine->Registers[i][0] = 0.0F;
61 machine->Registers[i][1] = 0.0F;
62 machine->Registers[i][2] = 0.0F;
63 machine->Registers[i][3] = 1.0F;
64 }
65 for (i = VP_TEMP_REG_START; i <= VP_TEMP_REG_END; i++) {
66 machine->Registers[i][0] = 0.0F;
67 machine->Registers[i][1] = 0.0F;
68 machine->Registers[i][2] = 0.0F;
69 machine->Registers[i][3] = 1.0F;
70 }
71
72 /* The program regs aren't touched */
73 }
74
75
76
77 /**
78 * Copy the 16 elements of a matrix into four consecutive program
79 * registers starting at 'pos'.
80 */
81 static void
82 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
83 {
84 GLuint i;
85 pos += VP_PROG_REG_START;
86 for (i = 0; i < 4; i++) {
87 registers[pos + i][0] = mat[0 + i];
88 registers[pos + i][1] = mat[4 + i];
89 registers[pos + i][2] = mat[8 + i];
90 registers[pos + i][3] = mat[12 + i];
91 }
92 }
93
94
95 /**
96 * As above, but transpose the matrix.
97 */
98 static void
99 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
100 const GLfloat mat[16])
101 {
102 pos += VP_PROG_REG_START;
103 MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
104 }
105
106
107 /**
108 * Load all currently tracked matrices into the program registers.
109 * This needs to be done per glBegin/glEnd.
110 */
111 void
112 _mesa_init_tracked_matrices(GLcontext *ctx)
113 {
114 GLuint i;
115
116 for (i = 0; i < VP_NUM_PROG_REGS / 4; i++) {
117 /* point 'mat' at source matrix */
118 GLmatrix *mat;
119 if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
120 mat = ctx->ModelviewMatrixStack.Top;
121 }
122 else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
123 mat = ctx->ProjectionMatrixStack.Top;
124 }
125 else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
126 mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
127 }
128 else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
129 mat = ctx->ColorMatrixStack.Top;
130 }
131 else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
132 /* XXX verify the combined matrix is up to date */
133 mat = &ctx->_ModelProjectMatrix;
134 }
135 else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
136 ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
137 GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
138 ASSERT(n < MAX_PROGRAM_MATRICES);
139 mat = ctx->ProgramMatrixStack[n].Top;
140 }
141 else {
142 /* no matrix is tracked, but we leave the register values as-is */
143 assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
144 continue;
145 }
146
147 /* load the matrix */
148 if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
149 load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
150 }
151 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
152 _math_matrix_analyse(mat); /* update the inverse */
153 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
154 load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->inv);
155 }
156 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
157 load_transpose_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
158 }
159 else {
160 assert(ctx->VertexProgram.TrackMatrixTransform[i]
161 == GL_INVERSE_TRANSPOSE_NV);
162 _math_matrix_analyse(mat); /* update the inverse */
163 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
164 load_transpose_matrix(ctx->VertexProgram.Machine.Registers,
165 i*4, mat->inv);
166 }
167 }
168 }
169
170
171
172 /**
173 * For debugging. Dump the current vertex program machine registers.
174 */
175 void
176 _mesa_dump_vp_machine( const struct vp_machine *machine )
177 {
178 int i;
179 _mesa_printf("VertexIn:\n");
180 for (i = 0; i < VP_NUM_INPUT_REGS; i++) {
181 _mesa_printf("%d: %f %f %f %f ", i,
182 machine->Registers[i + VP_INPUT_REG_START][0],
183 machine->Registers[i + VP_INPUT_REG_START][1],
184 machine->Registers[i + VP_INPUT_REG_START][2],
185 machine->Registers[i + VP_INPUT_REG_START][3]);
186 }
187 _mesa_printf("\n");
188
189 _mesa_printf("VertexOut:\n");
190 for (i = 0; i < VP_NUM_OUTPUT_REGS; i++) {
191 _mesa_printf("%d: %f %f %f %f ", i,
192 machine->Registers[i + VP_OUTPUT_REG_START][0],
193 machine->Registers[i + VP_OUTPUT_REG_START][1],
194 machine->Registers[i + VP_OUTPUT_REG_START][2],
195 machine->Registers[i + VP_OUTPUT_REG_START][3]);
196 }
197 _mesa_printf("\n");
198
199 _mesa_printf("Registers:\n");
200 for (i = 0; i < VP_NUM_TEMP_REGS; i++) {
201 _mesa_printf("%d: %f %f %f %f ", i,
202 machine->Registers[i + VP_TEMP_REG_START][0],
203 machine->Registers[i + VP_TEMP_REG_START][1],
204 machine->Registers[i + VP_TEMP_REG_START][2],
205 machine->Registers[i + VP_TEMP_REG_START][3]);
206 }
207 _mesa_printf("\n");
208
209 _mesa_printf("Parameters:\n");
210 for (i = 0; i < VP_NUM_PROG_REGS; i++) {
211 _mesa_printf("%d: %f %f %f %f ", i,
212 machine->Registers[i + VP_PROG_REG_START][0],
213 machine->Registers[i + VP_PROG_REG_START][1],
214 machine->Registers[i + VP_PROG_REG_START][2],
215 machine->Registers[i + VP_PROG_REG_START][3]);
216 }
217 _mesa_printf("\n");
218 }
219
220
221 /**
222 * Fetch a 4-element float vector from the given source register.
223 * Apply swizzling and negating as needed.
224 */
225 static void
226 fetch_vector4( const struct vp_src_register *source,
227 const struct vp_machine *machine,
228 GLfloat result[4] )
229 {
230 static const GLfloat zero[4] = { 0, 0, 0, 0 };
231 const GLfloat *src;
232
233 if (source->RelAddr) {
234 const GLint reg = source->Register + machine->AddressReg;
235 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
236 src = zero;
237 else
238 src = machine->Registers[VP_PROG_REG_START + reg];
239 }
240 else {
241 src = machine->Registers[source->Register];
242 }
243
244 if (source->Negate) {
245 result[0] = -src[source->Swizzle[0]];
246 result[1] = -src[source->Swizzle[1]];
247 result[2] = -src[source->Swizzle[2]];
248 result[3] = -src[source->Swizzle[3]];
249 }
250 else {
251 result[0] = src[source->Swizzle[0]];
252 result[1] = src[source->Swizzle[1]];
253 result[2] = src[source->Swizzle[2]];
254 result[3] = src[source->Swizzle[3]];
255 }
256 }
257
258
259 /**
260 * As above, but only return result[0] element.
261 */
262 static void
263 fetch_vector1( const struct vp_src_register *source,
264 const struct vp_machine *machine,
265 GLfloat result[4] )
266 {
267 static const GLfloat zero[4] = { 0, 0, 0, 0 };
268 const GLfloat *src;
269
270 if (source->RelAddr) {
271 const GLint reg = source->Register + machine->AddressReg;
272 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
273 src = zero;
274 else
275 src = machine->Registers[VP_PROG_REG_START + reg];
276 }
277 else {
278 src = machine->Registers[source->Register];
279 }
280
281 if (source->Negate) {
282 result[0] = -src[source->Swizzle[0]];
283 }
284 else {
285 result[0] = src[source->Swizzle[0]];
286 }
287 }
288
289
290 /**
291 * Store 4 floats into a register.
292 */
293 static void
294 store_vector4( const struct vp_dst_register *dest, struct vp_machine *machine,
295 const GLfloat value[4] )
296 {
297 GLfloat *dst = machine->Registers[dest->Register];
298
299 if (dest->WriteMask[0])
300 dst[0] = value[0];
301 if (dest->WriteMask[1])
302 dst[1] = value[1];
303 if (dest->WriteMask[2])
304 dst[2] = value[2];
305 if (dest->WriteMask[3])
306 dst[3] = value[3];
307 }
308
309
310 /**
311 * Set x to positive or negative infinity.
312 */
313 #ifdef USE_IEEE
314 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
315 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
316 #elif defined(VMS)
317 #define SET_POS_INFINITY(x) x = __MAXFLOAT
318 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
319 #else
320 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
321 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
322 #endif
323
324 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
325
326
327 /**
328 * Execute the given vertex program
329 */
330 void
331 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
332 {
333 struct vp_machine *machine = &ctx->VertexProgram.Machine;
334 const struct vp_instruction *inst;
335
336 for (inst = program->Instructions; inst->Opcode != VP_OPCODE_END; inst++) {
337 switch (inst->Opcode) {
338 case VP_OPCODE_MOV:
339 {
340 GLfloat t[4];
341 fetch_vector4( &inst->SrcReg[0], machine, t );
342 store_vector4( &inst->DstReg, machine, t );
343 }
344 break;
345 case VP_OPCODE_LIT:
346 {
347 const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
348 GLfloat t[4], lit[4];
349 fetch_vector4( &inst->SrcReg[0], machine, t );
350 if (t[3] < -(128.0F - epsilon))
351 t[3] = - (128.0F - epsilon);
352 else if (t[3] > 128.0F - epsilon)
353 t[3] = 128.0F - epsilon;
354 if (t[0] < 0.0)
355 t[0] = 0.0;
356 if (t[1] < 0.0)
357 t[1] = 0.0;
358 lit[0] = 1.0;
359 lit[1] = t[0];
360 lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
361 lit[3] = 1.0;
362 store_vector4( &inst->DstReg, machine, lit );
363 }
364 break;
365 case VP_OPCODE_RCP:
366 {
367 GLfloat t[4];
368 fetch_vector1( &inst->SrcReg[0], machine, t );
369 if (t[0] != 1.0F)
370 t[0] = 1.0F / t[0]; /* div by zero is infinity! */
371 t[1] = t[2] = t[3] = t[0];
372 store_vector4( &inst->DstReg, machine, t );
373 }
374 break;
375 case VP_OPCODE_RSQ:
376 {
377 GLfloat t[4];
378 fetch_vector1( &inst->SrcReg[0], machine, t );
379 t[0] = INV_SQRTF(FABSF(t[0]));
380 t[1] = t[2] = t[3] = t[0];
381 store_vector4( &inst->DstReg, machine, t );
382 }
383 break;
384 case VP_OPCODE_EXP:
385 {
386 GLfloat t[4], q[4], floor_t0;
387 fetch_vector1( &inst->SrcReg[0], machine, t );
388 floor_t0 = (float) floor(t[0]);
389 if (floor_t0 > FLT_MAX_EXP) {
390 SET_POS_INFINITY(q[0]);
391 SET_POS_INFINITY(q[2]);
392 }
393 else if (floor_t0 < FLT_MIN_EXP) {
394 q[0] = 0.0F;
395 q[2] = 0.0F;
396 }
397 else {
398 #ifdef USE_IEEE
399 GLint ii = (GLint) floor_t0;
400 ii = (ii < 23) + 0x3f800000;
401 SET_FLOAT_BITS(q[0], ii);
402 q[0] = *((GLfloat *) &ii);
403 #else
404 q[0] = (GLfloat) pow(2.0, floor_t0);
405 #endif
406 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
407 }
408 q[1] = t[0] - floor_t0;
409 q[3] = 1.0F;
410 store_vector4( &inst->DstReg, machine, q );
411 }
412 break;
413 case VP_OPCODE_LOG:
414 {
415 GLfloat t[4], q[4], abs_t0;
416 fetch_vector1( &inst->SrcReg[0], machine, t );
417 abs_t0 = (GLfloat) fabs(t[0]);
418 if (abs_t0 != 0.0F) {
419 /* Since we really can't handle infinite values on VMS
420 * like other OSes we'll use __MAXFLOAT to represent
421 * infinity. This may need some tweaking.
422 */
423 #ifdef VMS
424 if (abs_t0 == __MAXFLOAT) {
425 #else
426 if (IS_INF_OR_NAN(abs_t0)) {
427 #endif
428 SET_POS_INFINITY(q[0]);
429 q[1] = 1.0F;
430 SET_POS_INFINITY(q[2]);
431 }
432 else {
433 int exponent;
434 double mantissa = frexp(t[0], &exponent);
435 q[0] = (GLfloat) (exponent - 1);
436 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
437 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
438 }
439 }
440 else {
441 SET_NEG_INFINITY(q[0]);
442 q[1] = 1.0F;
443 SET_NEG_INFINITY(q[2]);
444 }
445 q[3] = 1.0;
446 store_vector4( &inst->DstReg, machine, q );
447 }
448 break;
449 case VP_OPCODE_MUL:
450 {
451 GLfloat t[4], u[4], prod[4];
452 fetch_vector4( &inst->SrcReg[0], machine, t );
453 fetch_vector4( &inst->SrcReg[1], machine, u );
454 prod[0] = t[0] * u[0];
455 prod[1] = t[1] * u[1];
456 prod[2] = t[2] * u[2];
457 prod[3] = t[3] * u[3];
458 store_vector4( &inst->DstReg, machine, prod );
459 }
460 break;
461 case VP_OPCODE_ADD:
462 {
463 GLfloat t[4], u[4], sum[4];
464 fetch_vector4( &inst->SrcReg[0], machine, t );
465 fetch_vector4( &inst->SrcReg[1], machine, u );
466 sum[0] = t[0] + u[0];
467 sum[1] = t[1] + u[1];
468 sum[2] = t[2] + u[2];
469 sum[3] = t[3] + u[3];
470 store_vector4( &inst->DstReg, machine, sum );
471 }
472 break;
473 case VP_OPCODE_DP3:
474 {
475 GLfloat t[4], u[4], dot[4];
476 fetch_vector4( &inst->SrcReg[0], machine, t );
477 fetch_vector4( &inst->SrcReg[1], machine, u );
478 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
479 dot[1] = dot[2] = dot[3] = dot[0];
480 store_vector4( &inst->DstReg, machine, dot );
481 }
482 break;
483 case VP_OPCODE_DP4:
484 {
485 GLfloat t[4], u[4], dot[4];
486 fetch_vector4( &inst->SrcReg[0], machine, t );
487 fetch_vector4( &inst->SrcReg[1], machine, u );
488 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
489 dot[1] = dot[2] = dot[3] = dot[0];
490 store_vector4( &inst->DstReg, machine, dot );
491 }
492 break;
493 case VP_OPCODE_DST:
494 {
495 GLfloat t[4], u[4], dst[4];
496 fetch_vector4( &inst->SrcReg[0], machine, t );
497 fetch_vector4( &inst->SrcReg[1], machine, u );
498 dst[0] = 1.0F;
499 dst[1] = t[1] * u[1];
500 dst[2] = t[2];
501 dst[3] = u[3];
502 store_vector4( &inst->DstReg, machine, dst );
503 }
504 break;
505 case VP_OPCODE_MIN:
506 {
507 GLfloat t[4], u[4], min[4];
508 fetch_vector4( &inst->SrcReg[0], machine, t );
509 fetch_vector4( &inst->SrcReg[1], machine, u );
510 min[0] = (t[0] < u[0]) ? t[0] : u[0];
511 min[1] = (t[1] < u[1]) ? t[1] : u[1];
512 min[2] = (t[2] < u[2]) ? t[2] : u[2];
513 min[3] = (t[3] < u[3]) ? t[3] : u[3];
514 store_vector4( &inst->DstReg, machine, min );
515 }
516 break;
517 case VP_OPCODE_MAX:
518 {
519 GLfloat t[4], u[4], max[4];
520 fetch_vector4( &inst->SrcReg[0], machine, t );
521 fetch_vector4( &inst->SrcReg[1], machine, u );
522 max[0] = (t[0] > u[0]) ? t[0] : u[0];
523 max[1] = (t[1] > u[1]) ? t[1] : u[1];
524 max[2] = (t[2] > u[2]) ? t[2] : u[2];
525 max[3] = (t[3] > u[3]) ? t[3] : u[3];
526 store_vector4( &inst->DstReg, machine, max );
527 }
528 break;
529 case VP_OPCODE_SLT:
530 {
531 GLfloat t[4], u[4], slt[4];
532 fetch_vector4( &inst->SrcReg[0], machine, t );
533 fetch_vector4( &inst->SrcReg[1], machine, u );
534 slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
535 slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
536 slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
537 slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
538 store_vector4( &inst->DstReg, machine, slt );
539 }
540 break;
541 case VP_OPCODE_SGE:
542 {
543 GLfloat t[4], u[4], sge[4];
544 fetch_vector4( &inst->SrcReg[0], machine, t );
545 fetch_vector4( &inst->SrcReg[1], machine, u );
546 sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
547 sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
548 sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
549 sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
550 store_vector4( &inst->DstReg, machine, sge );
551 }
552 break;
553 case VP_OPCODE_MAD:
554 {
555 GLfloat t[4], u[4], v[4], sum[4];
556 fetch_vector4( &inst->SrcReg[0], machine, t );
557 fetch_vector4( &inst->SrcReg[1], machine, u );
558 fetch_vector4( &inst->SrcReg[2], machine, v );
559 sum[0] = t[0] * u[0] + v[0];
560 sum[1] = t[1] * u[1] + v[1];
561 sum[2] = t[2] * u[2] + v[2];
562 sum[3] = t[3] * u[3] + v[3];
563 store_vector4( &inst->DstReg, machine, sum );
564 }
565 break;
566 case VP_OPCODE_ARL:
567 {
568 GLfloat t[4];
569 fetch_vector4( &inst->SrcReg[0], machine, t );
570 machine->AddressReg = (GLint) floor(t[0]);
571 }
572 break;
573 case VP_OPCODE_DPH:
574 {
575 GLfloat t[4], u[4], dot[4];
576 fetch_vector4( &inst->SrcReg[0], machine, t );
577 fetch_vector4( &inst->SrcReg[1], machine, u );
578 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
579 dot[1] = dot[2] = dot[3] = dot[0];
580 store_vector4( &inst->DstReg, machine, dot );
581 }
582 break;
583 case VP_OPCODE_RCC:
584 {
585 GLfloat t[4], u;
586 fetch_vector1( &inst->SrcReg[0], machine, t );
587 if (t[0] == 1.0F)
588 u = 1.0F;
589 else
590 u = 1.0F / t[0];
591 if (u > 0.0F) {
592 if (u > 1.884467e+019F) {
593 u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
594 }
595 else if (u < 5.42101e-020F) {
596 u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
597 }
598 }
599 else {
600 if (u < -1.884467e+019F) {
601 u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
602 }
603 else if (u > -5.42101e-020F) {
604 u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
605 }
606 }
607 t[0] = t[1] = t[2] = t[3] = u;
608 store_vector4( &inst->DstReg, machine, t );
609 }
610 break;
611 case VP_OPCODE_SUB:
612 {
613 GLfloat t[4], u[4], sum[4];
614 fetch_vector4( &inst->SrcReg[0], machine, t );
615 fetch_vector4( &inst->SrcReg[1], machine, u );
616 sum[0] = t[0] - u[0];
617 sum[1] = t[1] - u[1];
618 sum[2] = t[2] - u[2];
619 sum[3] = t[3] - u[3];
620 store_vector4( &inst->DstReg, machine, sum );
621 }
622 break;
623 case VP_OPCODE_ABS:
624 {
625 GLfloat t[4];
626 fetch_vector4( &inst->SrcReg[0], machine, t );
627 if (t[0] < 0.0) t[0] = -t[0];
628 if (t[1] < 0.0) t[1] = -t[1];
629 if (t[2] < 0.0) t[2] = -t[2];
630 if (t[3] < 0.0) t[3] = -t[3];
631 store_vector4( &inst->DstReg, machine, t );
632 }
633 break;
634
635 case VP_OPCODE_END:
636 return;
637 default:
638 /* bad instruction opcode */
639 _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
640 return;
641 }
642 }
643 }
644
645
646
647 /**
648 Thoughts on vertex program optimization:
649
650 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
651 assembly code. That will probably be a lot of work.
652
653 Another approach might be to replace the vp_instruction->Opcode field with
654 a pointer to a specialized C function which executes the instruction.
655 In particular we can write functions which skip swizzling, negating,
656 masking, relative addressing, etc. when they're not needed.
657
658 For example:
659
660 void simple_add( struct vp_instruction *inst )
661 {
662 GLfloat *sum = machine->Registers[inst->DstReg.Register];
663 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
664 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
665 sum[0] = a[0] + b[0];
666 sum[1] = a[1] + b[1];
667 sum[2] = a[2] + b[2];
668 sum[3] = a[3] + b[3];
669 }
670
671 */
672
673 /*
674
675 KW:
676
677 A first step would be to 'vectorize' the programs in the same way as
678 the normal transformation code in the tnl module. Thus each opcode
679 takes zero or more input vectors (registers) and produces one or more
680 output vectors.
681
682 These operations would intially be coded in C, with machine-specific
683 assembly following, as is currently the case for matrix
684 transformations in the math/ directory. The preprocessing scheme for
685 selecting simpler operations Brian describes above would also work
686 here.
687
688 This should give reasonable performance without excessive effort.
689
690 */