6ef6cce1d776d9b12972ce99ef2fff7c0b1374c1
[mesa.git] / src / mesa / main / nvvertexec.c
1 /* $Id: nvvertexec.c,v 1.4 2003/03/25 00:00:29 brianp Exp $ */
2
3 /*
4 * Mesa 3-D graphics library
5 * Version: 5.1
6 *
7 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 /**
28 * \file nvvertexec.c
29 * \brief Code to execute vertex programs.
30 * \author Brian Paul
31 */
32
33 #include "glheader.h"
34 #include "context.h"
35 #include "imports.h"
36 #include "macros.h"
37 #include "mtypes.h"
38 #include "nvvertexec.h"
39 #include "nvvertprog.h"
40 #include "math/m_matrix.h"
41
42
43 /**
44 * Load/initialize the vertex program registers.
45 * This needs to be done per vertex.
46 */
47 void
48 _mesa_init_vp_registers(GLcontext *ctx)
49 {
50 struct vp_machine *machine = &(ctx->VertexProgram.Machine);
51 GLuint i;
52
53 /* Input registers get initialized from the current vertex attribs */
54 MEMCPY(machine->Registers[VP_INPUT_REG_START],
55 ctx->Current.Attrib,
56 16 * 4 * sizeof(GLfloat));
57
58 /* Output and temp regs are initialized to [0,0,0,1] */
59 for (i = VP_OUTPUT_REG_START; i <= VP_OUTPUT_REG_END; i++) {
60 machine->Registers[i][0] = 0.0F;
61 machine->Registers[i][1] = 0.0F;
62 machine->Registers[i][2] = 0.0F;
63 machine->Registers[i][3] = 1.0F;
64 }
65 for (i = VP_TEMP_REG_START; i <= VP_TEMP_REG_END; i++) {
66 machine->Registers[i][0] = 0.0F;
67 machine->Registers[i][1] = 0.0F;
68 machine->Registers[i][2] = 0.0F;
69 machine->Registers[i][3] = 1.0F;
70 }
71
72 /* The program regs aren't touched */
73 }
74
75
76
77 /**
78 * Copy the 16 elements of a matrix into four consecutive program
79 * registers starting at 'pos'.
80 */
81 static void
82 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
83 {
84 GLuint i;
85 pos += VP_PROG_REG_START;
86 for (i = 0; i < 4; i++) {
87 registers[pos + i][0] = mat[0 + i];
88 registers[pos + i][1] = mat[4 + i];
89 registers[pos + i][2] = mat[8 + i];
90 registers[pos + i][3] = mat[12 + i];
91 }
92 }
93
94
95 /**
96 * As above, but transpose the matrix.
97 */
98 static void
99 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
100 const GLfloat mat[16])
101 {
102 pos += VP_PROG_REG_START;
103 MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
104 }
105
106
107 /**
108 * Load all currently tracked matrices into the program registers.
109 * This needs to be done per glBegin/glEnd.
110 */
111 void
112 _mesa_init_tracked_matrices(GLcontext *ctx)
113 {
114 GLuint i;
115
116 for (i = 0; i < VP_NUM_PROG_REGS / 4; i++) {
117 /* point 'mat' at source matrix */
118 GLmatrix *mat;
119 if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
120 mat = ctx->ModelviewMatrixStack.Top;
121 }
122 else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
123 mat = ctx->ProjectionMatrixStack.Top;
124 }
125 else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
126 mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
127 }
128 else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
129 mat = ctx->ColorMatrixStack.Top;
130 }
131 else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
132 /* XXX verify the combined matrix is up to date */
133 mat = &ctx->_ModelProjectMatrix;
134 }
135 else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
136 ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
137 GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
138 ASSERT(n < MAX_PROGRAM_MATRICES);
139 mat = ctx->ProgramMatrixStack[n].Top;
140 }
141 else {
142 /* no matrix is tracked, but we leave the register values as-is */
143 assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
144 continue;
145 }
146
147 /* load the matrix */
148 if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
149 load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
150 }
151 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
152 _math_matrix_analyse(mat); /* update the inverse */
153 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
154 load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->inv);
155 }
156 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
157 load_transpose_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
158 }
159 else {
160 assert(ctx->VertexProgram.TrackMatrixTransform[i]
161 == GL_INVERSE_TRANSPOSE_NV);
162 _math_matrix_analyse(mat); /* update the inverse */
163 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
164 load_transpose_matrix(ctx->VertexProgram.Machine.Registers,
165 i*4, mat->inv);
166 }
167 }
168 }
169
170
171
172 /**
173 * For debugging. Dump the current vertex program machine registers.
174 */
175 void
176 _mesa_dump_vp_machine( const struct vp_machine *machine )
177 {
178 int i;
179 _mesa_printf("VertexIn:\n");
180 for (i = 0; i < VP_NUM_INPUT_REGS; i++) {
181 _mesa_printf("%d: %f %f %f %f ", i,
182 machine->Registers[i + VP_INPUT_REG_START][0],
183 machine->Registers[i + VP_INPUT_REG_START][1],
184 machine->Registers[i + VP_INPUT_REG_START][2],
185 machine->Registers[i + VP_INPUT_REG_START][3]);
186 }
187 _mesa_printf("\n");
188
189 _mesa_printf("VertexOut:\n");
190 for (i = 0; i < VP_NUM_OUTPUT_REGS; i++) {
191 _mesa_printf("%d: %f %f %f %f ", i,
192 machine->Registers[i + VP_OUTPUT_REG_START][0],
193 machine->Registers[i + VP_OUTPUT_REG_START][1],
194 machine->Registers[i + VP_OUTPUT_REG_START][2],
195 machine->Registers[i + VP_OUTPUT_REG_START][3]);
196 }
197 _mesa_printf("\n");
198
199 _mesa_printf("Registers:\n");
200 for (i = 0; i < VP_NUM_TEMP_REGS; i++) {
201 _mesa_printf("%d: %f %f %f %f ", i,
202 machine->Registers[i + VP_TEMP_REG_START][0],
203 machine->Registers[i + VP_TEMP_REG_START][1],
204 machine->Registers[i + VP_TEMP_REG_START][2],
205 machine->Registers[i + VP_TEMP_REG_START][3]);
206 }
207 _mesa_printf("\n");
208
209 _mesa_printf("Parameters:\n");
210 for (i = 0; i < VP_NUM_PROG_REGS; i++) {
211 _mesa_printf("%d: %f %f %f %f ", i,
212 machine->Registers[i + VP_PROG_REG_START][0],
213 machine->Registers[i + VP_PROG_REG_START][1],
214 machine->Registers[i + VP_PROG_REG_START][2],
215 machine->Registers[i + VP_PROG_REG_START][3]);
216 }
217 _mesa_printf("\n");
218 }
219
220
221 /**
222 * Fetch a 4-element float vector from the given source register.
223 * Apply swizzling and negating as needed.
224 */
225 static void
226 fetch_vector4( const struct vp_src_register *source,
227 const struct vp_machine *machine,
228 GLfloat result[4] )
229 {
230 static const GLfloat zero[4] = { 0, 0, 0, 0 };
231 const GLfloat *src;
232
233 if (source->RelAddr) {
234 GLint reg = source->Register + machine->AddressReg;
235 if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
236 src = zero;
237 else
238 src = machine->Registers[reg];
239 }
240 else {
241 src = machine->Registers[source->Register];
242 }
243
244 if (source->Negate) {
245 result[0] = -src[source->Swizzle[0]];
246 result[1] = -src[source->Swizzle[1]];
247 result[2] = -src[source->Swizzle[2]];
248 result[3] = -src[source->Swizzle[3]];
249 }
250 else {
251 result[0] = src[source->Swizzle[0]];
252 result[1] = src[source->Swizzle[1]];
253 result[2] = src[source->Swizzle[2]];
254 result[3] = src[source->Swizzle[3]];
255 }
256 }
257
258
259 /**
260 * As above, but only return result[0] element.
261 */
262 static void
263 fetch_vector1( const struct vp_src_register *source,
264 const struct vp_machine *machine,
265 GLfloat result[4] )
266 {
267 static const GLfloat zero[4] = { 0, 0, 0, 0 };
268 const GLfloat *src;
269
270 if (source->RelAddr) {
271 GLint reg = source->Register + machine->AddressReg;
272 if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
273 src = zero;
274 else
275 src = machine->Registers[reg];
276 }
277 else {
278 src = machine->Registers[source->Register];
279 }
280
281 if (source->Negate) {
282 result[0] = -src[source->Swizzle[0]];
283 }
284 else {
285 result[0] = src[source->Swizzle[0]];
286 }
287 }
288
289
290 /**
291 * Store 4 floats into a register.
292 */
293 static void
294 store_vector4( const struct vp_dst_register *dest, struct vp_machine *machine,
295 const GLfloat value[4] )
296 {
297 GLfloat *dst = machine->Registers[dest->Register];
298
299 if (dest->WriteMask[0])
300 dst[0] = value[0];
301 if (dest->WriteMask[1])
302 dst[1] = value[1];
303 if (dest->WriteMask[2])
304 dst[2] = value[2];
305 if (dest->WriteMask[3])
306 dst[3] = value[3];
307 }
308
309
310 /**
311 * Set x to positive or negative infinity.
312 */
313 #ifdef USE_IEEE
314 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
315 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
316 #elif defined(VMS)
317 #define SET_POS_INFINITY(x) x = __MAXFLOAT
318 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
319 #else
320 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
321 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
322 #endif
323
324 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
325
326
327 /**
328 * Execute the given vertex program
329 */
330 void
331 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
332 {
333 struct vp_machine *machine = &ctx->VertexProgram.Machine;
334 const struct vp_instruction *inst;
335
336 /* XXX load vertex fields into input registers */
337 /* and do other initialization */
338
339
340 for (inst = program->Instructions; inst->Opcode != VP_OPCODE_END; inst++) {
341 switch (inst->Opcode) {
342 case VP_OPCODE_MOV:
343 {
344 GLfloat t[4];
345 fetch_vector4( &inst->SrcReg[0], machine, t );
346 store_vector4( &inst->DstReg, machine, t );
347 }
348 break;
349 case VP_OPCODE_LIT:
350 {
351 const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
352 GLfloat t[4], lit[4];
353 fetch_vector4( &inst->SrcReg[0], machine, t );
354 if (t[3] < -(128.0F - epsilon))
355 t[3] = - (128.0F - epsilon);
356 else if (t[3] > 128.0F - epsilon)
357 t[3] = 128.0F - epsilon;
358 if (t[0] < 0.0)
359 t[0] = 0.0;
360 if (t[1] < 0.0)
361 t[1] = 0.0;
362 lit[0] = 1.0;
363 lit[1] = t[0];
364 lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
365 lit[3] = 1.0;
366 store_vector4( &inst->DstReg, machine, lit );
367 }
368 break;
369 case VP_OPCODE_RCP:
370 {
371 GLfloat t[4];
372 fetch_vector1( &inst->SrcReg[0], machine, t );
373 if (t[0] != 1.0F)
374 t[0] = 1.0F / t[0]; /* div by zero is infinity! */
375 t[1] = t[2] = t[3] = t[0];
376 store_vector4( &inst->DstReg, machine, t );
377 }
378 break;
379 case VP_OPCODE_RSQ:
380 {
381 GLfloat t[4];
382 fetch_vector1( &inst->SrcReg[0], machine, t );
383 t[0] = INV_SQRTF(FABSF(t[0]));
384 t[1] = t[2] = t[3] = t[0];
385 store_vector4( &inst->DstReg, machine, t );
386 }
387 break;
388 case VP_OPCODE_EXP:
389 {
390 GLfloat t[4], q[4], floor_t0;
391 fetch_vector1( &inst->SrcReg[0], machine, t );
392 floor_t0 = (float) floor(t[0]);
393 if (floor_t0 > FLT_MAX_EXP) {
394 SET_POS_INFINITY(q[0]);
395 q[1] = 0.0F;
396 SET_POS_INFINITY(q[2]);
397 q[3] = 1.0F;
398 }
399 else if (floor_t0 < FLT_MIN_EXP) {
400 q[0] = 0.0F;
401 q[1] = 0.0F;
402 q[2] = 0.0F;
403 q[3] = 0.0F;
404 }
405 else {
406 #ifdef USE_IEEE
407 GLint ii = (GLint) floor_t0;
408 ii = (ii < 23) + 0x3f800000;
409 SET_FLOAT_BITS(q[0], ii);
410 q[0] = *((GLfloat *) &ii);
411 #else
412 q[0] = (GLfloat) pow(2.0, floor_t0);
413 #endif
414 q[1] = t[0] - floor_t0;
415 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
416 q[3] = 1.0F;
417 }
418 store_vector4( &inst->DstReg, machine, q );
419 }
420 break;
421 case VP_OPCODE_LOG:
422 {
423 GLfloat t[4], q[4], abs_t0;
424 fetch_vector1( &inst->SrcReg[0], machine, t );
425 abs_t0 = (GLfloat) fabs(t[0]);
426 if (abs_t0 != 0.0F) {
427 /* Since we really can't handle infinite values on VMS
428 * like other OSes we'll use __MAXFLOAT to represent
429 * infinity. This may need some tweaking.
430 */
431 #ifdef VMS
432 if (abs_t0 == __MAXFLOAT) {
433 #else
434 if (IS_INF_OR_NAN(abs_t0)) {
435 #endif
436 SET_POS_INFINITY(q[0]);
437 q[1] = 1.0F;
438 SET_POS_INFINITY(q[2]);
439 }
440 else {
441 int exponent;
442 double mantissa = frexp(t[0], &exponent);
443 q[0] = (GLfloat) (exponent - 1);
444 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
445 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
446 }
447 }
448 else {
449 SET_NEG_INFINITY(q[0]);
450 q[1] = 1.0F;
451 SET_NEG_INFINITY(q[2]);
452 }
453 q[3] = 1.0;
454 store_vector4( &inst->DstReg, machine, q );
455 }
456 break;
457 case VP_OPCODE_MUL:
458 {
459 GLfloat t[4], u[4], prod[4];
460 fetch_vector4( &inst->SrcReg[0], machine, t );
461 fetch_vector4( &inst->SrcReg[1], machine, u );
462 prod[0] = t[0] * u[0];
463 prod[1] = t[1] * u[1];
464 prod[2] = t[2] * u[2];
465 prod[3] = t[3] * u[3];
466 store_vector4( &inst->DstReg, machine, prod );
467 }
468 break;
469 case VP_OPCODE_ADD:
470 {
471 GLfloat t[4], u[4], sum[4];
472 fetch_vector4( &inst->SrcReg[0], machine, t );
473 fetch_vector4( &inst->SrcReg[1], machine, u );
474 sum[0] = t[0] + u[0];
475 sum[1] = t[1] + u[1];
476 sum[2] = t[2] + u[2];
477 sum[3] = t[3] + u[3];
478 store_vector4( &inst->DstReg, machine, sum );
479 }
480 break;
481 case VP_OPCODE_DP3:
482 {
483 GLfloat t[4], u[4], dot[4];
484 fetch_vector4( &inst->SrcReg[0], machine, t );
485 fetch_vector4( &inst->SrcReg[1], machine, u );
486 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
487 dot[1] = dot[2] = dot[3] = dot[0];
488 store_vector4( &inst->DstReg, machine, dot );
489 }
490 break;
491 case VP_OPCODE_DP4:
492 {
493 GLfloat t[4], u[4], dot[4];
494 fetch_vector4( &inst->SrcReg[0], machine, t );
495 fetch_vector4( &inst->SrcReg[1], machine, u );
496 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
497 dot[1] = dot[2] = dot[3] = dot[0];
498 store_vector4( &inst->DstReg, machine, dot );
499 }
500 break;
501 case VP_OPCODE_DST:
502 {
503 GLfloat t[4], u[4], dst[4];
504 fetch_vector4( &inst->SrcReg[0], machine, t );
505 fetch_vector4( &inst->SrcReg[1], machine, u );
506 dst[0] = 1.0F;
507 dst[1] = t[1] * u[1];
508 dst[2] = t[2];
509 dst[3] = u[3];
510 store_vector4( &inst->DstReg, machine, dst );
511 }
512 break;
513 case VP_OPCODE_MIN:
514 {
515 GLfloat t[4], u[4], min[4];
516 fetch_vector4( &inst->SrcReg[0], machine, t );
517 fetch_vector4( &inst->SrcReg[1], machine, u );
518 min[0] = (t[0] < u[0]) ? t[0] : u[0];
519 min[1] = (t[1] < u[1]) ? t[1] : u[1];
520 min[2] = (t[2] < u[2]) ? t[2] : u[2];
521 min[3] = (t[3] < u[3]) ? t[3] : u[3];
522 store_vector4( &inst->DstReg, machine, min );
523 }
524 break;
525 case VP_OPCODE_MAX:
526 {
527 GLfloat t[4], u[4], max[4];
528 fetch_vector4( &inst->SrcReg[0], machine, t );
529 fetch_vector4( &inst->SrcReg[1], machine, u );
530 max[0] = (t[0] > u[0]) ? t[0] : u[0];
531 max[1] = (t[1] > u[1]) ? t[1] : u[1];
532 max[2] = (t[2] > u[2]) ? t[2] : u[2];
533 max[3] = (t[3] > u[3]) ? t[3] : u[3];
534 store_vector4( &inst->DstReg, machine, max );
535 }
536 break;
537 case VP_OPCODE_SLT:
538 {
539 GLfloat t[4], u[4], slt[4];
540 fetch_vector4( &inst->SrcReg[0], machine, t );
541 fetch_vector4( &inst->SrcReg[1], machine, u );
542 slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
543 slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
544 slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
545 slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
546 store_vector4( &inst->DstReg, machine, slt );
547 }
548 break;
549 case VP_OPCODE_SGE:
550 {
551 GLfloat t[4], u[4], sge[4];
552 fetch_vector4( &inst->SrcReg[0], machine, t );
553 fetch_vector4( &inst->SrcReg[1], machine, u );
554 sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
555 sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
556 sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
557 sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
558 store_vector4( &inst->DstReg, machine, sge );
559 }
560 break;
561 case VP_OPCODE_MAD:
562 {
563 GLfloat t[4], u[4], v[4], sum[4];
564 fetch_vector4( &inst->SrcReg[0], machine, t );
565 fetch_vector4( &inst->SrcReg[1], machine, u );
566 fetch_vector4( &inst->SrcReg[2], machine, v );
567 sum[0] = t[0] * u[0] + v[0];
568 sum[1] = t[1] * u[1] + v[1];
569 sum[2] = t[2] * u[2] + v[2];
570 sum[3] = t[3] * u[3] + v[3];
571 store_vector4( &inst->DstReg, machine, sum );
572 }
573 break;
574 case VP_OPCODE_ARL:
575 {
576 GLfloat t[4];
577 fetch_vector4( &inst->SrcReg[0], machine, t );
578 machine->AddressReg = (GLint) floor(t[0]);
579 }
580 break;
581 case VP_OPCODE_DPH:
582 {
583 GLfloat t[4], u[4], dot[4];
584 fetch_vector4( &inst->SrcReg[0], machine, t );
585 fetch_vector4( &inst->SrcReg[1], machine, u );
586 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
587 dot[1] = dot[2] = dot[3] = dot[0];
588 store_vector4( &inst->DstReg, machine, dot );
589 }
590 break;
591 case VP_OPCODE_RCC:
592 {
593 GLfloat t[4], u;
594 fetch_vector1( &inst->SrcReg[0], machine, t );
595 if (t[0] == 1.0F)
596 u = 1.0F;
597 else
598 u = 1.0F / t[0];
599 if (u > 0.0F) {
600 if (u > 1.884467e+019F) {
601 u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
602 }
603 else if (u < 5.42101e-020F) {
604 u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
605 }
606 }
607 else {
608 if (u < -1.884467e+019F) {
609 u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
610 }
611 else if (u > -5.42101e-020F) {
612 u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
613 }
614 }
615 t[0] = t[1] = t[2] = t[3] = u;
616 store_vector4( &inst->DstReg, machine, t );
617 }
618 break;
619 case VP_OPCODE_SUB:
620 {
621 GLfloat t[4], u[4], sum[4];
622 fetch_vector4( &inst->SrcReg[0], machine, t );
623 fetch_vector4( &inst->SrcReg[1], machine, u );
624 sum[0] = t[0] - u[0];
625 sum[1] = t[1] - u[1];
626 sum[2] = t[2] - u[2];
627 sum[3] = t[3] - u[3];
628 store_vector4( &inst->DstReg, machine, sum );
629 }
630 break;
631 case VP_OPCODE_ABS:
632 {
633 GLfloat t[4];
634 fetch_vector4( &inst->SrcReg[0], machine, t );
635 if (t[0] < 0.0) t[0] = -t[0];
636 if (t[1] < 0.0) t[1] = -t[1];
637 if (t[2] < 0.0) t[2] = -t[2];
638 if (t[3] < 0.0) t[3] = -t[3];
639 store_vector4( &inst->DstReg, machine, t );
640 }
641 break;
642
643 case VP_OPCODE_END:
644 return;
645 default:
646 /* bad instruction opcode */
647 _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
648 return;
649 }
650 }
651 }
652
653
654
655 /**
656 Thoughts on vertex program optimization:
657
658 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
659 assembly code. That will probably be a lot of work.
660
661 Another approach might be to replace the vp_instruction->Opcode field with
662 a pointer to a specialized C function which executes the instruction.
663 In particular we can write functions which skip swizzling, negating,
664 masking, relative addressing, etc. when they're not needed.
665
666 For example:
667
668 void simple_add( struct vp_instruction *inst )
669 {
670 GLfloat *sum = machine->Registers[inst->DstReg.Register];
671 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
672 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
673 sum[0] = a[0] + b[0];
674 sum[1] = a[1] + b[1];
675 sum[2] = a[2] + b[2];
676 sum[3] = a[3] + b[3];
677 }
678
679 */
680
681 /*
682
683 KW:
684
685 A first step would be to 'vectorize' the programs in the same way as
686 the normal transformation code in the tnl module. Thus each opcode
687 takes zero or more input vectors (registers) and produces one or more
688 output vectors.
689
690 These operations would intially be coded in C, with machine-specific
691 assembly following, as is currently the case for matrix
692 transformations in the math/ directory. The preprocessing scheme for
693 selecting simpler operations Brian describes above would also work
694 here.
695
696 This should give reasonable performance without excessive effort.
697
698 */