bacbea8343c5438a30c9e33995abfa35075e8e58
[mesa.git] / src / mesa / main / nvvertexec.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 5.1
4 *
5 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file nvvertexec.c
27 * \brief Code to execute vertex programs.
28 * \author Brian Paul
29 */
30
31 #include "glheader.h"
32 #include "context.h"
33 #include "imports.h"
34 #include "macros.h"
35 #include "mtypes.h"
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
38 #include "math/m_matrix.h"
39
40
41 static const GLfloat zeroVec[4] = { 0, 0, 0, 0 };
42
43
44 /**
45 * Load/initialize the vertex program registers.
46 * This needs to be done per vertex.
47 */
48 void
49 _mesa_init_vp_registers(GLcontext *ctx)
50 {
51 struct vp_machine *machine = &(ctx->VertexProgram.Machine);
52 GLuint i;
53
54 /* Input registers get initialized from the current vertex attribs */
55 MEMCPY(machine->Registers[VP_INPUT_REG_START],
56 ctx->Current.Attrib,
57 16 * 4 * sizeof(GLfloat));
58
59 /* Output and temp regs are initialized to [0,0,0,1] */
60 for (i = VP_OUTPUT_REG_START; i <= VP_OUTPUT_REG_END; i++) {
61 machine->Registers[i][0] = 0.0F;
62 machine->Registers[i][1] = 0.0F;
63 machine->Registers[i][2] = 0.0F;
64 machine->Registers[i][3] = 1.0F;
65 }
66 for (i = VP_TEMP_REG_START; i <= VP_TEMP_REG_END; i++) {
67 machine->Registers[i][0] = 0.0F;
68 machine->Registers[i][1] = 0.0F;
69 machine->Registers[i][2] = 0.0F;
70 machine->Registers[i][3] = 1.0F;
71 }
72
73 /* The program regs aren't touched */
74 }
75
76
77
78 /**
79 * Copy the 16 elements of a matrix into four consecutive program
80 * registers starting at 'pos'.
81 */
82 static void
83 load_matrix(GLfloat registers[][4], GLuint pos, const GLfloat mat[16])
84 {
85 GLuint i;
86 pos += VP_PROG_REG_START;
87 for (i = 0; i < 4; i++) {
88 registers[pos + i][0] = mat[0 + i];
89 registers[pos + i][1] = mat[4 + i];
90 registers[pos + i][2] = mat[8 + i];
91 registers[pos + i][3] = mat[12 + i];
92 }
93 }
94
95
96 /**
97 * As above, but transpose the matrix.
98 */
99 static void
100 load_transpose_matrix(GLfloat registers[][4], GLuint pos,
101 const GLfloat mat[16])
102 {
103 pos += VP_PROG_REG_START;
104 MEMCPY(registers[pos], mat, 16 * sizeof(GLfloat));
105 }
106
107
108 /**
109 * Load all currently tracked matrices into the program registers.
110 * This needs to be done per glBegin/glEnd.
111 */
112 void
113 _mesa_init_tracked_matrices(GLcontext *ctx)
114 {
115 GLuint i;
116
117 for (i = 0; i < VP_NUM_PROG_REGS / 4; i++) {
118 /* point 'mat' at source matrix */
119 GLmatrix *mat;
120 if (ctx->VertexProgram.TrackMatrix[i] == GL_MODELVIEW) {
121 mat = ctx->ModelviewMatrixStack.Top;
122 }
123 else if (ctx->VertexProgram.TrackMatrix[i] == GL_PROJECTION) {
124 mat = ctx->ProjectionMatrixStack.Top;
125 }
126 else if (ctx->VertexProgram.TrackMatrix[i] == GL_TEXTURE) {
127 mat = ctx->TextureMatrixStack[ctx->Texture.CurrentUnit].Top;
128 }
129 else if (ctx->VertexProgram.TrackMatrix[i] == GL_COLOR) {
130 mat = ctx->ColorMatrixStack.Top;
131 }
132 else if (ctx->VertexProgram.TrackMatrix[i]==GL_MODELVIEW_PROJECTION_NV) {
133 /* XXX verify the combined matrix is up to date */
134 mat = &ctx->_ModelProjectMatrix;
135 }
136 else if (ctx->VertexProgram.TrackMatrix[i] >= GL_MATRIX0_NV &&
137 ctx->VertexProgram.TrackMatrix[i] <= GL_MATRIX7_NV) {
138 GLuint n = ctx->VertexProgram.TrackMatrix[i] - GL_MATRIX0_NV;
139 ASSERT(n < MAX_PROGRAM_MATRICES);
140 mat = ctx->ProgramMatrixStack[n].Top;
141 }
142 else {
143 /* no matrix is tracked, but we leave the register values as-is */
144 assert(ctx->VertexProgram.TrackMatrix[i] == GL_NONE);
145 continue;
146 }
147
148 /* load the matrix */
149 if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_IDENTITY_NV) {
150 load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
151 }
152 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_INVERSE_NV) {
153 _math_matrix_analyse(mat); /* update the inverse */
154 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
155 load_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->inv);
156 }
157 else if (ctx->VertexProgram.TrackMatrixTransform[i] == GL_TRANSPOSE_NV) {
158 load_transpose_matrix(ctx->VertexProgram.Machine.Registers, i*4, mat->m);
159 }
160 else {
161 assert(ctx->VertexProgram.TrackMatrixTransform[i]
162 == GL_INVERSE_TRANSPOSE_NV);
163 _math_matrix_analyse(mat); /* update the inverse */
164 assert((mat->flags & MAT_DIRTY_INVERSE) == 0);
165 load_transpose_matrix(ctx->VertexProgram.Machine.Registers,
166 i*4, mat->inv);
167 }
168 }
169 }
170
171
172
173 /**
174 * For debugging. Dump the current vertex program machine registers.
175 */
176 void
177 _mesa_dump_vp_machine( const struct vp_machine *machine )
178 {
179 int i;
180 _mesa_printf("VertexIn:\n");
181 for (i = 0; i < VP_NUM_INPUT_REGS; i++) {
182 _mesa_printf("%d: %f %f %f %f ", i,
183 machine->Registers[i + VP_INPUT_REG_START][0],
184 machine->Registers[i + VP_INPUT_REG_START][1],
185 machine->Registers[i + VP_INPUT_REG_START][2],
186 machine->Registers[i + VP_INPUT_REG_START][3]);
187 }
188 _mesa_printf("\n");
189
190 _mesa_printf("VertexOut:\n");
191 for (i = 0; i < VP_NUM_OUTPUT_REGS; i++) {
192 _mesa_printf("%d: %f %f %f %f ", i,
193 machine->Registers[i + VP_OUTPUT_REG_START][0],
194 machine->Registers[i + VP_OUTPUT_REG_START][1],
195 machine->Registers[i + VP_OUTPUT_REG_START][2],
196 machine->Registers[i + VP_OUTPUT_REG_START][3]);
197 }
198 _mesa_printf("\n");
199
200 _mesa_printf("Registers:\n");
201 for (i = 0; i < VP_NUM_TEMP_REGS; i++) {
202 _mesa_printf("%d: %f %f %f %f ", i,
203 machine->Registers[i + VP_TEMP_REG_START][0],
204 machine->Registers[i + VP_TEMP_REG_START][1],
205 machine->Registers[i + VP_TEMP_REG_START][2],
206 machine->Registers[i + VP_TEMP_REG_START][3]);
207 }
208 _mesa_printf("\n");
209
210 _mesa_printf("Parameters:\n");
211 for (i = 0; i < VP_NUM_PROG_REGS; i++) {
212 _mesa_printf("%d: %f %f %f %f ", i,
213 machine->Registers[i + VP_PROG_REG_START][0],
214 machine->Registers[i + VP_PROG_REG_START][1],
215 machine->Registers[i + VP_PROG_REG_START][2],
216 machine->Registers[i + VP_PROG_REG_START][3]);
217 }
218 _mesa_printf("\n");
219 }
220
221
222 /**
223 * Fetch a 4-element float vector from the given source register.
224 * Apply swizzling and negating as needed.
225 */
226 static void
227 fetch_vector4( const struct vp_src_register *source,
228 const struct vp_machine *machine,
229 GLfloat result[4] )
230 {
231 const GLfloat *src;
232
233 if (source->RelAddr) {
234 const GLint reg = source->Register + machine->AddressReg;
235 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
236 src = zeroVec;
237 else
238 src = machine->Registers[VP_PROG_REG_START + reg];
239 }
240 else {
241 src = machine->Registers[source->Register];
242 }
243
244 if (source->Negate) {
245 result[0] = -src[source->Swizzle[0]];
246 result[1] = -src[source->Swizzle[1]];
247 result[2] = -src[source->Swizzle[2]];
248 result[3] = -src[source->Swizzle[3]];
249 }
250 else {
251 result[0] = src[source->Swizzle[0]];
252 result[1] = src[source->Swizzle[1]];
253 result[2] = src[source->Swizzle[2]];
254 result[3] = src[source->Swizzle[3]];
255 }
256 }
257
258
259 /**
260 * As above, but only return result[0] element.
261 */
262 static void
263 fetch_vector1( const struct vp_src_register *source,
264 const struct vp_machine *machine,
265 GLfloat result[4] )
266 {
267 const GLfloat *src;
268
269 if (source->RelAddr) {
270 const GLint reg = source->Register + machine->AddressReg;
271 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
272 src = zeroVec;
273 else
274 src = machine->Registers[VP_PROG_REG_START + reg];
275 }
276 else {
277 src = machine->Registers[source->Register];
278 }
279
280 if (source->Negate) {
281 result[0] = -src[source->Swizzle[0]];
282 }
283 else {
284 result[0] = src[source->Swizzle[0]];
285 }
286 }
287
288
289 /**
290 * Store 4 floats into a register.
291 */
292 static void
293 store_vector4( const struct vp_dst_register *dest, struct vp_machine *machine,
294 const GLfloat value[4] )
295 {
296 GLfloat *dst = machine->Registers[dest->Register];
297
298 if (dest->WriteMask[0])
299 dst[0] = value[0];
300 if (dest->WriteMask[1])
301 dst[1] = value[1];
302 if (dest->WriteMask[2])
303 dst[2] = value[2];
304 if (dest->WriteMask[3])
305 dst[3] = value[3];
306 }
307
308
309 /**
310 * Set x to positive or negative infinity.
311 */
312 #ifdef USE_IEEE
313 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
314 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
315 #elif defined(VMS)
316 #define SET_POS_INFINITY(x) x = __MAXFLOAT
317 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
318 #else
319 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
320 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
321 #endif
322
323 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
324
325
326 /**
327 * Execute the given vertex program
328 */
329 void
330 _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
331 {
332 struct vp_machine *machine = &ctx->VertexProgram.Machine;
333 const struct vp_instruction *inst;
334
335 for (inst = program->Instructions; inst->Opcode != VP_OPCODE_END; inst++) {
336 switch (inst->Opcode) {
337 case VP_OPCODE_MOV:
338 {
339 GLfloat t[4];
340 fetch_vector4( &inst->SrcReg[0], machine, t );
341 store_vector4( &inst->DstReg, machine, t );
342 }
343 break;
344 case VP_OPCODE_LIT:
345 {
346 const GLfloat epsilon = 1.0e-5F; /* XXX fix? */
347 GLfloat t[4], lit[4];
348 fetch_vector4( &inst->SrcReg[0], machine, t );
349 if (t[3] < -(128.0F - epsilon))
350 t[3] = - (128.0F - epsilon);
351 else if (t[3] > 128.0F - epsilon)
352 t[3] = 128.0F - epsilon;
353 if (t[0] < 0.0)
354 t[0] = 0.0;
355 if (t[1] < 0.0)
356 t[1] = 0.0;
357 lit[0] = 1.0;
358 lit[1] = t[0];
359 lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F;
360 lit[3] = 1.0;
361 store_vector4( &inst->DstReg, machine, lit );
362 }
363 break;
364 case VP_OPCODE_RCP:
365 {
366 GLfloat t[4];
367 fetch_vector1( &inst->SrcReg[0], machine, t );
368 if (t[0] != 1.0F)
369 t[0] = 1.0F / t[0]; /* div by zero is infinity! */
370 t[1] = t[2] = t[3] = t[0];
371 store_vector4( &inst->DstReg, machine, t );
372 }
373 break;
374 case VP_OPCODE_RSQ:
375 {
376 GLfloat t[4];
377 fetch_vector1( &inst->SrcReg[0], machine, t );
378 t[0] = INV_SQRTF(FABSF(t[0]));
379 t[1] = t[2] = t[3] = t[0];
380 store_vector4( &inst->DstReg, machine, t );
381 }
382 break;
383 case VP_OPCODE_EXP:
384 {
385 GLfloat t[4], q[4], floor_t0;
386 fetch_vector1( &inst->SrcReg[0], machine, t );
387 floor_t0 = (float) floor(t[0]);
388 if (floor_t0 > FLT_MAX_EXP) {
389 SET_POS_INFINITY(q[0]);
390 SET_POS_INFINITY(q[2]);
391 }
392 else if (floor_t0 < FLT_MIN_EXP) {
393 q[0] = 0.0F;
394 q[2] = 0.0F;
395 }
396 else {
397 #ifdef USE_IEEE
398 GLint ii = (GLint) floor_t0;
399 ii = (ii < 23) + 0x3f800000;
400 SET_FLOAT_BITS(q[0], ii);
401 q[0] = *((GLfloat *) &ii);
402 #else
403 q[0] = (GLfloat) pow(2.0, floor_t0);
404 #endif
405 q[2] = (GLfloat) (q[0] * LOG2(q[1]));
406 }
407 q[1] = t[0] - floor_t0;
408 q[3] = 1.0F;
409 store_vector4( &inst->DstReg, machine, q );
410 }
411 break;
412 case VP_OPCODE_LOG:
413 {
414 GLfloat t[4], q[4], abs_t0;
415 fetch_vector1( &inst->SrcReg[0], machine, t );
416 abs_t0 = (GLfloat) fabs(t[0]);
417 if (abs_t0 != 0.0F) {
418 /* Since we really can't handle infinite values on VMS
419 * like other OSes we'll use __MAXFLOAT to represent
420 * infinity. This may need some tweaking.
421 */
422 #ifdef VMS
423 if (abs_t0 == __MAXFLOAT) {
424 #else
425 if (IS_INF_OR_NAN(abs_t0)) {
426 #endif
427 SET_POS_INFINITY(q[0]);
428 q[1] = 1.0F;
429 SET_POS_INFINITY(q[2]);
430 }
431 else {
432 int exponent;
433 double mantissa = frexp(t[0], &exponent);
434 q[0] = (GLfloat) (exponent - 1);
435 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
436 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
437 }
438 }
439 else {
440 SET_NEG_INFINITY(q[0]);
441 q[1] = 1.0F;
442 SET_NEG_INFINITY(q[2]);
443 }
444 q[3] = 1.0;
445 store_vector4( &inst->DstReg, machine, q );
446 }
447 break;
448 case VP_OPCODE_MUL:
449 {
450 GLfloat t[4], u[4], prod[4];
451 fetch_vector4( &inst->SrcReg[0], machine, t );
452 fetch_vector4( &inst->SrcReg[1], machine, u );
453 prod[0] = t[0] * u[0];
454 prod[1] = t[1] * u[1];
455 prod[2] = t[2] * u[2];
456 prod[3] = t[3] * u[3];
457 store_vector4( &inst->DstReg, machine, prod );
458 }
459 break;
460 case VP_OPCODE_ADD:
461 {
462 GLfloat t[4], u[4], sum[4];
463 fetch_vector4( &inst->SrcReg[0], machine, t );
464 fetch_vector4( &inst->SrcReg[1], machine, u );
465 sum[0] = t[0] + u[0];
466 sum[1] = t[1] + u[1];
467 sum[2] = t[2] + u[2];
468 sum[3] = t[3] + u[3];
469 store_vector4( &inst->DstReg, machine, sum );
470 }
471 break;
472 case VP_OPCODE_DP3:
473 {
474 GLfloat t[4], u[4], dot[4];
475 fetch_vector4( &inst->SrcReg[0], machine, t );
476 fetch_vector4( &inst->SrcReg[1], machine, u );
477 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2];
478 dot[1] = dot[2] = dot[3] = dot[0];
479 store_vector4( &inst->DstReg, machine, dot );
480 }
481 break;
482 case VP_OPCODE_DP4:
483 {
484 GLfloat t[4], u[4], dot[4];
485 fetch_vector4( &inst->SrcReg[0], machine, t );
486 fetch_vector4( &inst->SrcReg[1], machine, u );
487 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3];
488 dot[1] = dot[2] = dot[3] = dot[0];
489 store_vector4( &inst->DstReg, machine, dot );
490 }
491 break;
492 case VP_OPCODE_DST:
493 {
494 GLfloat t[4], u[4], dst[4];
495 fetch_vector4( &inst->SrcReg[0], machine, t );
496 fetch_vector4( &inst->SrcReg[1], machine, u );
497 dst[0] = 1.0F;
498 dst[1] = t[1] * u[1];
499 dst[2] = t[2];
500 dst[3] = u[3];
501 store_vector4( &inst->DstReg, machine, dst );
502 }
503 break;
504 case VP_OPCODE_MIN:
505 {
506 GLfloat t[4], u[4], min[4];
507 fetch_vector4( &inst->SrcReg[0], machine, t );
508 fetch_vector4( &inst->SrcReg[1], machine, u );
509 min[0] = (t[0] < u[0]) ? t[0] : u[0];
510 min[1] = (t[1] < u[1]) ? t[1] : u[1];
511 min[2] = (t[2] < u[2]) ? t[2] : u[2];
512 min[3] = (t[3] < u[3]) ? t[3] : u[3];
513 store_vector4( &inst->DstReg, machine, min );
514 }
515 break;
516 case VP_OPCODE_MAX:
517 {
518 GLfloat t[4], u[4], max[4];
519 fetch_vector4( &inst->SrcReg[0], machine, t );
520 fetch_vector4( &inst->SrcReg[1], machine, u );
521 max[0] = (t[0] > u[0]) ? t[0] : u[0];
522 max[1] = (t[1] > u[1]) ? t[1] : u[1];
523 max[2] = (t[2] > u[2]) ? t[2] : u[2];
524 max[3] = (t[3] > u[3]) ? t[3] : u[3];
525 store_vector4( &inst->DstReg, machine, max );
526 }
527 break;
528 case VP_OPCODE_SLT:
529 {
530 GLfloat t[4], u[4], slt[4];
531 fetch_vector4( &inst->SrcReg[0], machine, t );
532 fetch_vector4( &inst->SrcReg[1], machine, u );
533 slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
534 slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
535 slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
536 slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
537 store_vector4( &inst->DstReg, machine, slt );
538 }
539 break;
540 case VP_OPCODE_SGE:
541 {
542 GLfloat t[4], u[4], sge[4];
543 fetch_vector4( &inst->SrcReg[0], machine, t );
544 fetch_vector4( &inst->SrcReg[1], machine, u );
545 sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
546 sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
547 sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
548 sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
549 store_vector4( &inst->DstReg, machine, sge );
550 }
551 break;
552 case VP_OPCODE_MAD:
553 {
554 GLfloat t[4], u[4], v[4], sum[4];
555 fetch_vector4( &inst->SrcReg[0], machine, t );
556 fetch_vector4( &inst->SrcReg[1], machine, u );
557 fetch_vector4( &inst->SrcReg[2], machine, v );
558 sum[0] = t[0] * u[0] + v[0];
559 sum[1] = t[1] * u[1] + v[1];
560 sum[2] = t[2] * u[2] + v[2];
561 sum[3] = t[3] * u[3] + v[3];
562 store_vector4( &inst->DstReg, machine, sum );
563 }
564 break;
565 case VP_OPCODE_ARL:
566 {
567 GLfloat t[4];
568 fetch_vector4( &inst->SrcReg[0], machine, t );
569 machine->AddressReg = (GLint) floor(t[0]);
570 }
571 break;
572 case VP_OPCODE_DPH:
573 {
574 GLfloat t[4], u[4], dot[4];
575 fetch_vector4( &inst->SrcReg[0], machine, t );
576 fetch_vector4( &inst->SrcReg[1], machine, u );
577 dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
578 dot[1] = dot[2] = dot[3] = dot[0];
579 store_vector4( &inst->DstReg, machine, dot );
580 }
581 break;
582 case VP_OPCODE_RCC:
583 {
584 GLfloat t[4], u;
585 fetch_vector1( &inst->SrcReg[0], machine, t );
586 if (t[0] == 1.0F)
587 u = 1.0F;
588 else
589 u = 1.0F / t[0];
590 if (u > 0.0F) {
591 if (u > 1.884467e+019F) {
592 u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
593 }
594 else if (u < 5.42101e-020F) {
595 u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
596 }
597 }
598 else {
599 if (u < -1.884467e+019F) {
600 u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
601 }
602 else if (u > -5.42101e-020F) {
603 u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
604 }
605 }
606 t[0] = t[1] = t[2] = t[3] = u;
607 store_vector4( &inst->DstReg, machine, t );
608 }
609 break;
610 case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */
611 {
612 GLfloat t[4], u[4], sum[4];
613 fetch_vector4( &inst->SrcReg[0], machine, t );
614 fetch_vector4( &inst->SrcReg[1], machine, u );
615 sum[0] = t[0] - u[0];
616 sum[1] = t[1] - u[1];
617 sum[2] = t[2] - u[2];
618 sum[3] = t[3] - u[3];
619 store_vector4( &inst->DstReg, machine, sum );
620 }
621 break;
622 case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */
623 {
624 GLfloat t[4];
625 fetch_vector4( &inst->SrcReg[0], machine, t );
626 if (t[0] < 0.0) t[0] = -t[0];
627 if (t[1] < 0.0) t[1] = -t[1];
628 if (t[2] < 0.0) t[2] = -t[2];
629 if (t[3] < 0.0) t[3] = -t[3];
630 store_vector4( &inst->DstReg, machine, t );
631 }
632 break;
633 case VP_OPCODE_FLR: /* GL_ARB_vertex_program */
634 {
635 GLfloat t[4];
636 fetch_vector4( &inst->SrcReg[0], machine, t );
637 t[0] = FLOORF(t[0]);
638 t[1] = FLOORF(t[1]);
639 t[2] = FLOORF(t[2]);
640 t[3] = FLOORF(t[3]);
641 store_vector4( &inst->DstReg, machine, t );
642 }
643 break;
644 case VP_OPCODE_FRC: /* GL_ARB_vertex_program */
645 {
646 GLfloat t[4];
647 fetch_vector4( &inst->SrcReg[0], machine, t );
648 t[0] = t[0] - FLOORF(t[0]);
649 t[1] = t[1] - FLOORF(t[1]);
650 t[2] = t[2] - FLOORF(t[2]);
651 t[3] = t[3] - FLOORF(t[3]);
652 store_vector4( &inst->DstReg, machine, t );
653 }
654 break;
655 case VP_OPCODE_EX2: /* GL_ARB_vertex_program */
656 {
657 GLfloat t[4];
658 fetch_vector1( &inst->SrcReg[0], machine, t );
659 t[0] = t[1] = t[2] = t[3] = _mesa_pow(2.0, t[0]);
660 store_vector4( &inst->DstReg, machine, t );
661 }
662 break;
663 case VP_OPCODE_LG2: /* GL_ARB_vertex_program */
664 {
665 GLfloat t[4];
666 fetch_vector1( &inst->SrcReg[0], machine, t );
667 t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
668 store_vector4( &inst->DstReg, machine, t );
669 }
670 break;
671 case VP_OPCODE_POW: /* GL_ARB_vertex_program */
672 {
673 GLfloat t[4], u[4];
674 fetch_vector1( &inst->SrcReg[0], machine, t );
675 fetch_vector1( &inst->SrcReg[1], machine, u );
676 t[0] = t[1] = t[2] = t[3] = _mesa_pow(t[0], u[0]);
677 store_vector4( &inst->DstReg, machine, t );
678 }
679 break;
680 case VP_OPCODE_XPD: /* GL_ARB_vertex_program */
681 {
682 GLfloat t[4], u[4], cross[4];
683 fetch_vector4( &inst->SrcReg[0], machine, t );
684 fetch_vector4( &inst->SrcReg[1], machine, u );
685 cross[0] = t[1] * u[2] - t[2] * u[1];
686 cross[1] = t[2] * u[0] - t[0] * u[2];
687 cross[2] = t[0] * u[1] - t[1] * u[0];
688 store_vector4( &inst->DstReg, machine, cross );
689 }
690 break;
691 case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */
692 {
693 const struct vp_src_register *source = &inst->SrcReg[0];
694 const GLfloat *src;
695 GLfloat result[4];
696 GLuint i;
697
698 /* Code similar to fetch_vector4() */
699 if (source->RelAddr) {
700 const GLint reg = source->Register + machine->AddressReg;
701 if (reg < 0 || reg > MAX_NV_VERTEX_PROGRAM_PARAMS)
702 src = zeroVec;
703 else
704 src = machine->Registers[VP_PROG_REG_START + reg];
705 }
706 else {
707 src = machine->Registers[source->Register];
708 }
709
710 /* extended swizzling here */
711 for (i = 0; i < 3; i++) {
712 if (source->Swizzle[i] == SWIZZLE_ZERO)
713 result[i] = 0.0;
714 else if (source->Swizzle[i] == SWIZZLE_ONE)
715 result[i] = -1.0;
716 else
717 result[i] = -src[source->Swizzle[i]];
718 if (source->Negate)
719 result[i] = -result[i];
720 }
721 store_vector4( &inst->DstReg, machine, result );
722 }
723 break;
724
725 case VP_OPCODE_END:
726 return;
727 default:
728 /* bad instruction opcode */
729 _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
730 return;
731 }
732 }
733 }
734
735
736
737 /**
738 Thoughts on vertex program optimization:
739
740 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
741 assembly code. That will probably be a lot of work.
742
743 Another approach might be to replace the vp_instruction->Opcode field with
744 a pointer to a specialized C function which executes the instruction.
745 In particular we can write functions which skip swizzling, negating,
746 masking, relative addressing, etc. when they're not needed.
747
748 For example:
749
750 void simple_add( struct vp_instruction *inst )
751 {
752 GLfloat *sum = machine->Registers[inst->DstReg.Register];
753 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
754 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
755 sum[0] = a[0] + b[0];
756 sum[1] = a[1] + b[1];
757 sum[2] = a[2] + b[2];
758 sum[3] = a[3] + b[3];
759 }
760
761 */
762
763 /*
764
765 KW:
766
767 A first step would be to 'vectorize' the programs in the same way as
768 the normal transformation code in the tnl module. Thus each opcode
769 takes zero or more input vectors (registers) and produces one or more
770 output vectors.
771
772 These operations would intially be coded in C, with machine-specific
773 assembly following, as is currently the case for matrix
774 transformations in the math/ directory. The preprocessing scheme for
775 selecting simpler operations Brian describes above would also work
776 here.
777
778 This should give reasonable performance without excessive effort.
779
780 */