2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Code to execute vertex programs.
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
39 #include "math/m_matrix.h"
42 static const GLfloat zeroVec
[4] = { 0, 0, 0, 0 };
46 * Load/initialize the vertex program registers.
47 * This needs to be done per vertex.
50 _mesa_init_vp_registers(GLcontext
*ctx
)
54 /* Input registers get initialized from the current vertex attribs */
55 MEMCPY(ctx
->VertexProgram
.Inputs
, ctx
->Current
.Attrib
,
56 VERT_ATTRIB_MAX
* 4 * sizeof(GLfloat
));
58 /* Output and temp regs are initialized to [0,0,0,1] */
59 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
60 ASSIGN_4V(ctx
->VertexProgram
.Outputs
[i
], 0.0F
, 0.0F
, 0.0F
, 1.0F
);
62 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
63 ASSIGN_4V(ctx
->VertexProgram
.Temporaries
[i
], 0.0F
, 0.0F
, 0.0F
, 1.0F
);
66 /* The program parameters aren't touched */
67 /* XXX: This should be moved to glBegin() time, but its safe (and slow!)
70 if (ctx
->VertexProgram
.Current
->Parameters
) {
72 _mesa_load_state_parameters(ctx
, ctx
->VertexProgram
.Current
->Parameters
);
74 /* And copy it into the program state */
75 for (i
=0; i
<ctx
->VertexProgram
.Current
->Parameters
->NumParameters
; i
++) {
76 MEMCPY(ctx
->VertexProgram
.Parameters
[i
],
77 &ctx
->VertexProgram
.Current
->Parameters
->Parameters
[i
].Values
,
86 * Copy the 16 elements of a matrix into four consecutive program
87 * registers starting at 'pos'.
90 load_matrix(GLfloat registers
[][4], GLuint pos
, const GLfloat mat
[16])
93 for (i
= 0; i
< 4; i
++) {
94 registers
[pos
+ i
][0] = mat
[0 + i
];
95 registers
[pos
+ i
][1] = mat
[4 + i
];
96 registers
[pos
+ i
][2] = mat
[8 + i
];
97 registers
[pos
+ i
][3] = mat
[12 + i
];
103 * As above, but transpose the matrix.
106 load_transpose_matrix(GLfloat registers
[][4], GLuint pos
,
107 const GLfloat mat
[16])
109 MEMCPY(registers
[pos
], mat
, 16 * sizeof(GLfloat
));
114 * Load all currently tracked matrices into the program registers.
115 * This needs to be done per glBegin/glEnd.
118 _mesa_init_tracked_matrices(GLcontext
*ctx
)
122 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
/ 4; i
++) {
123 /* point 'mat' at source matrix */
125 if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_MODELVIEW
) {
126 mat
= ctx
->ModelviewMatrixStack
.Top
;
128 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_PROJECTION
) {
129 mat
= ctx
->ProjectionMatrixStack
.Top
;
131 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_TEXTURE
) {
132 mat
= ctx
->TextureMatrixStack
[ctx
->Texture
.CurrentUnit
].Top
;
134 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_COLOR
) {
135 mat
= ctx
->ColorMatrixStack
.Top
;
137 else if (ctx
->VertexProgram
.TrackMatrix
[i
]==GL_MODELVIEW_PROJECTION_NV
) {
138 /* XXX verify the combined matrix is up to date */
139 mat
= &ctx
->_ModelProjectMatrix
;
141 else if (ctx
->VertexProgram
.TrackMatrix
[i
] >= GL_MATRIX0_NV
&&
142 ctx
->VertexProgram
.TrackMatrix
[i
] <= GL_MATRIX7_NV
) {
143 GLuint n
= ctx
->VertexProgram
.TrackMatrix
[i
] - GL_MATRIX0_NV
;
144 ASSERT(n
< MAX_PROGRAM_MATRICES
);
145 mat
= ctx
->ProgramMatrixStack
[n
].Top
;
148 /* no matrix is tracked, but we leave the register values as-is */
149 assert(ctx
->VertexProgram
.TrackMatrix
[i
] == GL_NONE
);
153 /* load the matrix */
154 if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_IDENTITY_NV
) {
155 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
157 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_INVERSE_NV
) {
158 _math_matrix_analyse(mat
); /* update the inverse */
159 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
160 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
162 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_TRANSPOSE_NV
) {
163 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
166 assert(ctx
->VertexProgram
.TrackMatrixTransform
[i
]
167 == GL_INVERSE_TRANSPOSE_NV
);
168 _math_matrix_analyse(mat
); /* update the inverse */
169 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
170 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
178 * For debugging. Dump the current vertex program machine registers.
181 _mesa_dump_vp_state( const struct vertex_program_state
*state
)
184 _mesa_printf("VertexIn:\n");
185 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_INPUTS
; i
++) {
186 _mesa_printf("%d: %f %f %f %f ", i
,
190 state
->Inputs
[i
][3]);
194 _mesa_printf("VertexOut:\n");
195 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
196 _mesa_printf("%d: %f %f %f %f ", i
,
197 state
->Outputs
[i
][0],
198 state
->Outputs
[i
][1],
199 state
->Outputs
[i
][2],
200 state
->Outputs
[i
][3]);
204 _mesa_printf("Registers:\n");
205 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
206 _mesa_printf("%d: %f %f %f %f ", i
,
207 state
->Temporaries
[i
][0],
208 state
->Temporaries
[i
][1],
209 state
->Temporaries
[i
][2],
210 state
->Temporaries
[i
][3]);
214 _mesa_printf("Parameters:\n");
215 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
; i
++) {
216 _mesa_printf("%d: %f %f %f %f ", i
,
217 state
->Parameters
[i
][0],
218 state
->Parameters
[i
][1],
219 state
->Parameters
[i
][2],
220 state
->Parameters
[i
][3]);
228 * Return a pointer to the 4-element float vector specified by the given
231 static INLINE
const GLfloat
*
232 get_register_pointer( const struct vp_src_register
*source
,
233 const struct vertex_program_state
*state
)
235 if (source
->RelAddr
) {
236 const GLint reg
= source
->Index
+ state
->AddressReg
[0];
237 ASSERT( (source
->File
== PROGRAM_ENV_PARAM
) ||
238 (source
->File
== PROGRAM_STATE_VAR
) );
239 if (reg
< 0 || reg
> MAX_NV_VERTEX_PROGRAM_PARAMS
)
242 return state
->Parameters
[reg
];
245 switch (source
->File
) {
246 case PROGRAM_TEMPORARY
:
247 return state
->Temporaries
[source
->Index
];
249 return state
->Inputs
[source
->Index
];
250 case PROGRAM_LOCAL_PARAM
:
252 return state
->Temporaries
[source
->Index
];
253 case PROGRAM_ENV_PARAM
:
254 return state
->Parameters
[source
->Index
];
255 case PROGRAM_STATE_VAR
:
256 return state
->Parameters
[source
->Index
];
259 "Bad source register file in fetch_vector4(vp)");
268 * Fetch a 4-element float vector from the given source register.
269 * Apply swizzling and negating as needed.
272 fetch_vector4( const struct vp_src_register
*source
,
273 const struct vertex_program_state
*state
,
276 const GLfloat
*src
= get_register_pointer(source
, state
);
278 if (source
->Negate
) {
279 result
[0] = -src
[source
->Swizzle
[0]];
280 result
[1] = -src
[source
->Swizzle
[1]];
281 result
[2] = -src
[source
->Swizzle
[2]];
282 result
[3] = -src
[source
->Swizzle
[3]];
285 result
[0] = src
[source
->Swizzle
[0]];
286 result
[1] = src
[source
->Swizzle
[1]];
287 result
[2] = src
[source
->Swizzle
[2]];
288 result
[3] = src
[source
->Swizzle
[3]];
295 * As above, but only return result[0] element.
298 fetch_vector1( const struct vp_src_register
*source
,
299 const struct vertex_program_state
*state
,
302 const GLfloat
*src
= get_register_pointer(source
, state
);
304 if (source
->Negate
) {
305 result
[0] = -src
[source
->Swizzle
[0]];
308 result
[0] = src
[source
->Swizzle
[0]];
314 * Store 4 floats into a register.
317 store_vector4( const struct vp_dst_register
*dest
,
318 struct vertex_program_state
*state
,
319 const GLfloat value
[4] )
322 switch (dest
->File
) {
323 case PROGRAM_TEMPORARY
:
324 dst
= state
->Temporaries
[dest
->Index
];
327 dst
= state
->Outputs
[dest
->Index
];
330 _mesa_problem(NULL
, "Invalid register file in fetch_vector1(vp)");
334 if (dest
->WriteMask
[0])
336 if (dest
->WriteMask
[1])
338 if (dest
->WriteMask
[2])
340 if (dest
->WriteMask
[3])
346 * Set x to positive or negative infinity.
348 #if defined(USE_IEEE) || defined(_WIN32)
349 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
350 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
352 #define SET_POS_INFINITY(x) x = __MAXFLOAT
353 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
355 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
356 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
359 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
363 * Execute the given vertex program
366 _mesa_exec_vertex_program(GLcontext
*ctx
, const struct vertex_program
*program
)
368 struct vertex_program_state
*state
= &ctx
->VertexProgram
;
369 const struct vp_instruction
*inst
;
371 ctx
->_CurrentProgram
= GL_VERTEX_PROGRAM_ARB
; /* or NV, doesn't matter */
373 /* If the program is position invariant, multiply the input
374 * position and the MVP matrix and stick it into the output pos slot
376 if (ctx
->VertexProgram
.Current
->IsPositionInvariant
) {
377 TRANSFORM_POINT( ctx
->VertexProgram
.Outputs
[0],
378 ctx
->_ModelProjectMatrix
.m
,
379 ctx
->VertexProgram
.Inputs
[0]);
381 /* XXX: This could go elsewhere */
382 ctx
->VertexProgram
.Current
->OutputsWritten
|= 0x1;
385 for (inst
= program
->Instructions
; /*inst->Opcode != VP_OPCODE_END*/; inst
++) {
387 if (ctx
->VertexProgram
.CallbackEnabled
&&
388 ctx
->VertexProgram
.Callback
) {
389 ctx
->VertexProgram
.CurrentPosition
= inst
->StringPos
;
390 ctx
->VertexProgram
.Callback(program
->Base
.Target
,
391 ctx
->VertexProgram
.CallbackData
);
394 switch (inst
->Opcode
) {
398 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
399 store_vector4( &inst
->DstReg
, state
, t
);
404 const GLfloat epsilon
= 1.0e-5F
; /* XXX fix? */
405 GLfloat t
[4], lit
[4];
406 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
407 if (t
[3] < -(128.0F
- epsilon
))
408 t
[3] = - (128.0F
- epsilon
);
409 else if (t
[3] > 128.0F
- epsilon
)
410 t
[3] = 128.0F
- epsilon
;
417 lit
[2] = (t
[0] > 0.0) ? (GLfloat
) exp(t
[3] * log(t
[1])) : 0.0F
;
419 store_vector4( &inst
->DstReg
, state
, lit
);
425 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
427 t
[0] = 1.0F
/ t
[0]; /* div by zero is infinity! */
428 t
[1] = t
[2] = t
[3] = t
[0];
429 store_vector4( &inst
->DstReg
, state
, t
);
435 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
436 t
[0] = INV_SQRTF(FABSF(t
[0]));
437 t
[1] = t
[2] = t
[3] = t
[0];
438 store_vector4( &inst
->DstReg
, state
, t
);
443 GLfloat t
[4], q
[4], floor_t0
;
444 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
445 floor_t0
= (float) floor(t
[0]);
446 if (floor_t0
> FLT_MAX_EXP
) {
447 SET_POS_INFINITY(q
[0]);
448 SET_POS_INFINITY(q
[2]);
450 else if (floor_t0
< FLT_MIN_EXP
) {
456 GLint ii
= (GLint
) floor_t0
;
457 ii
= (ii
< 23) + 0x3f800000;
458 SET_FLOAT_BITS(q
[0], ii
);
459 q
[0] = *((GLfloat
*) &ii
);
461 q
[0] = (GLfloat
) pow(2.0, floor_t0
);
463 q
[2] = (GLfloat
) (q
[0] * LOG2(q
[1]));
465 q
[1] = t
[0] - floor_t0
;
467 store_vector4( &inst
->DstReg
, state
, q
);
472 GLfloat t
[4], q
[4], abs_t0
;
473 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
474 abs_t0
= (GLfloat
) fabs(t
[0]);
475 if (abs_t0
!= 0.0F
) {
476 /* Since we really can't handle infinite values on VMS
477 * like other OSes we'll use __MAXFLOAT to represent
478 * infinity. This may need some tweaking.
481 if (abs_t0
== __MAXFLOAT
)
483 if (IS_INF_OR_NAN(abs_t0
))
486 SET_POS_INFINITY(q
[0]);
488 SET_POS_INFINITY(q
[2]);
492 double mantissa
= frexp(t
[0], &exponent
);
493 q
[0] = (GLfloat
) (exponent
- 1);
494 q
[1] = (GLfloat
) (2.0 * mantissa
); /* map [.5, 1) -> [1, 2) */
495 q
[2] = (GLfloat
) (q
[0] + LOG2(q
[1]));
499 SET_NEG_INFINITY(q
[0]);
501 SET_NEG_INFINITY(q
[2]);
504 store_vector4( &inst
->DstReg
, state
, q
);
509 GLfloat t
[4], u
[4], prod
[4];
510 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
511 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
512 prod
[0] = t
[0] * u
[0];
513 prod
[1] = t
[1] * u
[1];
514 prod
[2] = t
[2] * u
[2];
515 prod
[3] = t
[3] * u
[3];
516 store_vector4( &inst
->DstReg
, state
, prod
);
521 GLfloat t
[4], u
[4], sum
[4];
522 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
523 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
524 sum
[0] = t
[0] + u
[0];
525 sum
[1] = t
[1] + u
[1];
526 sum
[2] = t
[2] + u
[2];
527 sum
[3] = t
[3] + u
[3];
528 store_vector4( &inst
->DstReg
, state
, sum
);
533 GLfloat t
[4], u
[4], dot
[4];
534 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
535 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
536 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2];
537 dot
[1] = dot
[2] = dot
[3] = dot
[0];
538 store_vector4( &inst
->DstReg
, state
, dot
);
543 GLfloat t
[4], u
[4], dot
[4];
544 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
545 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
546 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + t
[3] * u
[3];
547 dot
[1] = dot
[2] = dot
[3] = dot
[0];
548 store_vector4( &inst
->DstReg
, state
, dot
);
553 GLfloat t
[4], u
[4], dst
[4];
554 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
555 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
557 dst
[1] = t
[1] * u
[1];
560 store_vector4( &inst
->DstReg
, state
, dst
);
565 GLfloat t
[4], u
[4], min
[4];
566 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
567 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
568 min
[0] = (t
[0] < u
[0]) ? t
[0] : u
[0];
569 min
[1] = (t
[1] < u
[1]) ? t
[1] : u
[1];
570 min
[2] = (t
[2] < u
[2]) ? t
[2] : u
[2];
571 min
[3] = (t
[3] < u
[3]) ? t
[3] : u
[3];
572 store_vector4( &inst
->DstReg
, state
, min
);
577 GLfloat t
[4], u
[4], max
[4];
578 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
579 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
580 max
[0] = (t
[0] > u
[0]) ? t
[0] : u
[0];
581 max
[1] = (t
[1] > u
[1]) ? t
[1] : u
[1];
582 max
[2] = (t
[2] > u
[2]) ? t
[2] : u
[2];
583 max
[3] = (t
[3] > u
[3]) ? t
[3] : u
[3];
584 store_vector4( &inst
->DstReg
, state
, max
);
589 GLfloat t
[4], u
[4], slt
[4];
590 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
591 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
592 slt
[0] = (t
[0] < u
[0]) ? 1.0F
: 0.0F
;
593 slt
[1] = (t
[1] < u
[1]) ? 1.0F
: 0.0F
;
594 slt
[2] = (t
[2] < u
[2]) ? 1.0F
: 0.0F
;
595 slt
[3] = (t
[3] < u
[3]) ? 1.0F
: 0.0F
;
596 store_vector4( &inst
->DstReg
, state
, slt
);
601 GLfloat t
[4], u
[4], sge
[4];
602 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
603 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
604 sge
[0] = (t
[0] >= u
[0]) ? 1.0F
: 0.0F
;
605 sge
[1] = (t
[1] >= u
[1]) ? 1.0F
: 0.0F
;
606 sge
[2] = (t
[2] >= u
[2]) ? 1.0F
: 0.0F
;
607 sge
[3] = (t
[3] >= u
[3]) ? 1.0F
: 0.0F
;
608 store_vector4( &inst
->DstReg
, state
, sge
);
613 GLfloat t
[4], u
[4], v
[4], sum
[4];
614 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
615 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
616 fetch_vector4( &inst
->SrcReg
[2], state
, v
);
617 sum
[0] = t
[0] * u
[0] + v
[0];
618 sum
[1] = t
[1] * u
[1] + v
[1];
619 sum
[2] = t
[2] * u
[2] + v
[2];
620 sum
[3] = t
[3] * u
[3] + v
[3];
621 store_vector4( &inst
->DstReg
, state
, sum
);
627 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
628 state
->AddressReg
[0] = (GLint
) floor(t
[0]);
633 GLfloat t
[4], u
[4], dot
[4];
634 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
635 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
636 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + u
[3];
637 dot
[1] = dot
[2] = dot
[3] = dot
[0];
638 store_vector4( &inst
->DstReg
, state
, dot
);
644 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
650 if (u
> 1.884467e+019F
) {
651 u
= 1.884467e+019F
; /* IEEE 32-bit binary value 0x5F800000 */
653 else if (u
< 5.42101e-020F
) {
654 u
= 5.42101e-020F
; /* IEEE 32-bit binary value 0x1F800000 */
658 if (u
< -1.884467e+019F
) {
659 u
= -1.884467e+019F
; /* IEEE 32-bit binary value 0xDF800000 */
661 else if (u
> -5.42101e-020F
) {
662 u
= -5.42101e-020F
; /* IEEE 32-bit binary value 0x9F800000 */
665 t
[0] = t
[1] = t
[2] = t
[3] = u
;
666 store_vector4( &inst
->DstReg
, state
, t
);
669 case VP_OPCODE_SUB
: /* GL_NV_vertex_program1_1 */
671 GLfloat t
[4], u
[4], sum
[4];
672 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
673 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
674 sum
[0] = t
[0] - u
[0];
675 sum
[1] = t
[1] - u
[1];
676 sum
[2] = t
[2] - u
[2];
677 sum
[3] = t
[3] - u
[3];
678 store_vector4( &inst
->DstReg
, state
, sum
);
681 case VP_OPCODE_ABS
: /* GL_NV_vertex_program1_1 */
684 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
685 if (t
[0] < 0.0) t
[0] = -t
[0];
686 if (t
[1] < 0.0) t
[1] = -t
[1];
687 if (t
[2] < 0.0) t
[2] = -t
[2];
688 if (t
[3] < 0.0) t
[3] = -t
[3];
689 store_vector4( &inst
->DstReg
, state
, t
);
692 case VP_OPCODE_FLR
: /* GL_ARB_vertex_program */
695 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
700 store_vector4( &inst
->DstReg
, state
, t
);
703 case VP_OPCODE_FRC
: /* GL_ARB_vertex_program */
706 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
707 t
[0] = t
[0] - FLOORF(t
[0]);
708 t
[1] = t
[1] - FLOORF(t
[1]);
709 t
[2] = t
[2] - FLOORF(t
[2]);
710 t
[3] = t
[3] - FLOORF(t
[3]);
711 store_vector4( &inst
->DstReg
, state
, t
);
714 case VP_OPCODE_EX2
: /* GL_ARB_vertex_program */
717 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
718 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(2.0, t
[0]);
719 store_vector4( &inst
->DstReg
, state
, t
);
722 case VP_OPCODE_LG2
: /* GL_ARB_vertex_program */
725 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
726 t
[0] = t
[1] = t
[2] = t
[3] = LOG2(t
[0]);
727 store_vector4( &inst
->DstReg
, state
, t
);
730 case VP_OPCODE_POW
: /* GL_ARB_vertex_program */
733 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
734 fetch_vector1( &inst
->SrcReg
[1], state
, u
);
735 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(t
[0], u
[0]);
736 store_vector4( &inst
->DstReg
, state
, t
);
739 case VP_OPCODE_XPD
: /* GL_ARB_vertex_program */
741 GLfloat t
[4], u
[4], cross
[4];
742 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
743 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
744 cross
[0] = t
[1] * u
[2] - t
[2] * u
[1];
745 cross
[1] = t
[2] * u
[0] - t
[0] * u
[2];
746 cross
[2] = t
[0] * u
[1] - t
[1] * u
[0];
747 store_vector4( &inst
->DstReg
, state
, cross
);
750 case VP_OPCODE_SWZ
: /* GL_ARB_vertex_program */
752 const struct vp_src_register
*source
= &inst
->SrcReg
[0];
753 const GLfloat
*src
= get_register_pointer(source
, state
);
757 /* do extended swizzling here */
758 for (i
= 0; i
< 3; i
++) {
759 if (source
->Swizzle
[i
] == SWIZZLE_ZERO
)
761 else if (source
->Swizzle
[i
] == SWIZZLE_ONE
)
764 result
[i
] = -src
[source
->Swizzle
[i
]];
766 result
[i
] = -result
[i
];
768 store_vector4( &inst
->DstReg
, state
, result
);
773 ctx
->_CurrentProgram
= 0;
776 /* bad instruction opcode */
777 _mesa_problem(ctx
, "Bad VP Opcode in _mesa_exec_vertex_program");
778 ctx
->_CurrentProgram
= 0;
783 ctx
->_CurrentProgram
= 0;
789 Thoughts on vertex program optimization:
791 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
792 assembly code. That will probably be a lot of work.
794 Another approach might be to replace the vp_instruction->Opcode field with
795 a pointer to a specialized C function which executes the instruction.
796 In particular we can write functions which skip swizzling, negating,
797 masking, relative addressing, etc. when they're not needed.
801 void simple_add( struct vp_instruction *inst )
803 GLfloat *sum = machine->Registers[inst->DstReg.Register];
804 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
805 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
806 sum[0] = a[0] + b[0];
807 sum[1] = a[1] + b[1];
808 sum[2] = a[2] + b[2];
809 sum[3] = a[3] + b[3];
818 A first step would be to 'vectorize' the programs in the same way as
819 the normal transformation code in the tnl module. Thus each opcode
820 takes zero or more input vectors (registers) and produces one or more
823 These operations would intially be coded in C, with machine-specific
824 assembly following, as is currently the case for matrix
825 transformations in the math/ directory. The preprocessing scheme for
826 selecting simpler operations Brian describes above would also work
829 This should give reasonable performance without excessive effort.