2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Code to execute vertex programs.
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
39 #include "math/m_matrix.h"
42 static const GLfloat zeroVec
[4] = { 0, 0, 0, 0 };
46 * Load/initialize the vertex program registers.
47 * This needs to be done per vertex.
50 _mesa_init_vp_registers(GLcontext
*ctx
)
54 /* Input registers get initialized from the current vertex attribs */
55 MEMCPY(ctx
->VertexProgram
.Inputs
, ctx
->Current
.Attrib
,
56 VERT_ATTRIB_MAX
* 4 * sizeof(GLfloat
));
58 /* Output and temp regs are initialized to [0,0,0,1] */
59 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
60 ASSIGN_4V(ctx
->VertexProgram
.Outputs
[i
], 0.0F
, 0.0F
, 0.0F
, 1.0F
);
62 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
63 ASSIGN_4V(ctx
->VertexProgram
.Temporaries
[i
], 0.0F
, 0.0F
, 0.0F
, 1.0F
);
66 /* The program parameters aren't touched */
67 /* XXX: This should be moved to glBegin() time, but its safe (and slow!)
70 if (ctx
->VertexProgram
.Current
->Parameters
) {
73 _mesa_load_state_parameters(ctx
, ctx
->VertexProgram
.Current
->Parameters
);
75 /* And copy it into the program state */
76 for (i
=0; i
<ctx
->VertexProgram
.Current
->Parameters
->NumParameters
; i
++) {
77 MEMCPY(ctx
->VertexProgram
.Parameters
[i
],
78 &ctx
->VertexProgram
.Current
->Parameters
->Parameters
[i
].Values
,
89 * Copy the 16 elements of a matrix into four consecutive program
90 * registers starting at 'pos'.
93 load_matrix(GLfloat registers
[][4], GLuint pos
, const GLfloat mat
[16])
96 for (i
= 0; i
< 4; i
++) {
97 registers
[pos
+ i
][0] = mat
[0 + i
];
98 registers
[pos
+ i
][1] = mat
[4 + i
];
99 registers
[pos
+ i
][2] = mat
[8 + i
];
100 registers
[pos
+ i
][3] = mat
[12 + i
];
106 * As above, but transpose the matrix.
109 load_transpose_matrix(GLfloat registers
[][4], GLuint pos
,
110 const GLfloat mat
[16])
112 MEMCPY(registers
[pos
], mat
, 16 * sizeof(GLfloat
));
117 * Load all currently tracked matrices into the program registers.
118 * This needs to be done per glBegin/glEnd.
121 _mesa_init_tracked_matrices(GLcontext
*ctx
)
125 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
/ 4; i
++) {
126 /* point 'mat' at source matrix */
128 if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_MODELVIEW
) {
129 mat
= ctx
->ModelviewMatrixStack
.Top
;
131 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_PROJECTION
) {
132 mat
= ctx
->ProjectionMatrixStack
.Top
;
134 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_TEXTURE
) {
135 mat
= ctx
->TextureMatrixStack
[ctx
->Texture
.CurrentUnit
].Top
;
137 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_COLOR
) {
138 mat
= ctx
->ColorMatrixStack
.Top
;
140 else if (ctx
->VertexProgram
.TrackMatrix
[i
]==GL_MODELVIEW_PROJECTION_NV
) {
141 /* XXX verify the combined matrix is up to date */
142 mat
= &ctx
->_ModelProjectMatrix
;
144 else if (ctx
->VertexProgram
.TrackMatrix
[i
] >= GL_MATRIX0_NV
&&
145 ctx
->VertexProgram
.TrackMatrix
[i
] <= GL_MATRIX7_NV
) {
146 GLuint n
= ctx
->VertexProgram
.TrackMatrix
[i
] - GL_MATRIX0_NV
;
147 ASSERT(n
< MAX_PROGRAM_MATRICES
);
148 mat
= ctx
->ProgramMatrixStack
[n
].Top
;
151 /* no matrix is tracked, but we leave the register values as-is */
152 assert(ctx
->VertexProgram
.TrackMatrix
[i
] == GL_NONE
);
156 /* load the matrix */
157 if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_IDENTITY_NV
) {
158 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
160 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_INVERSE_NV
) {
161 _math_matrix_analyse(mat
); /* update the inverse */
162 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
163 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
165 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_TRANSPOSE_NV
) {
166 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
169 assert(ctx
->VertexProgram
.TrackMatrixTransform
[i
]
170 == GL_INVERSE_TRANSPOSE_NV
);
171 _math_matrix_analyse(mat
); /* update the inverse */
172 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
173 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
181 * For debugging. Dump the current vertex program machine registers.
184 _mesa_dump_vp_state( const struct vertex_program_state
*state
)
187 _mesa_printf("VertexIn:\n");
188 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_INPUTS
; i
++) {
189 _mesa_printf("%d: %f %f %f %f ", i
,
193 state
->Inputs
[i
][3]);
197 _mesa_printf("VertexOut:\n");
198 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
199 _mesa_printf("%d: %f %f %f %f ", i
,
200 state
->Outputs
[i
][0],
201 state
->Outputs
[i
][1],
202 state
->Outputs
[i
][2],
203 state
->Outputs
[i
][3]);
207 _mesa_printf("Registers:\n");
208 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
209 _mesa_printf("%d: %f %f %f %f ", i
,
210 state
->Temporaries
[i
][0],
211 state
->Temporaries
[i
][1],
212 state
->Temporaries
[i
][2],
213 state
->Temporaries
[i
][3]);
217 _mesa_printf("Parameters:\n");
218 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
; i
++) {
219 _mesa_printf("%d: %f %f %f %f ", i
,
220 state
->Parameters
[i
][0],
221 state
->Parameters
[i
][1],
222 state
->Parameters
[i
][2],
223 state
->Parameters
[i
][3]);
231 * Return a pointer to the 4-element float vector specified by the given
234 static INLINE
const GLfloat
*
235 get_register_pointer( const struct vp_src_register
*source
,
236 const struct vertex_program_state
*state
)
238 if (source
->RelAddr
) {
239 const GLint reg
= source
->Index
+ state
->AddressReg
[0];
240 ASSERT(source
->File
== PROGRAM_ENV_PARAM
);
241 if (reg
< 0 || reg
> MAX_NV_VERTEX_PROGRAM_PARAMS
)
244 return state
->Parameters
[reg
];
247 switch (source
->File
) {
248 case PROGRAM_TEMPORARY
:
249 return state
->Temporaries
[source
->Index
];
251 return state
->Inputs
[source
->Index
];
252 case PROGRAM_LOCAL_PARAM
:
254 return state
->Temporaries
[source
->Index
];
255 case PROGRAM_ENV_PARAM
:
256 return state
->Parameters
[source
->Index
];
257 case PROGRAM_STATE_VAR
:
258 return state
->Parameters
[source
->Index
];
261 "Bad source register file in fetch_vector4(vp)");
270 * Fetch a 4-element float vector from the given source register.
271 * Apply swizzling and negating as needed.
274 fetch_vector4( const struct vp_src_register
*source
,
275 const struct vertex_program_state
*state
,
278 const GLfloat
*src
= get_register_pointer(source
, state
);
280 if (source
->Negate
) {
281 result
[0] = -src
[source
->Swizzle
[0]];
282 result
[1] = -src
[source
->Swizzle
[1]];
283 result
[2] = -src
[source
->Swizzle
[2]];
284 result
[3] = -src
[source
->Swizzle
[3]];
287 result
[0] = src
[source
->Swizzle
[0]];
288 result
[1] = src
[source
->Swizzle
[1]];
289 result
[2] = src
[source
->Swizzle
[2]];
290 result
[3] = src
[source
->Swizzle
[3]];
297 * As above, but only return result[0] element.
300 fetch_vector1( const struct vp_src_register
*source
,
301 const struct vertex_program_state
*state
,
304 const GLfloat
*src
= get_register_pointer(source
, state
);
306 if (source
->Negate
) {
307 result
[0] = -src
[source
->Swizzle
[0]];
310 result
[0] = src
[source
->Swizzle
[0]];
316 * Store 4 floats into a register.
319 store_vector4( const struct vp_dst_register
*dest
,
320 struct vertex_program_state
*state
,
321 const GLfloat value
[4] )
324 switch (dest
->File
) {
325 case PROGRAM_TEMPORARY
:
326 dst
= state
->Temporaries
[dest
->Index
];
329 dst
= state
->Outputs
[dest
->Index
];
332 _mesa_problem(NULL
, "Invalid register file in fetch_vector1(vp)");
336 if (dest
->WriteMask
[0])
338 if (dest
->WriteMask
[1])
340 if (dest
->WriteMask
[2])
342 if (dest
->WriteMask
[3])
348 * Set x to positive or negative infinity.
350 #if defined(USE_IEEE) || defined(_WIN32)
351 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
352 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
354 #define SET_POS_INFINITY(x) x = __MAXFLOAT
355 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
357 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
358 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
361 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
365 * Execute the given vertex program
368 _mesa_exec_vertex_program(GLcontext
*ctx
, const struct vertex_program
*program
)
370 struct vertex_program_state
*state
= &ctx
->VertexProgram
;
371 const struct vp_instruction
*inst
;
373 ctx
->_CurrentProgram
= GL_VERTEX_PROGRAM_ARB
; /* or NV, doesn't matter */
375 for (inst
= program
->Instructions
; inst
->Opcode
!= VP_OPCODE_END
; inst
++) {
377 if (ctx
->VertexProgram
.CallbackEnabled
&&
378 ctx
->VertexProgram
.Callback
) {
379 ctx
->VertexProgram
.CurrentPosition
= inst
->StringPos
;
380 ctx
->VertexProgram
.Callback(program
->Base
.Target
,
381 ctx
->VertexProgram
.CallbackData
);
384 switch (inst
->Opcode
) {
388 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
389 store_vector4( &inst
->DstReg
, state
, t
);
394 const GLfloat epsilon
= 1.0e-5F
; /* XXX fix? */
395 GLfloat t
[4], lit
[4];
396 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
397 if (t
[3] < -(128.0F
- epsilon
))
398 t
[3] = - (128.0F
- epsilon
);
399 else if (t
[3] > 128.0F
- epsilon
)
400 t
[3] = 128.0F
- epsilon
;
407 lit
[2] = (t
[0] > 0.0) ? (GLfloat
) exp(t
[3] * log(t
[1])) : 0.0F
;
409 store_vector4( &inst
->DstReg
, state
, lit
);
415 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
417 t
[0] = 1.0F
/ t
[0]; /* div by zero is infinity! */
418 t
[1] = t
[2] = t
[3] = t
[0];
419 store_vector4( &inst
->DstReg
, state
, t
);
425 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
426 t
[0] = INV_SQRTF(FABSF(t
[0]));
427 t
[1] = t
[2] = t
[3] = t
[0];
428 store_vector4( &inst
->DstReg
, state
, t
);
433 GLfloat t
[4], q
[4], floor_t0
;
434 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
435 floor_t0
= (float) floor(t
[0]);
436 if (floor_t0
> FLT_MAX_EXP
) {
437 SET_POS_INFINITY(q
[0]);
438 SET_POS_INFINITY(q
[2]);
440 else if (floor_t0
< FLT_MIN_EXP
) {
446 GLint ii
= (GLint
) floor_t0
;
447 ii
= (ii
< 23) + 0x3f800000;
448 SET_FLOAT_BITS(q
[0], ii
);
449 q
[0] = *((GLfloat
*) &ii
);
451 q
[0] = (GLfloat
) pow(2.0, floor_t0
);
453 q
[2] = (GLfloat
) (q
[0] * LOG2(q
[1]));
455 q
[1] = t
[0] - floor_t0
;
457 store_vector4( &inst
->DstReg
, state
, q
);
462 GLfloat t
[4], q
[4], abs_t0
;
463 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
464 abs_t0
= (GLfloat
) fabs(t
[0]);
465 if (abs_t0
!= 0.0F
) {
466 /* Since we really can't handle infinite values on VMS
467 * like other OSes we'll use __MAXFLOAT to represent
468 * infinity. This may need some tweaking.
471 if (abs_t0
== __MAXFLOAT
)
473 if (IS_INF_OR_NAN(abs_t0
))
476 SET_POS_INFINITY(q
[0]);
478 SET_POS_INFINITY(q
[2]);
482 double mantissa
= frexp(t
[0], &exponent
);
483 q
[0] = (GLfloat
) (exponent
- 1);
484 q
[1] = (GLfloat
) (2.0 * mantissa
); /* map [.5, 1) -> [1, 2) */
485 q
[2] = (GLfloat
) (q
[0] + LOG2(q
[1]));
489 SET_NEG_INFINITY(q
[0]);
491 SET_NEG_INFINITY(q
[2]);
494 store_vector4( &inst
->DstReg
, state
, q
);
499 GLfloat t
[4], u
[4], prod
[4];
500 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
501 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
502 prod
[0] = t
[0] * u
[0];
503 prod
[1] = t
[1] * u
[1];
504 prod
[2] = t
[2] * u
[2];
505 prod
[3] = t
[3] * u
[3];
506 store_vector4( &inst
->DstReg
, state
, prod
);
511 GLfloat t
[4], u
[4], sum
[4];
512 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
513 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
514 sum
[0] = t
[0] + u
[0];
515 sum
[1] = t
[1] + u
[1];
516 sum
[2] = t
[2] + u
[2];
517 sum
[3] = t
[3] + u
[3];
518 store_vector4( &inst
->DstReg
, state
, sum
);
523 GLfloat t
[4], u
[4], dot
[4];
524 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
525 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
526 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2];
527 dot
[1] = dot
[2] = dot
[3] = dot
[0];
528 store_vector4( &inst
->DstReg
, state
, dot
);
533 GLfloat t
[4], u
[4], dot
[4];
534 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
535 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
536 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + t
[3] * u
[3];
537 dot
[1] = dot
[2] = dot
[3] = dot
[0];
538 store_vector4( &inst
->DstReg
, state
, dot
);
543 GLfloat t
[4], u
[4], dst
[4];
544 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
545 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
547 dst
[1] = t
[1] * u
[1];
550 store_vector4( &inst
->DstReg
, state
, dst
);
555 GLfloat t
[4], u
[4], min
[4];
556 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
557 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
558 min
[0] = (t
[0] < u
[0]) ? t
[0] : u
[0];
559 min
[1] = (t
[1] < u
[1]) ? t
[1] : u
[1];
560 min
[2] = (t
[2] < u
[2]) ? t
[2] : u
[2];
561 min
[3] = (t
[3] < u
[3]) ? t
[3] : u
[3];
562 store_vector4( &inst
->DstReg
, state
, min
);
567 GLfloat t
[4], u
[4], max
[4];
568 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
569 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
570 max
[0] = (t
[0] > u
[0]) ? t
[0] : u
[0];
571 max
[1] = (t
[1] > u
[1]) ? t
[1] : u
[1];
572 max
[2] = (t
[2] > u
[2]) ? t
[2] : u
[2];
573 max
[3] = (t
[3] > u
[3]) ? t
[3] : u
[3];
574 store_vector4( &inst
->DstReg
, state
, max
);
579 GLfloat t
[4], u
[4], slt
[4];
580 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
581 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
582 slt
[0] = (t
[0] < u
[0]) ? 1.0F
: 0.0F
;
583 slt
[1] = (t
[1] < u
[1]) ? 1.0F
: 0.0F
;
584 slt
[2] = (t
[2] < u
[2]) ? 1.0F
: 0.0F
;
585 slt
[3] = (t
[3] < u
[3]) ? 1.0F
: 0.0F
;
586 store_vector4( &inst
->DstReg
, state
, slt
);
591 GLfloat t
[4], u
[4], sge
[4];
592 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
593 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
594 sge
[0] = (t
[0] >= u
[0]) ? 1.0F
: 0.0F
;
595 sge
[1] = (t
[1] >= u
[1]) ? 1.0F
: 0.0F
;
596 sge
[2] = (t
[2] >= u
[2]) ? 1.0F
: 0.0F
;
597 sge
[3] = (t
[3] >= u
[3]) ? 1.0F
: 0.0F
;
598 store_vector4( &inst
->DstReg
, state
, sge
);
603 GLfloat t
[4], u
[4], v
[4], sum
[4];
604 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
605 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
606 fetch_vector4( &inst
->SrcReg
[2], state
, v
);
607 sum
[0] = t
[0] * u
[0] + v
[0];
608 sum
[1] = t
[1] * u
[1] + v
[1];
609 sum
[2] = t
[2] * u
[2] + v
[2];
610 sum
[3] = t
[3] * u
[3] + v
[3];
611 store_vector4( &inst
->DstReg
, state
, sum
);
617 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
618 state
->AddressReg
[0] = (GLint
) floor(t
[0]);
623 GLfloat t
[4], u
[4], dot
[4];
624 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
625 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
626 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + u
[3];
627 dot
[1] = dot
[2] = dot
[3] = dot
[0];
628 store_vector4( &inst
->DstReg
, state
, dot
);
634 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
640 if (u
> 1.884467e+019F
) {
641 u
= 1.884467e+019F
; /* IEEE 32-bit binary value 0x5F800000 */
643 else if (u
< 5.42101e-020F
) {
644 u
= 5.42101e-020F
; /* IEEE 32-bit binary value 0x1F800000 */
648 if (u
< -1.884467e+019F
) {
649 u
= -1.884467e+019F
; /* IEEE 32-bit binary value 0xDF800000 */
651 else if (u
> -5.42101e-020F
) {
652 u
= -5.42101e-020F
; /* IEEE 32-bit binary value 0x9F800000 */
655 t
[0] = t
[1] = t
[2] = t
[3] = u
;
656 store_vector4( &inst
->DstReg
, state
, t
);
659 case VP_OPCODE_SUB
: /* GL_NV_vertex_program1_1 */
661 GLfloat t
[4], u
[4], sum
[4];
662 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
663 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
664 sum
[0] = t
[0] - u
[0];
665 sum
[1] = t
[1] - u
[1];
666 sum
[2] = t
[2] - u
[2];
667 sum
[3] = t
[3] - u
[3];
668 store_vector4( &inst
->DstReg
, state
, sum
);
671 case VP_OPCODE_ABS
: /* GL_NV_vertex_program1_1 */
674 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
675 if (t
[0] < 0.0) t
[0] = -t
[0];
676 if (t
[1] < 0.0) t
[1] = -t
[1];
677 if (t
[2] < 0.0) t
[2] = -t
[2];
678 if (t
[3] < 0.0) t
[3] = -t
[3];
679 store_vector4( &inst
->DstReg
, state
, t
);
682 case VP_OPCODE_FLR
: /* GL_ARB_vertex_program */
685 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
690 store_vector4( &inst
->DstReg
, state
, t
);
693 case VP_OPCODE_FRC
: /* GL_ARB_vertex_program */
696 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
697 t
[0] = t
[0] - FLOORF(t
[0]);
698 t
[1] = t
[1] - FLOORF(t
[1]);
699 t
[2] = t
[2] - FLOORF(t
[2]);
700 t
[3] = t
[3] - FLOORF(t
[3]);
701 store_vector4( &inst
->DstReg
, state
, t
);
704 case VP_OPCODE_EX2
: /* GL_ARB_vertex_program */
707 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
708 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(2.0, t
[0]);
709 store_vector4( &inst
->DstReg
, state
, t
);
712 case VP_OPCODE_LG2
: /* GL_ARB_vertex_program */
715 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
716 t
[0] = t
[1] = t
[2] = t
[3] = LOG2(t
[0]);
717 store_vector4( &inst
->DstReg
, state
, t
);
720 case VP_OPCODE_POW
: /* GL_ARB_vertex_program */
723 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
724 fetch_vector1( &inst
->SrcReg
[1], state
, u
);
725 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(t
[0], u
[0]);
726 store_vector4( &inst
->DstReg
, state
, t
);
729 case VP_OPCODE_XPD
: /* GL_ARB_vertex_program */
731 GLfloat t
[4], u
[4], cross
[4];
732 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
733 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
734 cross
[0] = t
[1] * u
[2] - t
[2] * u
[1];
735 cross
[1] = t
[2] * u
[0] - t
[0] * u
[2];
736 cross
[2] = t
[0] * u
[1] - t
[1] * u
[0];
737 store_vector4( &inst
->DstReg
, state
, cross
);
740 case VP_OPCODE_SWZ
: /* GL_ARB_vertex_program */
742 const struct vp_src_register
*source
= &inst
->SrcReg
[0];
743 const GLfloat
*src
= get_register_pointer(source
, state
);
747 /* do extended swizzling here */
748 for (i
= 0; i
< 3; i
++) {
749 if (source
->Swizzle
[i
] == SWIZZLE_ZERO
)
751 else if (source
->Swizzle
[i
] == SWIZZLE_ONE
)
754 result
[i
] = -src
[source
->Swizzle
[i
]];
756 result
[i
] = -result
[i
];
758 store_vector4( &inst
->DstReg
, state
, result
);
763 ctx
->_CurrentProgram
= 0;
766 /* bad instruction opcode */
767 _mesa_problem(ctx
, "Bad VP Opcode in _mesa_exec_vertex_program");
768 ctx
->_CurrentProgram
= 0;
773 ctx
->_CurrentProgram
= 0;
779 Thoughts on vertex program optimization:
781 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
782 assembly code. That will probably be a lot of work.
784 Another approach might be to replace the vp_instruction->Opcode field with
785 a pointer to a specialized C function which executes the instruction.
786 In particular we can write functions which skip swizzling, negating,
787 masking, relative addressing, etc. when they're not needed.
791 void simple_add( struct vp_instruction *inst )
793 GLfloat *sum = machine->Registers[inst->DstReg.Register];
794 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
795 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
796 sum[0] = a[0] + b[0];
797 sum[1] = a[1] + b[1];
798 sum[2] = a[2] + b[2];
799 sum[3] = a[3] + b[3];
808 A first step would be to 'vectorize' the programs in the same way as
809 the normal transformation code in the tnl module. Thus each opcode
810 takes zero or more input vectors (registers) and produces one or more
813 These operations would intially be coded in C, with machine-specific
814 assembly following, as is currently the case for matrix
815 transformations in the math/ directory. The preprocessing scheme for
816 selecting simpler operations Brian describes above would also work
819 This should give reasonable performance without excessive effort.