2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Code to execute vertex programs.
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
39 #include "math/m_matrix.h"
42 static const GLfloat zeroVec
[4] = { 0, 0, 0, 0 };
46 * Load/initialize the vertex program registers.
47 * This needs to be done per vertex.
50 _mesa_init_vp_registers(GLcontext
*ctx
)
54 /* Input registers get initialized from the current vertex attribs */
55 MEMCPY(ctx
->VertexProgram
.Inputs
, ctx
->Current
.Attrib
,
56 VERT_ATTRIB_MAX
* 4 * sizeof(GLfloat
));
58 /* Output and temp regs are initialized to [0,0,0,1] */
59 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
60 ASSIGN_4V(ctx
->VertexProgram
.Outputs
[i
], 0.0F
, 0.0F
, 0.0F
, 1.0F
);
62 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
63 ASSIGN_4V(ctx
->VertexProgram
.Temporaries
[i
], 0.0F
, 0.0F
, 0.0F
, 1.0F
);
66 /* The program parameters aren't touched */
67 /* XXX: This should be moved to glBegin() time, but its safe (and slow!)
70 if (ctx
->VertexProgram
.Current
->Parameters
) {
72 _mesa_load_state_parameters(ctx
, ctx
->VertexProgram
.Current
->Parameters
);
74 /* And copy it into the program state */
75 for (i
=0; i
<ctx
->VertexProgram
.Current
->Parameters
->NumParameters
; i
++) {
76 MEMCPY(ctx
->VertexProgram
.Parameters
[i
],
77 &ctx
->VertexProgram
.Current
->Parameters
->Parameters
[i
].Values
,
86 * Copy the 16 elements of a matrix into four consecutive program
87 * registers starting at 'pos'.
90 load_matrix(GLfloat registers
[][4], GLuint pos
, const GLfloat mat
[16])
93 for (i
= 0; i
< 4; i
++) {
94 registers
[pos
+ i
][0] = mat
[0 + i
];
95 registers
[pos
+ i
][1] = mat
[4 + i
];
96 registers
[pos
+ i
][2] = mat
[8 + i
];
97 registers
[pos
+ i
][3] = mat
[12 + i
];
103 * As above, but transpose the matrix.
106 load_transpose_matrix(GLfloat registers
[][4], GLuint pos
,
107 const GLfloat mat
[16])
109 MEMCPY(registers
[pos
], mat
, 16 * sizeof(GLfloat
));
114 * Load all currently tracked matrices into the program registers.
115 * This needs to be done per glBegin/glEnd.
118 _mesa_init_tracked_matrices(GLcontext
*ctx
)
122 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
/ 4; i
++) {
123 /* point 'mat' at source matrix */
125 if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_MODELVIEW
) {
126 mat
= ctx
->ModelviewMatrixStack
.Top
;
128 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_PROJECTION
) {
129 mat
= ctx
->ProjectionMatrixStack
.Top
;
131 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_TEXTURE
) {
132 mat
= ctx
->TextureMatrixStack
[ctx
->Texture
.CurrentUnit
].Top
;
134 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_COLOR
) {
135 mat
= ctx
->ColorMatrixStack
.Top
;
137 else if (ctx
->VertexProgram
.TrackMatrix
[i
]==GL_MODELVIEW_PROJECTION_NV
) {
138 /* XXX verify the combined matrix is up to date */
139 mat
= &ctx
->_ModelProjectMatrix
;
141 else if (ctx
->VertexProgram
.TrackMatrix
[i
] >= GL_MATRIX0_NV
&&
142 ctx
->VertexProgram
.TrackMatrix
[i
] <= GL_MATRIX7_NV
) {
143 GLuint n
= ctx
->VertexProgram
.TrackMatrix
[i
] - GL_MATRIX0_NV
;
144 ASSERT(n
< MAX_PROGRAM_MATRICES
);
145 mat
= ctx
->ProgramMatrixStack
[n
].Top
;
148 /* no matrix is tracked, but we leave the register values as-is */
149 assert(ctx
->VertexProgram
.TrackMatrix
[i
] == GL_NONE
);
153 /* load the matrix */
154 if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_IDENTITY_NV
) {
155 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
157 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_INVERSE_NV
) {
158 _math_matrix_analyse(mat
); /* update the inverse */
159 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
160 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
162 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_TRANSPOSE_NV
) {
163 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
166 assert(ctx
->VertexProgram
.TrackMatrixTransform
[i
]
167 == GL_INVERSE_TRANSPOSE_NV
);
168 _math_matrix_analyse(mat
); /* update the inverse */
169 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
170 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
178 * For debugging. Dump the current vertex program machine registers.
181 _mesa_dump_vp_state( const struct vertex_program_state
*state
)
184 _mesa_printf("VertexIn:\n");
185 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_INPUTS
; i
++) {
186 _mesa_printf("%d: %f %f %f %f ", i
,
190 state
->Inputs
[i
][3]);
194 _mesa_printf("VertexOut:\n");
195 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
196 _mesa_printf("%d: %f %f %f %f ", i
,
197 state
->Outputs
[i
][0],
198 state
->Outputs
[i
][1],
199 state
->Outputs
[i
][2],
200 state
->Outputs
[i
][3]);
204 _mesa_printf("Registers:\n");
205 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
206 _mesa_printf("%d: %f %f %f %f ", i
,
207 state
->Temporaries
[i
][0],
208 state
->Temporaries
[i
][1],
209 state
->Temporaries
[i
][2],
210 state
->Temporaries
[i
][3]);
214 _mesa_printf("Parameters:\n");
215 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
; i
++) {
216 _mesa_printf("%d: %f %f %f %f ", i
,
217 state
->Parameters
[i
][0],
218 state
->Parameters
[i
][1],
219 state
->Parameters
[i
][2],
220 state
->Parameters
[i
][3]);
228 * Return a pointer to the 4-element float vector specified by the given
231 static INLINE
const GLfloat
*
232 get_register_pointer( const struct vp_src_register
*source
,
233 const struct vertex_program_state
*state
)
235 if (source
->RelAddr
) {
236 const GLint reg
= source
->Index
+ state
->AddressReg
[0];
237 ASSERT(source
->File
== PROGRAM_ENV_PARAM
);
238 if (reg
< 0 || reg
> MAX_NV_VERTEX_PROGRAM_PARAMS
)
241 return state
->Parameters
[reg
];
244 switch (source
->File
) {
245 case PROGRAM_TEMPORARY
:
246 return state
->Temporaries
[source
->Index
];
248 return state
->Inputs
[source
->Index
];
249 case PROGRAM_LOCAL_PARAM
:
251 return state
->Temporaries
[source
->Index
];
252 case PROGRAM_ENV_PARAM
:
253 return state
->Parameters
[source
->Index
];
254 case PROGRAM_STATE_VAR
:
255 return state
->Parameters
[source
->Index
];
258 "Bad source register file in fetch_vector4(vp)");
267 * Fetch a 4-element float vector from the given source register.
268 * Apply swizzling and negating as needed.
271 fetch_vector4( const struct vp_src_register
*source
,
272 const struct vertex_program_state
*state
,
275 const GLfloat
*src
= get_register_pointer(source
, state
);
277 if (source
->Negate
) {
278 result
[0] = -src
[source
->Swizzle
[0]];
279 result
[1] = -src
[source
->Swizzle
[1]];
280 result
[2] = -src
[source
->Swizzle
[2]];
281 result
[3] = -src
[source
->Swizzle
[3]];
284 result
[0] = src
[source
->Swizzle
[0]];
285 result
[1] = src
[source
->Swizzle
[1]];
286 result
[2] = src
[source
->Swizzle
[2]];
287 result
[3] = src
[source
->Swizzle
[3]];
294 * As above, but only return result[0] element.
297 fetch_vector1( const struct vp_src_register
*source
,
298 const struct vertex_program_state
*state
,
301 const GLfloat
*src
= get_register_pointer(source
, state
);
303 if (source
->Negate
) {
304 result
[0] = -src
[source
->Swizzle
[0]];
307 result
[0] = src
[source
->Swizzle
[0]];
313 * Store 4 floats into a register.
316 store_vector4( const struct vp_dst_register
*dest
,
317 struct vertex_program_state
*state
,
318 const GLfloat value
[4] )
321 switch (dest
->File
) {
322 case PROGRAM_TEMPORARY
:
323 dst
= state
->Temporaries
[dest
->Index
];
326 dst
= state
->Outputs
[dest
->Index
];
329 _mesa_problem(NULL
, "Invalid register file in fetch_vector1(vp)");
333 if (dest
->WriteMask
[0])
335 if (dest
->WriteMask
[1])
337 if (dest
->WriteMask
[2])
339 if (dest
->WriteMask
[3])
345 * Set x to positive or negative infinity.
347 #if defined(USE_IEEE) || defined(_WIN32)
348 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
349 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
351 #define SET_POS_INFINITY(x) x = __MAXFLOAT
352 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
354 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
355 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
358 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
362 * Execute the given vertex program
365 _mesa_exec_vertex_program(GLcontext
*ctx
, const struct vertex_program
*program
)
367 struct vertex_program_state
*state
= &ctx
->VertexProgram
;
368 const struct vp_instruction
*inst
;
370 ctx
->_CurrentProgram
= GL_VERTEX_PROGRAM_ARB
; /* or NV, doesn't matter */
372 for (inst
= program
->Instructions
; inst
->Opcode
!= VP_OPCODE_END
; inst
++) {
374 if (ctx
->VertexProgram
.CallbackEnabled
&&
375 ctx
->VertexProgram
.Callback
) {
376 ctx
->VertexProgram
.CurrentPosition
= inst
->StringPos
;
377 ctx
->VertexProgram
.Callback(program
->Base
.Target
,
378 ctx
->VertexProgram
.CallbackData
);
381 switch (inst
->Opcode
) {
385 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
386 store_vector4( &inst
->DstReg
, state
, t
);
391 const GLfloat epsilon
= 1.0e-5F
; /* XXX fix? */
392 GLfloat t
[4], lit
[4];
393 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
394 if (t
[3] < -(128.0F
- epsilon
))
395 t
[3] = - (128.0F
- epsilon
);
396 else if (t
[3] > 128.0F
- epsilon
)
397 t
[3] = 128.0F
- epsilon
;
404 lit
[2] = (t
[0] > 0.0) ? (GLfloat
) exp(t
[3] * log(t
[1])) : 0.0F
;
406 store_vector4( &inst
->DstReg
, state
, lit
);
412 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
414 t
[0] = 1.0F
/ t
[0]; /* div by zero is infinity! */
415 t
[1] = t
[2] = t
[3] = t
[0];
416 store_vector4( &inst
->DstReg
, state
, t
);
422 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
423 t
[0] = INV_SQRTF(FABSF(t
[0]));
424 t
[1] = t
[2] = t
[3] = t
[0];
425 store_vector4( &inst
->DstReg
, state
, t
);
430 GLfloat t
[4], q
[4], floor_t0
;
431 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
432 floor_t0
= (float) floor(t
[0]);
433 if (floor_t0
> FLT_MAX_EXP
) {
434 SET_POS_INFINITY(q
[0]);
435 SET_POS_INFINITY(q
[2]);
437 else if (floor_t0
< FLT_MIN_EXP
) {
443 GLint ii
= (GLint
) floor_t0
;
444 ii
= (ii
< 23) + 0x3f800000;
445 SET_FLOAT_BITS(q
[0], ii
);
446 q
[0] = *((GLfloat
*) &ii
);
448 q
[0] = (GLfloat
) pow(2.0, floor_t0
);
450 q
[2] = (GLfloat
) (q
[0] * LOG2(q
[1]));
452 q
[1] = t
[0] - floor_t0
;
454 store_vector4( &inst
->DstReg
, state
, q
);
459 GLfloat t
[4], q
[4], abs_t0
;
460 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
461 abs_t0
= (GLfloat
) fabs(t
[0]);
462 if (abs_t0
!= 0.0F
) {
463 /* Since we really can't handle infinite values on VMS
464 * like other OSes we'll use __MAXFLOAT to represent
465 * infinity. This may need some tweaking.
468 if (abs_t0
== __MAXFLOAT
)
470 if (IS_INF_OR_NAN(abs_t0
))
473 SET_POS_INFINITY(q
[0]);
475 SET_POS_INFINITY(q
[2]);
479 double mantissa
= frexp(t
[0], &exponent
);
480 q
[0] = (GLfloat
) (exponent
- 1);
481 q
[1] = (GLfloat
) (2.0 * mantissa
); /* map [.5, 1) -> [1, 2) */
482 q
[2] = (GLfloat
) (q
[0] + LOG2(q
[1]));
486 SET_NEG_INFINITY(q
[0]);
488 SET_NEG_INFINITY(q
[2]);
491 store_vector4( &inst
->DstReg
, state
, q
);
496 GLfloat t
[4], u
[4], prod
[4];
497 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
498 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
499 prod
[0] = t
[0] * u
[0];
500 prod
[1] = t
[1] * u
[1];
501 prod
[2] = t
[2] * u
[2];
502 prod
[3] = t
[3] * u
[3];
503 store_vector4( &inst
->DstReg
, state
, prod
);
508 GLfloat t
[4], u
[4], sum
[4];
509 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
510 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
511 sum
[0] = t
[0] + u
[0];
512 sum
[1] = t
[1] + u
[1];
513 sum
[2] = t
[2] + u
[2];
514 sum
[3] = t
[3] + u
[3];
515 store_vector4( &inst
->DstReg
, state
, sum
);
520 GLfloat t
[4], u
[4], dot
[4];
521 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
522 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
523 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2];
524 dot
[1] = dot
[2] = dot
[3] = dot
[0];
525 store_vector4( &inst
->DstReg
, state
, dot
);
530 GLfloat t
[4], u
[4], dot
[4];
531 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
532 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
533 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + t
[3] * u
[3];
534 dot
[1] = dot
[2] = dot
[3] = dot
[0];
535 store_vector4( &inst
->DstReg
, state
, dot
);
540 GLfloat t
[4], u
[4], dst
[4];
541 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
542 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
544 dst
[1] = t
[1] * u
[1];
547 store_vector4( &inst
->DstReg
, state
, dst
);
552 GLfloat t
[4], u
[4], min
[4];
553 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
554 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
555 min
[0] = (t
[0] < u
[0]) ? t
[0] : u
[0];
556 min
[1] = (t
[1] < u
[1]) ? t
[1] : u
[1];
557 min
[2] = (t
[2] < u
[2]) ? t
[2] : u
[2];
558 min
[3] = (t
[3] < u
[3]) ? t
[3] : u
[3];
559 store_vector4( &inst
->DstReg
, state
, min
);
564 GLfloat t
[4], u
[4], max
[4];
565 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
566 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
567 max
[0] = (t
[0] > u
[0]) ? t
[0] : u
[0];
568 max
[1] = (t
[1] > u
[1]) ? t
[1] : u
[1];
569 max
[2] = (t
[2] > u
[2]) ? t
[2] : u
[2];
570 max
[3] = (t
[3] > u
[3]) ? t
[3] : u
[3];
571 store_vector4( &inst
->DstReg
, state
, max
);
576 GLfloat t
[4], u
[4], slt
[4];
577 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
578 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
579 slt
[0] = (t
[0] < u
[0]) ? 1.0F
: 0.0F
;
580 slt
[1] = (t
[1] < u
[1]) ? 1.0F
: 0.0F
;
581 slt
[2] = (t
[2] < u
[2]) ? 1.0F
: 0.0F
;
582 slt
[3] = (t
[3] < u
[3]) ? 1.0F
: 0.0F
;
583 store_vector4( &inst
->DstReg
, state
, slt
);
588 GLfloat t
[4], u
[4], sge
[4];
589 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
590 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
591 sge
[0] = (t
[0] >= u
[0]) ? 1.0F
: 0.0F
;
592 sge
[1] = (t
[1] >= u
[1]) ? 1.0F
: 0.0F
;
593 sge
[2] = (t
[2] >= u
[2]) ? 1.0F
: 0.0F
;
594 sge
[3] = (t
[3] >= u
[3]) ? 1.0F
: 0.0F
;
595 store_vector4( &inst
->DstReg
, state
, sge
);
600 GLfloat t
[4], u
[4], v
[4], sum
[4];
601 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
602 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
603 fetch_vector4( &inst
->SrcReg
[2], state
, v
);
604 sum
[0] = t
[0] * u
[0] + v
[0];
605 sum
[1] = t
[1] * u
[1] + v
[1];
606 sum
[2] = t
[2] * u
[2] + v
[2];
607 sum
[3] = t
[3] * u
[3] + v
[3];
608 store_vector4( &inst
->DstReg
, state
, sum
);
614 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
615 state
->AddressReg
[0] = (GLint
) floor(t
[0]);
620 GLfloat t
[4], u
[4], dot
[4];
621 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
622 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
623 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + u
[3];
624 dot
[1] = dot
[2] = dot
[3] = dot
[0];
625 store_vector4( &inst
->DstReg
, state
, dot
);
631 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
637 if (u
> 1.884467e+019F
) {
638 u
= 1.884467e+019F
; /* IEEE 32-bit binary value 0x5F800000 */
640 else if (u
< 5.42101e-020F
) {
641 u
= 5.42101e-020F
; /* IEEE 32-bit binary value 0x1F800000 */
645 if (u
< -1.884467e+019F
) {
646 u
= -1.884467e+019F
; /* IEEE 32-bit binary value 0xDF800000 */
648 else if (u
> -5.42101e-020F
) {
649 u
= -5.42101e-020F
; /* IEEE 32-bit binary value 0x9F800000 */
652 t
[0] = t
[1] = t
[2] = t
[3] = u
;
653 store_vector4( &inst
->DstReg
, state
, t
);
656 case VP_OPCODE_SUB
: /* GL_NV_vertex_program1_1 */
658 GLfloat t
[4], u
[4], sum
[4];
659 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
660 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
661 sum
[0] = t
[0] - u
[0];
662 sum
[1] = t
[1] - u
[1];
663 sum
[2] = t
[2] - u
[2];
664 sum
[3] = t
[3] - u
[3];
665 store_vector4( &inst
->DstReg
, state
, sum
);
668 case VP_OPCODE_ABS
: /* GL_NV_vertex_program1_1 */
671 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
672 if (t
[0] < 0.0) t
[0] = -t
[0];
673 if (t
[1] < 0.0) t
[1] = -t
[1];
674 if (t
[2] < 0.0) t
[2] = -t
[2];
675 if (t
[3] < 0.0) t
[3] = -t
[3];
676 store_vector4( &inst
->DstReg
, state
, t
);
679 case VP_OPCODE_FLR
: /* GL_ARB_vertex_program */
682 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
687 store_vector4( &inst
->DstReg
, state
, t
);
690 case VP_OPCODE_FRC
: /* GL_ARB_vertex_program */
693 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
694 t
[0] = t
[0] - FLOORF(t
[0]);
695 t
[1] = t
[1] - FLOORF(t
[1]);
696 t
[2] = t
[2] - FLOORF(t
[2]);
697 t
[3] = t
[3] - FLOORF(t
[3]);
698 store_vector4( &inst
->DstReg
, state
, t
);
701 case VP_OPCODE_EX2
: /* GL_ARB_vertex_program */
704 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
705 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(2.0, t
[0]);
706 store_vector4( &inst
->DstReg
, state
, t
);
709 case VP_OPCODE_LG2
: /* GL_ARB_vertex_program */
712 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
713 t
[0] = t
[1] = t
[2] = t
[3] = LOG2(t
[0]);
714 store_vector4( &inst
->DstReg
, state
, t
);
717 case VP_OPCODE_POW
: /* GL_ARB_vertex_program */
720 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
721 fetch_vector1( &inst
->SrcReg
[1], state
, u
);
722 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(t
[0], u
[0]);
723 store_vector4( &inst
->DstReg
, state
, t
);
726 case VP_OPCODE_XPD
: /* GL_ARB_vertex_program */
728 GLfloat t
[4], u
[4], cross
[4];
729 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
730 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
731 cross
[0] = t
[1] * u
[2] - t
[2] * u
[1];
732 cross
[1] = t
[2] * u
[0] - t
[0] * u
[2];
733 cross
[2] = t
[0] * u
[1] - t
[1] * u
[0];
734 store_vector4( &inst
->DstReg
, state
, cross
);
737 case VP_OPCODE_SWZ
: /* GL_ARB_vertex_program */
739 const struct vp_src_register
*source
= &inst
->SrcReg
[0];
740 const GLfloat
*src
= get_register_pointer(source
, state
);
744 /* do extended swizzling here */
745 for (i
= 0; i
< 3; i
++) {
746 if (source
->Swizzle
[i
] == SWIZZLE_ZERO
)
748 else if (source
->Swizzle
[i
] == SWIZZLE_ONE
)
751 result
[i
] = -src
[source
->Swizzle
[i
]];
753 result
[i
] = -result
[i
];
755 store_vector4( &inst
->DstReg
, state
, result
);
760 ctx
->_CurrentProgram
= 0;
763 /* bad instruction opcode */
764 _mesa_problem(ctx
, "Bad VP Opcode in _mesa_exec_vertex_program");
765 ctx
->_CurrentProgram
= 0;
770 ctx
->_CurrentProgram
= 0;
776 Thoughts on vertex program optimization:
778 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
779 assembly code. That will probably be a lot of work.
781 Another approach might be to replace the vp_instruction->Opcode field with
782 a pointer to a specialized C function which executes the instruction.
783 In particular we can write functions which skip swizzling, negating,
784 masking, relative addressing, etc. when they're not needed.
788 void simple_add( struct vp_instruction *inst )
790 GLfloat *sum = machine->Registers[inst->DstReg.Register];
791 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
792 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
793 sum[0] = a[0] + b[0];
794 sum[1] = a[1] + b[1];
795 sum[2] = a[2] + b[2];
796 sum[3] = a[3] + b[3];
805 A first step would be to 'vectorize' the programs in the same way as
806 the normal transformation code in the tnl module. Thus each opcode
807 takes zero or more input vectors (registers) and produces one or more
810 These operations would intially be coded in C, with machine-specific
811 assembly following, as is currently the case for matrix
812 transformations in the math/ directory. The preprocessing scheme for
813 selecting simpler operations Brian describes above would also work
816 This should give reasonable performance without excessive effort.