2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Code to execute vertex programs.
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
39 #include "math/m_matrix.h"
42 static const GLfloat zeroVec
[4] = { 0, 0, 0, 0 };
46 * Load/initialize the vertex program registers.
47 * This needs to be done per vertex.
50 _mesa_init_vp_registers(GLcontext
*ctx
)
54 /* Input registers get initialized from the current vertex attribs */
55 MEMCPY(ctx
->VertexProgram
.Inputs
, ctx
->Current
.Attrib
,
56 VERT_ATTRIB_MAX
* 4 * sizeof(GLfloat
));
58 /* Output and temp regs are initialized to [0,0,0,1] */
59 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
60 ASSIGN_4V(ctx
->VertexProgram
.Outputs
[i
], 0.0F
, 0.0F
, 0.0F
, 1.0F
);
62 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
63 ASSIGN_4V(ctx
->VertexProgram
.Temporaries
[i
], 0.0F
, 0.0F
, 0.0F
, 1.0F
);
66 /* The program parameters aren't touched */
67 /* XXX: This should be moved to glBegin() time, but its safe (and slow!)
70 if (ctx
->VertexProgram
.Current
->Parameters
) {
72 _mesa_load_state_parameters(ctx
, ctx
->VertexProgram
.Current
->Parameters
);
74 /* And copy it into the program state */
75 for (i
=0; i
<ctx
->VertexProgram
.Current
->Parameters
->NumParameters
; i
++) {
76 MEMCPY(ctx
->VertexProgram
.Parameters
[i
],
77 &ctx
->VertexProgram
.Current
->Parameters
->Parameters
[i
].Values
,
86 * Copy the 16 elements of a matrix into four consecutive program
87 * registers starting at 'pos'.
90 load_matrix(GLfloat registers
[][4], GLuint pos
, const GLfloat mat
[16])
93 for (i
= 0; i
< 4; i
++) {
94 registers
[pos
+ i
][0] = mat
[0 + i
];
95 registers
[pos
+ i
][1] = mat
[4 + i
];
96 registers
[pos
+ i
][2] = mat
[8 + i
];
97 registers
[pos
+ i
][3] = mat
[12 + i
];
103 * As above, but transpose the matrix.
106 load_transpose_matrix(GLfloat registers
[][4], GLuint pos
,
107 const GLfloat mat
[16])
109 MEMCPY(registers
[pos
], mat
, 16 * sizeof(GLfloat
));
114 * Load all currently tracked matrices into the program registers.
115 * This needs to be done per glBegin/glEnd.
118 _mesa_init_tracked_matrices(GLcontext
*ctx
)
122 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
/ 4; i
++) {
123 /* point 'mat' at source matrix */
125 if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_MODELVIEW
) {
126 mat
= ctx
->ModelviewMatrixStack
.Top
;
128 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_PROJECTION
) {
129 mat
= ctx
->ProjectionMatrixStack
.Top
;
131 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_TEXTURE
) {
132 mat
= ctx
->TextureMatrixStack
[ctx
->Texture
.CurrentUnit
].Top
;
134 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_COLOR
) {
135 mat
= ctx
->ColorMatrixStack
.Top
;
137 else if (ctx
->VertexProgram
.TrackMatrix
[i
]==GL_MODELVIEW_PROJECTION_NV
) {
138 /* XXX verify the combined matrix is up to date */
139 mat
= &ctx
->_ModelProjectMatrix
;
141 else if (ctx
->VertexProgram
.TrackMatrix
[i
] >= GL_MATRIX0_NV
&&
142 ctx
->VertexProgram
.TrackMatrix
[i
] <= GL_MATRIX7_NV
) {
143 GLuint n
= ctx
->VertexProgram
.TrackMatrix
[i
] - GL_MATRIX0_NV
;
144 ASSERT(n
< MAX_PROGRAM_MATRICES
);
145 mat
= ctx
->ProgramMatrixStack
[n
].Top
;
148 /* no matrix is tracked, but we leave the register values as-is */
149 assert(ctx
->VertexProgram
.TrackMatrix
[i
] == GL_NONE
);
153 /* load the matrix */
154 if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_IDENTITY_NV
) {
155 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
157 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_INVERSE_NV
) {
158 _math_matrix_analyse(mat
); /* update the inverse */
159 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
160 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
162 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_TRANSPOSE_NV
) {
163 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
166 assert(ctx
->VertexProgram
.TrackMatrixTransform
[i
]
167 == GL_INVERSE_TRANSPOSE_NV
);
168 _math_matrix_analyse(mat
); /* update the inverse */
169 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
170 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
178 * For debugging. Dump the current vertex program machine registers.
181 _mesa_dump_vp_state( const struct vertex_program_state
*state
)
184 _mesa_printf("VertexIn:\n");
185 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_INPUTS
; i
++) {
186 _mesa_printf("%d: %f %f %f %f ", i
,
190 state
->Inputs
[i
][3]);
194 _mesa_printf("VertexOut:\n");
195 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
196 _mesa_printf("%d: %f %f %f %f ", i
,
197 state
->Outputs
[i
][0],
198 state
->Outputs
[i
][1],
199 state
->Outputs
[i
][2],
200 state
->Outputs
[i
][3]);
204 _mesa_printf("Registers:\n");
205 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
206 _mesa_printf("%d: %f %f %f %f ", i
,
207 state
->Temporaries
[i
][0],
208 state
->Temporaries
[i
][1],
209 state
->Temporaries
[i
][2],
210 state
->Temporaries
[i
][3]);
214 _mesa_printf("Parameters:\n");
215 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
; i
++) {
216 _mesa_printf("%d: %f %f %f %f ", i
,
217 state
->Parameters
[i
][0],
218 state
->Parameters
[i
][1],
219 state
->Parameters
[i
][2],
220 state
->Parameters
[i
][3]);
228 * Return a pointer to the 4-element float vector specified by the given
231 static INLINE
const GLfloat
*
232 get_register_pointer( const struct vp_src_register
*source
,
233 const struct vertex_program_state
*state
)
235 if (source
->RelAddr
) {
236 const GLint reg
= source
->Index
+ state
->AddressReg
[0];
237 ASSERT( (source
->File
== PROGRAM_ENV_PARAM
) ||
238 (source
->File
== PROGRAM_STATE_VAR
) );
239 if (reg
< 0 || reg
> MAX_NV_VERTEX_PROGRAM_PARAMS
)
242 return state
->Parameters
[reg
];
245 switch (source
->File
) {
246 case PROGRAM_TEMPORARY
:
247 return state
->Temporaries
[source
->Index
];
249 return state
->Inputs
[source
->Index
];
250 case PROGRAM_LOCAL_PARAM
:
252 return state
->Temporaries
[source
->Index
];
253 case PROGRAM_ENV_PARAM
:
254 return state
->Parameters
[source
->Index
];
255 case PROGRAM_STATE_VAR
:
256 return state
->Parameters
[source
->Index
];
259 "Bad source register file in fetch_vector4(vp)");
268 * Fetch a 4-element float vector from the given source register.
269 * Apply swizzling and negating as needed.
272 fetch_vector4( const struct vp_src_register
*source
,
273 const struct vertex_program_state
*state
,
276 const GLfloat
*src
= get_register_pointer(source
, state
);
278 if (source
->Negate
) {
279 result
[0] = -src
[source
->Swizzle
[0]];
280 result
[1] = -src
[source
->Swizzle
[1]];
281 result
[2] = -src
[source
->Swizzle
[2]];
282 result
[3] = -src
[source
->Swizzle
[3]];
285 result
[0] = src
[source
->Swizzle
[0]];
286 result
[1] = src
[source
->Swizzle
[1]];
287 result
[2] = src
[source
->Swizzle
[2]];
288 result
[3] = src
[source
->Swizzle
[3]];
295 * As above, but only return result[0] element.
298 fetch_vector1( const struct vp_src_register
*source
,
299 const struct vertex_program_state
*state
,
302 const GLfloat
*src
= get_register_pointer(source
, state
);
304 if (source
->Negate
) {
305 result
[0] = -src
[source
->Swizzle
[0]];
308 result
[0] = src
[source
->Swizzle
[0]];
314 * Store 4 floats into a register.
317 store_vector4( const struct vp_dst_register
*dest
,
318 struct vertex_program_state
*state
,
319 const GLfloat value
[4] )
322 switch (dest
->File
) {
323 case PROGRAM_TEMPORARY
:
324 dst
= state
->Temporaries
[dest
->Index
];
327 dst
= state
->Outputs
[dest
->Index
];
330 _mesa_problem(NULL
, "Invalid register file in fetch_vector1(vp)");
334 if (dest
->WriteMask
[0])
336 if (dest
->WriteMask
[1])
338 if (dest
->WriteMask
[2])
340 if (dest
->WriteMask
[3])
346 * Set x to positive or negative infinity.
348 #if defined(USE_IEEE) || defined(_WIN32)
349 #define SET_POS_INFINITY(x) ( *((GLuint *) &x) = 0x7F800000 )
350 #define SET_NEG_INFINITY(x) ( *((GLuint *) &x) = 0xFF800000 )
352 #define SET_POS_INFINITY(x) x = __MAXFLOAT
353 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
355 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
356 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
359 #define SET_FLOAT_BITS(x, bits) ((fi_type *) &(x))->i = bits
363 * Execute the given vertex program
366 _mesa_exec_vertex_program(GLcontext
*ctx
, const struct vertex_program
*program
)
368 struct vertex_program_state
*state
= &ctx
->VertexProgram
;
369 const struct vp_instruction
*inst
;
371 ctx
->_CurrentProgram
= GL_VERTEX_PROGRAM_ARB
; /* or NV, doesn't matter */
373 /* If the program is position invariant, multiply the input
374 * position and the MVP matrix and stick it into the output pos slot
376 if (ctx
->VertexProgram
.Current
->IsPositionInvariant
) {
377 TRANSFORM_POINT( ctx
->VertexProgram
.Outputs
[0],
378 ctx
->_ModelProjectMatrix
.m
,
379 ctx
->VertexProgram
.Inputs
[0]);
381 /* XXX: This could go elsewhere */
382 ctx
->VertexProgram
.Current
->OutputsWritten
|= 0x1;
387 for (inst
= program
->Instructions
; inst
->Opcode
!= VP_OPCODE_END
; inst
++) {
389 if (ctx
->VertexProgram
.CallbackEnabled
&&
390 ctx
->VertexProgram
.Callback
) {
391 ctx
->VertexProgram
.CurrentPosition
= inst
->StringPos
;
392 ctx
->VertexProgram
.Callback(program
->Base
.Target
,
393 ctx
->VertexProgram
.CallbackData
);
396 switch (inst
->Opcode
) {
400 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
401 store_vector4( &inst
->DstReg
, state
, t
);
406 const GLfloat epsilon
= 1.0e-5F
; /* XXX fix? */
407 GLfloat t
[4], lit
[4];
408 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
409 if (t
[3] < -(128.0F
- epsilon
))
410 t
[3] = - (128.0F
- epsilon
);
411 else if (t
[3] > 128.0F
- epsilon
)
412 t
[3] = 128.0F
- epsilon
;
419 lit
[2] = (t
[0] > 0.0) ? (GLfloat
) exp(t
[3] * log(t
[1])) : 0.0F
;
421 store_vector4( &inst
->DstReg
, state
, lit
);
427 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
429 t
[0] = 1.0F
/ t
[0]; /* div by zero is infinity! */
430 t
[1] = t
[2] = t
[3] = t
[0];
431 store_vector4( &inst
->DstReg
, state
, t
);
437 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
438 t
[0] = INV_SQRTF(FABSF(t
[0]));
439 t
[1] = t
[2] = t
[3] = t
[0];
440 store_vector4( &inst
->DstReg
, state
, t
);
445 GLfloat t
[4], q
[4], floor_t0
;
446 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
447 floor_t0
= (float) floor(t
[0]);
448 if (floor_t0
> FLT_MAX_EXP
) {
449 SET_POS_INFINITY(q
[0]);
450 SET_POS_INFINITY(q
[2]);
452 else if (floor_t0
< FLT_MIN_EXP
) {
458 GLint ii
= (GLint
) floor_t0
;
459 ii
= (ii
< 23) + 0x3f800000;
460 SET_FLOAT_BITS(q
[0], ii
);
461 q
[0] = *((GLfloat
*) &ii
);
463 q
[0] = (GLfloat
) pow(2.0, floor_t0
);
465 q
[2] = (GLfloat
) (q
[0] * LOG2(q
[1]));
467 q
[1] = t
[0] - floor_t0
;
469 store_vector4( &inst
->DstReg
, state
, q
);
474 GLfloat t
[4], q
[4], abs_t0
;
475 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
476 abs_t0
= (GLfloat
) fabs(t
[0]);
477 if (abs_t0
!= 0.0F
) {
478 /* Since we really can't handle infinite values on VMS
479 * like other OSes we'll use __MAXFLOAT to represent
480 * infinity. This may need some tweaking.
483 if (abs_t0
== __MAXFLOAT
)
485 if (IS_INF_OR_NAN(abs_t0
))
488 SET_POS_INFINITY(q
[0]);
490 SET_POS_INFINITY(q
[2]);
494 double mantissa
= frexp(t
[0], &exponent
);
495 q
[0] = (GLfloat
) (exponent
- 1);
496 q
[1] = (GLfloat
) (2.0 * mantissa
); /* map [.5, 1) -> [1, 2) */
497 q
[2] = (GLfloat
) (q
[0] + LOG2(q
[1]));
501 SET_NEG_INFINITY(q
[0]);
503 SET_NEG_INFINITY(q
[2]);
506 store_vector4( &inst
->DstReg
, state
, q
);
511 GLfloat t
[4], u
[4], prod
[4];
512 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
513 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
514 prod
[0] = t
[0] * u
[0];
515 prod
[1] = t
[1] * u
[1];
516 prod
[2] = t
[2] * u
[2];
517 prod
[3] = t
[3] * u
[3];
518 store_vector4( &inst
->DstReg
, state
, prod
);
523 GLfloat t
[4], u
[4], sum
[4];
524 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
525 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
526 sum
[0] = t
[0] + u
[0];
527 sum
[1] = t
[1] + u
[1];
528 sum
[2] = t
[2] + u
[2];
529 sum
[3] = t
[3] + u
[3];
530 store_vector4( &inst
->DstReg
, state
, sum
);
535 GLfloat t
[4], u
[4], dot
[4];
536 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
537 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
538 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2];
539 dot
[1] = dot
[2] = dot
[3] = dot
[0];
540 store_vector4( &inst
->DstReg
, state
, dot
);
545 GLfloat t
[4], u
[4], dot
[4];
546 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
547 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
548 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + t
[3] * u
[3];
549 dot
[1] = dot
[2] = dot
[3] = dot
[0];
550 store_vector4( &inst
->DstReg
, state
, dot
);
555 GLfloat t
[4], u
[4], dst
[4];
556 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
557 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
559 dst
[1] = t
[1] * u
[1];
562 store_vector4( &inst
->DstReg
, state
, dst
);
567 GLfloat t
[4], u
[4], min
[4];
568 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
569 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
570 min
[0] = (t
[0] < u
[0]) ? t
[0] : u
[0];
571 min
[1] = (t
[1] < u
[1]) ? t
[1] : u
[1];
572 min
[2] = (t
[2] < u
[2]) ? t
[2] : u
[2];
573 min
[3] = (t
[3] < u
[3]) ? t
[3] : u
[3];
574 store_vector4( &inst
->DstReg
, state
, min
);
579 GLfloat t
[4], u
[4], max
[4];
580 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
581 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
582 max
[0] = (t
[0] > u
[0]) ? t
[0] : u
[0];
583 max
[1] = (t
[1] > u
[1]) ? t
[1] : u
[1];
584 max
[2] = (t
[2] > u
[2]) ? t
[2] : u
[2];
585 max
[3] = (t
[3] > u
[3]) ? t
[3] : u
[3];
586 store_vector4( &inst
->DstReg
, state
, max
);
591 GLfloat t
[4], u
[4], slt
[4];
592 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
593 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
594 slt
[0] = (t
[0] < u
[0]) ? 1.0F
: 0.0F
;
595 slt
[1] = (t
[1] < u
[1]) ? 1.0F
: 0.0F
;
596 slt
[2] = (t
[2] < u
[2]) ? 1.0F
: 0.0F
;
597 slt
[3] = (t
[3] < u
[3]) ? 1.0F
: 0.0F
;
598 store_vector4( &inst
->DstReg
, state
, slt
);
603 GLfloat t
[4], u
[4], sge
[4];
604 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
605 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
606 sge
[0] = (t
[0] >= u
[0]) ? 1.0F
: 0.0F
;
607 sge
[1] = (t
[1] >= u
[1]) ? 1.0F
: 0.0F
;
608 sge
[2] = (t
[2] >= u
[2]) ? 1.0F
: 0.0F
;
609 sge
[3] = (t
[3] >= u
[3]) ? 1.0F
: 0.0F
;
610 store_vector4( &inst
->DstReg
, state
, sge
);
615 GLfloat t
[4], u
[4], v
[4], sum
[4];
616 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
617 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
618 fetch_vector4( &inst
->SrcReg
[2], state
, v
);
619 sum
[0] = t
[0] * u
[0] + v
[0];
620 sum
[1] = t
[1] * u
[1] + v
[1];
621 sum
[2] = t
[2] * u
[2] + v
[2];
622 sum
[3] = t
[3] * u
[3] + v
[3];
623 store_vector4( &inst
->DstReg
, state
, sum
);
629 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
630 state
->AddressReg
[0] = (GLint
) floor(t
[0]);
635 GLfloat t
[4], u
[4], dot
[4];
636 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
637 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
638 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + u
[3];
639 dot
[1] = dot
[2] = dot
[3] = dot
[0];
640 store_vector4( &inst
->DstReg
, state
, dot
);
646 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
652 if (u
> 1.884467e+019F
) {
653 u
= 1.884467e+019F
; /* IEEE 32-bit binary value 0x5F800000 */
655 else if (u
< 5.42101e-020F
) {
656 u
= 5.42101e-020F
; /* IEEE 32-bit binary value 0x1F800000 */
660 if (u
< -1.884467e+019F
) {
661 u
= -1.884467e+019F
; /* IEEE 32-bit binary value 0xDF800000 */
663 else if (u
> -5.42101e-020F
) {
664 u
= -5.42101e-020F
; /* IEEE 32-bit binary value 0x9F800000 */
667 t
[0] = t
[1] = t
[2] = t
[3] = u
;
668 store_vector4( &inst
->DstReg
, state
, t
);
671 case VP_OPCODE_SUB
: /* GL_NV_vertex_program1_1 */
673 GLfloat t
[4], u
[4], sum
[4];
674 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
675 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
676 sum
[0] = t
[0] - u
[0];
677 sum
[1] = t
[1] - u
[1];
678 sum
[2] = t
[2] - u
[2];
679 sum
[3] = t
[3] - u
[3];
680 store_vector4( &inst
->DstReg
, state
, sum
);
683 case VP_OPCODE_ABS
: /* GL_NV_vertex_program1_1 */
686 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
687 if (t
[0] < 0.0) t
[0] = -t
[0];
688 if (t
[1] < 0.0) t
[1] = -t
[1];
689 if (t
[2] < 0.0) t
[2] = -t
[2];
690 if (t
[3] < 0.0) t
[3] = -t
[3];
691 store_vector4( &inst
->DstReg
, state
, t
);
694 case VP_OPCODE_FLR
: /* GL_ARB_vertex_program */
697 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
702 store_vector4( &inst
->DstReg
, state
, t
);
705 case VP_OPCODE_FRC
: /* GL_ARB_vertex_program */
708 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
709 t
[0] = t
[0] - FLOORF(t
[0]);
710 t
[1] = t
[1] - FLOORF(t
[1]);
711 t
[2] = t
[2] - FLOORF(t
[2]);
712 t
[3] = t
[3] - FLOORF(t
[3]);
713 store_vector4( &inst
->DstReg
, state
, t
);
716 case VP_OPCODE_EX2
: /* GL_ARB_vertex_program */
719 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
720 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(2.0, t
[0]);
721 store_vector4( &inst
->DstReg
, state
, t
);
724 case VP_OPCODE_LG2
: /* GL_ARB_vertex_program */
727 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
728 t
[0] = t
[1] = t
[2] = t
[3] = LOG2(t
[0]);
729 store_vector4( &inst
->DstReg
, state
, t
);
732 case VP_OPCODE_POW
: /* GL_ARB_vertex_program */
735 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
736 fetch_vector1( &inst
->SrcReg
[1], state
, u
);
737 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(t
[0], u
[0]);
738 store_vector4( &inst
->DstReg
, state
, t
);
741 case VP_OPCODE_XPD
: /* GL_ARB_vertex_program */
743 GLfloat t
[4], u
[4], cross
[4];
744 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
745 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
746 cross
[0] = t
[1] * u
[2] - t
[2] * u
[1];
747 cross
[1] = t
[2] * u
[0] - t
[0] * u
[2];
748 cross
[2] = t
[0] * u
[1] - t
[1] * u
[0];
749 store_vector4( &inst
->DstReg
, state
, cross
);
752 case VP_OPCODE_SWZ
: /* GL_ARB_vertex_program */
754 const struct vp_src_register
*source
= &inst
->SrcReg
[0];
755 const GLfloat
*src
= get_register_pointer(source
, state
);
759 /* do extended swizzling here */
760 for (i
= 0; i
< 3; i
++) {
761 if (source
->Swizzle
[i
] == SWIZZLE_ZERO
)
763 else if (source
->Swizzle
[i
] == SWIZZLE_ONE
)
766 result
[i
] = -src
[source
->Swizzle
[i
]];
768 result
[i
] = -result
[i
];
770 store_vector4( &inst
->DstReg
, state
, result
);
775 ctx
->_CurrentProgram
= 0;
778 /* bad instruction opcode */
779 _mesa_problem(ctx
, "Bad VP Opcode in _mesa_exec_vertex_program");
780 ctx
->_CurrentProgram
= 0;
785 ctx
->_CurrentProgram
= 0;
791 Thoughts on vertex program optimization:
793 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
794 assembly code. That will probably be a lot of work.
796 Another approach might be to replace the vp_instruction->Opcode field with
797 a pointer to a specialized C function which executes the instruction.
798 In particular we can write functions which skip swizzling, negating,
799 masking, relative addressing, etc. when they're not needed.
803 void simple_add( struct vp_instruction *inst )
805 GLfloat *sum = machine->Registers[inst->DstReg.Register];
806 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
807 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
808 sum[0] = a[0] + b[0];
809 sum[1] = a[1] + b[1];
810 sum[2] = a[2] + b[2];
811 sum[3] = a[3] + b[3];
820 A first step would be to 'vectorize' the programs in the same way as
821 the normal transformation code in the tnl module. Thus each opcode
822 takes zero or more input vectors (registers) and produces one or more
825 These operations would intially be coded in C, with machine-specific
826 assembly following, as is currently the case for matrix
827 transformations in the math/ directory. The preprocessing scheme for
828 selecting simpler operations Brian describes above would also work
831 This should give reasonable performance without excessive effort.