2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Code to execute vertex programs.
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
39 #include "math/m_matrix.h"
42 static const GLfloat ZeroVec
[4] = { 0.0F
, 0.0F
, 0.0F
, 0.0F
};
46 * Load/initialize the vertex program registers which need to be set
50 _mesa_init_vp_per_vertex_registers(GLcontext
*ctx
)
52 /* Input registers get initialized from the current vertex attribs */
53 MEMCPY(ctx
->VertexProgram
.Inputs
, ctx
->Current
.Attrib
,
54 VERT_ATTRIB_MAX
* 4 * sizeof(GLfloat
));
56 if (ctx
->VertexProgram
.Current
->IsNVProgram
) {
58 /* Output/result regs are initialized to [0,0,0,1] */
59 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
60 ASSIGN_4V(ctx
->VertexProgram
.Outputs
[i
], 0.0F
, 0.0F
, 0.0F
, 1.0F
);
62 /* Temp regs are initialized to [0,0,0,0] */
63 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
64 ASSIGN_4V(ctx
->VertexProgram
.Temporaries
[i
], 0.0F
, 0.0F
, 0.0F
, 0.0F
);
66 ASSIGN_4V(ctx
->VertexProgram
.AddressReg
, 0, 0, 0, 0);
73 * Copy the 16 elements of a matrix into four consecutive program
74 * registers starting at 'pos'.
77 load_matrix(GLfloat registers
[][4], GLuint pos
, const GLfloat mat
[16])
80 for (i
= 0; i
< 4; i
++) {
81 registers
[pos
+ i
][0] = mat
[0 + i
];
82 registers
[pos
+ i
][1] = mat
[4 + i
];
83 registers
[pos
+ i
][2] = mat
[8 + i
];
84 registers
[pos
+ i
][3] = mat
[12 + i
];
90 * As above, but transpose the matrix.
93 load_transpose_matrix(GLfloat registers
[][4], GLuint pos
,
94 const GLfloat mat
[16])
96 MEMCPY(registers
[pos
], mat
, 16 * sizeof(GLfloat
));
101 * Load program parameter registers with tracked matrices (if NV program)
102 * or GL state values (if ARB program).
103 * This needs to be done per glBegin/glEnd, not per-vertex.
106 _mesa_init_vp_per_primitive_registers(GLcontext
*ctx
)
108 if (ctx
->VertexProgram
.Current
->IsNVProgram
) {
111 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
/ 4; i
++) {
112 /* point 'mat' at source matrix */
114 if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_MODELVIEW
) {
115 mat
= ctx
->ModelviewMatrixStack
.Top
;
117 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_PROJECTION
) {
118 mat
= ctx
->ProjectionMatrixStack
.Top
;
120 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_TEXTURE
) {
121 mat
= ctx
->TextureMatrixStack
[ctx
->Texture
.CurrentUnit
].Top
;
123 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_COLOR
) {
124 mat
= ctx
->ColorMatrixStack
.Top
;
126 else if (ctx
->VertexProgram
.TrackMatrix
[i
]==GL_MODELVIEW_PROJECTION_NV
) {
127 /* XXX verify the combined matrix is up to date */
128 mat
= &ctx
->_ModelProjectMatrix
;
130 else if (ctx
->VertexProgram
.TrackMatrix
[i
] >= GL_MATRIX0_NV
&&
131 ctx
->VertexProgram
.TrackMatrix
[i
] <= GL_MATRIX7_NV
) {
132 GLuint n
= ctx
->VertexProgram
.TrackMatrix
[i
] - GL_MATRIX0_NV
;
133 ASSERT(n
< MAX_PROGRAM_MATRICES
);
134 mat
= ctx
->ProgramMatrixStack
[n
].Top
;
137 /* no matrix is tracked, but we leave the register values as-is */
138 assert(ctx
->VertexProgram
.TrackMatrix
[i
] == GL_NONE
);
142 /* load the matrix */
143 if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_IDENTITY_NV
) {
144 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
146 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_INVERSE_NV
) {
147 _math_matrix_analyse(mat
); /* update the inverse */
148 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
149 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
151 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_TRANSPOSE_NV
) {
152 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
155 assert(ctx
->VertexProgram
.TrackMatrixTransform
[i
]
156 == GL_INVERSE_TRANSPOSE_NV
);
157 _math_matrix_analyse(mat
); /* update the inverse */
158 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
159 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
164 /* Using and ARB vertex program */
165 if (ctx
->VertexProgram
.Current
->Parameters
) {
166 /* Grab the state GL state and put into registers */
167 _mesa_load_state_parameters(ctx
,
168 ctx
->VertexProgram
.Current
->Parameters
);
176 * For debugging. Dump the current vertex program machine registers.
179 _mesa_dump_vp_state( const struct vertex_program_state
*state
)
182 _mesa_printf("VertexIn:\n");
183 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_INPUTS
; i
++) {
184 _mesa_printf("%d: %f %f %f %f ", i
,
188 state
->Inputs
[i
][3]);
192 _mesa_printf("VertexOut:\n");
193 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
194 _mesa_printf("%d: %f %f %f %f ", i
,
195 state
->Outputs
[i
][0],
196 state
->Outputs
[i
][1],
197 state
->Outputs
[i
][2],
198 state
->Outputs
[i
][3]);
202 _mesa_printf("Registers:\n");
203 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
204 _mesa_printf("%d: %f %f %f %f ", i
,
205 state
->Temporaries
[i
][0],
206 state
->Temporaries
[i
][1],
207 state
->Temporaries
[i
][2],
208 state
->Temporaries
[i
][3]);
212 _mesa_printf("Parameters:\n");
213 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
; i
++) {
214 _mesa_printf("%d: %f %f %f %f ", i
,
215 state
->Parameters
[i
][0],
216 state
->Parameters
[i
][1],
217 state
->Parameters
[i
][2],
218 state
->Parameters
[i
][3]);
226 * Return a pointer to the 4-element float vector specified by the given
229 static INLINE
const GLfloat
*
230 get_register_pointer( const struct vp_src_register
*source
,
231 const struct vertex_program_state
*state
)
233 if (source
->RelAddr
) {
234 const GLint reg
= source
->Index
+ state
->AddressReg
[0];
235 ASSERT( (source
->File
== PROGRAM_ENV_PARAM
) ||
236 (source
->File
== PROGRAM_STATE_VAR
) );
237 if (reg
< 0 || reg
> MAX_NV_VERTEX_PROGRAM_PARAMS
)
239 else if (source
->File
== PROGRAM_ENV_PARAM
)
240 return state
->Parameters
[reg
];
242 return state
->Current
->Parameters
->Parameters
[reg
].Values
;
245 switch (source
->File
) {
246 case PROGRAM_TEMPORARY
:
247 ASSERT(source
->Index
< MAX_NV_VERTEX_PROGRAM_TEMPS
);
248 return state
->Temporaries
[source
->Index
];
250 ASSERT(source
->Index
< MAX_NV_VERTEX_PROGRAM_INPUTS
);
251 return state
->Inputs
[source
->Index
];
252 case PROGRAM_LOCAL_PARAM
:
253 ASSERT(source
->Index
< MAX_PROGRAM_LOCAL_PARAMS
);
254 return state
->Current
->Base
.LocalParams
[source
->Index
];
255 case PROGRAM_ENV_PARAM
:
256 ASSERT(source
->Index
< MAX_NV_VERTEX_PROGRAM_PARAMS
);
257 return state
->Parameters
[source
->Index
];
258 case PROGRAM_STATE_VAR
:
259 ASSERT(source
->Index
< state
->Current
->Parameters
->NumParameters
);
260 return state
->Current
->Parameters
->Parameters
[source
->Index
].Values
;
263 "Bad source register file in get_register_pointer");
272 * Fetch a 4-element float vector from the given source register.
273 * Apply swizzling and negating as needed.
276 fetch_vector4( const struct vp_src_register
*source
,
277 const struct vertex_program_state
*state
,
280 const GLfloat
*src
= get_register_pointer(source
, state
);
282 if (source
->Negate
) {
283 result
[0] = -src
[source
->Swizzle
[0]];
284 result
[1] = -src
[source
->Swizzle
[1]];
285 result
[2] = -src
[source
->Swizzle
[2]];
286 result
[3] = -src
[source
->Swizzle
[3]];
289 result
[0] = src
[source
->Swizzle
[0]];
290 result
[1] = src
[source
->Swizzle
[1]];
291 result
[2] = src
[source
->Swizzle
[2]];
292 result
[3] = src
[source
->Swizzle
[3]];
299 * As above, but only return result[0] element.
302 fetch_vector1( const struct vp_src_register
*source
,
303 const struct vertex_program_state
*state
,
306 const GLfloat
*src
= get_register_pointer(source
, state
);
308 if (source
->Negate
) {
309 result
[0] = -src
[source
->Swizzle
[0]];
312 result
[0] = src
[source
->Swizzle
[0]];
318 * Store 4 floats into a register.
321 store_vector4( const struct vp_dst_register
*dest
,
322 struct vertex_program_state
*state
,
323 const GLfloat value
[4] )
326 switch (dest
->File
) {
327 case PROGRAM_TEMPORARY
:
328 dst
= state
->Temporaries
[dest
->Index
];
331 dst
= state
->Outputs
[dest
->Index
];
333 case PROGRAM_ENV_PARAM
:
336 GET_CURRENT_CONTEXT(ctx
);
337 dst
= ctx
->VertexProgram
.Parameters
[dest
->Index
];
341 _mesa_problem(NULL
, "Invalid register file in store_vector4(file=%d)",
346 if (dest
->WriteMask
[0])
348 if (dest
->WriteMask
[1])
350 if (dest
->WriteMask
[2])
352 if (dest
->WriteMask
[3])
358 * Set x to positive or negative infinity.
360 #if defined(USE_IEEE) || defined(_WIN32)
361 #define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
362 #define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
364 #define SET_POS_INFINITY(x) x = __MAXFLOAT
365 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
367 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
368 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
371 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
375 * Execute the given vertex program
378 _mesa_exec_vertex_program(GLcontext
*ctx
, const struct vertex_program
*program
)
380 struct vertex_program_state
*state
= &ctx
->VertexProgram
;
381 const struct vp_instruction
*inst
;
383 ctx
->_CurrentProgram
= GL_VERTEX_PROGRAM_ARB
; /* or NV, doesn't matter */
385 /* If the program is position invariant, multiply the input
386 * position and the MVP matrix and stick it into the output pos slot
388 if (ctx
->VertexProgram
.Current
->IsPositionInvariant
) {
389 TRANSFORM_POINT( ctx
->VertexProgram
.Outputs
[0],
390 ctx
->_ModelProjectMatrix
.m
,
391 ctx
->VertexProgram
.Inputs
[0]);
393 /* XXX: This could go elsewhere */
394 ctx
->VertexProgram
.Current
->OutputsWritten
|= 0x1;
396 for (inst
= program
->Instructions
; ; inst
++) {
398 if (ctx
->VertexProgram
.CallbackEnabled
&&
399 ctx
->VertexProgram
.Callback
) {
400 ctx
->VertexProgram
.CurrentPosition
= inst
->StringPos
;
401 ctx
->VertexProgram
.Callback(program
->Base
.Target
,
402 ctx
->VertexProgram
.CallbackData
);
405 switch (inst
->Opcode
) {
409 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
410 store_vector4( &inst
->DstReg
, state
, t
);
415 const GLfloat epsilon
= 1.0e-5F
; /* XXX fix? */
416 GLfloat t
[4], lit
[4];
417 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
418 if (t
[3] < -(128.0F
- epsilon
))
419 t
[3] = - (128.0F
- epsilon
);
420 else if (t
[3] > 128.0F
- epsilon
)
421 t
[3] = 128.0F
- epsilon
;
428 lit
[2] = (t
[0] > 0.0) ? (GLfloat
) exp(t
[3] * log(t
[1])) : 0.0F
;
430 store_vector4( &inst
->DstReg
, state
, lit
);
436 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
438 t
[0] = 1.0F
/ t
[0]; /* div by zero is infinity! */
439 t
[1] = t
[2] = t
[3] = t
[0];
440 store_vector4( &inst
->DstReg
, state
, t
);
446 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
447 t
[0] = INV_SQRTF(FABSF(t
[0]));
448 t
[1] = t
[2] = t
[3] = t
[0];
449 store_vector4( &inst
->DstReg
, state
, t
);
454 GLfloat t
[4], q
[4], floor_t0
;
455 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
456 floor_t0
= (float) floor(t
[0]);
457 if (floor_t0
> FLT_MAX_EXP
) {
458 SET_POS_INFINITY(q
[0]);
459 SET_POS_INFINITY(q
[2]);
461 else if (floor_t0
< FLT_MIN_EXP
) {
467 GLint ii
= (GLint
) floor_t0
;
468 ii
= (ii
< 23) + 0x3f800000;
469 SET_FLOAT_BITS(q
[0], ii
);
470 q
[0] = *((GLfloat
*) (void *)&ii
);
472 q
[0] = (GLfloat
) pow(2.0, floor_t0
);
474 q
[2] = (GLfloat
) (q
[0] * LOG2(q
[1]));
476 q
[1] = t
[0] - floor_t0
;
478 store_vector4( &inst
->DstReg
, state
, q
);
483 GLfloat t
[4], q
[4], abs_t0
;
484 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
485 abs_t0
= (GLfloat
) fabs(t
[0]);
486 if (abs_t0
!= 0.0F
) {
487 /* Since we really can't handle infinite values on VMS
488 * like other OSes we'll use __MAXFLOAT to represent
489 * infinity. This may need some tweaking.
492 if (abs_t0
== __MAXFLOAT
)
494 if (IS_INF_OR_NAN(abs_t0
))
497 SET_POS_INFINITY(q
[0]);
499 SET_POS_INFINITY(q
[2]);
503 double mantissa
= frexp(t
[0], &exponent
);
504 q
[0] = (GLfloat
) (exponent
- 1);
505 q
[1] = (GLfloat
) (2.0 * mantissa
); /* map [.5, 1) -> [1, 2) */
506 q
[2] = (GLfloat
) (q
[0] + LOG2(q
[1]));
510 SET_NEG_INFINITY(q
[0]);
512 SET_NEG_INFINITY(q
[2]);
515 store_vector4( &inst
->DstReg
, state
, q
);
520 GLfloat t
[4], u
[4], prod
[4];
521 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
522 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
523 prod
[0] = t
[0] * u
[0];
524 prod
[1] = t
[1] * u
[1];
525 prod
[2] = t
[2] * u
[2];
526 prod
[3] = t
[3] * u
[3];
527 store_vector4( &inst
->DstReg
, state
, prod
);
532 GLfloat t
[4], u
[4], sum
[4];
533 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
534 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
535 sum
[0] = t
[0] + u
[0];
536 sum
[1] = t
[1] + u
[1];
537 sum
[2] = t
[2] + u
[2];
538 sum
[3] = t
[3] + u
[3];
539 store_vector4( &inst
->DstReg
, state
, sum
);
544 GLfloat t
[4], u
[4], dot
[4];
545 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
546 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
547 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2];
548 dot
[1] = dot
[2] = dot
[3] = dot
[0];
549 store_vector4( &inst
->DstReg
, state
, dot
);
554 GLfloat t
[4], u
[4], dot
[4];
555 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
556 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
557 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + t
[3] * u
[3];
558 dot
[1] = dot
[2] = dot
[3] = dot
[0];
559 store_vector4( &inst
->DstReg
, state
, dot
);
564 GLfloat t
[4], u
[4], dst
[4];
565 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
566 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
568 dst
[1] = t
[1] * u
[1];
571 store_vector4( &inst
->DstReg
, state
, dst
);
576 GLfloat t
[4], u
[4], min
[4];
577 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
578 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
579 min
[0] = (t
[0] < u
[0]) ? t
[0] : u
[0];
580 min
[1] = (t
[1] < u
[1]) ? t
[1] : u
[1];
581 min
[2] = (t
[2] < u
[2]) ? t
[2] : u
[2];
582 min
[3] = (t
[3] < u
[3]) ? t
[3] : u
[3];
583 store_vector4( &inst
->DstReg
, state
, min
);
588 GLfloat t
[4], u
[4], max
[4];
589 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
590 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
591 max
[0] = (t
[0] > u
[0]) ? t
[0] : u
[0];
592 max
[1] = (t
[1] > u
[1]) ? t
[1] : u
[1];
593 max
[2] = (t
[2] > u
[2]) ? t
[2] : u
[2];
594 max
[3] = (t
[3] > u
[3]) ? t
[3] : u
[3];
595 store_vector4( &inst
->DstReg
, state
, max
);
600 GLfloat t
[4], u
[4], slt
[4];
601 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
602 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
603 slt
[0] = (t
[0] < u
[0]) ? 1.0F
: 0.0F
;
604 slt
[1] = (t
[1] < u
[1]) ? 1.0F
: 0.0F
;
605 slt
[2] = (t
[2] < u
[2]) ? 1.0F
: 0.0F
;
606 slt
[3] = (t
[3] < u
[3]) ? 1.0F
: 0.0F
;
607 store_vector4( &inst
->DstReg
, state
, slt
);
612 GLfloat t
[4], u
[4], sge
[4];
613 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
614 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
615 sge
[0] = (t
[0] >= u
[0]) ? 1.0F
: 0.0F
;
616 sge
[1] = (t
[1] >= u
[1]) ? 1.0F
: 0.0F
;
617 sge
[2] = (t
[2] >= u
[2]) ? 1.0F
: 0.0F
;
618 sge
[3] = (t
[3] >= u
[3]) ? 1.0F
: 0.0F
;
619 store_vector4( &inst
->DstReg
, state
, sge
);
624 GLfloat t
[4], u
[4], v
[4], sum
[4];
625 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
626 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
627 fetch_vector4( &inst
->SrcReg
[2], state
, v
);
628 sum
[0] = t
[0] * u
[0] + v
[0];
629 sum
[1] = t
[1] * u
[1] + v
[1];
630 sum
[2] = t
[2] * u
[2] + v
[2];
631 sum
[3] = t
[3] * u
[3] + v
[3];
632 store_vector4( &inst
->DstReg
, state
, sum
);
638 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
639 state
->AddressReg
[0] = (GLint
) floor(t
[0]);
644 GLfloat t
[4], u
[4], dot
[4];
645 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
646 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
647 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + u
[3];
648 dot
[1] = dot
[2] = dot
[3] = dot
[0];
649 store_vector4( &inst
->DstReg
, state
, dot
);
655 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
661 if (u
> 1.884467e+019F
) {
662 u
= 1.884467e+019F
; /* IEEE 32-bit binary value 0x5F800000 */
664 else if (u
< 5.42101e-020F
) {
665 u
= 5.42101e-020F
; /* IEEE 32-bit binary value 0x1F800000 */
669 if (u
< -1.884467e+019F
) {
670 u
= -1.884467e+019F
; /* IEEE 32-bit binary value 0xDF800000 */
672 else if (u
> -5.42101e-020F
) {
673 u
= -5.42101e-020F
; /* IEEE 32-bit binary value 0x9F800000 */
676 t
[0] = t
[1] = t
[2] = t
[3] = u
;
677 store_vector4( &inst
->DstReg
, state
, t
);
680 case VP_OPCODE_SUB
: /* GL_NV_vertex_program1_1 */
682 GLfloat t
[4], u
[4], sum
[4];
683 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
684 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
685 sum
[0] = t
[0] - u
[0];
686 sum
[1] = t
[1] - u
[1];
687 sum
[2] = t
[2] - u
[2];
688 sum
[3] = t
[3] - u
[3];
689 store_vector4( &inst
->DstReg
, state
, sum
);
692 case VP_OPCODE_ABS
: /* GL_NV_vertex_program1_1 */
695 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
696 if (t
[0] < 0.0) t
[0] = -t
[0];
697 if (t
[1] < 0.0) t
[1] = -t
[1];
698 if (t
[2] < 0.0) t
[2] = -t
[2];
699 if (t
[3] < 0.0) t
[3] = -t
[3];
700 store_vector4( &inst
->DstReg
, state
, t
);
703 case VP_OPCODE_FLR
: /* GL_ARB_vertex_program */
706 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
711 store_vector4( &inst
->DstReg
, state
, t
);
714 case VP_OPCODE_FRC
: /* GL_ARB_vertex_program */
717 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
718 t
[0] = t
[0] - FLOORF(t
[0]);
719 t
[1] = t
[1] - FLOORF(t
[1]);
720 t
[2] = t
[2] - FLOORF(t
[2]);
721 t
[3] = t
[3] - FLOORF(t
[3]);
722 store_vector4( &inst
->DstReg
, state
, t
);
725 case VP_OPCODE_EX2
: /* GL_ARB_vertex_program */
728 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
729 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(2.0, t
[0]);
730 store_vector4( &inst
->DstReg
, state
, t
);
733 case VP_OPCODE_LG2
: /* GL_ARB_vertex_program */
736 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
737 t
[0] = t
[1] = t
[2] = t
[3] = LOG2(t
[0]);
738 store_vector4( &inst
->DstReg
, state
, t
);
741 case VP_OPCODE_POW
: /* GL_ARB_vertex_program */
744 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
745 fetch_vector1( &inst
->SrcReg
[1], state
, u
);
746 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(t
[0], u
[0]);
747 store_vector4( &inst
->DstReg
, state
, t
);
750 case VP_OPCODE_XPD
: /* GL_ARB_vertex_program */
752 GLfloat t
[4], u
[4], cross
[4];
753 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
754 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
755 cross
[0] = t
[1] * u
[2] - t
[2] * u
[1];
756 cross
[1] = t
[2] * u
[0] - t
[0] * u
[2];
757 cross
[2] = t
[0] * u
[1] - t
[1] * u
[0];
758 store_vector4( &inst
->DstReg
, state
, cross
);
761 case VP_OPCODE_SWZ
: /* GL_ARB_vertex_program */
763 const struct vp_src_register
*source
= &inst
->SrcReg
[0];
764 const GLfloat
*src
= get_register_pointer(source
, state
);
768 /* do extended swizzling here */
769 for (i
= 0; i
< 3; i
++) {
770 if (source
->Swizzle
[i
] == SWIZZLE_ZERO
)
772 else if (source
->Swizzle
[i
] == SWIZZLE_ONE
)
775 result
[i
] = -src
[source
->Swizzle
[i
]];
777 result
[i
] = -result
[i
];
779 store_vector4( &inst
->DstReg
, state
, result
);
784 ctx
->_CurrentProgram
= 0;
787 /* bad instruction opcode */
788 _mesa_problem(ctx
, "Bad VP Opcode in _mesa_exec_vertex_program");
789 ctx
->_CurrentProgram
= 0;
794 ctx
->_CurrentProgram
= 0;
800 Thoughts on vertex program optimization:
802 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
803 assembly code. That will probably be a lot of work.
805 Another approach might be to replace the vp_instruction->Opcode field with
806 a pointer to a specialized C function which executes the instruction.
807 In particular we can write functions which skip swizzling, negating,
808 masking, relative addressing, etc. when they're not needed.
812 void simple_add( struct vp_instruction *inst )
814 GLfloat *sum = machine->Registers[inst->DstReg.Register];
815 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
816 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
817 sum[0] = a[0] + b[0];
818 sum[1] = a[1] + b[1];
819 sum[2] = a[2] + b[2];
820 sum[3] = a[3] + b[3];
829 A first step would be to 'vectorize' the programs in the same way as
830 the normal transformation code in the tnl module. Thus each opcode
831 takes zero or more input vectors (registers) and produces one or more
834 These operations would intially be coded in C, with machine-specific
835 assembly following, as is currently the case for matrix
836 transformations in the math/ directory. The preprocessing scheme for
837 selecting simpler operations Brian describes above would also work
840 This should give reasonable performance without excessive effort.