2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Code to execute vertex programs.
36 #include "nvvertexec.h"
37 #include "nvvertprog.h"
39 #include "math/m_matrix.h"
42 static const GLfloat ZeroVec
[4] = { 0.0F
, 0.0F
, 0.0F
, 0.0F
};
46 * Load/initialize the vertex program registers which need to be set
50 _mesa_init_vp_per_vertex_registers(GLcontext
*ctx
)
52 /* Input registers get initialized from the current vertex attribs */
53 MEMCPY(ctx
->VertexProgram
.Inputs
, ctx
->Current
.Attrib
,
54 VERT_ATTRIB_MAX
* 4 * sizeof(GLfloat
));
56 if (ctx
->VertexProgram
.Current
->IsNVProgram
) {
58 /* Output/result regs are initialized to [0,0,0,1] */
59 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
60 ASSIGN_4V(ctx
->VertexProgram
.Outputs
[i
], 0.0F
, 0.0F
, 0.0F
, 1.0F
);
62 /* Temp regs are initialized to [0,0,0,0] */
63 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
64 ASSIGN_4V(ctx
->VertexProgram
.Temporaries
[i
], 0.0F
, 0.0F
, 0.0F
, 0.0F
);
66 ASSIGN_4V(ctx
->VertexProgram
.AddressReg
, 0, 0, 0, 0);
73 * Copy the 16 elements of a matrix into four consecutive program
74 * registers starting at 'pos'.
77 load_matrix(GLfloat registers
[][4], GLuint pos
, const GLfloat mat
[16])
80 for (i
= 0; i
< 4; i
++) {
81 registers
[pos
+ i
][0] = mat
[0 + i
];
82 registers
[pos
+ i
][1] = mat
[4 + i
];
83 registers
[pos
+ i
][2] = mat
[8 + i
];
84 registers
[pos
+ i
][3] = mat
[12 + i
];
90 * As above, but transpose the matrix.
93 load_transpose_matrix(GLfloat registers
[][4], GLuint pos
,
94 const GLfloat mat
[16])
96 MEMCPY(registers
[pos
], mat
, 16 * sizeof(GLfloat
));
101 * Load program parameter registers with tracked matrices (if NV program)
102 * or GL state values (if ARB program).
103 * This needs to be done per glBegin/glEnd, not per-vertex.
106 _mesa_init_vp_per_primitive_registers(GLcontext
*ctx
)
108 if (ctx
->VertexProgram
.Current
->IsNVProgram
) {
111 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
/ 4; i
++) {
112 /* point 'mat' at source matrix */
114 if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_MODELVIEW
) {
115 mat
= ctx
->ModelviewMatrixStack
.Top
;
117 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_PROJECTION
) {
118 mat
= ctx
->ProjectionMatrixStack
.Top
;
120 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_TEXTURE
) {
121 mat
= ctx
->TextureMatrixStack
[ctx
->Texture
.CurrentUnit
].Top
;
123 else if (ctx
->VertexProgram
.TrackMatrix
[i
] == GL_COLOR
) {
124 mat
= ctx
->ColorMatrixStack
.Top
;
126 else if (ctx
->VertexProgram
.TrackMatrix
[i
]==GL_MODELVIEW_PROJECTION_NV
) {
127 /* XXX verify the combined matrix is up to date */
128 mat
= &ctx
->_ModelProjectMatrix
;
130 else if (ctx
->VertexProgram
.TrackMatrix
[i
] >= GL_MATRIX0_NV
&&
131 ctx
->VertexProgram
.TrackMatrix
[i
] <= GL_MATRIX7_NV
) {
132 GLuint n
= ctx
->VertexProgram
.TrackMatrix
[i
] - GL_MATRIX0_NV
;
133 ASSERT(n
< MAX_PROGRAM_MATRICES
);
134 mat
= ctx
->ProgramMatrixStack
[n
].Top
;
137 /* no matrix is tracked, but we leave the register values as-is */
138 assert(ctx
->VertexProgram
.TrackMatrix
[i
] == GL_NONE
);
142 /* load the matrix */
143 if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_IDENTITY_NV
) {
144 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
146 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_INVERSE_NV
) {
147 _math_matrix_analyse(mat
); /* update the inverse */
148 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
149 load_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
151 else if (ctx
->VertexProgram
.TrackMatrixTransform
[i
] == GL_TRANSPOSE_NV
) {
152 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->m
);
155 assert(ctx
->VertexProgram
.TrackMatrixTransform
[i
]
156 == GL_INVERSE_TRANSPOSE_NV
);
157 _math_matrix_analyse(mat
); /* update the inverse */
158 assert((mat
->flags
& MAT_DIRTY_INVERSE
) == 0);
159 load_transpose_matrix(ctx
->VertexProgram
.Parameters
, i
*4, mat
->inv
);
164 /* Using and ARB vertex program */
165 if (ctx
->VertexProgram
.Current
->Parameters
) {
166 /* Grab the state GL state and put into registers */
167 _mesa_load_state_parameters(ctx
,
168 ctx
->VertexProgram
.Current
->Parameters
);
176 * For debugging. Dump the current vertex program machine registers.
179 _mesa_dump_vp_state( const struct gl_vertex_program_state
*state
)
182 _mesa_printf("VertexIn:\n");
183 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_INPUTS
; i
++) {
184 _mesa_printf("%d: %f %f %f %f ", i
,
188 state
->Inputs
[i
][3]);
192 _mesa_printf("VertexOut:\n");
193 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
; i
++) {
194 _mesa_printf("%d: %f %f %f %f ", i
,
195 state
->Outputs
[i
][0],
196 state
->Outputs
[i
][1],
197 state
->Outputs
[i
][2],
198 state
->Outputs
[i
][3]);
202 _mesa_printf("Registers:\n");
203 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_TEMPS
; i
++) {
204 _mesa_printf("%d: %f %f %f %f ", i
,
205 state
->Temporaries
[i
][0],
206 state
->Temporaries
[i
][1],
207 state
->Temporaries
[i
][2],
208 state
->Temporaries
[i
][3]);
212 _mesa_printf("Parameters:\n");
213 for (i
= 0; i
< MAX_NV_VERTEX_PROGRAM_PARAMS
; i
++) {
214 _mesa_printf("%d: %f %f %f %f ", i
,
215 state
->Parameters
[i
][0],
216 state
->Parameters
[i
][1],
217 state
->Parameters
[i
][2],
218 state
->Parameters
[i
][3]);
226 * Return a pointer to the 4-element float vector specified by the given
229 static INLINE
const GLfloat
*
230 get_register_pointer( const struct vp_src_register
*source
,
231 const struct gl_vertex_program_state
*state
)
233 if (source
->RelAddr
) {
234 const GLint reg
= source
->Index
+ state
->AddressReg
[0];
235 ASSERT( (source
->File
== PROGRAM_ENV_PARAM
) ||
236 (source
->File
== PROGRAM_STATE_VAR
) );
237 if (reg
< 0 || reg
> MAX_NV_VERTEX_PROGRAM_PARAMS
)
239 else if (source
->File
== PROGRAM_ENV_PARAM
)
240 return state
->Parameters
[reg
];
242 return state
->Current
->Parameters
->ParameterValues
[reg
];
245 switch (source
->File
) {
246 case PROGRAM_TEMPORARY
:
247 ASSERT(source
->Index
< MAX_NV_VERTEX_PROGRAM_TEMPS
);
248 return state
->Temporaries
[source
->Index
];
250 ASSERT(source
->Index
< MAX_NV_VERTEX_PROGRAM_INPUTS
);
251 return state
->Inputs
[source
->Index
];
253 /* This is only needed for the PRINT instruction */
254 ASSERT(source
->Index
< MAX_NV_VERTEX_PROGRAM_OUTPUTS
);
255 return state
->Outputs
[source
->Index
];
256 case PROGRAM_LOCAL_PARAM
:
257 ASSERT(source
->Index
< MAX_PROGRAM_LOCAL_PARAMS
);
258 return state
->Current
->Base
.LocalParams
[source
->Index
];
259 case PROGRAM_ENV_PARAM
:
260 ASSERT(source
->Index
< MAX_NV_VERTEX_PROGRAM_PARAMS
);
261 return state
->Parameters
[source
->Index
];
262 case PROGRAM_STATE_VAR
:
263 ASSERT(source
->Index
< state
->Current
->Parameters
->NumParameters
);
264 return state
->Current
->Parameters
->ParameterValues
[source
->Index
];
267 "Bad source register file in get_register_pointer");
276 * Fetch a 4-element float vector from the given source register.
277 * Apply swizzling and negating as needed.
280 fetch_vector4( const struct vp_src_register
*source
,
281 const struct gl_vertex_program_state
*state
,
284 const GLfloat
*src
= get_register_pointer(source
, state
);
286 if (source
->Negate
) {
287 result
[0] = -src
[GET_SWZ(source
->Swizzle
, 0)];
288 result
[1] = -src
[GET_SWZ(source
->Swizzle
, 1)];
289 result
[2] = -src
[GET_SWZ(source
->Swizzle
, 2)];
290 result
[3] = -src
[GET_SWZ(source
->Swizzle
, 3)];
293 result
[0] = src
[GET_SWZ(source
->Swizzle
, 0)];
294 result
[1] = src
[GET_SWZ(source
->Swizzle
, 1)];
295 result
[2] = src
[GET_SWZ(source
->Swizzle
, 2)];
296 result
[3] = src
[GET_SWZ(source
->Swizzle
, 3)];
303 * As above, but only return result[0] element.
306 fetch_vector1( const struct vp_src_register
*source
,
307 const struct gl_vertex_program_state
*state
,
310 const GLfloat
*src
= get_register_pointer(source
, state
);
312 if (source
->Negate
) {
313 result
[0] = -src
[GET_SWZ(source
->Swizzle
, 0)];
316 result
[0] = src
[GET_SWZ(source
->Swizzle
, 0)];
322 * Store 4 floats into a register.
325 store_vector4( const struct vp_dst_register
*dest
,
326 struct gl_vertex_program_state
*state
,
327 const GLfloat value
[4] )
330 switch (dest
->File
) {
331 case PROGRAM_TEMPORARY
:
332 dst
= state
->Temporaries
[dest
->Index
];
335 dst
= state
->Outputs
[dest
->Index
];
337 case PROGRAM_ENV_PARAM
:
340 GET_CURRENT_CONTEXT(ctx
);
341 dst
= ctx
->VertexProgram
.Parameters
[dest
->Index
];
345 _mesa_problem(NULL
, "Invalid register file in store_vector4(file=%d)",
350 if (dest
->WriteMask
& WRITEMASK_X
)
352 if (dest
->WriteMask
& WRITEMASK_Y
)
354 if (dest
->WriteMask
& WRITEMASK_Z
)
356 if (dest
->WriteMask
& WRITEMASK_W
)
362 * Set x to positive or negative infinity.
364 #if defined(USE_IEEE) || defined(_WIN32)
365 #define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
366 #define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
368 #define SET_POS_INFINITY(x) x = __MAXFLOAT
369 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
371 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
372 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
375 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
379 * Execute the given vertex program
382 _mesa_exec_vertex_program(GLcontext
*ctx
, const struct vertex_program
*program
)
384 struct gl_vertex_program_state
*state
= &ctx
->VertexProgram
;
385 const struct vp_instruction
*inst
;
387 ctx
->_CurrentProgram
= GL_VERTEX_PROGRAM_ARB
; /* or NV, doesn't matter */
389 /* If the program is position invariant, multiply the input
390 * position and the MVP matrix and stick it into the output pos slot
392 if (ctx
->VertexProgram
.Current
->IsPositionInvariant
) {
393 TRANSFORM_POINT( ctx
->VertexProgram
.Outputs
[0],
394 ctx
->_ModelProjectMatrix
.m
,
395 ctx
->VertexProgram
.Inputs
[0]);
397 /* XXX: This could go elsewhere */
398 ctx
->VertexProgram
.Current
->OutputsWritten
|= 0x1;
400 for (inst
= program
->Instructions
; ; inst
++) {
402 if (ctx
->VertexProgram
.CallbackEnabled
&&
403 ctx
->VertexProgram
.Callback
) {
404 ctx
->VertexProgram
.CurrentPosition
= inst
->StringPos
;
405 ctx
->VertexProgram
.Callback(program
->Base
.Target
,
406 ctx
->VertexProgram
.CallbackData
);
409 switch (inst
->Opcode
) {
413 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
414 store_vector4( &inst
->DstReg
, state
, t
);
419 const GLfloat epsilon
= 1.0F
/ 256.0F
; /* per NV spec */
420 GLfloat t
[4], lit
[4];
421 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
422 t
[0] = MAX2(t
[0], 0.0F
);
423 t
[1] = MAX2(t
[1], 0.0F
);
424 t
[3] = CLAMP(t
[3], -(128.0F
- epsilon
), (128.0F
- epsilon
));
427 lit
[2] = (t
[0] > 0.0) ? (GLfloat
) _mesa_pow(t
[1], t
[3]) : 0.0F
;
429 store_vector4( &inst
->DstReg
, state
, lit
);
435 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
437 t
[0] = 1.0F
/ t
[0]; /* div by zero is infinity! */
438 t
[1] = t
[2] = t
[3] = t
[0];
439 store_vector4( &inst
->DstReg
, state
, t
);
445 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
446 t
[0] = INV_SQRTF(FABSF(t
[0]));
447 t
[1] = t
[2] = t
[3] = t
[0];
448 store_vector4( &inst
->DstReg
, state
, t
);
453 GLfloat t
[4], q
[4], floor_t0
;
454 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
455 floor_t0
= (float) floor(t
[0]);
456 if (floor_t0
> FLT_MAX_EXP
) {
457 SET_POS_INFINITY(q
[0]);
458 SET_POS_INFINITY(q
[2]);
460 else if (floor_t0
< FLT_MIN_EXP
) {
466 GLint ii
= (GLint
) floor_t0
;
467 ii
= (ii
< 23) + 0x3f800000;
468 SET_FLOAT_BITS(q
[0], ii
);
469 q
[0] = *((GLfloat
*) (void *)&ii
);
471 q
[0] = (GLfloat
) pow(2.0, floor_t0
);
473 q
[2] = (GLfloat
) (q
[0] * LOG2(q
[1]));
475 q
[1] = t
[0] - floor_t0
;
477 store_vector4( &inst
->DstReg
, state
, q
);
482 GLfloat t
[4], q
[4], abs_t0
;
483 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
484 abs_t0
= (GLfloat
) fabs(t
[0]);
485 if (abs_t0
!= 0.0F
) {
486 /* Since we really can't handle infinite values on VMS
487 * like other OSes we'll use __MAXFLOAT to represent
488 * infinity. This may need some tweaking.
491 if (abs_t0
== __MAXFLOAT
)
493 if (IS_INF_OR_NAN(abs_t0
))
496 SET_POS_INFINITY(q
[0]);
498 SET_POS_INFINITY(q
[2]);
502 double mantissa
= frexp(t
[0], &exponent
);
503 q
[0] = (GLfloat
) (exponent
- 1);
504 q
[1] = (GLfloat
) (2.0 * mantissa
); /* map [.5, 1) -> [1, 2) */
505 q
[2] = (GLfloat
) (q
[0] + LOG2(q
[1]));
509 SET_NEG_INFINITY(q
[0]);
511 SET_NEG_INFINITY(q
[2]);
514 store_vector4( &inst
->DstReg
, state
, q
);
519 GLfloat t
[4], u
[4], prod
[4];
520 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
521 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
522 prod
[0] = t
[0] * u
[0];
523 prod
[1] = t
[1] * u
[1];
524 prod
[2] = t
[2] * u
[2];
525 prod
[3] = t
[3] * u
[3];
526 store_vector4( &inst
->DstReg
, state
, prod
);
531 GLfloat t
[4], u
[4], sum
[4];
532 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
533 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
534 sum
[0] = t
[0] + u
[0];
535 sum
[1] = t
[1] + u
[1];
536 sum
[2] = t
[2] + u
[2];
537 sum
[3] = t
[3] + u
[3];
538 store_vector4( &inst
->DstReg
, state
, sum
);
543 GLfloat t
[4], u
[4], dot
[4];
544 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
545 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
546 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2];
547 dot
[1] = dot
[2] = dot
[3] = dot
[0];
548 store_vector4( &inst
->DstReg
, state
, dot
);
553 GLfloat t
[4], u
[4], dot
[4];
554 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
555 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
556 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + t
[3] * u
[3];
557 dot
[1] = dot
[2] = dot
[3] = dot
[0];
558 store_vector4( &inst
->DstReg
, state
, dot
);
563 GLfloat t
[4], u
[4], dst
[4];
564 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
565 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
567 dst
[1] = t
[1] * u
[1];
570 store_vector4( &inst
->DstReg
, state
, dst
);
575 GLfloat t
[4], u
[4], min
[4];
576 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
577 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
578 min
[0] = (t
[0] < u
[0]) ? t
[0] : u
[0];
579 min
[1] = (t
[1] < u
[1]) ? t
[1] : u
[1];
580 min
[2] = (t
[2] < u
[2]) ? t
[2] : u
[2];
581 min
[3] = (t
[3] < u
[3]) ? t
[3] : u
[3];
582 store_vector4( &inst
->DstReg
, state
, min
);
587 GLfloat t
[4], u
[4], max
[4];
588 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
589 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
590 max
[0] = (t
[0] > u
[0]) ? t
[0] : u
[0];
591 max
[1] = (t
[1] > u
[1]) ? t
[1] : u
[1];
592 max
[2] = (t
[2] > u
[2]) ? t
[2] : u
[2];
593 max
[3] = (t
[3] > u
[3]) ? t
[3] : u
[3];
594 store_vector4( &inst
->DstReg
, state
, max
);
599 GLfloat t
[4], u
[4], slt
[4];
600 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
601 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
602 slt
[0] = (t
[0] < u
[0]) ? 1.0F
: 0.0F
;
603 slt
[1] = (t
[1] < u
[1]) ? 1.0F
: 0.0F
;
604 slt
[2] = (t
[2] < u
[2]) ? 1.0F
: 0.0F
;
605 slt
[3] = (t
[3] < u
[3]) ? 1.0F
: 0.0F
;
606 store_vector4( &inst
->DstReg
, state
, slt
);
611 GLfloat t
[4], u
[4], sge
[4];
612 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
613 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
614 sge
[0] = (t
[0] >= u
[0]) ? 1.0F
: 0.0F
;
615 sge
[1] = (t
[1] >= u
[1]) ? 1.0F
: 0.0F
;
616 sge
[2] = (t
[2] >= u
[2]) ? 1.0F
: 0.0F
;
617 sge
[3] = (t
[3] >= u
[3]) ? 1.0F
: 0.0F
;
618 store_vector4( &inst
->DstReg
, state
, sge
);
623 GLfloat t
[4], u
[4], v
[4], sum
[4];
624 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
625 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
626 fetch_vector4( &inst
->SrcReg
[2], state
, v
);
627 sum
[0] = t
[0] * u
[0] + v
[0];
628 sum
[1] = t
[1] * u
[1] + v
[1];
629 sum
[2] = t
[2] * u
[2] + v
[2];
630 sum
[3] = t
[3] * u
[3] + v
[3];
631 store_vector4( &inst
->DstReg
, state
, sum
);
637 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
638 state
->AddressReg
[0] = (GLint
) floor(t
[0]);
643 GLfloat t
[4], u
[4], dot
[4];
644 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
645 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
646 dot
[0] = t
[0] * u
[0] + t
[1] * u
[1] + t
[2] * u
[2] + u
[3];
647 dot
[1] = dot
[2] = dot
[3] = dot
[0];
648 store_vector4( &inst
->DstReg
, state
, dot
);
654 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
660 if (u
> 1.884467e+019F
) {
661 u
= 1.884467e+019F
; /* IEEE 32-bit binary value 0x5F800000 */
663 else if (u
< 5.42101e-020F
) {
664 u
= 5.42101e-020F
; /* IEEE 32-bit binary value 0x1F800000 */
668 if (u
< -1.884467e+019F
) {
669 u
= -1.884467e+019F
; /* IEEE 32-bit binary value 0xDF800000 */
671 else if (u
> -5.42101e-020F
) {
672 u
= -5.42101e-020F
; /* IEEE 32-bit binary value 0x9F800000 */
675 t
[0] = t
[1] = t
[2] = t
[3] = u
;
676 store_vector4( &inst
->DstReg
, state
, t
);
679 case VP_OPCODE_SUB
: /* GL_NV_vertex_program1_1 */
681 GLfloat t
[4], u
[4], sum
[4];
682 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
683 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
684 sum
[0] = t
[0] - u
[0];
685 sum
[1] = t
[1] - u
[1];
686 sum
[2] = t
[2] - u
[2];
687 sum
[3] = t
[3] - u
[3];
688 store_vector4( &inst
->DstReg
, state
, sum
);
691 case VP_OPCODE_ABS
: /* GL_NV_vertex_program1_1 */
694 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
695 if (t
[0] < 0.0) t
[0] = -t
[0];
696 if (t
[1] < 0.0) t
[1] = -t
[1];
697 if (t
[2] < 0.0) t
[2] = -t
[2];
698 if (t
[3] < 0.0) t
[3] = -t
[3];
699 store_vector4( &inst
->DstReg
, state
, t
);
702 case VP_OPCODE_FLR
: /* GL_ARB_vertex_program */
705 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
710 store_vector4( &inst
->DstReg
, state
, t
);
713 case VP_OPCODE_FRC
: /* GL_ARB_vertex_program */
716 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
717 t
[0] = t
[0] - FLOORF(t
[0]);
718 t
[1] = t
[1] - FLOORF(t
[1]);
719 t
[2] = t
[2] - FLOORF(t
[2]);
720 t
[3] = t
[3] - FLOORF(t
[3]);
721 store_vector4( &inst
->DstReg
, state
, t
);
724 case VP_OPCODE_EX2
: /* GL_ARB_vertex_program */
727 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
728 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(2.0, t
[0]);
729 store_vector4( &inst
->DstReg
, state
, t
);
732 case VP_OPCODE_LG2
: /* GL_ARB_vertex_program */
735 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
736 t
[0] = t
[1] = t
[2] = t
[3] = LOG2(t
[0]);
737 store_vector4( &inst
->DstReg
, state
, t
);
740 case VP_OPCODE_POW
: /* GL_ARB_vertex_program */
743 fetch_vector1( &inst
->SrcReg
[0], state
, t
);
744 fetch_vector1( &inst
->SrcReg
[1], state
, u
);
745 t
[0] = t
[1] = t
[2] = t
[3] = (GLfloat
)_mesa_pow(t
[0], u
[0]);
746 store_vector4( &inst
->DstReg
, state
, t
);
749 case VP_OPCODE_XPD
: /* GL_ARB_vertex_program */
751 GLfloat t
[4], u
[4], cross
[4];
752 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
753 fetch_vector4( &inst
->SrcReg
[1], state
, u
);
754 cross
[0] = t
[1] * u
[2] - t
[2] * u
[1];
755 cross
[1] = t
[2] * u
[0] - t
[0] * u
[2];
756 cross
[2] = t
[0] * u
[1] - t
[1] * u
[0];
757 store_vector4( &inst
->DstReg
, state
, cross
);
760 case VP_OPCODE_SWZ
: /* GL_ARB_vertex_program */
762 const struct vp_src_register
*source
= &inst
->SrcReg
[0];
763 const GLfloat
*src
= get_register_pointer(source
, state
);
767 /* do extended swizzling here */
768 for (i
= 0; i
< 3; i
++) {
769 if (GET_SWZ(source
->Swizzle
, i
) == SWIZZLE_ZERO
)
771 else if (GET_SWZ(source
->Swizzle
, i
) == SWIZZLE_ONE
)
774 result
[i
] = -src
[GET_SWZ(source
->Swizzle
, i
)];
776 result
[i
] = -result
[i
];
778 store_vector4( &inst
->DstReg
, state
, result
);
781 case VP_OPCODE_PRINT
:
782 if (inst
->SrcReg
[0].File
) {
784 fetch_vector4( &inst
->SrcReg
[0], state
, t
);
785 _mesa_printf("%s%g, %g, %g, %g\n",
786 (char *) inst
->Data
, t
[0], t
[1], t
[2], t
[3]);
789 _mesa_printf("%s\n", (char *) inst
->Data
);
793 ctx
->_CurrentProgram
= 0;
796 /* bad instruction opcode */
797 _mesa_problem(ctx
, "Bad VP Opcode in _mesa_exec_vertex_program");
798 ctx
->_CurrentProgram
= 0;
803 ctx
->_CurrentProgram
= 0;
809 Thoughts on vertex program optimization:
811 The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
812 assembly code. That will probably be a lot of work.
814 Another approach might be to replace the vp_instruction->Opcode field with
815 a pointer to a specialized C function which executes the instruction.
816 In particular we can write functions which skip swizzling, negating,
817 masking, relative addressing, etc. when they're not needed.
821 void simple_add( struct vp_instruction *inst )
823 GLfloat *sum = machine->Registers[inst->DstReg.Register];
824 GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
825 GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
826 sum[0] = a[0] + b[0];
827 sum[1] = a[1] + b[1];
828 sum[2] = a[2] + b[2];
829 sum[3] = a[3] + b[3];
838 A first step would be to 'vectorize' the programs in the same way as
839 the normal transformation code in the tnl module. Thus each opcode
840 takes zero or more input vectors (registers) and produces one or more
843 These operations would intially be coded in C, with machine-specific
844 assembly following, as is currently the case for matrix
845 transformations in the math/ directory. The preprocessing scheme for
846 selecting simpler operations Brian describes above would also work
849 This should give reasonable performance without excessive effort.