From 77865f81c9ce422b6f23bb105c632c2c6fb9bd67 Mon Sep 17 00:00:00 2001 From: Daniel Borca Date: Fri, 2 Apr 2004 06:42:04 +0000 Subject: [PATCH] added codegen'ed choosers --- src/mesa/tnl/t_save_api.c | 10 +++ src/mesa/tnl/t_vtx_api.c | 4 +- src/mesa/tnl/t_vtx_api.h | 4 ++ src/mesa/tnl/t_vtx_generic.c | 14 +++- src/mesa/tnl/t_vtx_x86.c | 90 +++++++++++++++++++++---- src/mesa/tnl/t_vtx_x86_gcc.S | 124 ++++++++++++++++++----------------- 6 files changed, 171 insertions(+), 75 deletions(-) diff --git a/src/mesa/tnl/t_save_api.c b/src/mesa/tnl/t_save_api.c index db338bbef6c..18fa46951d0 100644 --- a/src/mesa/tnl/t_save_api.c +++ b/src/mesa/tnl/t_save_api.c @@ -712,6 +712,15 @@ do { \ #define DISPATCH_ATTR1F( ATTR, S ) DISPATCH_ATTRFV( ATTR, 1, &(S) ) +#ifdef USE_X86_ASM +/* Naughty cheat: + */ +#define DISPATCH_ATTR2F( ATTR, S,T ) DISPATCH_ATTRFV( ATTR, 2, &(S) ) +#define DISPATCH_ATTR3F( ATTR, S,T,R ) DISPATCH_ATTRFV( ATTR, 3, &(S) ) +#define DISPATCH_ATTR4F( ATTR, S,T,R,Q ) DISPATCH_ATTRFV( ATTR, 4, &(S) ) +#else +/* Safe: + */ #define DISPATCH_ATTR2F( ATTR, S,T ) \ do { \ GLfloat v[2]; \ @@ -730,6 +739,7 @@ do { \ v[0] = S; v[1] = T; v[2] = R; v[3] = Q; \ DISPATCH_ATTR4FV( ATTR, v ); \ } while (0) +#endif static void enum_error( void ) diff --git a/src/mesa/tnl/t_vtx_api.c b/src/mesa/tnl/t_vtx_api.c index 807d99952f0..f47114cf623 100644 --- a/src/mesa/tnl/t_vtx_api.c +++ b/src/mesa/tnl/t_vtx_api.c @@ -883,6 +883,8 @@ void _tnl_vtx_init( GLcontext *ctx ) choose[ERROR_ATTRIB][2] = error_attrib; choose[ERROR_ATTRIB][3] = error_attrib; + _tnl_x86choosers(choose, do_choose); /* x86 INIT_CHOOSERS */ + _tnl_generic_attr_table_init( generic_attr_func ); } @@ -901,7 +903,7 @@ void _tnl_vtx_init( GLcontext *ctx ) _tnl_current_init( ctx ); _tnl_exec_vtxfmt_init( ctx ); _tnl_generic_exec_vtxfmt_init( ctx ); - _tnl_x86_exec_vtxfmt_init( ctx ); /* [dBorca] x86 DISPATCH_ATTRFV */ + _tnl_x86_exec_vtxfmt_init( ctx ); /* x86 DISPATCH_ATTRFV */ _mesa_install_exec_vtxfmt( ctx, &tnl->exec_vtxfmt ); diff --git a/src/mesa/tnl/t_vtx_api.h b/src/mesa/tnl/t_vtx_api.h index 46700fcd0ae..f58461332ec 100644 --- a/src/mesa/tnl/t_vtx_api.h +++ b/src/mesa/tnl/t_vtx_api.h @@ -80,6 +80,10 @@ extern void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen ); extern void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx ); +extern void _tnl_x86choosers( attrfv_func (*choose)[4], + attrfv_func (*do_choose)( GLuint attr, + GLuint sz )); + diff --git a/src/mesa/tnl/t_vtx_generic.c b/src/mesa/tnl/t_vtx_generic.c index 00dd2e89075..ea03dff05bd 100644 --- a/src/mesa/tnl/t_vtx_generic.c +++ b/src/mesa/tnl/t_vtx_generic.c @@ -151,6 +151,15 @@ do { \ #define DISPATCH_ATTR1F( ATTR, S ) DISPATCH_ATTRFV( ATTR, 1, &(S) ) +#ifdef USE_X86_ASM +/* Naughty cheat: + */ +#define DISPATCH_ATTR2F( ATTR, S,T ) DISPATCH_ATTRFV( ATTR, 2, &(S) ) +#define DISPATCH_ATTR3F( ATTR, S,T,R ) DISPATCH_ATTRFV( ATTR, 3, &(S) ) +#define DISPATCH_ATTR4F( ATTR, S,T,R,Q ) DISPATCH_ATTRFV( ATTR, 4, &(S) ) +#else +/* Safe: + */ #define DISPATCH_ATTR2F( ATTR, S,T ) \ do { \ GLfloat v[2]; \ @@ -169,6 +178,7 @@ do { \ v[0] = S; v[1] = T; v[2] = R; v[3] = Q; \ DISPATCH_ATTR4FV( ATTR, v ); \ } while (0) +#endif static void GLAPIENTRY _tnl_Vertex2f( GLfloat x, GLfloat y ) @@ -408,8 +418,8 @@ static void GLAPIENTRY _tnl_VertexAttrib4fvNV( GLuint index, } -/* Install the generic versions of the 2nd level dispatch functions. - * [dBorca] Some of these have a codegen alternative. +/* Install the generic versions of the 2nd level dispatch + * functions. Some of these have a codegen alternative. */ void _tnl_generic_exec_vtxfmt_init( GLcontext *ctx ) { diff --git a/src/mesa/tnl/t_vtx_x86.c b/src/mesa/tnl/t_vtx_x86.c index 4f04a4efbb3..66950e70e19 100644 --- a/src/mesa/tnl/t_vtx_x86.c +++ b/src/mesa/tnl/t_vtx_x86.c @@ -28,6 +28,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* * Authors: * Keith Whitwell + * Daniel Borca */ @@ -66,6 +67,8 @@ EXTERN( _tnl_x86_dispatch_multitexcoordfv ); EXTERN( _tnl_x86_dispatch_vertexattribf ); EXTERN( _tnl_x86_dispatch_vertexattribfv ); +EXTERN( _tnl_x86_choose_fv ); + static void notify( void ) { @@ -77,7 +80,7 @@ static void notify( void ) #define DFN( FUNC, CACHE, KEY ) \ - struct _tnl_dynfn *dfn = MALLOC_STRUCT( _tnl_dynfn ); \ + struct _tnl_dynfn *dfn = MALLOC_STRUCT( _tnl_dynfn );\ char *start = (char *)&FUNC; \ char *end = (char *)&FUNC##_end; \ int offset = 0; \ @@ -254,20 +257,23 @@ void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen ) gen->Attribute[3] = makeX86Attribute4fv; } -void _do_choose( void ) + +static attrfv_func +_do_choose( GLuint attr, GLuint sz ) { + return NULL; } -/* [dBorca] I purposely avoided one single macro, since they might need to - * be handled in different ways. Ohwell, once things get much clearer, they - * could collapse... +/* I purposely avoided one single macro, since they might need to be + * handled in different ways. Ohwell, once things get much clearer, + * they could collapse... */ #define MAKE_DISPATCH_ATTR(FUNC, SIZE, TYPE, ATTR) \ do { \ char *code; \ char *start = (char *)&_tnl_x86_dispatch_attr##TYPE; \ - char *end = (char *)&_tnl_x86_dispatch_attr##TYPE##_end; \ + char *end = (char *)&_tnl_x86_dispatch_attr##TYPE##_end; \ int offset = 0; \ code = ALIGN_MALLOC( end - start, 16 ); \ memcpy (code, start, end - start); \ @@ -279,7 +285,7 @@ do { \ #define MAKE_DISPATCH_MULTITEXCOORD(FUNC, SIZE, TYPE, ATTR) \ do { \ char *code; \ - char *start = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE; \ + char *start = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE; \ char *end = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE##_end; \ int offset = 0; \ code = ALIGN_MALLOC( end - start, 16 ); \ @@ -293,7 +299,7 @@ do { \ do { \ char *code; \ char *start = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE; \ - char *end = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE##_end; \ + char *end = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE##_end; \ int offset = 0; \ code = ALIGN_MALLOC( end - start, 16 ); \ memcpy (code, start, end - start); \ @@ -301,7 +307,8 @@ do { \ vfmt->FUNC##SIZE##TYPE##NV = code; \ } while (0) -/* [dBorca] Install the codegen'ed versions of the 2nd level dispatch + +/* Install the codegen'ed versions of the 2nd level dispatch * functions. We should keep a list and free them in the end... */ void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx ) @@ -312,21 +319,70 @@ void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx ) MAKE_DISPATCH_ATTR(Color,3,fv, _TNL_ATTRIB_COLOR0); MAKE_DISPATCH_ATTR(Color,4,f, _TNL_ATTRIB_COLOR0); MAKE_DISPATCH_ATTR(Color,4,fv, _TNL_ATTRIB_COLOR0); +/* vfmt->FogCoordfEXT = _tnl_FogCoordfEXT; + vfmt->FogCoordfvEXT = _tnl_FogCoordfvEXT;*/ MAKE_DISPATCH_ATTR(Normal,3,f, _TNL_ATTRIB_NORMAL); MAKE_DISPATCH_ATTR(Normal,3,fv, _TNL_ATTRIB_NORMAL); +/* vfmt->SecondaryColor3fEXT = _tnl_SecondaryColor3fEXT; + vfmt->SecondaryColor3fvEXT = _tnl_SecondaryColor3fvEXT; */ + MAKE_DISPATCH_ATTR(TexCoord,1,f, _TNL_ATTRIB_TEX0); + MAKE_DISPATCH_ATTR(TexCoord,1,fv, _TNL_ATTRIB_TEX0); MAKE_DISPATCH_ATTR(TexCoord,2,f, _TNL_ATTRIB_TEX0); MAKE_DISPATCH_ATTR(TexCoord,2,fv, _TNL_ATTRIB_TEX0); + MAKE_DISPATCH_ATTR(TexCoord,3,f, _TNL_ATTRIB_TEX0); + MAKE_DISPATCH_ATTR(TexCoord,3,fv, _TNL_ATTRIB_TEX0); + MAKE_DISPATCH_ATTR(TexCoord,4,f, _TNL_ATTRIB_TEX0); + MAKE_DISPATCH_ATTR(TexCoord,4,fv, _TNL_ATTRIB_TEX0); + MAKE_DISPATCH_ATTR(Vertex,2,f, _TNL_ATTRIB_POS); + MAKE_DISPATCH_ATTR(Vertex,2,fv, _TNL_ATTRIB_POS); MAKE_DISPATCH_ATTR(Vertex,3,f, _TNL_ATTRIB_POS); MAKE_DISPATCH_ATTR(Vertex,3,fv, _TNL_ATTRIB_POS); - /* just add more */ + MAKE_DISPATCH_ATTR(Vertex,4,f, _TNL_ATTRIB_POS); + MAKE_DISPATCH_ATTR(Vertex,4,fv, _TNL_ATTRIB_POS); + MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,1,f, 0); + MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,1,fv, 0); MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,f, 0); MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,fv, 0); - /* just add more */ + MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,3,f, 0); + MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,3,fv, 0); + MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,4,f, 0); + MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,4,fv, 0); + MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,1,f, 0); + MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,1,fv, 0); MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,f, 0); MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,fv, 0); - /* just add more */ + MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,3,f, 0); + MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,3,fv, 0); + MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,4,f, 0); + MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,4,fv, 0); +} + + +/* Install the codegen'ed choosers. + * We should keep a list and free them in the end... + */ +void _tnl_x86choosers( attrfv_func (*choose)[4], + attrfv_func (*do_choose)( GLuint attr, + GLuint sz )) +{ + int attr, size; + + for (attr = 0; attr < _TNL_MAX_ATTR_CODEGEN; attr++) { + for (size = 0; size < 4; size++) { + char *code; + char *start = (char *)&_tnl_x86_choose_fv; + char *end = (char *)&_tnl_x86_choose_fv_end; + int offset = 0; + code = ALIGN_MALLOC( end - start, 16 ); + memcpy (code, start, end - start); + FIXUP(code, 0, 0, attr); + FIXUP(code, 0, 1, size + 1); + FIXUPREL(code, 0, 2, do_choose); + choose[attr][size] = code; + } + } } #else @@ -336,9 +392,19 @@ void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen ) (void) gen; } + void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx ) { (void) ctx; } + +void _tnl_x86choosers( attrfv_func (*choose)[4], + attrfv_func (*do_choose)( GLuint attr, + GLuint sz )) +{ + (void) choose; + (void) do_choose; +} + #endif diff --git a/src/mesa/tnl/t_vtx_x86_gcc.S b/src/mesa/tnl/t_vtx_x86_gcc.S index 2a2e933f971..5a1adc0f33d 100644 --- a/src/mesa/tnl/t_vtx_x86_gcc.S +++ b/src/mesa/tnl/t_vtx_x86_gcc.S @@ -36,16 +36,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. .globl x; \ x: -#define EXTRN( x ) x - #else /* defined(__DJGPP__) */ #define GLOBL( x ) \ .globl _##x; \ _##x: -#define EXTRN( x ) _##x - #endif /* defined(__DJGPP__) */ .data @@ -55,17 +51,22 @@ _##x: // macro to note current offsets, etc in a special region of the // object file & just make everything work out neat. I don't know // enough to do that... - -#define SUBST( x ) (0x10101010 + x) - + +#define SUBST( x ) (0x10101010 + x) + // [dBorca] TODO // Unfold functions for each vertex size? // Build super-specialized MMX/SSE versions? +// STDCALL woes (HAVE_NONSTANDARD_GLAPIENTRY): +// need separate routine for the non "fv" case, +// to clean up the stack (I guess we could codegen +// 'ret nn' insn)! Also we need to call notify, then +// return, instead of jump! GLOBL ( _tnl_x86_Vertex1fv ) - movl 4(%esp), %ecx + movl 4(%esp), %ecx push %edi push %esi movl SUBST(0), %edi // 0x0 --> tnl->vtx.vbptr @@ -75,7 +76,7 @@ GLOBL ( _tnl_x86_Vertex1fv ) movl $SUBST(1), %ecx // 0x1 --> (tnl->vtx.vertex_size - 1) movl $SUBST(2), %esi // 0x2 --> (tnl->vtx.vertex + 1) repz - movsl %ds:(%esi), %es:(%edi) + movsl %ds:(%esi), %es:(%edi) movl %edi, SUBST(0) // 0x0 --> tnl->vtx.vbptr movl SUBST(3), %edx // 0x3 --> counter pop %esi @@ -90,7 +91,7 @@ GLOBL ( _tnl_x86_Vertex1fv_end ) .align 4 GLOBL ( _tnl_x86_Vertex2fv ) - movl 4(%esp), %ecx + movl 4(%esp), %ecx push %edi push %esi movl SUBST(0), %edi // load tnl->vtx.vbptr @@ -102,7 +103,7 @@ GLOBL ( _tnl_x86_Vertex2fv ) movl $SUBST(1), %ecx // vertex_size - 2 movl $SUBST(2), %esi // tnl->vtx.vertex + 2 repz - movsl %ds:(%esi), %es:(%edi) + movsl %ds:(%esi), %es:(%edi) movl %edi, SUBST(0) // save tnl->vtx.vbptr movl SUBST(3), %edx // load counter pop %esi @@ -116,7 +117,7 @@ GLOBL ( _tnl_x86_Vertex2fv_end ) .align 4 GLOBL ( _tnl_x86_Vertex3fv ) - movl 4(%esp), %ecx + movl 4(%esp), %ecx push %edi push %esi movl SUBST(0), %edi // load tnl->vtx.vbptr @@ -130,7 +131,7 @@ GLOBL ( _tnl_x86_Vertex3fv ) movl $SUBST(1), %ecx // vertex_size - 3 movl $SUBST(2), %esi // tnl->vtx.vertex + 3 repz - movsl %ds:(%esi), %es:(%edi) + movsl %ds:(%esi), %es:(%edi) movl %edi, SUBST(0) // save tnl->vtx.vbptr movl SUBST(3), %edx // load counter pop %esi @@ -142,10 +143,10 @@ GLOBL ( _tnl_x86_Vertex3fv ) ret // return GLOBL ( _tnl_x86_Vertex3fv_end ) - + .align 4 GLOBL ( _tnl_x86_Vertex4fv ) - movl 4(%esp), %ecx + movl 4(%esp), %ecx push %edi push %esi movl SUBST(0), %edi // load tnl->vtx.vbptr @@ -161,7 +162,7 @@ GLOBL ( _tnl_x86_Vertex4fv ) movl $SUBST(1), %ecx // vertex_size - 4 movl $SUBST(2), %esi // tnl->vtx.vertex + 3 repz - movsl %ds:(%esi), %es:(%edi) + movsl %ds:(%esi), %es:(%edi) movl %edi, SUBST(0) // save tnl->vtx.vbptr movl SUBST(3), %edx // load counter pop %esi @@ -174,49 +175,49 @@ GLOBL ( _tnl_x86_Vertex4fv ) GLOBL ( _tnl_x86_Vertex4fv_end ) - + /** * Generic handlers for vector format data. */ GLOBL( _tnl_x86_Attribute1fv) - movl 4(%esp), %ecx - movl (%ecx), %eax /* load v[0] */ - movl %eax, SUBST(0) /* store v[0] to current vertex */ + movl 4(%esp), %ecx + movl (%ecx), %eax /* load v[0] */ + movl %eax, SUBST(0) /* store v[0] to current vertex */ ret GLOBL ( _tnl_x86_Attribute1fv_end ) GLOBL( _tnl_x86_Attribute2fv) - movl 4(%esp), %ecx - movl (%ecx), %eax /* load v[0] */ - movl 4(%ecx), %edx /* load v[1] */ - movl %eax, SUBST(0) /* store v[0] to current vertex */ - movl %edx, SUBST(1) /* store v[1] to current vertex */ + movl 4(%esp), %ecx + movl (%ecx), %eax /* load v[0] */ + movl 4(%ecx), %edx /* load v[1] */ + movl %eax, SUBST(0) /* store v[0] to current vertex */ + movl %edx, SUBST(1) /* store v[1] to current vertex */ ret GLOBL ( _tnl_x86_Attribute2fv_end ) GLOBL( _tnl_x86_Attribute3fv) - movl 4(%esp), %ecx - movl (%ecx), %eax /* load v[0] */ - movl 4(%ecx), %edx /* load v[1] */ - movl 8(%ecx), %ecx /* load v[2] */ - movl %eax, SUBST(0) /* store v[0] to current vertex */ - movl %edx, SUBST(1) /* store v[1] to current vertex */ - movl %ecx, SUBST(2) /* store v[2] to current vertex */ + movl 4(%esp), %ecx + movl (%ecx), %eax /* load v[0] */ + movl 4(%ecx), %edx /* load v[1] */ + movl 8(%ecx), %ecx /* load v[2] */ + movl %eax, SUBST(0) /* store v[0] to current vertex */ + movl %edx, SUBST(1) /* store v[1] to current vertex */ + movl %ecx, SUBST(2) /* store v[2] to current vertex */ ret GLOBL ( _tnl_x86_Attribute3fv_end ) GLOBL( _tnl_x86_Attribute4fv) - movl 4(%esp), %ecx - movl (%ecx), %eax /* load v[0] */ - movl 4(%ecx), %edx /* load v[1] */ - movl %eax, SUBST(0) /* store v[0] to current vertex */ - movl %edx, SUBST(1) /* store v[1] to current vertex */ - movl 8(%ecx), %eax /* load v[2] */ - movl 12(%ecx), %edx /* load v[3] */ - movl %eax, SUBST(2) /* store v[2] to current vertex */ - movl %edx, SUBST(3) /* store v[3] to current vertex */ + movl 4(%esp), %ecx + movl (%ecx), %eax /* load v[0] */ + movl 4(%ecx), %edx /* load v[1] */ + movl %eax, SUBST(0) /* store v[0] to current vertex */ + movl %edx, SUBST(1) /* store v[1] to current vertex */ + movl 8(%ecx), %eax /* load v[2] */ + movl 12(%ecx), %edx /* load v[3] */ + movl %eax, SUBST(2) /* store v[2] to current vertex */ + movl %edx, SUBST(3) /* store v[3] to current vertex */ ret GLOBL ( _tnl_x86_Attribute4fv_end ) @@ -225,29 +226,24 @@ GLOBL ( _tnl_x86_Attribute4fv_end ) // Must generate all of these ahead of first usage. Generate at // compile-time? - -// NOT CURRENTLY USED GLOBL( _tnl_x86_choose_fv) subl $12, %esp // gcc does 16 byte alignment of stack frames? movl $SUBST(0), (%esp) // arg 0 - attrib movl $SUBST(1), 4(%esp) // arg 1 - N - call EXTRN(_do_choose) // new function returned in %eax - add $12, %esp // tear down stack frame - jmp *%eax // jump to new func -GLOBL ( _tnl_x86_choosefv_end ) - - + .byte 0xe8 // call ... + .long SUBST(2) // ... do_choose + add $12, %esp // tear down stack frame + jmp *%eax // jump to new func +GLOBL ( _tnl_x86_choose_fv_end ) + // FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch. - -// NOT CURRENTLY USED - - + // In the 1st level dispatch functions, switch to a different // calling convention -- (const GLfloat *v) in %ecx. // @@ -256,7 +252,7 @@ GLOBL ( _tnl_x86_choosefv_end ) // back to the original caller. - + // Vertex/Normal/Color, etc: the address of the function pointer // is known at codegen time. @@ -282,6 +278,13 @@ GLOBL( _tnl_x86_dispatch_attrfv_end ) // MultiTexcoord: the address of the function pointer must be // calculated, but can use the index argument slot to hold 'v', and // avoid setting up a new stack frame. +// +// [dBorca] +// right, this would be the preferred approach, but gcc does not +// clean up the stack after each function call when optimizing (-fdefer-pop); +// can it make assumptions about what's already on the stack? I dunno, +// but in this case, we can't mess with the caller's stack frame, and +// we must use a model like `_x86_dispatch_attrfv' above. Caveat emptor! // Also, will only need a maximum of four of each of these per context: // @@ -302,15 +305,16 @@ GLOBL( _tnl_x86_dispatch_multitexcoordfv ) sall $4, %ecx jmp *SUBST(0)(%ecx) // 0x0 - tabfv[tex0][n] GLOBL( _tnl_x86_dispatch_multitexcoordfv_end ) - + // VertexAttrib: the address of the function pointer must be // calculated. GLOBL( _tnl_x86_dispatch_vertexattribf ) - movl $16, %ecx movl 4(%esp), %eax cmpl $16, %eax - cmovge %ecx, %eax // [dBorca] BADBAD! might not be supported + jb .0 // "cmovge" is not supported on all CPUs + movl $16, %eax +.0: leal 8(%esp), %ecx // calculate 'v' movl %ecx, 4(%esp) // save in 1st arg slot sall $4, %eax @@ -318,13 +322,13 @@ GLOBL( _tnl_x86_dispatch_vertexattribf ) GLOBL( _tnl_x86_dispatch_vertexattribf_end ) GLOBL( _tnl_x86_dispatch_vertexattribfv ) - movl $16, %ecx movl 4(%esp), %eax cmpl $16, %eax - cmovge %ecx, %eax // [dBorca] BADBAD! might not be supported + jb .1 // "cmovge" is not supported on all CPUs + movl $16, %eax +.1: movl 8(%esp), %ecx // load 'v' movl %ecx, 4(%esp) // save in 1st arg slot sall $4, %eax jmp *SUBST(0)(%eax) // 0x0 - tabfv[0][n] GLOBL( _tnl_x86_dispatch_vertexattribfv_end ) - -- 2.30.2