EXTERN( _tnl_x86_Vertex3fv );
EXTERN( _tnl_x86_Vertex4fv );
-EXTERN( _tnl_x86_dispatch_attrf );
+EXTERN( _tnl_x86_dispatch_attrf1 );
+EXTERN( _tnl_x86_dispatch_attrf2 );
+EXTERN( _tnl_x86_dispatch_attrf3 );
+EXTERN( _tnl_x86_dispatch_attrf4 );
EXTERN( _tnl_x86_dispatch_attrfv );
-EXTERN( _tnl_x86_dispatch_multitexcoordf );
+EXTERN( _tnl_x86_dispatch_multitexcoordf1 );
+EXTERN( _tnl_x86_dispatch_multitexcoordf2 );
+EXTERN( _tnl_x86_dispatch_multitexcoordf3 );
+EXTERN( _tnl_x86_dispatch_multitexcoordf4 );
EXTERN( _tnl_x86_dispatch_multitexcoordfv );
-EXTERN( _tnl_x86_dispatch_vertexattribf );
+EXTERN( _tnl_x86_dispatch_vertexattribf1 );
+EXTERN( _tnl_x86_dispatch_vertexattribf2 );
+EXTERN( _tnl_x86_dispatch_vertexattribf3 );
+EXTERN( _tnl_x86_dispatch_vertexattribf4 );
EXTERN( _tnl_x86_dispatch_vertexattribfv );
EXTERN( _tnl_x86_choose_fv );
-static void notify( void )
-{
- GET_CURRENT_CONTEXT( ctx );
- _tnl_wrap_filled_vertex( ctx );
-}
-
#define DONT_KNOW_OFFSETS 1
#define FIXUP( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL ) \
do { \
- GLuint subst = 0x10101010 + CHECKVAL; \
+ GLint subst = 0x10101010 + CHECKVAL; \
\
if (DONT_KNOW_OFFSETS) { \
while (*(int *)(CODE+offset) != subst) offset++; \
#define FIXUPREL( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL )\
do { \
- GLuint subst = 0x10101010 + CHECKVAL; \
+ GLint subst = 0x10101010 + CHECKVAL; \
\
if (DONT_KNOW_OFFSETS) { \
while (*(int *)(CODE+offset) != subst) offset++; \
}
-static attrfv_func
-_do_choose( GLuint attr, GLuint sz )
-{
- return NULL;
-}
-
-
-/* I purposely avoided one single macro, since they might need to be
- * handled in different ways. Ohwell, once things get much clearer,
- * they could collapse...
- */
-#define MAKE_DISPATCH_ATTR(FUNC, SIZE, TYPE, ATTR) \
+#define MKDISP(FUNC, SIZE, ATTR, WARP) \
do { \
char *code; \
- char *start = (char *)&_tnl_x86_dispatch_attr##TYPE; \
- char *end = (char *)&_tnl_x86_dispatch_attr##TYPE##_end; \
+ char *start = (char *)&WARP; \
+ char *end = (char *)&WARP##_end; \
int offset = 0; \
code = ALIGN_MALLOC( end - start, 16 ); \
memcpy (code, start, end - start); \
FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[ATTR][SIZE-1]));\
- vfmt->FUNC##SIZE##TYPE = code; \
-} while (0)
-
-
-#define MAKE_DISPATCH_MULTITEXCOORD(FUNC, SIZE, TYPE, ATTR) \
-do { \
- char *code; \
- char *start = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE; \
- char *end = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE##_end; \
- int offset = 0; \
- code = ALIGN_MALLOC( end - start, 16 ); \
- memcpy (code, start, end - start); \
- FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[_TNL_ATTRIB_TEX0][SIZE-1]));\
- vfmt->FUNC##SIZE##TYPE##ARB = code; \
-} while (0)
-
-
-#define MAKE_DISPATCH_VERTEXATTRIB(FUNC, SIZE, TYPE, ATTR) \
-do { \
- char *code; \
- char *start = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE; \
- char *end = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE##_end; \
- int offset = 0; \
- code = ALIGN_MALLOC( end - start, 16 ); \
- memcpy (code, start, end - start); \
- FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[0][SIZE-1])); \
- vfmt->FUNC##SIZE##TYPE##NV = code; \
+ *(void **)&vfmt->FUNC = code; \
} while (0)
{
GLvertexformat *vfmt = &(TNL_CONTEXT(ctx)->exec_vtxfmt);
- MAKE_DISPATCH_ATTR(Color,3,f, _TNL_ATTRIB_COLOR0);
- MAKE_DISPATCH_ATTR(Color,3,fv, _TNL_ATTRIB_COLOR0);
- MAKE_DISPATCH_ATTR(Color,4,f, _TNL_ATTRIB_COLOR0);
- MAKE_DISPATCH_ATTR(Color,4,fv, _TNL_ATTRIB_COLOR0);
-/* vfmt->FogCoordfEXT = _tnl_FogCoordfEXT;
- vfmt->FogCoordfvEXT = _tnl_FogCoordfvEXT;*/
- MAKE_DISPATCH_ATTR(Normal,3,f, _TNL_ATTRIB_NORMAL);
- MAKE_DISPATCH_ATTR(Normal,3,fv, _TNL_ATTRIB_NORMAL);
-/* vfmt->SecondaryColor3fEXT = _tnl_SecondaryColor3fEXT;
- vfmt->SecondaryColor3fvEXT = _tnl_SecondaryColor3fvEXT; */
- MAKE_DISPATCH_ATTR(TexCoord,1,f, _TNL_ATTRIB_TEX0);
- MAKE_DISPATCH_ATTR(TexCoord,1,fv, _TNL_ATTRIB_TEX0);
- MAKE_DISPATCH_ATTR(TexCoord,2,f, _TNL_ATTRIB_TEX0);
- MAKE_DISPATCH_ATTR(TexCoord,2,fv, _TNL_ATTRIB_TEX0);
- MAKE_DISPATCH_ATTR(TexCoord,3,f, _TNL_ATTRIB_TEX0);
- MAKE_DISPATCH_ATTR(TexCoord,3,fv, _TNL_ATTRIB_TEX0);
- MAKE_DISPATCH_ATTR(TexCoord,4,f, _TNL_ATTRIB_TEX0);
- MAKE_DISPATCH_ATTR(TexCoord,4,fv, _TNL_ATTRIB_TEX0);
- MAKE_DISPATCH_ATTR(Vertex,2,f, _TNL_ATTRIB_POS);
- MAKE_DISPATCH_ATTR(Vertex,2,fv, _TNL_ATTRIB_POS);
- MAKE_DISPATCH_ATTR(Vertex,3,f, _TNL_ATTRIB_POS);
- MAKE_DISPATCH_ATTR(Vertex,3,fv, _TNL_ATTRIB_POS);
- MAKE_DISPATCH_ATTR(Vertex,4,f, _TNL_ATTRIB_POS);
- MAKE_DISPATCH_ATTR(Vertex,4,fv, _TNL_ATTRIB_POS);
-
- MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,1,f, 0);
- MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,1,fv, 0);
- MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,f, 0);
- MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,fv, 0);
- MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,3,f, 0);
- MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,3,fv, 0);
- MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,4,f, 0);
- MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,4,fv, 0);
-
- MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,1,f, 0);
- MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,1,fv, 0);
- MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,f, 0);
- MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,fv, 0);
- MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,3,f, 0);
- MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,3,fv, 0);
- MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,4,f, 0);
- MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,4,fv, 0);
+ MKDISP(Color3f, 3, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrf3);
+ MKDISP(Color3fv, 3, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrfv);
+ MKDISP(Color4f, 4, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrf4);
+ MKDISP(Color4fv, 4, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrfv);
+ MKDISP(FogCoordfEXT, 1, _TNL_ATTRIB_FOG, _tnl_x86_dispatch_attrf1);
+ MKDISP(FogCoordfvEXT, 1, _TNL_ATTRIB_FOG, _tnl_x86_dispatch_attrfv);
+ MKDISP(Normal3f, 3, _TNL_ATTRIB_NORMAL, _tnl_x86_dispatch_attrf3);
+ MKDISP(Normal3fv, 3, _TNL_ATTRIB_NORMAL, _tnl_x86_dispatch_attrfv);
+ MKDISP(SecondaryColor3fEXT, 3, _TNL_ATTRIB_COLOR1, _tnl_x86_dispatch_attrf3);
+ MKDISP(SecondaryColor3fvEXT,3, _TNL_ATTRIB_COLOR1, _tnl_x86_dispatch_attrfv);
+ MKDISP(TexCoord1f, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf1);
+ MKDISP(TexCoord1fv, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
+ MKDISP(TexCoord2f, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf2);
+ MKDISP(TexCoord2fv, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
+ MKDISP(TexCoord3f, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf3);
+ MKDISP(TexCoord3fv, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
+ MKDISP(TexCoord4f, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf4);
+ MKDISP(TexCoord4fv, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
+ MKDISP(Vertex2f, 2, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf2);
+ MKDISP(Vertex2fv, 2, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
+ MKDISP(Vertex3f, 3, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf3);
+ MKDISP(Vertex3fv, 3, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
+ MKDISP(Vertex4f, 4, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf4);
+ MKDISP(Vertex4fv, 4, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
+
+ MKDISP(MultiTexCoord1fARB, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf1);
+ MKDISP(MultiTexCoord1fvARB, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
+ MKDISP(MultiTexCoord2fARB, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf2);
+ MKDISP(MultiTexCoord2fvARB, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
+ MKDISP(MultiTexCoord3fARB, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf3);
+ MKDISP(MultiTexCoord3fvARB, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
+ MKDISP(MultiTexCoord4fARB, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf4);
+ MKDISP(MultiTexCoord4fvARB, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
+
+ MKDISP(VertexAttrib1fNV, 1, 0, _tnl_x86_dispatch_vertexattribf1);
+ MKDISP(VertexAttrib1fvNV, 1, 0, _tnl_x86_dispatch_vertexattribfv);
+ MKDISP(VertexAttrib2fNV, 2, 0, _tnl_x86_dispatch_vertexattribf2);
+ MKDISP(VertexAttrib2fvNV, 2, 0, _tnl_x86_dispatch_vertexattribfv);
+ MKDISP(VertexAttrib3fNV, 3, 0, _tnl_x86_dispatch_vertexattribf3);
+ MKDISP(VertexAttrib3fvNV, 3, 0, _tnl_x86_dispatch_vertexattribfv);
+ MKDISP(VertexAttrib4fNV, 4, 0, _tnl_x86_dispatch_vertexattribf4);
+ MKDISP(VertexAttrib4fvNV, 4, 0, _tnl_x86_dispatch_vertexattribfv);
}
FIXUP(code, 0, 0, attr);
FIXUP(code, 0, 1, size + 1);
FIXUPREL(code, 0, 2, do_choose);
- choose[attr][size] = code;
+ choose[attr][size] = (attrfv_func)code;
}
}
}
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
+ * Daniel Borca <dborca@yahoo.com>
*/
-#if !defined (__DJGPP__) && !defined (__MINGW32__)
-
+#if defined (__DJGPP__) || defined (__MINGW32__)
+#define GLOBL( x ) \
+.globl _##x; \
+_##x:
+#else /* !defined (__DJGPP__) && !defined (__MINGW32__) */
#define GLOBL( x ) \
.globl x; \
x:
+#endif /* !defined (__DJGPP__) && !defined (__MINGW32__) */
-#else /* defined(__DJGPP__) || defined (__MINGW32__) */
-#define GLOBL( x ) \
-.globl _##x; \
-_##x:
+#if !defined (STDCALL_API)
+#define RETCLEAN( x ) ret
+#else
+#define RETCLEAN( x ) ret $x
+#endif
-#endif /* defined(__DJGPP__) || defined (__MINGW32__) */
-.data
-.align 4
+#define _JMP(x) \
+.byte 0xe9; \
+.long x
+
+#define _CALL(x) \
+.byte 0xe8; \
+.long x
-// Someone who knew a lot about this sort of thing would use this
-// macro to note current offsets, etc in a special region of the
-// object file & just make everything work out neat. I do not know
-// enough to do that...
+
+/* Someone who knew a lot about this sort of thing would use this
+ * macro to note current offsets, etc in a special region of the
+ * object file & just make everything work out neat. I don't know
+ * enough to do that...
+ */
#define SUBST( x ) (0x10101010 + x)
+.data
-// [dBorca] TODO
-// Unfold functions for each vertex size?
-// Build super-specialized SSE versions?
-// STDCALL woes (HAVE_NONSTANDARD_GLAPIENTRY):
-// need separate routine for the non "fv" case,
-// to clean up the stack!
+/* [dBorca] TODO
+ * Unfold functions for each vertex size?
+ * Build super-specialized SSE versions?
+ *
+ * There is a trick in Vertex*fv: under certain conditions,
+ * we tail to _tnl_wrap_filled_vertex(ctx). This means that
+ * if Vertex*fv is STDCALL, then _tnl_wrap_filled_vertex must
+ * be STDCALL as well, because (GLcontext *) and (GLfloat *)
+ * have the same size.
+ */
+.align 4
GLOBL ( _tnl_x86_Vertex1fv )
movl 4(%esp), %ecx
push %edi
push %esi
- movl SUBST(0), %edi # 0x0 --> tnl->vtx.vbptr
- movl (%ecx), %edx # load v[0]
- movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0]
- addl $4, %edi # tnl->vtx.vbptr += 1
- movl $SUBST(1), %ecx # 0x1 --> (tnl->vtx.vertex_size - 1)
- movl $SUBST(2), %esi # 0x2 --> (tnl->vtx.vertex + 1)
+ movl SUBST(0), %edi /* 0x0 --> tnl->vtx.vbptr */
+ movl (%ecx), %edx /* load v[0] */
+ movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
+ addl $4, %edi /* tnl->vtx.vbptr += 1 */
+ movl $SUBST(1), %ecx /* 0x1 --> (tnl->vtx.vertex_size - 1) */
+ movl $SUBST(2), %esi /* 0x2 --> (tnl->vtx.vertex + 1) */
repz
movsl %ds:(%esi), %es:(%edi)
- movl %edi, SUBST(0) # 0x0 --> tnl->vtx.vbptr
- movl SUBST(3), %edx # 0x3 --> counter
+ movl %edi, SUBST(0) /* 0x0 --> tnl->vtx.vbptr */
+ movl SUBST(3), %edx /* 0x3 --> counter */
pop %esi
pop %edi
- dec %edx # counter--
- movl %edx, SUBST(3) # 0x3 --> counter
- jne .0 # if (counter != 0) return
- pushl $SUBST(4) # 0x4 --> ctx
- .byte 0xe8 # call ...
- .long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
- pop %eax
+ dec %edx /* counter-- */
+ movl %edx, SUBST(3) /* 0x3 --> counter */
+ je .0 /* if (counter == 0) goto .0 */
+ RETCLEAN(4) /* return */
+ .balign 16
.0:
- ret # return
+ movl $SUBST(4), %eax /* load ctx */
+ movl %eax, 4(%esp) /* push ctx */
+ _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
GLOBL ( _tnl_x86_Vertex1fv_end )
-
.align 4
GLOBL ( _tnl_x86_Vertex2fv )
movl 4(%esp), %ecx
push %edi
push %esi
- movl SUBST(0), %edi # load tnl->vtx.vbptr
- movl (%ecx), %edx # load v[0]
- movl 4(%ecx), %eax # load v[1]
- movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0]
- movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1]
- addl $8, %edi # tnl->vtx.vbptr += 2
- movl $SUBST(1), %ecx # vertex_size - 2
- movl $SUBST(2), %esi # tnl->vtx.vertex + 2
+ movl SUBST(0), %edi /* load tnl->vtx.vbptr */
+ movl (%ecx), %edx /* load v[0] */
+ movl 4(%ecx), %eax /* load v[1] */
+ movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
+ movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
+ addl $8, %edi /* tnl->vtx.vbptr += 2 */
+ movl $SUBST(1), %ecx /* vertex_size - 2 */
+ movl $SUBST(2), %esi /* tnl->vtx.vertex + 2 */
repz
movsl %ds:(%esi), %es:(%edi)
- movl %edi, SUBST(0) # save tnl->vtx.vbptr
- movl SUBST(3), %edx # load counter
+ movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
+ movl SUBST(3), %edx /* load counter */
pop %esi
pop %edi
- dec %edx # counter--
- movl %edx, SUBST(3) # save counter
- jne .1 # if (counter != 0) return
- pushl $SUBST(4) # load ctx
- .byte 0xe8 # call ...
- .long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
- pop %eax
+ dec %edx /* counter-- */
+ movl %edx, SUBST(3) /* save counter */
+ je .1 /* if (counter == 0) goto .1 */
+ RETCLEAN(4) /* return */
+ .balign 16
.1:
- ret # return
+ movl $SUBST(4), %eax /* load ctx */
+ movl %eax, 4(%esp) /* push ctx */
+ _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
GLOBL ( _tnl_x86_Vertex2fv_end )
.align 4
movl 4(%esp), %ecx
push %edi
push %esi
- movl SUBST(0), %edi # load tnl->vtx.vbptr
- movl (%ecx), %edx # load v[0]
- movl 4(%ecx), %eax # load v[1]
- movl 8(%ecx), %esi # load v[2]
- movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0]
- movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1]
- movl %esi, 8(%edi) # tnl->vtx.vbptr[2] = v[2]
- addl $12, %edi # tnl->vtx.vbptr += 3
- movl $SUBST(1), %ecx # vertex_size - 3
- movl $SUBST(2), %esi # tnl->vtx.vertex + 3
+ movl SUBST(0), %edi /* load tnl->vtx.vbptr */
+ movl (%ecx), %edx /* load v[0] */
+ movl 4(%ecx), %eax /* load v[1] */
+ movl 8(%ecx), %esi /* load v[2] */
+ movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
+ movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
+ movl %esi, 8(%edi) /* tnl->vtx.vbptr[2] = v[2] */
+ addl $12, %edi /* tnl->vtx.vbptr += 3 */
+ movl $SUBST(1), %ecx /* vertex_size - 3 */
+ movl $SUBST(2), %esi /* tnl->vtx.vertex + 3 */
repz
movsl %ds:(%esi), %es:(%edi)
- movl %edi, SUBST(0) # save tnl->vtx.vbptr
- movl SUBST(3), %edx # load counter
+ movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
+ movl SUBST(3), %edx /* load counter */
pop %esi
pop %edi
- dec %edx # counter--
- movl %edx, SUBST(3) # save counter
- jne .2 # if (counter != 0) return
- pushl $SUBST(4) # load ctx
- .byte 0xe8 # call ...
- .long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
- pop %eax
+ dec %edx /* counter-- */
+ movl %edx, SUBST(3) /* save counter */
+ je .2 /* if (counter == 0) goto .2 */
+ RETCLEAN(4) /* return */
+ .balign 16
.2:
- ret # return
+ movl $SUBST(4), %eax /* load ctx */
+ movl %eax, 4(%esp) /* push ctx */
+ _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
GLOBL ( _tnl_x86_Vertex3fv_end )
-
.align 4
GLOBL ( _tnl_x86_Vertex4fv )
movl 4(%esp), %ecx
push %edi
push %esi
- movl SUBST(0), %edi # load tnl->vtx.vbptr
- movl (%ecx), %edx # load v[0]
- movl 4(%ecx), %eax # load v[1]
- movl 8(%ecx), %esi # load v[2]
- movl 12(%ecx), %ecx # load v[3]
- movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0]
- movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1]
- movl %esi, 8(%edi) # tnl->vtx.vbptr[2] = v[2]
- movl %ecx, 12(%edi) # tnl->vtx.vbptr[3] = v[3]
- addl $16, %edi # tnl->vtx.vbptr += 4
- movl $SUBST(1), %ecx # vertex_size - 4
- movl $SUBST(2), %esi # tnl->vtx.vertex + 3
+ movl SUBST(0), %edi /* load tnl->vtx.vbptr */
+ movl (%ecx), %edx /* load v[0] */
+ movl 4(%ecx), %eax /* load v[1] */
+ movl 8(%ecx), %esi /* load v[2] */
+ movl 12(%ecx), %ecx /* load v[3] */
+ movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
+ movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
+ movl %esi, 8(%edi) /* tnl->vtx.vbptr[2] = v[2] */
+ movl %ecx, 12(%edi) /* tnl->vtx.vbptr[3] = v[3] */
+ addl $16, %edi /* tnl->vtx.vbptr += 4 */
+ movl $SUBST(1), %ecx /* vertex_size - 4 */
+ movl $SUBST(2), %esi /* tnl->vtx.vertex + 4 */
repz
movsl %ds:(%esi), %es:(%edi)
- movl %edi, SUBST(0) # save tnl->vtx.vbptr
- movl SUBST(3), %edx # load counter
+ movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
+ movl SUBST(3), %edx /* load counter */
pop %esi
pop %edi
- dec %edx # counter--
- movl %edx, SUBST(3) # save counter
- jne .3 # if (counter != 0) return
- pushl $SUBST(4) # load ctx
- .byte 0xe8 # call ...
- .long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
- pop %eax
+ dec %edx /* counter-- */
+ movl %edx, SUBST(3) /* save counter */
+ je .3 /* if (counter == 0) goto .3 */
+ RETCLEAN(4) /* return */
+ .balign 16
.3:
- ret # return
+ movl $SUBST(4), %eax /* load ctx */
+ movl %eax, 4(%esp) /* push ctx */
+ _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
GLOBL ( _tnl_x86_Vertex4fv_end )
-
/**
* Generic handlers for vector format data.
*/
-
-GLOBL( _tnl_x86_Attribute1fv)
+GLOBL( _tnl_x86_Attribute1fv )
movl 4(%esp), %ecx
movl (%ecx), %eax /* load v[0] */
movl %eax, SUBST(0) /* store v[0] to current vertex */
- ret
+ RETCLEAN(4)
GLOBL ( _tnl_x86_Attribute1fv_end )
-GLOBL( _tnl_x86_Attribute2fv)
+GLOBL( _tnl_x86_Attribute2fv )
movl 4(%esp), %ecx
movl (%ecx), %eax /* load v[0] */
movl 4(%ecx), %edx /* load v[1] */
movl %eax, SUBST(0) /* store v[0] to current vertex */
movl %edx, SUBST(1) /* store v[1] to current vertex */
- ret
+ RETCLEAN(4)
GLOBL ( _tnl_x86_Attribute2fv_end )
-
-GLOBL( _tnl_x86_Attribute3fv)
+GLOBL( _tnl_x86_Attribute3fv )
movl 4(%esp), %ecx
movl (%ecx), %eax /* load v[0] */
movl 4(%ecx), %edx /* load v[1] */
movl %eax, SUBST(0) /* store v[0] to current vertex */
movl %edx, SUBST(1) /* store v[1] to current vertex */
movl %ecx, SUBST(2) /* store v[2] to current vertex */
- ret
+ RETCLEAN(4)
GLOBL ( _tnl_x86_Attribute3fv_end )
-GLOBL( _tnl_x86_Attribute4fv)
+GLOBL( _tnl_x86_Attribute4fv )
movl 4(%esp), %ecx
movl (%ecx), %eax /* load v[0] */
movl 4(%ecx), %edx /* load v[1] */
movl 12(%ecx), %edx /* load v[3] */
movl %eax, SUBST(2) /* store v[2] to current vertex */
movl %edx, SUBST(3) /* store v[3] to current vertex */
- ret
+ RETCLEAN(4)
GLOBL ( _tnl_x86_Attribute4fv_end )
-// Choosers:
-
-// Must generate all of these ahead of first usage. Generate at
-// compile-time?
-
-
-GLOBL( _tnl_x86_choose_fv)
- subl $12, %esp # gcc does 16 byte alignment of stack frames?
- movl $SUBST(0), (%esp) # arg 0 - attrib
- movl $SUBST(1), 4(%esp) # arg 1 - N
- .byte 0xe8 # call ...
- .long SUBST(2) # ... do_choose
- add $12, %esp # tear down stack frame
- jmp *%eax # jump to new func
+/* Choosers:
+ *
+ * Must generate all of these ahead of first usage. Generate at
+ * compile-time?
+ */
+GLOBL( _tnl_x86_choose_fv )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ movl $SUBST(0), (%esp) /* arg 0 - attrib */
+ movl $SUBST(1), 4(%esp) /* arg 1 - N */
+ _CALL (SUBST(2)) /* call do_choose */
+ add $12, %esp /* tear down stack frame */
+ jmp *%eax /* jump to new func */
GLOBL ( _tnl_x86_choose_fv_end )
+/* FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch.
+ *
+ * In the 1st level dispatch functions, switch to a different
+ * calling convention -- (const GLfloat *v) in %ecx.
+ *
+ * As with regular (x86) dispatch, don't create a new stack frame -
+ * just let the 'ret' in the dispatched function return straight
+ * back to the original caller.
+ *
+ * Vertex/Normal/Color, etc: the address of the function pointer
+ * is known at codegen time.
+ */
-// FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch.
-
-
-
-// In the 1st level dispatch functions, switch to a different
-// calling convention -- (const GLfloat *v) in %ecx.
-//
-// As with regular (x86) dispatch, do not create a new stack frame -
-// just let the 'ret' in the dispatched function return straight
-// back to the original caller.
-
-
-
-// Vertex/Normal/Color, etc: the address of the function pointer
-// is known at codegen time.
-
-
-// Unfortunately, have to play with the stack in the non-fv case:
-//
-GLOBL( _tnl_x86_dispatch_attrf )
- subl $12, %esp # gcc does 16 byte alignment of stack frames?
- leal 16(%esp), %edx # address of first float on stack
- movl %edx, (%esp) # save as 'v'
- call *SUBST(0) # 0x0 --> tabfv[attr][n]
- addl $12, %esp # tear down frame
- ret # return
-GLOBL( _tnl_x86_dispatch_attrf_end )
-
-// The fv case is simpler:
-//
+/* Unfortunately, have to play with the stack in the non-fv case:
+ */
+#if !defined (STDCALL_API)
+GLOBL( _tnl_x86_dispatch_attrf1 )
+GLOBL( _tnl_x86_dispatch_attrf2 )
+GLOBL( _tnl_x86_dispatch_attrf3 )
+GLOBL( _tnl_x86_dispatch_attrf4 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ leal 16(%esp), %edx /* address of first float on stack */
+ movl %edx, (%esp) /* save as 'v' */
+ call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
+ addl $12, %esp /* tear down frame */
+ ret /* return */
+GLOBL( _tnl_x86_dispatch_attrf4_end )
+GLOBL( _tnl_x86_dispatch_attrf3_end )
+GLOBL( _tnl_x86_dispatch_attrf2_end )
+GLOBL( _tnl_x86_dispatch_attrf1_end )
+
+#else /* defined(STDCALL_API) */
+
+GLOBL( _tnl_x86_dispatch_attrf1 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ leal 16(%esp), %edx /* address of first float on stack */
+ movl %edx, (%esp) /* save as 'v' */
+ call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $4 /* return */
+GLOBL( _tnl_x86_dispatch_attrf1_end )
+
+GLOBL( _tnl_x86_dispatch_attrf2 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ leal 16(%esp), %edx /* address of first float on stack */
+ movl %edx, (%esp) /* save as 'v' */
+ call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $8 /* return */
+GLOBL( _tnl_x86_dispatch_attrf2_end )
+
+GLOBL( _tnl_x86_dispatch_attrf3 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ leal 16(%esp), %edx /* address of first float on stack */
+ movl %edx, (%esp) /* save as 'v' */
+ call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $12 /* return */
+GLOBL( _tnl_x86_dispatch_attrf3_end )
+
+GLOBL( _tnl_x86_dispatch_attrf4 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ leal 16(%esp), %edx /* address of first float on stack */
+ movl %edx, (%esp) /* save as 'v' */
+ call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $16 /* return */
+GLOBL( _tnl_x86_dispatch_attrf4_end )
+#endif /* defined(STDCALL_API) */
+
+/* The fv case is simpler:
+ */
GLOBL( _tnl_x86_dispatch_attrfv )
- jmp *SUBST(0) # 0x0 --> tabfv[attr][n]
+ jmp *SUBST(0) /* 0x0 --> tabfv[attr][n] */
GLOBL( _tnl_x86_dispatch_attrfv_end )
-// MultiTexcoord: the address of the function pointer must be
-// calculated, but can use the index argument slot to hold 'v', and
-// avoid setting up a new stack frame.
-//
-// [dBorca]
-// right, this would be the preferred approach, but gcc does not
-// clean up the stack after each function call when optimizing (-fdefer-pop);
-// can it make assumptions about what is already on the stack? I dunno,
-// but in this case, we can't mess with the caller's stack frame, and
-// we must use a model like '_x86_dispatch_attrfv' above. Caveat emptor!
-
-// Also, will only need a maximum of four of each of these per context:
-//
-GLOBL( _tnl_x86_dispatch_multitexcoordf )
+/* MultiTexcoord: the address of the function pointer must be
+ * calculated, but can use the index argument slot to hold 'v', and
+ * avoid setting up a new stack frame.
+ *
+ * [dBorca]
+ * right, this would be the preferred approach, but gcc does not
+ * clean up the stack after each function call when optimizing (-fdefer-pop);
+ * can it make assumptions about what's already on the stack? I dunno,
+ * but in this case, we can't mess with the caller's stack frame, and
+ * we must use a model like `_x86_dispatch_attrfv' above. Caveat emptor!
+ */
+
+/* Also, will only need a maximum of four of each of these per context:
+ */
+#if !defined (STDCALL_API)
+GLOBL( _tnl_x86_dispatch_multitexcoordf1 )
+GLOBL( _tnl_x86_dispatch_multitexcoordf2 )
+GLOBL( _tnl_x86_dispatch_multitexcoordf3 )
+GLOBL( _tnl_x86_dispatch_multitexcoordf4 )
movl 4(%esp), %ecx
leal 8(%esp), %edx
andl $7, %ecx
movl %edx, 4(%esp)
sall $4, %ecx
- jmp *SUBST(0)(%ecx) # 0x0 - tabfv[tex0][n]
-GLOBL( _tnl_x86_dispatch_multitexcoordf_end )
+ jmp *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
+GLOBL( _tnl_x86_dispatch_multitexcoordf4_end )
+GLOBL( _tnl_x86_dispatch_multitexcoordf3_end )
+GLOBL( _tnl_x86_dispatch_multitexcoordf2_end )
+GLOBL( _tnl_x86_dispatch_multitexcoordf1_end )
GLOBL( _tnl_x86_dispatch_multitexcoordfv )
movl 4(%esp), %ecx
andl $7, %ecx
movl %edx, 4(%esp)
sall $4, %ecx
- jmp *SUBST(0)(%ecx) # 0x0 - tabfv[tex0][n]
+ jmp *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
-// VertexAttrib: the address of the function pointer must be
-// calculated.
+#else /* defined (STDCALL_API) */
+
+GLOBL( _tnl_x86_dispatch_multitexcoordf1 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ movl 16(%esp), %ecx
+ leal 20(%esp), %edx
+ andl $7, %ecx
+ movl %edx, (%esp)
+ sall $4, %ecx
+ call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $8 /* return */
+GLOBL( _tnl_x86_dispatch_multitexcoordf1_end )
+
+GLOBL( _tnl_x86_dispatch_multitexcoordf2 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ movl 16(%esp), %ecx
+ leal 20(%esp), %edx
+ andl $7, %ecx
+ movl %edx, (%esp)
+ sall $4, %ecx
+ call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $12 /* return */
+GLOBL( _tnl_x86_dispatch_multitexcoordf2_end )
+
+GLOBL( _tnl_x86_dispatch_multitexcoordf3 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ movl 16(%esp), %ecx
+ leal 20(%esp), %edx
+ andl $7, %ecx
+ movl %edx, (%esp)
+ sall $4, %ecx
+ call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $16 /* return */
+GLOBL( _tnl_x86_dispatch_multitexcoordf3_end )
+
+GLOBL( _tnl_x86_dispatch_multitexcoordf4 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ movl 16(%esp), %ecx
+ leal 20(%esp), %edx
+ andl $7, %ecx
+ movl %edx, (%esp)
+ sall $4, %ecx
+ call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $20 /* return */
+GLOBL( _tnl_x86_dispatch_multitexcoordf4_end )
-GLOBL( _tnl_x86_dispatch_vertexattribf )
+GLOBL( _tnl_x86_dispatch_multitexcoordfv )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ movl 16(%esp), %ecx
+ movl 20(%esp), %edx
+ andl $7, %ecx
+ movl %edx, (%esp)
+ sall $4, %ecx
+ call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $8 /* return */
+GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
+#endif /* defined (STDCALL_API) */
+
+
+/* VertexAttrib: the address of the function pointer must be
+ * calculated.
+ */
+#if !defined (STDCALL_API)
+GLOBL( _tnl_x86_dispatch_vertexattribf1 )
+GLOBL( _tnl_x86_dispatch_vertexattribf2 )
+GLOBL( _tnl_x86_dispatch_vertexattribf3 )
+GLOBL( _tnl_x86_dispatch_vertexattribf4 )
movl 4(%esp), %eax
cmpl $16, %eax
- jb .8 # "cmovge" is not supported on all CPUs
+ jb .8 /* "cmovge" is not supported on all CPUs */
movl $16, %eax
.8:
- leal 8(%esp), %ecx # calculate 'v'
- movl %ecx, 4(%esp) # save in 1st arg slot
+ leal 8(%esp), %ecx /* calculate 'v' */
+ movl %ecx, 4(%esp) /* save in 1st arg slot */
sall $4, %eax
- jmp *SUBST(0)(%eax) # 0x0 - tabfv[0][n]
-GLOBL( _tnl_x86_dispatch_vertexattribf_end )
+ jmp *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
+GLOBL( _tnl_x86_dispatch_vertexattribf4_end )
+GLOBL( _tnl_x86_dispatch_vertexattribf3_end )
+GLOBL( _tnl_x86_dispatch_vertexattribf2_end )
+GLOBL( _tnl_x86_dispatch_vertexattribf1_end )
GLOBL( _tnl_x86_dispatch_vertexattribfv )
movl 4(%esp), %eax
cmpl $16, %eax
- jb .9 # "cmovge" is not supported on all CPUs
+ jb .9 /* "cmovge" is not supported on all CPUs */
+ movl $16, %eax
+.9:
+ movl 8(%esp), %ecx /* load 'v' */
+ movl %ecx, 4(%esp) /* save in 1st arg slot */
+ sall $4, %eax
+ jmp *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
+GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
+
+#else /* defined (STDCALL_API) */
+
+GLOBL( _tnl_x86_dispatch_vertexattribf1 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ movl 16(%esp), %eax
+ cmpl $16, %eax
+ jb .81 /* "cmovge" is not supported on all CPUs */
+ movl $16, %eax
+.81:
+ leal 20(%esp), %ecx /* load 'v' */
+ movl %ecx, (%esp) /* save in 1st arg slot */
+ sall $4, %eax
+ call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $8 /* return */
+GLOBL( _tnl_x86_dispatch_vertexattribf1_end )
+
+GLOBL( _tnl_x86_dispatch_vertexattribf2 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ movl 16(%esp), %eax
+ cmpl $16, %eax
+ jb .82 /* "cmovge" is not supported on all CPUs */
+ movl $16, %eax
+.82:
+ leal 20(%esp), %ecx /* load 'v' */
+ movl %ecx, (%esp) /* save in 1st arg slot */
+ sall $4, %eax
+ call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $12 /* return */
+GLOBL( _tnl_x86_dispatch_vertexattribf2_end )
+
+GLOBL( _tnl_x86_dispatch_vertexattribf3 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ movl 16(%esp), %eax
+ cmpl $16, %eax
+ jb .83 /* "cmovge" is not supported on all CPUs */
+ movl $16, %eax
+.83:
+ leal 20(%esp), %ecx /* load 'v' */
+ movl %ecx, (%esp) /* save in 1st arg slot */
+ sall $4, %eax
+ call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $16 /* return */
+GLOBL( _tnl_x86_dispatch_vertexattribf3_end )
+
+GLOBL( _tnl_x86_dispatch_vertexattribf4 )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ movl 16(%esp), %eax
+ cmpl $16, %eax
+ jb .84 /* "cmovge" is not supported on all CPUs */
+ movl $16, %eax
+.84:
+ leal 20(%esp), %ecx /* load 'v' */
+ movl %ecx, (%esp) /* save in 1st arg slot */
+ sall $4, %eax
+ call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $20 /* return */
+GLOBL( _tnl_x86_dispatch_vertexattribf4_end )
+
+GLOBL( _tnl_x86_dispatch_vertexattribfv )
+ subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
+ movl 16(%esp), %eax
+ cmpl $16, %eax
+ jb .9 /* "cmovge" is not supported on all CPUs */
movl $16, %eax
.9:
- movl 8(%esp), %ecx # load 'v'
- movl %ecx, 4(%esp) # save in 1st arg slot
+ movl 20(%esp), %ecx /* load 'v' */
+ movl %ecx, (%esp) /* save in 1st arg slot */
sall $4, %eax
- jmp *SUBST(0)(%eax) # 0x0 - tabfv[0][n]
+ call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
+ addl $8, %esp /* tear down frame (4 shaved off by the callee) */
+ ret $8 /* return */
GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
+#endif /* defined (STDCALL_API) */