codegen'ed versions of the 2nd level dispatch
authorDaniel Borca <dborca@users.sourceforge.net>
Thu, 1 Apr 2004 06:53:22 +0000 (06:53 +0000)
committerDaniel Borca <dborca@users.sourceforge.net>
Thu, 1 Apr 2004 06:53:22 +0000 (06:53 +0000)
src/mesa/tnl/t_vtx_api.c
src/mesa/tnl/t_vtx_api.h
src/mesa/tnl/t_vtx_generic.c
src/mesa/tnl/t_vtx_x86.c
src/mesa/tnl/t_vtx_x86_gcc.S

index 6dcd8b43a0bdf0d304f11dcb49d9958c427bcb35..807d99952f03feaa159274eaae9f34c5bce51b03 100644 (file)
@@ -901,6 +901,7 @@ void _tnl_vtx_init( GLcontext *ctx )
    _tnl_current_init( ctx );
    _tnl_exec_vtxfmt_init( ctx );
    _tnl_generic_exec_vtxfmt_init( ctx );
+   _tnl_x86_exec_vtxfmt_init( ctx ); /* [dBorca] x86 DISPATCH_ATTRFV */
 
    _mesa_install_exec_vtxfmt( ctx, &tnl->exec_vtxfmt );
 
index 85cc33f0718b69ec5933a1ea93f272e3a6dff9de..46700fcd0ae7eae402108c05b26e1a9a42076fca 100644 (file)
@@ -78,6 +78,8 @@ extern void _tnl_generic_attr_table_init( attrfv_func (*tab)[4] );
  */
 extern void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen );
 
+extern void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx );
+
 
 
 
index 25dd07a52c7aec3d7db864369ff68504ea76b50e..00dd2e890758c9eabb40f3c26c9c4ab0b8c446ff 100644 (file)
@@ -409,8 +409,7 @@ static void GLAPIENTRY _tnl_VertexAttrib4fvNV( GLuint index,
 
 
 /* Install the generic versions of the 2nd level dispatch functions.
- * There's currently no codegen alternative to these, though one is in
- * the works.
+ * [dBorca] Some of these have a codegen alternative.
  */
 void _tnl_generic_exec_vtxfmt_init( GLcontext *ctx )
 {
index 6ff1a52743d50a42abad34f2914e74390ffa36ca..3f3a198a2433a85f53870ea816f04a81419950a5 100644 (file)
@@ -59,8 +59,6 @@ EXTERN( _x86_Vertex2fv );
 EXTERN( _x86_Vertex3fv );
 EXTERN( _x86_Vertex4fv );
 
-/* None of these used yet:
- */
 EXTERN( _x86_dispatch_attrf );
 EXTERN( _x86_dispatch_attrfv );
 EXTERN( _x86_dispatch_multitexcoordf );
@@ -260,6 +258,77 @@ void _do_choose( void )
 {
 }
 
+
+/* [dBorca] I purposely avoided one single macro, since they might need to
+ * be handled in different ways. Ohwell, once things get much clearer, they
+ * could collapse...
+ */
+#define MAKE_DISPATCH_ATTR(FUNC, SIZE, TYPE, ATTR)                     \
+do {                                                                   \
+   char *code;                                                         \
+   char *start = (char *)&_x86_dispatch_attr##TYPE;                    \
+   char *end = (char *)&_x86_dispatch_attr##TYPE##_end;                        \
+   int offset = 0;                                                     \
+   code = ALIGN_MALLOC( end - start, 16 );                             \
+   memcpy (code, start, end - start);                                  \
+   FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[ATTR][SIZE-1]));\
+   vfmt->FUNC##SIZE##TYPE = code;                                      \
+} while (0)
+
+
+#define MAKE_DISPATCH_MULTITEXCOORD(FUNC, SIZE, TYPE, ATTR)            \
+do {                                                                   \
+   char *code;                                                         \
+   char *start = (char *)&_x86_dispatch_multitexcoord##TYPE;           \
+   char *end = (char *)&_x86_dispatch_multitexcoord##TYPE##_end;       \
+   int offset = 0;                                                     \
+   code = ALIGN_MALLOC( end - start, 16 );                             \
+   memcpy (code, start, end - start);                                  \
+   FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[_TNL_ATTRIB_TEX0][SIZE-1]));\
+   vfmt->FUNC##SIZE##TYPE##ARB = code;                                 \
+} while (0)
+
+
+#define MAKE_DISPATCH_VERTEXATTRIB(FUNC, SIZE, TYPE, ATTR)             \
+do {                                                                   \
+   char *code;                                                         \
+   char *start = (char *)&_x86_dispatch_vertexattrib##TYPE;            \
+   char *end = (char *)&_x86_dispatch_vertexattrib##TYPE##_end;                \
+   int offset = 0;                                                     \
+   code = ALIGN_MALLOC( end - start, 16 );                             \
+   memcpy (code, start, end - start);                                  \
+   FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[0][SIZE-1]));  \
+   vfmt->FUNC##SIZE##TYPE##NV = code;                                  \
+} while (0)
+
+/* [dBorca] Install the codegen'ed versions of the 2nd level dispatch
+ * functions.  We should keep a list and free them in the end...
+ */
+void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx )
+{
+   GLvertexformat *vfmt = &(TNL_CONTEXT(ctx)->exec_vtxfmt);
+
+   MAKE_DISPATCH_ATTR(Color,3,f,     _TNL_ATTRIB_COLOR0);
+   MAKE_DISPATCH_ATTR(Color,3,fv,    _TNL_ATTRIB_COLOR0);
+   MAKE_DISPATCH_ATTR(Color,4,f,     _TNL_ATTRIB_COLOR0);
+   MAKE_DISPATCH_ATTR(Color,4,fv,    _TNL_ATTRIB_COLOR0);
+   MAKE_DISPATCH_ATTR(Normal,3,f,    _TNL_ATTRIB_NORMAL);
+   MAKE_DISPATCH_ATTR(Normal,3,fv,   _TNL_ATTRIB_NORMAL);
+   MAKE_DISPATCH_ATTR(TexCoord,2,f,  _TNL_ATTRIB_TEX0);
+   MAKE_DISPATCH_ATTR(TexCoord,2,fv, _TNL_ATTRIB_TEX0);
+   MAKE_DISPATCH_ATTR(Vertex,3,f,    _TNL_ATTRIB_POS);
+   MAKE_DISPATCH_ATTR(Vertex,3,fv,   _TNL_ATTRIB_POS);
+   /* just add more */
+
+   MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,f,  0);
+   MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,fv, 0);
+   /* just add more */
+
+   MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,f,  0);
+   MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,fv, 0);
+   /* just add more */
+}
+
 #else 
 
 void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen )
index 937b53bfd3f2055545da43b8507f7994551df5fd..e932faff757d8fa8a7c78c9a3bdee7b5bdf185cb 100644 (file)
@@ -31,11 +31,25 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #if defined(USE_X86_ASM) && !defined(HAVE_NONSTANDARD_GLAPIENTRY)
-                       
+
+#if !defined (__DJGPP__)
+
 #define GLOBL( x )     \
 .globl x;              \
 x:
 
+#define EXTRN( x )     x
+
+#else  /* defined(__DJGPP__) */
+
+#define GLOBL( x )     \
+.globl _##x;           \
+_##x:
+
+#define EXTRN( x )     _##x
+
+#endif /* defined(__DJGPP__) */
+
 .data
 .align 4
 
@@ -48,6 +62,10 @@ x:
        
 
 
+// [dBorca] TODO
+// Unfold functions for each vertex size?
+// Build super-specialized MMX/SSE versions?
+
 GLOBL ( _x86_Vertex1fv )
        movl    4(%esp), %ecx   
        push    %edi
@@ -217,9 +235,9 @@ GLOBL( _x86_choose_fv)
        subl    $12, %esp       // gcc does 16 byte alignment of stack frames?
        movl    $SUBST(0), (%esp)       // arg 0 - attrib
        movl    $SUBST(1), 4(%esp)      // arg 1 - N
-       call    _do_choose      // new function returned in %eax
-       add     $12, %esp       // tear down stack frame
-       jmp     *%eax           // jump to new func
+       call    EXTRN(_do_choose)       // new function returned in %eax
+       add     $12, %esp               // tear down stack frame
+       jmp     *%eax                   // jump to new func
 GLOBL ( _x86_choosefv_end )
        
        
@@ -251,7 +269,7 @@ GLOBL( _x86_dispatch_attrf )
        subl    $12, %esp       // gcc does 16 byte alignment of stack frames?
        leal    16(%esp), %edx  // address of first float on stack
        movl    %edx, (%esp)    // save as 'v'
-       call    SUBST(0)        // 0x0 --> tabfv[attr][n]
+       call    *SUBST(0)       // 0x0 --> tabfv[attr][n]
        addl    $12, %esp       // tear down frame
        ret                     // return
 GLOBL( _x86_dispatch_attrf_end )
@@ -259,7 +277,7 @@ GLOBL( _x86_dispatch_attrf_end )
 // The fv case is simpler:
 // 
 GLOBL( _x86_dispatch_attrfv )
-       jmp     SUBST(0)        // 0x0 --> tabfv[attr][n]
+       jmp     *SUBST(0)       // 0x0 --> tabfv[attr][n]
 GLOBL( _x86_dispatch_attrfv_end )
 
 
@@ -294,7 +312,7 @@ GLOBL( _x86_dispatch_vertexattribf )
        movl    $16, %ecx
        movl    4(%esp), %eax
        cmpl    $16, %eax
-       cmovge  %ecx, %eax
+       cmovge  %ecx, %eax      // [dBorca] BADBAD! might not be supported
        leal    8(%esp), %ecx   // calculate 'v'
        movl    %ecx, 4(%esp)   // save in 1st arg slot
        sall    $4, %eax
@@ -305,7 +323,7 @@ GLOBL( _x86_dispatch_vertexattribfv )
        movl    $16, %ecx
        movl    4(%esp), %eax
        cmpl    $16, %eax
-       cmovge  %ecx, %eax
+       cmovge  %ecx, %eax      // [dBorca] BADBAD! might not be supported
        movl    8(%esp), %ecx   // load 'v'
        movl    %ecx, 4(%esp)   // save in 1st arg slot
        sall    $4, %eax