Speedup the venerable mm.[ch] allocator with doubly linked lists and a
[mesa.git] / src / mesa / tnl / t_vtx_x86.c
1 /**************************************************************************
2
3 Copyright 2004 Tungsten Graphics Inc., Cedar Park, Texas.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 * Daniel Borca <dborca@yahoo.com>
32 */
33
34
35 #include "glheader.h"
36 #include "context.h"
37 #include "macros.h"
38 #include "vtxfmt.h"
39 #include "dlist.h"
40 #include "state.h"
41 #include "light.h"
42 #include "api_arrayelt.h"
43 #include "api_noop.h"
44 #include "t_vtx_api.h"
45 #include "simple_list.h"
46
47 #if defined(USE_X86_ASM) && !defined(HAVE_NONSTANDARD_GLAPIENTRY)
48
49 #define EXTERN( FUNC ) \
50 extern const char FUNC[]; \
51 extern const char FUNC##_end[]
52
53 EXTERN( _tnl_x86_Attribute1fv );
54 EXTERN( _tnl_x86_Attribute2fv );
55 EXTERN( _tnl_x86_Attribute3fv );
56 EXTERN( _tnl_x86_Attribute4fv );
57 EXTERN( _tnl_x86_Vertex1fv );
58 EXTERN( _tnl_x86_Vertex2fv );
59 EXTERN( _tnl_x86_Vertex3fv );
60 EXTERN( _tnl_x86_Vertex4fv );
61
62 EXTERN( _tnl_x86_dispatch_attrf1 );
63 EXTERN( _tnl_x86_dispatch_attrf2 );
64 EXTERN( _tnl_x86_dispatch_attrf3 );
65 EXTERN( _tnl_x86_dispatch_attrf4 );
66 EXTERN( _tnl_x86_dispatch_attrfv );
67 EXTERN( _tnl_x86_dispatch_multitexcoordf1 );
68 EXTERN( _tnl_x86_dispatch_multitexcoordf2 );
69 EXTERN( _tnl_x86_dispatch_multitexcoordf3 );
70 EXTERN( _tnl_x86_dispatch_multitexcoordf4 );
71 EXTERN( _tnl_x86_dispatch_multitexcoordfv );
72 EXTERN( _tnl_x86_dispatch_vertexattribf1 );
73 EXTERN( _tnl_x86_dispatch_vertexattribf2 );
74 EXTERN( _tnl_x86_dispatch_vertexattribf3 );
75 EXTERN( _tnl_x86_dispatch_vertexattribf4 );
76 EXTERN( _tnl_x86_dispatch_vertexattribfv );
77
78 EXTERN( _tnl_x86_choose_fv );
79
80
81 #define DONT_KNOW_OFFSETS 1
82
83
84 #define DFN( FUNC, CACHE, KEY ) \
85 struct _tnl_dynfn *dfn = MALLOC_STRUCT( _tnl_dynfn );\
86 const char *start = FUNC; \
87 const char *end = FUNC##_end; \
88 int offset = 0; \
89 insert_at_head( &CACHE, dfn ); \
90 dfn->key = KEY; \
91 dfn->code = ALIGN_MALLOC( end - start, 16 ); \
92 _mesa_memcpy (dfn->code, start, end - start)
93
94
95
96 #undef DEBUG_VTX
97
98 #ifdef DEBUG_VTX
99 #define FIXUP_PRINTF( offset, NEWVAL ) \
100 fprintf(stderr, "%s/%d: offset %d, new value: 0x%x\n", __FILE__, __LINE__, offset, (int)(NEWVAL))
101 #define FIXUPREL_PRINTF( offset, NEWVAL, CODE ) \
102 fprintf(stderr, "%s/%d: offset %d, new value: 0x%x\n", __FILE__, __LINE__, offset, (int)(NEWVAL) - ((int)(CODE)+offset) - 4)
103 #else
104 #define FIXUP_PRINTF( offset, NEWVAL )
105 #define FIXUPREL_PRINTF( offset, NEWVAL, CODE )
106 #endif
107
108 #define FIXUP( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL ) \
109 do { \
110 GLint subst = 0x10101010 + CHECKVAL; \
111 \
112 if (DONT_KNOW_OFFSETS) { \
113 while (*(int *)(CODE+offset) != subst) offset++; \
114 *(int *)(CODE+offset) = (int)(NEWVAL); \
115 FIXUP_PRINTF(offset, NEWVAL); \
116 offset += 4; \
117 } \
118 else { \
119 int *icode = (int *)(CODE+KNOWN_OFFSET); \
120 assert (*icode == subst); \
121 *icode = (int)NEWVAL; \
122 } \
123 } while (0)
124
125
126
127 #define FIXUPREL( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL )\
128 do { \
129 GLint subst = 0x10101010 + CHECKVAL; \
130 \
131 if (DONT_KNOW_OFFSETS) { \
132 while (*(int *)(CODE+offset) != subst) offset++; \
133 *(int *)(CODE+offset) = (int)(NEWVAL) - ((int)(CODE)+offset) - 4; \
134 FIXUPREL_PRINTF(offset, NEWVAL, CODE); \
135 offset += 4; \
136 } \
137 else { \
138 int *icode = (int *)(CODE+KNOWN_OFFSET); \
139 assert (*icode == subst); \
140 *icode = (int)(NEWVAL) - (int)(icode) - 4; \
141 } \
142 } while (0)
143
144
145
146
147 /* Build specialized versions of the immediate calls on the fly for
148 * the current state. Generic x86 versions.
149 */
150
151 static struct _tnl_dynfn *makeX86Vertex1fv( GLcontext *ctx, int vertex_size )
152 {
153 TNLcontext *tnl = TNL_CONTEXT(ctx);
154 DFN ( _tnl_x86_Vertex1fv, tnl->vtx.cache.Vertex[1-1], vertex_size );
155
156 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
157 FIXUP(dfn->code, 0, 1, vertex_size - 1);
158 FIXUP(dfn->code, 0, 2, (int)&tnl->vtx.vertex[1]);
159 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
160 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
161 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
162 FIXUP(dfn->code, 0, 4, (int)ctx);
163 FIXUPREL(dfn->code, 0, 5, (int)&_tnl_wrap_filled_vertex);
164
165 return dfn;
166 }
167
168 static struct _tnl_dynfn *makeX86Vertex2fv( GLcontext *ctx, int vertex_size )
169 {
170 TNLcontext *tnl = TNL_CONTEXT(ctx);
171 DFN ( _tnl_x86_Vertex2fv, tnl->vtx.cache.Vertex[2-1], vertex_size );
172
173 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
174 FIXUP(dfn->code, 0, 1, vertex_size - 2);
175 FIXUP(dfn->code, 0, 2, (int)&tnl->vtx.vertex[2]);
176 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
177 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
178 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
179 FIXUP(dfn->code, 0, 4, (int)ctx);
180 FIXUPREL(dfn->code, 0, 5, (int)&_tnl_wrap_filled_vertex);
181
182 return dfn;
183 }
184
185 static struct _tnl_dynfn *makeX86Vertex3fv( GLcontext *ctx, int vertex_size )
186 {
187 TNLcontext *tnl = TNL_CONTEXT(ctx);
188
189 switch (vertex_size) {
190 default: {
191 DFN ( _tnl_x86_Vertex3fv, tnl->vtx.cache.Vertex[3-1], vertex_size );
192
193 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
194 FIXUP(dfn->code, 0, 1, vertex_size - 3);
195 FIXUP(dfn->code, 0, 2, (int)&tnl->vtx.vertex[3]);
196 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
197 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
198 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
199 FIXUP(dfn->code, 0, 4, (int)ctx);
200 FIXUPREL(dfn->code, 0, 5, (int)&_tnl_wrap_filled_vertex);
201 return dfn;
202 }
203 }
204 }
205
206 static struct _tnl_dynfn *makeX86Vertex4fv( GLcontext *ctx, int vertex_size )
207 {
208 TNLcontext *tnl = TNL_CONTEXT(ctx);
209 DFN ( _tnl_x86_Vertex4fv, tnl->vtx.cache.Vertex[4-1], vertex_size );
210
211 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
212 FIXUP(dfn->code, 0, 1, vertex_size - 4);
213 FIXUP(dfn->code, 0, 2, (int)&tnl->vtx.vertex[4]);
214 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
215 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
216 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
217 FIXUP(dfn->code, 0, 4, (int)ctx);
218 FIXUPREL(dfn->code, 0, 5, (int)&_tnl_wrap_filled_vertex);
219
220 return dfn;
221 }
222
223
224 static struct _tnl_dynfn *makeX86Attribute1fv( GLcontext *ctx, int dest )
225 {
226 TNLcontext *tnl = TNL_CONTEXT(ctx);
227 DFN ( _tnl_x86_Attribute1fv, tnl->vtx.cache.Attribute[1-1], dest );
228
229 FIXUP(dfn->code, 0, 0, dest);
230
231 return dfn;
232 }
233
234 static struct _tnl_dynfn *makeX86Attribute2fv( GLcontext *ctx, int dest )
235 {
236 TNLcontext *tnl = TNL_CONTEXT(ctx);
237 DFN ( _tnl_x86_Attribute2fv, tnl->vtx.cache.Attribute[2-1], dest );
238
239 FIXUP(dfn->code, 0, 0, dest);
240 FIXUP(dfn->code, 0, 1, 4+dest);
241
242 return dfn;
243 }
244
245 static struct _tnl_dynfn *makeX86Attribute3fv( GLcontext *ctx, int dest )
246 {
247 TNLcontext *tnl = TNL_CONTEXT(ctx);
248 DFN ( _tnl_x86_Attribute3fv, tnl->vtx.cache.Attribute[3-1], dest );
249
250 FIXUP(dfn->code, 0, 0, dest);
251 FIXUP(dfn->code, 0, 1, 4+dest);
252 FIXUP(dfn->code, 0, 2, 8+dest);
253
254 return dfn;
255 }
256
257 static struct _tnl_dynfn *makeX86Attribute4fv( GLcontext *ctx, int dest )
258 {
259 TNLcontext *tnl = TNL_CONTEXT(ctx);
260 DFN ( _tnl_x86_Attribute4fv, tnl->vtx.cache.Attribute[4-1], dest );
261
262 FIXUP(dfn->code, 0, 0, dest);
263 FIXUP(dfn->code, 0, 1, 4+dest);
264 FIXUP(dfn->code, 0, 2, 8+dest);
265 FIXUP(dfn->code, 0, 3, 12+dest);
266
267 return dfn;
268 }
269
270
271 void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen )
272 {
273 gen->Vertex[0] = makeX86Vertex1fv;
274 gen->Vertex[1] = makeX86Vertex2fv;
275 gen->Vertex[2] = makeX86Vertex3fv;
276 gen->Vertex[3] = makeX86Vertex4fv;
277 gen->Attribute[0] = makeX86Attribute1fv;
278 gen->Attribute[1] = makeX86Attribute2fv;
279 gen->Attribute[2] = makeX86Attribute3fv;
280 gen->Attribute[3] = makeX86Attribute4fv;
281 }
282
283
284 #define MKDISP(FUNC, SIZE, ATTR, WARP) \
285 do { \
286 char *code; \
287 const char *start = WARP; \
288 const char *end = WARP##_end; \
289 int offset = 0; \
290 code = ALIGN_MALLOC( end - start, 16 ); \
291 _mesa_memcpy (code, start, end - start); \
292 FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[ATTR][SIZE-1]));\
293 *(void **)&vfmt->FUNC = code; \
294 } while (0)
295
296
297 /* Install the codegen'ed versions of the 2nd level dispatch
298 * functions. We should keep a list and free them in the end...
299 */
300 void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx )
301 {
302 GLvertexformat *vfmt = &(TNL_CONTEXT(ctx)->exec_vtxfmt);
303
304 MKDISP(Color3f, 3, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrf3);
305 MKDISP(Color3fv, 3, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrfv);
306 MKDISP(Color4f, 4, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrf4);
307 MKDISP(Color4fv, 4, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrfv);
308 MKDISP(FogCoordfEXT, 1, _TNL_ATTRIB_FOG, _tnl_x86_dispatch_attrf1);
309 MKDISP(FogCoordfvEXT, 1, _TNL_ATTRIB_FOG, _tnl_x86_dispatch_attrfv);
310 MKDISP(Normal3f, 3, _TNL_ATTRIB_NORMAL, _tnl_x86_dispatch_attrf3);
311 MKDISP(Normal3fv, 3, _TNL_ATTRIB_NORMAL, _tnl_x86_dispatch_attrfv);
312 MKDISP(SecondaryColor3fEXT, 3, _TNL_ATTRIB_COLOR1, _tnl_x86_dispatch_attrf3);
313 MKDISP(SecondaryColor3fvEXT,3, _TNL_ATTRIB_COLOR1, _tnl_x86_dispatch_attrfv);
314 MKDISP(TexCoord1f, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf1);
315 MKDISP(TexCoord1fv, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
316 MKDISP(TexCoord2f, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf2);
317 MKDISP(TexCoord2fv, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
318 MKDISP(TexCoord3f, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf3);
319 MKDISP(TexCoord3fv, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
320 MKDISP(TexCoord4f, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf4);
321 MKDISP(TexCoord4fv, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
322 MKDISP(Vertex2f, 2, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf2);
323 MKDISP(Vertex2fv, 2, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
324 MKDISP(Vertex3f, 3, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf3);
325 MKDISP(Vertex3fv, 3, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
326 MKDISP(Vertex4f, 4, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf4);
327 MKDISP(Vertex4fv, 4, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
328
329 MKDISP(MultiTexCoord1fARB, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf1);
330 MKDISP(MultiTexCoord1fvARB, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
331 MKDISP(MultiTexCoord2fARB, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf2);
332 MKDISP(MultiTexCoord2fvARB, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
333 MKDISP(MultiTexCoord3fARB, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf3);
334 MKDISP(MultiTexCoord3fvARB, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
335 MKDISP(MultiTexCoord4fARB, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf4);
336 MKDISP(MultiTexCoord4fvARB, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
337
338 MKDISP(VertexAttrib1fNV, 1, 0, _tnl_x86_dispatch_vertexattribf1);
339 MKDISP(VertexAttrib1fvNV, 1, 0, _tnl_x86_dispatch_vertexattribfv);
340 MKDISP(VertexAttrib2fNV, 2, 0, _tnl_x86_dispatch_vertexattribf2);
341 MKDISP(VertexAttrib2fvNV, 2, 0, _tnl_x86_dispatch_vertexattribfv);
342 MKDISP(VertexAttrib3fNV, 3, 0, _tnl_x86_dispatch_vertexattribf3);
343 MKDISP(VertexAttrib3fvNV, 3, 0, _tnl_x86_dispatch_vertexattribfv);
344 MKDISP(VertexAttrib4fNV, 4, 0, _tnl_x86_dispatch_vertexattribf4);
345 MKDISP(VertexAttrib4fvNV, 4, 0, _tnl_x86_dispatch_vertexattribfv);
346 }
347
348
349 /* Install the codegen'ed choosers.
350 * We should keep a list and free them in the end...
351 */
352 void _tnl_x86choosers( tnl_attrfv_func (*choose)[4],
353 tnl_attrfv_func (*do_choose)( GLuint attr,
354 GLuint sz ))
355 {
356 int attr, size;
357
358 for (attr = 0; attr < _TNL_MAX_ATTR_CODEGEN; attr++) {
359 for (size = 0; size < 4; size++) {
360 char *code;
361 const char *start = _tnl_x86_choose_fv;
362 const char *end = _tnl_x86_choose_fv_end;
363 int offset = 0;
364 code = ALIGN_MALLOC( end - start, 16 );
365 _mesa_memcpy (code, start, end - start);
366 FIXUP(code, 0, 0, attr);
367 FIXUP(code, 0, 1, size + 1);
368 FIXUPREL(code, 0, 2, do_choose);
369 choose[attr][size] = (tnl_attrfv_func)code;
370 }
371 }
372 }
373
374 #else
375
376 void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen )
377 {
378 (void) gen;
379 }
380
381
382 void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx )
383 {
384 (void) ctx;
385 }
386
387
388 void _tnl_x86choosers( tnl_attrfv_func (*choose)[4],
389 tnl_attrfv_func (*do_choose)( GLuint attr,
390 GLuint sz ))
391 {
392 (void) choose;
393 (void) do_choose;
394 }
395
396 #endif