updated some printfs, added comment about sched_yield
[mesa.git] / src / mesa / tnl / t_vtx_x86_gcc.S
1 /**************************************************************************
2
3 Copyright 2004 Tungsten Graphics Inc., Cedar Park, Texas.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 * Daniel Borca <dborca@yahoo.com>
32 */
33
34 #if defined (__DJGPP__) || defined (__MINGW32__) || defined (__CYGWIN__)
35 #define GLOBL( x ) \
36 .globl _##x; \
37 _##x:
38 #else /* !defined (__DJGPP__) && !defined (__MINGW32__) && !defined (__CYGWIN__) */
39 #define GLOBL( x ) \
40 .globl x; \
41 x:
42 #endif /* !defined (__DJGPP__) && !defined (__MINGW32__) && !defined (__CYGWIN__) */
43
44
45 #if !defined (STDCALL_API)
46 #define RETCLEAN( x ) ret
47 #else
48 #define RETCLEAN( x ) ret $x
49 #endif
50
51
52 #define _JMP(x) \
53 .byte 0xe9; \
54 .long x
55
56 #define _CALL(x) \
57 .byte 0xe8; \
58 .long x
59
60
61 /* Someone who knew a lot about this sort of thing would use this
62 * macro to note current offsets, etc in a special region of the
63 * object file & just make everything work out neat. I don't know
64 * enough to do that...
65 */
66
67 #define SUBST( x ) (0x10101010 + x)
68
69
70 .data
71
72
73 /* [dBorca] TODO
74 * Unfold functions for each vertex size?
75 * Build super-specialized SSE versions?
76 *
77 * There is a trick in Vertex*fv: under certain conditions,
78 * we tail to _tnl_wrap_filled_vertex(ctx). This means that
79 * if Vertex*fv is STDCALL, then _tnl_wrap_filled_vertex must
80 * be STDCALL as well, because (GLcontext *) and (GLfloat *)
81 * have the same size.
82 */
83 .align 4
84 GLOBL ( _tnl_x86_Vertex1fv )
85 movl 4(%esp), %ecx
86 push %edi
87 push %esi
88 movl SUBST(0), %edi /* 0x0 --> tnl->vtx.vbptr */
89 movl (%ecx), %edx /* load v[0] */
90 movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
91 addl $4, %edi /* tnl->vtx.vbptr += 1 */
92 movl $SUBST(1), %ecx /* 0x1 --> (tnl->vtx.vertex_size - 1) */
93 movl $SUBST(2), %esi /* 0x2 --> (tnl->vtx.vertex + 1) */
94 repz
95 movsl %ds:(%esi), %es:(%edi)
96 movl %edi, SUBST(0) /* 0x0 --> tnl->vtx.vbptr */
97 movl SUBST(3), %edx /* 0x3 --> counter */
98 pop %esi
99 pop %edi
100 dec %edx /* counter-- */
101 movl %edx, SUBST(3) /* 0x3 --> counter */
102 je .0 /* if (counter == 0) goto .0 */
103 RETCLEAN(4) /* return */
104 .balign 16
105 .0:
106 movl $SUBST(4), %eax /* load ctx */
107 movl %eax, 4(%esp) /* push ctx */
108 _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
109 GLOBL ( _tnl_x86_Vertex1fv_end )
110
111 .align 4
112 GLOBL ( _tnl_x86_Vertex2fv )
113 movl 4(%esp), %ecx
114 push %edi
115 push %esi
116 movl SUBST(0), %edi /* load tnl->vtx.vbptr */
117 movl (%ecx), %edx /* load v[0] */
118 movl 4(%ecx), %eax /* load v[1] */
119 movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
120 movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
121 addl $8, %edi /* tnl->vtx.vbptr += 2 */
122 movl $SUBST(1), %ecx /* vertex_size - 2 */
123 movl $SUBST(2), %esi /* tnl->vtx.vertex + 2 */
124 repz
125 movsl %ds:(%esi), %es:(%edi)
126 movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
127 movl SUBST(3), %edx /* load counter */
128 pop %esi
129 pop %edi
130 dec %edx /* counter-- */
131 movl %edx, SUBST(3) /* save counter */
132 je .1 /* if (counter == 0) goto .1 */
133 RETCLEAN(4) /* return */
134 .balign 16
135 .1:
136 movl $SUBST(4), %eax /* load ctx */
137 movl %eax, 4(%esp) /* push ctx */
138 _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
139 GLOBL ( _tnl_x86_Vertex2fv_end )
140
141 .align 4
142 GLOBL ( _tnl_x86_Vertex3fv )
143 movl 4(%esp), %ecx
144 push %edi
145 push %esi
146 movl SUBST(0), %edi /* load tnl->vtx.vbptr */
147 movl (%ecx), %edx /* load v[0] */
148 movl 4(%ecx), %eax /* load v[1] */
149 movl 8(%ecx), %esi /* load v[2] */
150 movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
151 movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
152 movl %esi, 8(%edi) /* tnl->vtx.vbptr[2] = v[2] */
153 addl $12, %edi /* tnl->vtx.vbptr += 3 */
154 movl $SUBST(1), %ecx /* vertex_size - 3 */
155 movl $SUBST(2), %esi /* tnl->vtx.vertex + 3 */
156 repz
157 movsl %ds:(%esi), %es:(%edi)
158 movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
159 movl SUBST(3), %edx /* load counter */
160 pop %esi
161 pop %edi
162 dec %edx /* counter-- */
163 movl %edx, SUBST(3) /* save counter */
164 je .2 /* if (counter == 0) goto .2 */
165 RETCLEAN(4) /* return */
166 .balign 16
167 .2:
168 movl $SUBST(4), %eax /* load ctx */
169 movl %eax, 4(%esp) /* push ctx */
170 _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
171 GLOBL ( _tnl_x86_Vertex3fv_end )
172
173 .align 4
174 GLOBL ( _tnl_x86_Vertex4fv )
175 movl 4(%esp), %ecx
176 push %edi
177 push %esi
178 movl SUBST(0), %edi /* load tnl->vtx.vbptr */
179 movl (%ecx), %edx /* load v[0] */
180 movl 4(%ecx), %eax /* load v[1] */
181 movl 8(%ecx), %esi /* load v[2] */
182 movl 12(%ecx), %ecx /* load v[3] */
183 movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
184 movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
185 movl %esi, 8(%edi) /* tnl->vtx.vbptr[2] = v[2] */
186 movl %ecx, 12(%edi) /* tnl->vtx.vbptr[3] = v[3] */
187 addl $16, %edi /* tnl->vtx.vbptr += 4 */
188 movl $SUBST(1), %ecx /* vertex_size - 4 */
189 movl $SUBST(2), %esi /* tnl->vtx.vertex + 4 */
190 repz
191 movsl %ds:(%esi), %es:(%edi)
192 movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
193 movl SUBST(3), %edx /* load counter */
194 pop %esi
195 pop %edi
196 dec %edx /* counter-- */
197 movl %edx, SUBST(3) /* save counter */
198 je .3 /* if (counter == 0) goto .3 */
199 RETCLEAN(4) /* return */
200 .balign 16
201 .3:
202 movl $SUBST(4), %eax /* load ctx */
203 movl %eax, 4(%esp) /* push ctx */
204 _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
205 GLOBL ( _tnl_x86_Vertex4fv_end )
206
207
208 /**
209 * Generic handlers for vector format data.
210 */
211 GLOBL( _tnl_x86_Attribute1fv )
212 movl 4(%esp), %ecx
213 movl (%ecx), %eax /* load v[0] */
214 movl %eax, SUBST(0) /* store v[0] to current vertex */
215 RETCLEAN(4)
216 GLOBL ( _tnl_x86_Attribute1fv_end )
217
218 GLOBL( _tnl_x86_Attribute2fv )
219 movl 4(%esp), %ecx
220 movl (%ecx), %eax /* load v[0] */
221 movl 4(%ecx), %edx /* load v[1] */
222 movl %eax, SUBST(0) /* store v[0] to current vertex */
223 movl %edx, SUBST(1) /* store v[1] to current vertex */
224 RETCLEAN(4)
225 GLOBL ( _tnl_x86_Attribute2fv_end )
226
227 GLOBL( _tnl_x86_Attribute3fv )
228 movl 4(%esp), %ecx
229 movl (%ecx), %eax /* load v[0] */
230 movl 4(%ecx), %edx /* load v[1] */
231 movl 8(%ecx), %ecx /* load v[2] */
232 movl %eax, SUBST(0) /* store v[0] to current vertex */
233 movl %edx, SUBST(1) /* store v[1] to current vertex */
234 movl %ecx, SUBST(2) /* store v[2] to current vertex */
235 RETCLEAN(4)
236 GLOBL ( _tnl_x86_Attribute3fv_end )
237
238 GLOBL( _tnl_x86_Attribute4fv )
239 movl 4(%esp), %ecx
240 movl (%ecx), %eax /* load v[0] */
241 movl 4(%ecx), %edx /* load v[1] */
242 movl %eax, SUBST(0) /* store v[0] to current vertex */
243 movl %edx, SUBST(1) /* store v[1] to current vertex */
244 movl 8(%ecx), %eax /* load v[2] */
245 movl 12(%ecx), %edx /* load v[3] */
246 movl %eax, SUBST(2) /* store v[2] to current vertex */
247 movl %edx, SUBST(3) /* store v[3] to current vertex */
248 RETCLEAN(4)
249 GLOBL ( _tnl_x86_Attribute4fv_end )
250
251
252 /* Choosers:
253 *
254 * Must generate all of these ahead of first usage. Generate at
255 * compile-time?
256 */
257 GLOBL( _tnl_x86_choose_fv )
258 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
259 movl $SUBST(0), (%esp) /* arg 0 - attrib */
260 movl $SUBST(1), 4(%esp) /* arg 1 - N */
261 _CALL (SUBST(2)) /* call do_choose */
262 add $12, %esp /* tear down stack frame */
263 jmp *%eax /* jump to new func */
264 GLOBL ( _tnl_x86_choose_fv_end )
265
266
267 /* FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch.
268 *
269 * In the 1st level dispatch functions, switch to a different
270 * calling convention -- (const GLfloat *v) in %ecx.
271 *
272 * As with regular (x86) dispatch, don't create a new stack frame -
273 * just let the 'ret' in the dispatched function return straight
274 * back to the original caller.
275 *
276 * Vertex/Normal/Color, etc: the address of the function pointer
277 * is known at codegen time.
278 */
279
280 /* Unfortunately, have to play with the stack in the non-fv case:
281 */
282 #if !defined (STDCALL_API)
283 GLOBL( _tnl_x86_dispatch_attrf1 )
284 GLOBL( _tnl_x86_dispatch_attrf2 )
285 GLOBL( _tnl_x86_dispatch_attrf3 )
286 GLOBL( _tnl_x86_dispatch_attrf4 )
287 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
288 leal 16(%esp), %edx /* address of first float on stack */
289 movl %edx, (%esp) /* save as 'v' */
290 call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
291 addl $12, %esp /* tear down frame */
292 ret /* return */
293 GLOBL( _tnl_x86_dispatch_attrf4_end )
294 GLOBL( _tnl_x86_dispatch_attrf3_end )
295 GLOBL( _tnl_x86_dispatch_attrf2_end )
296 GLOBL( _tnl_x86_dispatch_attrf1_end )
297
298 #else /* defined(STDCALL_API) */
299
300 GLOBL( _tnl_x86_dispatch_attrf1 )
301 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
302 leal 16(%esp), %edx /* address of first float on stack */
303 movl %edx, (%esp) /* save as 'v' */
304 call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
305 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
306 ret $4 /* return */
307 GLOBL( _tnl_x86_dispatch_attrf1_end )
308
309 GLOBL( _tnl_x86_dispatch_attrf2 )
310 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
311 leal 16(%esp), %edx /* address of first float on stack */
312 movl %edx, (%esp) /* save as 'v' */
313 call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
314 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
315 ret $8 /* return */
316 GLOBL( _tnl_x86_dispatch_attrf2_end )
317
318 GLOBL( _tnl_x86_dispatch_attrf3 )
319 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
320 leal 16(%esp), %edx /* address of first float on stack */
321 movl %edx, (%esp) /* save as 'v' */
322 call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
323 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
324 ret $12 /* return */
325 GLOBL( _tnl_x86_dispatch_attrf3_end )
326
327 GLOBL( _tnl_x86_dispatch_attrf4 )
328 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
329 leal 16(%esp), %edx /* address of first float on stack */
330 movl %edx, (%esp) /* save as 'v' */
331 call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
332 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
333 ret $16 /* return */
334 GLOBL( _tnl_x86_dispatch_attrf4_end )
335 #endif /* defined(STDCALL_API) */
336
337 /* The fv case is simpler:
338 */
339 GLOBL( _tnl_x86_dispatch_attrfv )
340 jmp *SUBST(0) /* 0x0 --> tabfv[attr][n] */
341 GLOBL( _tnl_x86_dispatch_attrfv_end )
342
343
344 /* MultiTexcoord: the address of the function pointer must be
345 * calculated, but can use the index argument slot to hold 'v', and
346 * avoid setting up a new stack frame.
347 *
348 * [dBorca]
349 * right, this would be the preferred approach, but gcc does not
350 * clean up the stack after each function call when optimizing (-fdefer-pop);
351 * can it make assumptions about what's already on the stack? I dunno,
352 * but in this case, we can't mess with the caller's stack frame, and
353 * we must use a model like `_x86_dispatch_attrfv' above. Caveat emptor!
354 */
355
356 /* Also, will only need a maximum of four of each of these per context:
357 */
358 #if !defined (STDCALL_API)
359 GLOBL( _tnl_x86_dispatch_multitexcoordf1 )
360 GLOBL( _tnl_x86_dispatch_multitexcoordf2 )
361 GLOBL( _tnl_x86_dispatch_multitexcoordf3 )
362 GLOBL( _tnl_x86_dispatch_multitexcoordf4 )
363 movl 4(%esp), %ecx
364 leal 8(%esp), %edx
365 andl $7, %ecx
366 movl %edx, 4(%esp)
367 sall $4, %ecx
368 jmp *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
369 GLOBL( _tnl_x86_dispatch_multitexcoordf4_end )
370 GLOBL( _tnl_x86_dispatch_multitexcoordf3_end )
371 GLOBL( _tnl_x86_dispatch_multitexcoordf2_end )
372 GLOBL( _tnl_x86_dispatch_multitexcoordf1_end )
373
374 GLOBL( _tnl_x86_dispatch_multitexcoordfv )
375 movl 4(%esp), %ecx
376 movl 8(%esp), %edx
377 andl $7, %ecx
378 movl %edx, 4(%esp)
379 sall $4, %ecx
380 jmp *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
381 GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
382
383 #else /* defined (STDCALL_API) */
384
385 GLOBL( _tnl_x86_dispatch_multitexcoordf1 )
386 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
387 movl 16(%esp), %ecx
388 leal 20(%esp), %edx
389 andl $7, %ecx
390 movl %edx, (%esp)
391 sall $4, %ecx
392 call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
393 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
394 ret $8 /* return */
395 GLOBL( _tnl_x86_dispatch_multitexcoordf1_end )
396
397 GLOBL( _tnl_x86_dispatch_multitexcoordf2 )
398 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
399 movl 16(%esp), %ecx
400 leal 20(%esp), %edx
401 andl $7, %ecx
402 movl %edx, (%esp)
403 sall $4, %ecx
404 call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
405 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
406 ret $12 /* return */
407 GLOBL( _tnl_x86_dispatch_multitexcoordf2_end )
408
409 GLOBL( _tnl_x86_dispatch_multitexcoordf3 )
410 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
411 movl 16(%esp), %ecx
412 leal 20(%esp), %edx
413 andl $7, %ecx
414 movl %edx, (%esp)
415 sall $4, %ecx
416 call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
417 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
418 ret $16 /* return */
419 GLOBL( _tnl_x86_dispatch_multitexcoordf3_end )
420
421 GLOBL( _tnl_x86_dispatch_multitexcoordf4 )
422 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
423 movl 16(%esp), %ecx
424 leal 20(%esp), %edx
425 andl $7, %ecx
426 movl %edx, (%esp)
427 sall $4, %ecx
428 call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
429 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
430 ret $20 /* return */
431 GLOBL( _tnl_x86_dispatch_multitexcoordf4_end )
432
433 GLOBL( _tnl_x86_dispatch_multitexcoordfv )
434 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
435 movl 16(%esp), %ecx
436 movl 20(%esp), %edx
437 andl $7, %ecx
438 movl %edx, (%esp)
439 sall $4, %ecx
440 call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
441 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
442 ret $8 /* return */
443 GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
444 #endif /* defined (STDCALL_API) */
445
446
447 /* VertexAttrib: the address of the function pointer must be
448 * calculated.
449 */
450 #if !defined (STDCALL_API)
451 GLOBL( _tnl_x86_dispatch_vertexattribf1 )
452 GLOBL( _tnl_x86_dispatch_vertexattribf2 )
453 GLOBL( _tnl_x86_dispatch_vertexattribf3 )
454 GLOBL( _tnl_x86_dispatch_vertexattribf4 )
455 movl 4(%esp), %eax
456 cmpl $16, %eax
457 jb .8 /* "cmovge" is not supported on all CPUs */
458 movl $16, %eax
459 .8:
460 leal 8(%esp), %ecx /* calculate 'v' */
461 movl %ecx, 4(%esp) /* save in 1st arg slot */
462 sall $4, %eax
463 jmp *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
464 GLOBL( _tnl_x86_dispatch_vertexattribf4_end )
465 GLOBL( _tnl_x86_dispatch_vertexattribf3_end )
466 GLOBL( _tnl_x86_dispatch_vertexattribf2_end )
467 GLOBL( _tnl_x86_dispatch_vertexattribf1_end )
468
469 GLOBL( _tnl_x86_dispatch_vertexattribfv )
470 movl 4(%esp), %eax
471 cmpl $16, %eax
472 jb .9 /* "cmovge" is not supported on all CPUs */
473 movl $16, %eax
474 .9:
475 movl 8(%esp), %ecx /* load 'v' */
476 movl %ecx, 4(%esp) /* save in 1st arg slot */
477 sall $4, %eax
478 jmp *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
479 GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
480
481 #else /* defined (STDCALL_API) */
482
483 GLOBL( _tnl_x86_dispatch_vertexattribf1 )
484 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
485 movl 16(%esp), %eax
486 cmpl $16, %eax
487 jb .81 /* "cmovge" is not supported on all CPUs */
488 movl $16, %eax
489 .81:
490 leal 20(%esp), %ecx /* load 'v' */
491 movl %ecx, (%esp) /* save in 1st arg slot */
492 sall $4, %eax
493 call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
494 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
495 ret $8 /* return */
496 GLOBL( _tnl_x86_dispatch_vertexattribf1_end )
497
498 GLOBL( _tnl_x86_dispatch_vertexattribf2 )
499 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
500 movl 16(%esp), %eax
501 cmpl $16, %eax
502 jb .82 /* "cmovge" is not supported on all CPUs */
503 movl $16, %eax
504 .82:
505 leal 20(%esp), %ecx /* load 'v' */
506 movl %ecx, (%esp) /* save in 1st arg slot */
507 sall $4, %eax
508 call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
509 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
510 ret $12 /* return */
511 GLOBL( _tnl_x86_dispatch_vertexattribf2_end )
512
513 GLOBL( _tnl_x86_dispatch_vertexattribf3 )
514 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
515 movl 16(%esp), %eax
516 cmpl $16, %eax
517 jb .83 /* "cmovge" is not supported on all CPUs */
518 movl $16, %eax
519 .83:
520 leal 20(%esp), %ecx /* load 'v' */
521 movl %ecx, (%esp) /* save in 1st arg slot */
522 sall $4, %eax
523 call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
524 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
525 ret $16 /* return */
526 GLOBL( _tnl_x86_dispatch_vertexattribf3_end )
527
528 GLOBL( _tnl_x86_dispatch_vertexattribf4 )
529 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
530 movl 16(%esp), %eax
531 cmpl $16, %eax
532 jb .84 /* "cmovge" is not supported on all CPUs */
533 movl $16, %eax
534 .84:
535 leal 20(%esp), %ecx /* load 'v' */
536 movl %ecx, (%esp) /* save in 1st arg slot */
537 sall $4, %eax
538 call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
539 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
540 ret $20 /* return */
541 GLOBL( _tnl_x86_dispatch_vertexattribf4_end )
542
543 GLOBL( _tnl_x86_dispatch_vertexattribfv )
544 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
545 movl 16(%esp), %eax
546 cmpl $16, %eax
547 jb .9 /* "cmovge" is not supported on all CPUs */
548 movl $16, %eax
549 .9:
550 movl 20(%esp), %ecx /* load 'v' */
551 movl %ecx, (%esp) /* save in 1st arg slot */
552 sall $4, %eax
553 call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
554 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
555 ret $8 /* return */
556 GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
557 #endif /* defined (STDCALL_API) */