1 /**************************************************************************
3 Copyright 2004 Tungsten Graphics Inc., Cedar Park, Texas.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Keith Whitwell <keith@tungstengraphics.com>
31 * Daniel Borca <dborca@yahoo.com>
34 #if defined (__DJGPP__) || defined (__MINGW32__) || defined (__CYGWIN__)
38 #else /* !defined (__DJGPP__) && !defined (__MINGW32__) && !defined (__CYGWIN__) */
42 #endif /* !defined (__DJGPP__) && !defined (__MINGW32__) && !defined (__CYGWIN__) */
45 #if !defined (STDCALL_API)
46 #define RETCLEAN( x ) ret
48 #define RETCLEAN( x ) ret $x
61 /* Someone who knew a lot about this sort of thing would use this
62 * macro to note current offsets, etc in a special region of the
63 * object file & just make everything work out neat. I don't know
64 * enough to do that...
67 #define SUBST( x ) (0x10101010 + x)
74 * Unfold functions for each vertex size?
75 * Build super-specialized SSE versions?
77 * There is a trick in Vertex*fv: under certain conditions,
78 * we tail to _tnl_wrap_filled_vertex(ctx). This means that
79 * if Vertex*fv is STDCALL, then _tnl_wrap_filled_vertex must
80 * be STDCALL as well, because (GLcontext *) and (GLfloat *)
84 GLOBL ( _tnl_x86_Vertex1fv )
88 movl SUBST(0), %edi /* 0x0 --> tnl->vtx.vbptr */
89 movl (%ecx), %edx /* load v[0] */
90 movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
91 addl $4, %edi /* tnl->vtx.vbptr += 1 */
92 movl $SUBST(1), %ecx /* 0x1 --> (tnl->vtx.vertex_size - 1) */
93 movl $SUBST(2), %esi /* 0x2 --> (tnl->vtx.vertex + 1) */
95 movsl %ds:(%esi), %es:(%edi)
96 movl %edi, SUBST(0) /* 0x0 --> tnl->vtx.vbptr */
97 movl SUBST(3), %edx /* 0x3 --> counter */
100 dec %edx /* counter-- */
101 movl %edx, SUBST(3) /* 0x3 --> counter */
102 je .0 /* if (counter == 0) goto .0 */
103 RETCLEAN(4) /* return */
106 movl $SUBST(4), %eax /* load ctx */
107 movl %eax, 4(%esp) /* push ctx */
108 _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
109 GLOBL ( _tnl_x86_Vertex1fv_end )
112 GLOBL ( _tnl_x86_Vertex2fv )
116 movl SUBST(0), %edi /* load tnl->vtx.vbptr */
117 movl (%ecx), %edx /* load v[0] */
118 movl 4(%ecx), %eax /* load v[1] */
119 movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
120 movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
121 addl $8, %edi /* tnl->vtx.vbptr += 2 */
122 movl $SUBST(1), %ecx /* vertex_size - 2 */
123 movl $SUBST(2), %esi /* tnl->vtx.vertex + 2 */
125 movsl %ds:(%esi), %es:(%edi)
126 movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
127 movl SUBST(3), %edx /* load counter */
130 dec %edx /* counter-- */
131 movl %edx, SUBST(3) /* save counter */
132 je .1 /* if (counter == 0) goto .1 */
133 RETCLEAN(4) /* return */
136 movl $SUBST(4), %eax /* load ctx */
137 movl %eax, 4(%esp) /* push ctx */
138 _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
139 GLOBL ( _tnl_x86_Vertex2fv_end )
142 GLOBL ( _tnl_x86_Vertex3fv )
146 movl SUBST(0), %edi /* load tnl->vtx.vbptr */
147 movl (%ecx), %edx /* load v[0] */
148 movl 4(%ecx), %eax /* load v[1] */
149 movl 8(%ecx), %esi /* load v[2] */
150 movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
151 movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
152 movl %esi, 8(%edi) /* tnl->vtx.vbptr[2] = v[2] */
153 addl $12, %edi /* tnl->vtx.vbptr += 3 */
154 movl $SUBST(1), %ecx /* vertex_size - 3 */
155 movl $SUBST(2), %esi /* tnl->vtx.vertex + 3 */
157 movsl %ds:(%esi), %es:(%edi)
158 movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
159 movl SUBST(3), %edx /* load counter */
162 dec %edx /* counter-- */
163 movl %edx, SUBST(3) /* save counter */
164 je .2 /* if (counter == 0) goto .2 */
165 RETCLEAN(4) /* return */
168 movl $SUBST(4), %eax /* load ctx */
169 movl %eax, 4(%esp) /* push ctx */
170 _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
171 GLOBL ( _tnl_x86_Vertex3fv_end )
174 GLOBL ( _tnl_x86_Vertex4fv )
178 movl SUBST(0), %edi /* load tnl->vtx.vbptr */
179 movl (%ecx), %edx /* load v[0] */
180 movl 4(%ecx), %eax /* load v[1] */
181 movl 8(%ecx), %esi /* load v[2] */
182 movl 12(%ecx), %ecx /* load v[3] */
183 movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
184 movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
185 movl %esi, 8(%edi) /* tnl->vtx.vbptr[2] = v[2] */
186 movl %ecx, 12(%edi) /* tnl->vtx.vbptr[3] = v[3] */
187 addl $16, %edi /* tnl->vtx.vbptr += 4 */
188 movl $SUBST(1), %ecx /* vertex_size - 4 */
189 movl $SUBST(2), %esi /* tnl->vtx.vertex + 4 */
191 movsl %ds:(%esi), %es:(%edi)
192 movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
193 movl SUBST(3), %edx /* load counter */
196 dec %edx /* counter-- */
197 movl %edx, SUBST(3) /* save counter */
198 je .3 /* if (counter == 0) goto .3 */
199 RETCLEAN(4) /* return */
202 movl $SUBST(4), %eax /* load ctx */
203 movl %eax, 4(%esp) /* push ctx */
204 _JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
205 GLOBL ( _tnl_x86_Vertex4fv_end )
209 * Generic handlers for vector format data.
211 GLOBL( _tnl_x86_Attribute1fv )
213 movl (%ecx), %eax /* load v[0] */
214 movl %eax, SUBST(0) /* store v[0] to current vertex */
216 GLOBL ( _tnl_x86_Attribute1fv_end )
218 GLOBL( _tnl_x86_Attribute2fv )
220 movl (%ecx), %eax /* load v[0] */
221 movl 4(%ecx), %edx /* load v[1] */
222 movl %eax, SUBST(0) /* store v[0] to current vertex */
223 movl %edx, SUBST(1) /* store v[1] to current vertex */
225 GLOBL ( _tnl_x86_Attribute2fv_end )
227 GLOBL( _tnl_x86_Attribute3fv )
229 movl (%ecx), %eax /* load v[0] */
230 movl 4(%ecx), %edx /* load v[1] */
231 movl 8(%ecx), %ecx /* load v[2] */
232 movl %eax, SUBST(0) /* store v[0] to current vertex */
233 movl %edx, SUBST(1) /* store v[1] to current vertex */
234 movl %ecx, SUBST(2) /* store v[2] to current vertex */
236 GLOBL ( _tnl_x86_Attribute3fv_end )
238 GLOBL( _tnl_x86_Attribute4fv )
240 movl (%ecx), %eax /* load v[0] */
241 movl 4(%ecx), %edx /* load v[1] */
242 movl %eax, SUBST(0) /* store v[0] to current vertex */
243 movl %edx, SUBST(1) /* store v[1] to current vertex */
244 movl 8(%ecx), %eax /* load v[2] */
245 movl 12(%ecx), %edx /* load v[3] */
246 movl %eax, SUBST(2) /* store v[2] to current vertex */
247 movl %edx, SUBST(3) /* store v[3] to current vertex */
249 GLOBL ( _tnl_x86_Attribute4fv_end )
254 * Must generate all of these ahead of first usage. Generate at
257 GLOBL( _tnl_x86_choose_fv )
258 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
259 movl $SUBST(0), (%esp) /* arg 0 - attrib */
260 movl $SUBST(1), 4(%esp) /* arg 1 - N */
261 _CALL (SUBST(2)) /* call do_choose */
262 add $12, %esp /* tear down stack frame */
263 jmp *%eax /* jump to new func */
264 GLOBL ( _tnl_x86_choose_fv_end )
267 /* FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch.
269 * In the 1st level dispatch functions, switch to a different
270 * calling convention -- (const GLfloat *v) in %ecx.
272 * As with regular (x86) dispatch, don't create a new stack frame -
273 * just let the 'ret' in the dispatched function return straight
274 * back to the original caller.
276 * Vertex/Normal/Color, etc: the address of the function pointer
277 * is known at codegen time.
280 /* Unfortunately, have to play with the stack in the non-fv case:
282 #if !defined (STDCALL_API)
283 GLOBL( _tnl_x86_dispatch_attrf1 )
284 GLOBL( _tnl_x86_dispatch_attrf2 )
285 GLOBL( _tnl_x86_dispatch_attrf3 )
286 GLOBL( _tnl_x86_dispatch_attrf4 )
287 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
288 leal 16(%esp), %edx /* address of first float on stack */
289 movl %edx, (%esp) /* save as 'v' */
290 call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
291 addl $12, %esp /* tear down frame */
293 GLOBL( _tnl_x86_dispatch_attrf4_end )
294 GLOBL( _tnl_x86_dispatch_attrf3_end )
295 GLOBL( _tnl_x86_dispatch_attrf2_end )
296 GLOBL( _tnl_x86_dispatch_attrf1_end )
298 #else /* defined(STDCALL_API) */
300 GLOBL( _tnl_x86_dispatch_attrf1 )
301 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
302 leal 16(%esp), %edx /* address of first float on stack */
303 movl %edx, (%esp) /* save as 'v' */
304 call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
305 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
307 GLOBL( _tnl_x86_dispatch_attrf1_end )
309 GLOBL( _tnl_x86_dispatch_attrf2 )
310 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
311 leal 16(%esp), %edx /* address of first float on stack */
312 movl %edx, (%esp) /* save as 'v' */
313 call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
314 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
316 GLOBL( _tnl_x86_dispatch_attrf2_end )
318 GLOBL( _tnl_x86_dispatch_attrf3 )
319 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
320 leal 16(%esp), %edx /* address of first float on stack */
321 movl %edx, (%esp) /* save as 'v' */
322 call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
323 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
325 GLOBL( _tnl_x86_dispatch_attrf3_end )
327 GLOBL( _tnl_x86_dispatch_attrf4 )
328 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
329 leal 16(%esp), %edx /* address of first float on stack */
330 movl %edx, (%esp) /* save as 'v' */
331 call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
332 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
334 GLOBL( _tnl_x86_dispatch_attrf4_end )
335 #endif /* defined(STDCALL_API) */
337 /* The fv case is simpler:
339 GLOBL( _tnl_x86_dispatch_attrfv )
340 jmp *SUBST(0) /* 0x0 --> tabfv[attr][n] */
341 GLOBL( _tnl_x86_dispatch_attrfv_end )
344 /* MultiTexcoord: the address of the function pointer must be
345 * calculated, but can use the index argument slot to hold 'v', and
346 * avoid setting up a new stack frame.
349 * right, this would be the preferred approach, but gcc does not
350 * clean up the stack after each function call when optimizing (-fdefer-pop);
351 * can it make assumptions about what's already on the stack? I dunno,
352 * but in this case, we can't mess with the caller's stack frame, and
353 * we must use a model like `_x86_dispatch_attrfv' above. Caveat emptor!
356 /* Also, will only need a maximum of four of each of these per context:
358 #if !defined (STDCALL_API)
359 GLOBL( _tnl_x86_dispatch_multitexcoordf1 )
360 GLOBL( _tnl_x86_dispatch_multitexcoordf2 )
361 GLOBL( _tnl_x86_dispatch_multitexcoordf3 )
362 GLOBL( _tnl_x86_dispatch_multitexcoordf4 )
368 jmp *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
369 GLOBL( _tnl_x86_dispatch_multitexcoordf4_end )
370 GLOBL( _tnl_x86_dispatch_multitexcoordf3_end )
371 GLOBL( _tnl_x86_dispatch_multitexcoordf2_end )
372 GLOBL( _tnl_x86_dispatch_multitexcoordf1_end )
374 GLOBL( _tnl_x86_dispatch_multitexcoordfv )
380 jmp *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
381 GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
383 #else /* defined (STDCALL_API) */
385 GLOBL( _tnl_x86_dispatch_multitexcoordf1 )
386 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
392 call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
393 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
395 GLOBL( _tnl_x86_dispatch_multitexcoordf1_end )
397 GLOBL( _tnl_x86_dispatch_multitexcoordf2 )
398 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
404 call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
405 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
407 GLOBL( _tnl_x86_dispatch_multitexcoordf2_end )
409 GLOBL( _tnl_x86_dispatch_multitexcoordf3 )
410 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
416 call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
417 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
419 GLOBL( _tnl_x86_dispatch_multitexcoordf3_end )
421 GLOBL( _tnl_x86_dispatch_multitexcoordf4 )
422 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
428 call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
429 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
431 GLOBL( _tnl_x86_dispatch_multitexcoordf4_end )
433 GLOBL( _tnl_x86_dispatch_multitexcoordfv )
434 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
440 call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
441 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
443 GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
444 #endif /* defined (STDCALL_API) */
447 /* VertexAttrib: the address of the function pointer must be
450 #if !defined (STDCALL_API)
451 GLOBL( _tnl_x86_dispatch_vertexattribf1 )
452 GLOBL( _tnl_x86_dispatch_vertexattribf2 )
453 GLOBL( _tnl_x86_dispatch_vertexattribf3 )
454 GLOBL( _tnl_x86_dispatch_vertexattribf4 )
457 jb .8 /* "cmovge" is not supported on all CPUs */
460 leal 8(%esp), %ecx /* calculate 'v' */
461 movl %ecx, 4(%esp) /* save in 1st arg slot */
463 jmp *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
464 GLOBL( _tnl_x86_dispatch_vertexattribf4_end )
465 GLOBL( _tnl_x86_dispatch_vertexattribf3_end )
466 GLOBL( _tnl_x86_dispatch_vertexattribf2_end )
467 GLOBL( _tnl_x86_dispatch_vertexattribf1_end )
469 GLOBL( _tnl_x86_dispatch_vertexattribfv )
472 jb .9 /* "cmovge" is not supported on all CPUs */
475 movl 8(%esp), %ecx /* load 'v' */
476 movl %ecx, 4(%esp) /* save in 1st arg slot */
478 jmp *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
479 GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
481 #else /* defined (STDCALL_API) */
483 GLOBL( _tnl_x86_dispatch_vertexattribf1 )
484 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
487 jb .81 /* "cmovge" is not supported on all CPUs */
490 leal 20(%esp), %ecx /* load 'v' */
491 movl %ecx, (%esp) /* save in 1st arg slot */
493 call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
494 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
496 GLOBL( _tnl_x86_dispatch_vertexattribf1_end )
498 GLOBL( _tnl_x86_dispatch_vertexattribf2 )
499 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
502 jb .82 /* "cmovge" is not supported on all CPUs */
505 leal 20(%esp), %ecx /* load 'v' */
506 movl %ecx, (%esp) /* save in 1st arg slot */
508 call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
509 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
511 GLOBL( _tnl_x86_dispatch_vertexattribf2_end )
513 GLOBL( _tnl_x86_dispatch_vertexattribf3 )
514 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
517 jb .83 /* "cmovge" is not supported on all CPUs */
520 leal 20(%esp), %ecx /* load 'v' */
521 movl %ecx, (%esp) /* save in 1st arg slot */
523 call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
524 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
526 GLOBL( _tnl_x86_dispatch_vertexattribf3_end )
528 GLOBL( _tnl_x86_dispatch_vertexattribf4 )
529 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
532 jb .84 /* "cmovge" is not supported on all CPUs */
535 leal 20(%esp), %ecx /* load 'v' */
536 movl %ecx, (%esp) /* save in 1st arg slot */
538 call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
539 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
541 GLOBL( _tnl_x86_dispatch_vertexattribf4_end )
543 GLOBL( _tnl_x86_dispatch_vertexattribfv )
544 subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
547 jb .9 /* "cmovge" is not supported on all CPUs */
550 movl 20(%esp), %ecx /* load 'v' */
551 movl %ecx, (%esp) /* save in 1st arg slot */
553 call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
554 addl $8, %esp /* tear down frame (4 shaved off by the callee) */
556 GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
557 #endif /* defined (STDCALL_API) */