1 /**************************************************************************
3 Copyright 2004 Tungsten Graphics Inc., Cedar Park, Texas.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Keith Whitwell <keith@tungstengraphics.com>
33 #if !defined (__DJGPP__)
39 #else /* defined(__DJGPP__) */
45 #endif /* defined(__DJGPP__) */
50 // Someone who knew a lot about this sort of thing would use this
51 // macro to note current offsets, etc in a special region of the
52 // object file & just make everything work out neat. I don't know
53 // enough to do that...
55 #define SUBST( x ) (0x10101010 + x)
60 // Unfold functions for each vertex size?
61 // Build super-specialized MMX/SSE versions?
62 // STDCALL woes (HAVE_NONSTANDARD_GLAPIENTRY):
63 // need separate routine for the non "fv" case,
64 // to clean up the stack (I guess we could codegen
65 // 'ret nn' insn)! Also we need to call notify, then
66 // return, instead of jump!
68 GLOBL ( _tnl_x86_Vertex1fv )
72 movl SUBST(0), %edi // 0x0 --> tnl->vtx.vbptr
73 movl (%ecx), %edx // load v[0]
74 movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0]
75 addl $4, %edi // tnl->vtx.vbptr += 1
76 movl $SUBST(1), %ecx // 0x1 --> (tnl->vtx.vertex_size - 1)
77 movl $SUBST(2), %esi // 0x2 --> (tnl->vtx.vertex + 1)
79 movsl %ds:(%esi), %es:(%edi)
80 movl %edi, SUBST(0) // 0x0 --> tnl->vtx.vbptr
81 movl SUBST(3), %edx // 0x3 --> counter
85 movl %edx, SUBST(3) // 0x3 --> counter
86 jne .0 // if (counter != 0) return
87 pushl $SUBST(4) // 0x4 --> ctx
88 .byte 0xe8 // call ...
89 .long SUBST(5) // ... _tnl_wrap_filled_vertex(ctx)
93 GLOBL ( _tnl_x86_Vertex1fv_end )
97 GLOBL ( _tnl_x86_Vertex2fv )
101 movl SUBST(0), %edi // load tnl->vtx.vbptr
102 movl (%ecx), %edx // load v[0]
103 movl 4(%ecx), %eax // load v[1]
104 movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0]
105 movl %eax, 4(%edi) // tnl->vtx.vbptr[1] = v[1]
106 addl $8, %edi // tnl->vtx.vbptr += 2
107 movl $SUBST(1), %ecx // vertex_size - 2
108 movl $SUBST(2), %esi // tnl->vtx.vertex + 2
110 movsl %ds:(%esi), %es:(%edi)
111 movl %edi, SUBST(0) // save tnl->vtx.vbptr
112 movl SUBST(3), %edx // load counter
115 dec %edx // counter--
116 movl %edx, SUBST(3) // save counter
117 jne .1 // if (counter != 0) return
118 pushl $SUBST(4) // load ctx
119 .byte 0xe8 // call ...
120 .long SUBST(5) // ... _tnl_wrap_filled_vertex(ctx)
124 GLOBL ( _tnl_x86_Vertex2fv_end )
127 GLOBL ( _tnl_x86_Vertex3fv )
131 movl SUBST(0), %edi // load tnl->vtx.vbptr
132 movl (%ecx), %edx // load v[0]
133 movl 4(%ecx), %eax // load v[1]
134 movl 8(%ecx), %esi // load v[2]
135 movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0]
136 movl %eax, 4(%edi) // tnl->vtx.vbptr[1] = v[1]
137 movl %esi, 8(%edi) // tnl->vtx.vbptr[2] = v[2]
138 addl $12, %edi // tnl->vtx.vbptr += 3
139 movl $SUBST(1), %ecx // vertex_size - 3
140 movl $SUBST(2), %esi // tnl->vtx.vertex + 3
142 movsl %ds:(%esi), %es:(%edi)
143 movl %edi, SUBST(0) // save tnl->vtx.vbptr
144 movl SUBST(3), %edx // load counter
147 dec %edx // counter--
148 movl %edx, SUBST(3) // save counter
149 jne .2 // if (counter != 0) return
150 pushl $SUBST(4) // load ctx
151 .byte 0xe8 // call ...
152 .long SUBST(5) // ... _tnl_wrap_filled_vertex(ctx)
156 GLOBL ( _tnl_x86_Vertex3fv_end )
160 GLOBL ( _tnl_x86_Vertex4fv )
164 movl SUBST(0), %edi // load tnl->vtx.vbptr
165 movl (%ecx), %edx // load v[0]
166 movl 4(%ecx), %eax // load v[1]
167 movl 8(%ecx), %esi // load v[2]
168 movl 12(%ecx), %ecx // load v[3]
169 movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0]
170 movl %eax, 4(%edi) // tnl->vtx.vbptr[1] = v[1]
171 movl %esi, 8(%edi) // tnl->vtx.vbptr[2] = v[2]
172 movl %ecx, 12(%edi) // tnl->vtx.vbptr[3] = v[3]
173 addl $16, %edi // tnl->vtx.vbptr += 4
174 movl $SUBST(1), %ecx // vertex_size - 4
175 movl $SUBST(2), %esi // tnl->vtx.vertex + 3
177 movsl %ds:(%esi), %es:(%edi)
178 movl %edi, SUBST(0) // save tnl->vtx.vbptr
179 movl SUBST(3), %edx // load counter
182 dec %edx // counter--
183 movl %edx, SUBST(3) // save counter
184 jne .3 // if (counter != 0) return
185 pushl $SUBST(4) // load ctx
186 .byte 0xe8 // call ...
187 .long SUBST(5) // ... _tnl_wrap_filled_vertex(ctx)
191 GLOBL ( _tnl_x86_Vertex4fv_end )
196 * Generic handlers for vector format data.
199 GLOBL( _tnl_x86_Attribute1fv)
201 movl (%ecx), %eax /* load v[0] */
202 movl %eax, SUBST(0) /* store v[0] to current vertex */
204 GLOBL ( _tnl_x86_Attribute1fv_end )
206 GLOBL( _tnl_x86_Attribute2fv)
208 movl (%ecx), %eax /* load v[0] */
209 movl 4(%ecx), %edx /* load v[1] */
210 movl %eax, SUBST(0) /* store v[0] to current vertex */
211 movl %edx, SUBST(1) /* store v[1] to current vertex */
213 GLOBL ( _tnl_x86_Attribute2fv_end )
216 GLOBL( _tnl_x86_Attribute3fv)
218 movl (%ecx), %eax /* load v[0] */
219 movl 4(%ecx), %edx /* load v[1] */
220 movl 8(%ecx), %ecx /* load v[2] */
221 movl %eax, SUBST(0) /* store v[0] to current vertex */
222 movl %edx, SUBST(1) /* store v[1] to current vertex */
223 movl %ecx, SUBST(2) /* store v[2] to current vertex */
225 GLOBL ( _tnl_x86_Attribute3fv_end )
227 GLOBL( _tnl_x86_Attribute4fv)
229 movl (%ecx), %eax /* load v[0] */
230 movl 4(%ecx), %edx /* load v[1] */
231 movl %eax, SUBST(0) /* store v[0] to current vertex */
232 movl %edx, SUBST(1) /* store v[1] to current vertex */
233 movl 8(%ecx), %eax /* load v[2] */
234 movl 12(%ecx), %edx /* load v[3] */
235 movl %eax, SUBST(2) /* store v[2] to current vertex */
236 movl %edx, SUBST(3) /* store v[3] to current vertex */
238 GLOBL ( _tnl_x86_Attribute4fv_end )
243 // Must generate all of these ahead of first usage. Generate at
247 GLOBL( _tnl_x86_choose_fv)
248 subl $12, %esp // gcc does 16 byte alignment of stack frames?
249 movl $SUBST(0), (%esp) // arg 0 - attrib
250 movl $SUBST(1), 4(%esp) // arg 1 - N
251 .byte 0xe8 // call ...
252 .long SUBST(2) // ... do_choose
253 add $12, %esp // tear down stack frame
254 jmp *%eax // jump to new func
255 GLOBL ( _tnl_x86_choose_fv_end )
259 // FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch.
263 // In the 1st level dispatch functions, switch to a different
264 // calling convention -- (const GLfloat *v) in %ecx.
266 // As with regular (x86) dispatch, don't create a new stack frame -
267 // just let the 'ret' in the dispatched function return straight
268 // back to the original caller.
272 // Vertex/Normal/Color, etc: the address of the function pointer
273 // is known at codegen time.
276 // Unfortunately, have to play with the stack in the non-fv case:
278 GLOBL( _tnl_x86_dispatch_attrf )
279 subl $12, %esp // gcc does 16 byte alignment of stack frames?
280 leal 16(%esp), %edx // address of first float on stack
281 movl %edx, (%esp) // save as 'v'
282 call *SUBST(0) // 0x0 --> tabfv[attr][n]
283 addl $12, %esp // tear down frame
285 GLOBL( _tnl_x86_dispatch_attrf_end )
287 // The fv case is simpler:
289 GLOBL( _tnl_x86_dispatch_attrfv )
290 jmp *SUBST(0) // 0x0 --> tabfv[attr][n]
291 GLOBL( _tnl_x86_dispatch_attrfv_end )
294 // MultiTexcoord: the address of the function pointer must be
295 // calculated, but can use the index argument slot to hold 'v', and
296 // avoid setting up a new stack frame.
299 // right, this would be the preferred approach, but gcc does not
300 // clean up the stack after each function call when optimizing (-fdefer-pop);
301 // can it make assumptions about what's already on the stack? I dunno,
302 // but in this case, we can't mess with the caller's stack frame, and
303 // we must use a model like `_x86_dispatch_attrfv' above. Caveat emptor!
305 // Also, will only need a maximum of four of each of these per context:
307 GLOBL( _tnl_x86_dispatch_multitexcoordf )
313 jmp *SUBST(0)(%ecx) // 0x0 - tabfv[tex0][n]
314 GLOBL( _tnl_x86_dispatch_multitexcoordf_end )
316 GLOBL( _tnl_x86_dispatch_multitexcoordfv )
322 jmp *SUBST(0)(%ecx) // 0x0 - tabfv[tex0][n]
323 GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
325 // VertexAttrib: the address of the function pointer must be
328 GLOBL( _tnl_x86_dispatch_vertexattribf )
331 jb .8 // "cmovge" is not supported on all CPUs
334 leal 8(%esp), %ecx // calculate 'v'
335 movl %ecx, 4(%esp) // save in 1st arg slot
337 jmp *SUBST(0)(%eax) // 0x0 - tabfv[0][n]
338 GLOBL( _tnl_x86_dispatch_vertexattribf_end )
340 GLOBL( _tnl_x86_dispatch_vertexattribfv )
343 jb .9 // "cmovge" is not supported on all CPUs
346 movl 8(%esp), %ecx // load 'v'
347 movl %ecx, 4(%esp) // save in 1st arg slot
349 jmp *SUBST(0)(%eax) // 0x0 - tabfv[0][n]
350 GLOBL( _tnl_x86_dispatch_vertexattribfv_end )