fine-tuning x86 gcc codegen
[mesa.git] / src / mesa / tnl / t_vtx_x86.c
1 /**************************************************************************
2
3 Copyright 2004 Tungsten Graphics Inc., Cedar Park, Texas.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 */
32
33
34 #include "glheader.h"
35 #include "context.h"
36 #include "macros.h"
37 #include "vtxfmt.h"
38 #include "dlist.h"
39 #include "state.h"
40 #include "light.h"
41 #include "api_arrayelt.h"
42 #include "api_noop.h"
43 #include "t_vtx_api.h"
44 #include "simple_list.h"
45
46
47 #if defined(USE_X86_ASM) && !defined(HAVE_NONSTANDARD_GLAPIENTRY)
48
49 #define EXTERN( FUNC ) \
50 extern const char *FUNC; \
51 extern const char *FUNC##_end
52
53 EXTERN( _x86_Attribute1fv );
54 EXTERN( _x86_Attribute2fv );
55 EXTERN( _x86_Attribute3fv );
56 EXTERN( _x86_Attribute4fv );
57 EXTERN( _x86_Vertex1fv );
58 EXTERN( _x86_Vertex2fv );
59 EXTERN( _x86_Vertex3fv );
60 EXTERN( _x86_Vertex4fv );
61
62 /* None of these used yet:
63 */
64 EXTERN( _x86_dispatch_attrf );
65 EXTERN( _x86_dispatch_attrfv );
66 EXTERN( _x86_dispatch_multitexcoordf );
67 EXTERN( _x86_dispatch_multitexcoordfv );
68 EXTERN( _x86_dispatch_vertexattribf );
69 EXTERN( _x86_dispatch_vertexattribfv );
70
71
72 static void notify( void )
73 {
74 GET_CURRENT_CONTEXT( ctx );
75 _tnl_wrap_filled_vertex( ctx );
76 }
77
78 #define DONT_KNOW_OFFSETS 1
79
80
81 #define DFN( FUNC, CACHE, KEY ) \
82 struct _tnl_dynfn *dfn = MALLOC_STRUCT( _tnl_dynfn ); \
83 char *start = (char *)&FUNC; \
84 char *end = (char *)&FUNC##_end; \
85 int offset = 0; \
86 insert_at_head( &CACHE, dfn ); \
87 dfn->key = KEY; \
88 dfn->code = ALIGN_MALLOC( end - start, 16 ); \
89 memcpy (dfn->code, start, end - start)
90
91
92
93 #define FIXUP( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL ) \
94 do { \
95 GLuint subst = 0x10101010 + CHECKVAL; \
96 \
97 if (DONT_KNOW_OFFSETS) { \
98 while (*(int *)(CODE+offset) != subst) offset++; \
99 *(int *)(CODE+offset) = (int)(NEWVAL); \
100 if (0) fprintf(stderr, "%s/%d: offset %d, new value: 0x%x\n", __FILE__, __LINE__, offset, (int)(NEWVAL)); \
101 offset += 4; \
102 } \
103 else { \
104 int *icode = (int *)(CODE+KNOWN_OFFSET); \
105 assert (*icode == subst); \
106 *icode = (int)NEWVAL; \
107 } \
108 } while (0)
109
110
111
112 #define FIXUPREL( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL )\
113 do { \
114 GLuint subst = 0x10101010 + CHECKVAL; \
115 \
116 if (DONT_KNOW_OFFSETS) { \
117 while (*(int *)(CODE+offset) != subst) offset++; \
118 *(int *)(CODE+offset) = (int)(NEWVAL) - ((int)(CODE)+offset) - 4; \
119 if (0) fprintf(stderr, "%s/%d: offset %d, new value: 0x%x\n", __FILE__, __LINE__, offset, (int)(NEWVAL) - ((int)(CODE)+offset) - 4); \
120 offset += 4; \
121 } \
122 else { \
123 int *icode = (int *)(CODE+KNOWN_OFFSET); \
124 assert (*icode == subst); \
125 *icode = (int)(NEWVAL) - (int)(icode) - 4; \
126 } \
127 } while (0)
128
129
130
131
132 /* Build specialized versions of the immediate calls on the fly for
133 * the current state. Generic x86 versions.
134 */
135
136 static struct _tnl_dynfn *makeX86Vertex1fv( GLcontext *ctx, int vertex_size )
137 {
138 TNLcontext *tnl = TNL_CONTEXT(ctx);
139 DFN ( _x86_Vertex1fv, tnl->vtx.cache.Vertex[1-1], vertex_size );
140
141 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
142 FIXUP(dfn->code, 0, 1, vertex_size - 1);
143 FIXUP(dfn->code, 0, 2, (int)&tnl->vtx.vertex[1]);
144 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
145 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
146 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
147 FIXUPREL(dfn->code, 0, 4, (int)&notify);
148
149 return dfn;
150 }
151
152 static struct _tnl_dynfn *makeX86Vertex2fv( GLcontext *ctx, int vertex_size )
153 {
154 TNLcontext *tnl = TNL_CONTEXT(ctx);
155 DFN ( _x86_Vertex2fv, tnl->vtx.cache.Vertex[2-1], vertex_size );
156
157 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
158 FIXUP(dfn->code, 0, 1, vertex_size - 2);
159 FIXUP(dfn->code, 0, 2, (int)&tnl->vtx.vertex[2]);
160 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
161 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
162 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
163 FIXUPREL(dfn->code, 0, 4, (int)&notify);
164
165 return dfn;
166 }
167
168 static struct _tnl_dynfn *makeX86Vertex3fv( GLcontext *ctx, int vertex_size )
169 {
170 TNLcontext *tnl = TNL_CONTEXT(ctx);
171 DFN ( _x86_Vertex3fv, tnl->vtx.cache.Vertex[3-1], vertex_size );
172
173 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
174 FIXUP(dfn->code, 0, 1, vertex_size - 3);
175 FIXUP(dfn->code, 0, 2, (int)&tnl->vtx.vertex[3]);
176 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
177 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
178 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
179 FIXUPREL(dfn->code, 0, 4, (int)&notify);
180 return dfn;
181 }
182
183 static struct _tnl_dynfn *makeX86Vertex4fv( GLcontext *ctx, int vertex_size )
184 {
185 TNLcontext *tnl = TNL_CONTEXT(ctx);
186 DFN ( _x86_Vertex4fv, tnl->vtx.cache.Vertex[4-1], vertex_size );
187
188 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
189 FIXUP(dfn->code, 0, 1, vertex_size - 4);
190 FIXUP(dfn->code, 0, 2, (int)&tnl->vtx.vertex[4]);
191 FIXUP(dfn->code, 0, 0, (int)&tnl->vtx.vbptr);
192 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
193 FIXUP(dfn->code, 0, 3, (int)&tnl->vtx.counter);
194 FIXUPREL(dfn->code, 0, 4, (int)&notify);
195
196 return dfn;
197 }
198
199
200 static struct _tnl_dynfn *makeX86Attribute1fv( GLcontext *ctx, int dest )
201 {
202 TNLcontext *tnl = TNL_CONTEXT(ctx);
203 DFN ( _x86_Attribute1fv, tnl->vtx.cache.Attribute[1-1], dest );
204
205 FIXUP(dfn->code, 0, 0, dest);
206
207 return dfn;
208 }
209
210 static struct _tnl_dynfn *makeX86Attribute2fv( GLcontext *ctx, int dest )
211 {
212 TNLcontext *tnl = TNL_CONTEXT(ctx);
213 DFN ( _x86_Attribute2fv, tnl->vtx.cache.Attribute[2-1], dest );
214
215 FIXUP(dfn->code, 0, 0, dest);
216 FIXUP(dfn->code, 0, 1, 4+dest);
217
218 return dfn;
219 }
220
221 static struct _tnl_dynfn *makeX86Attribute3fv( GLcontext *ctx, int dest )
222 {
223 TNLcontext *tnl = TNL_CONTEXT(ctx);
224 DFN ( _x86_Attribute3fv, tnl->vtx.cache.Attribute[3-1], dest );
225
226 FIXUP(dfn->code, 0, 0, dest);
227 FIXUP(dfn->code, 0, 1, 4+dest);
228 FIXUP(dfn->code, 0, 2, 8+dest);
229
230 return dfn;
231 }
232
233 static struct _tnl_dynfn *makeX86Attribute4fv( GLcontext *ctx, int dest )
234 {
235 TNLcontext *tnl = TNL_CONTEXT(ctx);
236 DFN ( _x86_Attribute4fv, tnl->vtx.cache.Attribute[4-1], dest );
237
238 FIXUP(dfn->code, 0, 0, dest);
239 FIXUP(dfn->code, 0, 1, 4+dest);
240 FIXUP(dfn->code, 0, 2, 8+dest);
241 FIXUP(dfn->code, 0, 3, 12+dest);
242
243 return dfn;
244 }
245
246
247 void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen )
248 {
249 gen->Vertex[0] = makeX86Vertex1fv;
250 gen->Vertex[1] = makeX86Vertex2fv;
251 gen->Vertex[2] = makeX86Vertex3fv;
252 gen->Vertex[3] = makeX86Vertex4fv;
253 gen->Attribute[0] = makeX86Attribute1fv;
254 gen->Attribute[1] = makeX86Attribute2fv;
255 gen->Attribute[2] = makeX86Attribute3fv;
256 gen->Attribute[3] = makeX86Attribute4fv;
257 }
258
259 void _do_choose( void )
260 {
261 }
262
263 #else
264
265 void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen )
266 {
267 (void) gen;
268 }
269
270 #endif