fix from DRI trunk
[mesa.git] / src / mesa / drivers / dri / tdfx / X86 / fx_3dnow_fasttmp.h
1 /* $XFree86: xc/lib/GL/mesa/src/drv/tdfx/X86/fx_3dnow_fasttmp.h,v 1.2 2000/09/26 15:56:51 tsi Exp $ */
2
3 #if !defined(NASM_ASSEMBLER) && !defined(MASM_ASSEMBLER)
4 #define TAGLLBL(a) TAG(.L##a)
5 #else
6 #define TAGLLBL(a) TAG(a)
7 #endif
8
9 #if !GLIDE3
10
11 #define GR_VERTEX_X_OFFSET 0
12 #define GR_VERTEX_Y_OFFSET 4
13 #define GR_VERTEX_Z_OFFSET 8
14 #define GR_VERTEX_R_OFFSET 12
15 #define GR_VERTEX_G_OFFSET 16
16 #define GR_VERTEX_B_OFFSET 20
17 #define GR_VERTEX_OOZ_OFFSET 24
18 #define GR_VERTEX_A_OFFSET 28
19 #define GR_VERTEX_OOW_OFFSET 32
20
21 #else /* GLIDE3 */
22
23 #define GR_VERTEX_X_OFFSET 0
24 #define GR_VERTEX_Y_OFFSET 4
25 #define GR_VERTEX_OOZ_OFFSET 8
26 #define GR_VERTEX_OOW_OFFSET 12
27 #define GR_VERTEX_R_OFFSET 16
28 #define GR_VERTEX_G_OFFSET 20
29 #define GR_VERTEX_B_OFFSET 24
30 #define GR_VERTEX_A_OFFSET 28
31 #define GR_VERTEX_Z_OFFSET 32
32
33 #endif /* GLIDE3 */
34
35 #define GR_VERTEX_SOW_TMU0_OFFSET 36
36 #define GR_VERTEX_TOW_TMU0_OFFSET 40
37 #define GR_VERTEX_OOW_TMU0_OFFSET 44
38 #define GR_VERTEX_SOW_TMU1_OFFSET 48
39 #define GR_VERTEX_TOW_TMU1_OFFSET 52
40 #define GR_VERTEX_OOW_TMU1_OFFSET 56
41
42
43
44
45 /*#define MAT_SX 0 /* accessed by REGIND !! */
46 #define MAT_SY 20
47 #define MAT_SZ 40
48 #define MAT_TX 48
49 #define MAT_TY 52
50 #define MAT_TZ 56
51
52
53
54
55 /* Do viewport map, device scale and perspective projection.
56 *
57 * void project_verts( GLfloat *first,
58 * GLfloat *last,
59 * const GLfloat *m,
60 * GLuint stride )
61 *
62 *
63 * Rearrange fxVertices to look like grVertices.
64 */
65
66 GLOBL GLNAME( TAG(fx_3dnow_project_vertices) )
67 GLNAME( TAG(fx_3dnow_project_vertices) ):
68
69 PUSH_L ( EBP )
70
71 MOV_L ( REGOFF(8, ESP), ECX ) /* first_vert */
72 MOV_L ( REGOFF(12, ESP), EDX ) /* last_vert */
73
74 CMP_L ( ECX, EDX )
75 JE ( TAGLLBL(FXPV_end) )
76
77 FEMMS
78
79 PREFETCH ( REGIND(ECX) ) /* fetch the first vertex */
80
81 MOV_L ( REGOFF(16, ESP), EBP ) /* matrix */
82 MOV_L ( REGOFF(20, ESP), EAX ) /* stride */
83
84 MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */
85 PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */
86
87 #if !defined(FX_V2)
88 MOV_L ( CONST(0x49400000), REGOFF(-8, ESP) ) /* snapper */
89 MOV_L ( CONST(0x49400000), REGOFF(-4, ESP) ) /* snapper */
90 #endif
91
92 MOVQ ( REGOFF(-8, ESP), MM4 ) /* snapper | snapper */
93 PFADD ( MM4, MM6 ) /* ty+snapper | tx+snapper */
94
95 MOVD ( REGIND(EBP), MM5 )
96 PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */
97
98 MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */
99
100
101 ALIGNTEXT32
102 TAGLLBL(FXPV_loop_start):
103
104 PREFETCH ( REGOFF(64, ECX) ) /* fetch the next-ish vertex */
105
106
107 MOVD ( REGOFF(12, ECX), MM0 ) /* | f[3] */
108 PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */
109
110 MOVD ( REGOFF(12, ECX), MM7 ) /* | f[3] */
111 PFRCPIT1 ( MM0, MM7 )
112 PFRCPIT2 ( MM0, MM7 ) /* oow | oow */
113
114 PUNPCKLDQ ( MM7, MM7 )
115
116
117 #if (TYPE & SETUP_RGBA)
118 MOVD ( REGOFF(CLIP_R, ECX ), MM0 ) /* f[RCOORD] = f[CLIP_R]; */
119 MOVD ( MM0, REGOFF(GR_VERTEX_R_OFFSET, ECX) )
120 #endif
121
122 #if (TYPE & SETUP_TMU1)
123 MOVQ ( REGOFF(CLIP_S1, ECX), MM0 ) /* f[S1COORD] = f[CLIP_S1] * oow */
124 PFMUL ( MM7, MM0 ) /* f[T1COORD] = f[CLIP_T1] * oow */
125 MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU1_OFFSET, ECX) )
126 #endif
127
128
129 #if (TYPE & SETUP_TMU0)
130 MOVQ ( REGOFF(CLIP_S0, ECX), MM0 ) /* f[S0COORD] = f[CLIP_S0] * oow */
131 PFMUL ( MM7, MM0 ) /* f[T0COORD] = f[CLIP_T0] * oow */
132 MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU0_OFFSET, ECX) )
133 #endif
134
135
136
137
138
139 /* DO_SETUP_XYZ */
140
141 MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */
142 PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */
143
144 MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */
145 PFMUL ( MM7, MM3 ) /* | f[2] * oow */
146
147 MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */
148 PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */
149
150 PFADD ( MM0, MM3 ) /* | f[2] += vtz */
151 PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */
152
153 PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */
154
155 #if !defined(FX_V2)
156 PFSUB ( MM4, MM2 ) /* f[0,1] -= snapper */
157 #endif
158
159 MOVQ ( MM2, REGOFF(GR_VERTEX_X_OFFSET, ECX) )
160 MOVD ( MM3, REGOFF(GR_VERTEX_OOZ_OFFSET, ECX) )
161
162
163 /* end of DO_SETUP_XYZ */
164
165 MOVD ( MM7, REGOFF(GR_VERTEX_OOW_OFFSET, ECX) ) /* f[OOWCOORD] = oow */
166 ADD_L ( EAX, ECX ) /* f += stride */
167
168 CMP_L ( ECX, EDX ) /* stall??? */
169 JA ( TAGLLBL(FXPV_loop_start) )
170
171 TAGLLBL(FXPV_end):
172 FEMMS
173 POP_L ( EBP )
174 RET
175
176
177
178
179
180
181
182 /* void project_verts( GLfloat *first,
183 * GLfloat *last,
184 * const GLfloat *m,
185 * GLuint stride,
186 * const GLubyte *mask )
187 *
188 */
189
190 GLOBL GLNAME( TAG(fx_3dnow_project_clipped_vertices) )
191 GLNAME( TAG(fx_3dnow_project_clipped_vertices) ):
192
193 PUSH_L ( EBP )
194
195 MOV_L ( REGOFF(8, ESP), ECX ) /* first FXDRIVER(VB)->verts*/
196 MOV_L ( REGOFF(12, ESP), EDX ) /* last FXDRIVER(VB)->last_vert */
197
198 FEMMS
199
200 PUSH_L ( EDI )
201 PUSH_L ( ESI )
202
203 PREFETCH ( REGIND(ECX) ) /* fetch the first vertex */
204
205 MOV_L ( REGOFF(24, ESP), EBP ) /* mat ctx->Viewport.WindowMap.M */
206 MOV_L ( REGOFF(28, ESP), EAX ) /* stride */
207 MOV_L ( REGOFF(32, ESP), ESI ) /* VB->ClipMask */
208
209 MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */
210 PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */
211
212 #if !defined(FX_V2)
213 MOV_L ( CONST(0x49400000), REGOFF(-8, ESP) ) /* snapper */
214 MOV_L ( CONST(0x49400000), REGOFF(-4, ESP) ) /* snapper */
215 #endif
216
217 MOVQ ( REGOFF(-8, ESP), MM4 ) /* snapper | snapper */
218 PFADD ( MM4, MM6 ) /* ty+snapper | tx+snapper */
219
220 MOVD ( REGIND(EBP), MM5 )
221 PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */
222
223 MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */
224
225
226
227 ALIGNTEXT32
228 TAGLLBL(FXPCV_loop_start):
229
230 PREFETCH ( REGOFF(64, ECX) ) /* fetch the next-ish vertex */
231
232 CMP_B ( CONST(0), REGIND(ESI) )
233 JNE ( TAGLLBL(FXPCV_skip) )
234
235 MOVD ( REGOFF(12, ECX), MM0) /* | f[3] */
236 PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */
237
238 MOVD ( REGOFF(12, ECX), MM7) /* | f[3] */
239 PFRCPIT1 ( MM0, MM7 )
240 PFRCPIT2 ( MM0, MM7 ) /* oow | oow */
241
242 PUNPCKLDQ ( MM7, MM7 )
243
244
245 #if (TYPE & SETUP_RGBA)
246 MOVD ( REGOFF(CLIP_R, ECX ), MM0 ) /* f[RCOORD] = f[CLIP_R]; */
247 MOVD ( MM0, REGOFF(GR_VERTEX_R_OFFSET, ECX) )
248 #endif
249
250 #if (TYPE & SETUP_TMU1)
251 MOVQ ( REGOFF(CLIP_S1, ECX), MM0 ) /* f[S1COORD] = f[CLIP_S1] * oow */
252 PFMUL ( MM7, MM0 ) /* f[T1COORD] = f[CLIP_T1] * oow */
253 MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU1_OFFSET, ECX) )
254 #endif
255
256
257 #if (TYPE & SETUP_TMU0)
258 MOVQ ( REGOFF(CLIP_S0, ECX), MM0 ) /* f[S0COORD] = f[CLIP_S0] * oow */
259 PFMUL ( MM7, MM0 ) /* f[T0COORD] = f[CLIP_T0] * oow */
260 MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU0_OFFSET, ECX) )
261 #endif
262
263
264
265
266 /* DO_SETUP_XYZ */
267
268 MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */
269 PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */
270
271 MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */
272 PFMUL ( MM7, MM3 ) /* | f[2] * oow */
273
274 MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */
275 PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */
276
277 PFADD ( MM0, MM3 ) /* | f[2] += vtz */
278 PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */
279
280 PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */
281
282 #if !defined(FX_V2)
283 PFSUB ( MM4, MM2 ) /* f[0,1] -= snapper */
284 #endif
285
286 MOVQ ( MM2, REGOFF(GR_VERTEX_X_OFFSET, ECX) )
287 MOVD ( MM3, REGOFF(GR_VERTEX_OOZ_OFFSET, ECX) )
288
289
290 /* end of DO_SETUP_XYZ */
291
292 MOVD ( MM7, REGOFF(GR_VERTEX_OOW_OFFSET, ECX) ) /* f[OOWCOORD] = oow */
293
294 TAGLLBL(FXPCV_skip):
295 ADD_L ( EAX, ECX ) /* f += stride */
296
297 INC_L ( ESI ) /* next ClipMask */
298 CMP_L ( ECX, EDX )
299 JA ( TAGLLBL(FXPCV_loop_start) )
300
301 POP_L ( ESI )
302 POP_L ( EDI )
303
304 TAGLLBL(FXPCV_end):
305 FEMMS
306 POP_L ( EBP )
307 RET
308
309
310
311 #undef TYPE
312 #undef TAG
313 #undef SIZE
314