SWTC trick, enabled new extensions
[mesa.git] / src / mesa / x86 / 3dnow_xform4.S
1 /* $Id: 3dnow_xform4.S,v 1.3 2004/04/26 10:10:25 alanh Exp $ */
2
3 /*
4 * Mesa 3-D graphics library
5 * Version: 3.5
6 *
7 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #ifdef USE_3DNOW_ASM
28 #include "matypes.h"
29 #include "xform_args.h"
30
31 SEG_TEXT
32
33 #define FRAME_OFFSET 4
34
35
36 ALIGNTEXT16
37 GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
38 GLNAME( _mesa_3dnow_transform_points4_general ):
39
40 PUSH_L ( ESI )
41
42 MOV_L ( ARG_DEST, ECX )
43 MOV_L ( ARG_MATRIX, ESI )
44 MOV_L ( ARG_SOURCE, EAX )
45 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
46 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
47 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
48 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
49
50 PUSH_L ( EDI )
51
52 MOV_L ( REGOFF(V4F_START, ECX), EDX )
53 MOV_L ( ESI, ECX )
54 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
55 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
56 MOV_L ( REGOFF(V4F_START, EAX), EAX )
57
58 TEST_L ( ESI, ESI )
59 JZ ( LLBL( G3TPGR_2 ) )
60
61 PREFETCHW ( REGIND(EDX) )
62
63 ALIGNTEXT16
64 LLBL( G3TPGR_1 ):
65
66 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
67
68 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
69 MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */
70
71 ADD_L ( EDI, EAX ) /* next vertex */
72 PREFETCH ( REGIND(EAX) )
73
74 MOVQ ( MM0, MM2 ) /* x1 | x0 */
75 MOVQ ( MM4, MM6 ) /* x3 | x2 */
76
77 PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
78 PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */
79
80 MOVQ ( MM0, MM1 ) /* x0 | x0 */
81 ADD_L ( CONST(16), EDX ) /* next r */
82
83 PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
84 MOVQ ( MM2, MM3 ) /* x1 | x1 */
85
86 PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */
87 PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
88
89 PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */
90 MOVQ ( MM4, MM5 ) /* x2 | x2 */
91
92 PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */
93 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
94
95 PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
96 MOVQ ( MM6, MM7 ) /* x3 | x3 */
97
98 PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
99 PFADD ( MM0, MM2 )
100
101 PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */
102 PFADD ( MM1, MM3 )
103
104 PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */
105 PFADD ( MM4, MM6 )
106
107 PFADD ( MM5, MM7 )
108 PFADD ( MM2, MM6 )
109
110 PFADD ( MM3, MM7 )
111 MOVQ ( MM6, REGOFF(-16, EDX) )
112
113 MOVQ ( MM7, REGOFF(-8, EDX) )
114
115 DEC_L ( ESI ) /* decrement vertex counter */
116 JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
117
118 LLBL( G3TPGR_2 ):
119
120 FEMMS
121 POP_L ( EDI )
122 POP_L ( ESI )
123 RET
124
125
126
127
128 ALIGNTEXT16
129 GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
130 GLNAME( _mesa_3dnow_transform_points4_perspective ):
131
132 PUSH_L ( ESI )
133
134 MOV_L ( ARG_DEST, ECX )
135 MOV_L ( ARG_MATRIX, ESI )
136 MOV_L ( ARG_SOURCE, EAX )
137 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
138 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
139 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
140 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
141
142 PUSH_L ( EDI )
143
144 MOV_L ( REGOFF(V4F_START, ECX), EDX )
145 MOV_L ( ESI, ECX )
146 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
147 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
148 MOV_L ( REGOFF(V4F_START, EAX), EAX )
149
150 TEST_L ( ESI, ESI )
151 JZ ( LLBL( G3TPPR_2 ) )
152
153 PREFETCH ( REGIND(EAX) )
154 PREFETCHW ( REGIND(EDX) )
155
156 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
157 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
158
159 MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */
160 PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */
161
162 MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */
163 PXOR ( MM7, MM7 ) /* 0 | 0 */
164
165 ALIGNTEXT16
166 LLBL( G3TPPR_1 ):
167
168 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
169
170 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
171 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
172 MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */
173
174 ADD_L ( EDI, EAX ) /* next vertex */
175 PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
176
177 MOVQ ( MM5, MM6 ) /* x3 | x2 */
178 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
179
180 PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
181 ADD_L ( CONST(16), EDX ) /* next r */
182
183 PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */
184 PFSUBR ( MM7, MM3 ) /* | -x2 */
185
186 PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */
187 PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
188
189 PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */
190 MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
191
192 MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */
193 DEC_L ( ESI ) /* decrement vertex counter */
194
195 JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
196
197 LLBL( G3TPPR_2 ):
198
199 FEMMS
200 POP_L ( EDI )
201 POP_L ( ESI )
202 RET
203
204
205
206
207 ALIGNTEXT16
208 GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
209 GLNAME( _mesa_3dnow_transform_points4_3d ):
210
211 PUSH_L ( ESI )
212
213 MOV_L ( ARG_DEST, ECX )
214 MOV_L ( ARG_MATRIX, ESI )
215 MOV_L ( ARG_SOURCE, EAX )
216 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
217 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
218 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
219 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
220
221 PUSH_L ( EDI )
222
223 MOV_L ( REGOFF(V4F_START, ECX), EDX )
224 MOV_L ( ESI, ECX )
225 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
226 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
227 MOV_L ( REGOFF(V4F_START, EAX), EAX )
228
229 TEST_L ( ESI, ESI )
230 JZ ( LLBL( G3TP3R_2 ) )
231
232 MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */
233 PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */
234
235 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
236 PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */
237
238 ALIGNTEXT16
239 LLBL( G3TP3R_1 ):
240
241 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
242 PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */
243
244 MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */
245 MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */
246
247 MOVQ ( MM2, MM0 ) /* x1 | x0 */
248 MOVQ ( MM3, MM4 ) /* x3 | x2 */
249
250 MOVQ ( MM0, MM1 ) /* x1 | x0 */
251 MOVQ ( MM4, MM5 ) /* x3 | x2 */
252
253 PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
254 PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
255
256 PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
257 PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */
258
259 PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
260 PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */
261
262 PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */
263 PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
264
265 PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */
266 ADD_L ( CONST(16), EDX ) /* next r */
267
268 PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */
269 PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */
270
271 PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */
272 PFADD ( MM3, MM4 ) /* r1 | r0 */
273
274 PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */
275 MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */
276
277 ADD_L ( EDI, EAX ) /* next vertex */
278 PFACC ( MM0, MM5 ) /* r3 | r2 */
279
280 MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
281 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
282
283 DEC_L ( ESI ) /* decrement vertex counter */
284 JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
285
286 LLBL( G3TP3R_2 ):
287
288 FEMMS
289 POP_L ( EDI )
290 POP_L ( ESI )
291 RET
292
293
294
295
296 ALIGNTEXT16
297 GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
298 GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
299
300 PUSH_L ( ESI )
301 MOV_L ( ARG_DEST, ECX )
302 MOV_L ( ARG_MATRIX, ESI )
303 MOV_L ( ARG_SOURCE, EAX )
304 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
305 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
306 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
307 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
308
309 PUSH_L ( EDI )
310
311 MOV_L ( REGOFF(V4F_START, ECX), EDX )
312 MOV_L ( ESI, ECX )
313 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
314 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
315 MOV_L ( REGOFF(V4F_START, EAX), EAX )
316
317 TEST_L ( ESI, ESI )
318 JZ ( LLBL( G3TP3NRR_2 ) )
319
320 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
321 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
322
323 MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
324 PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */
325
326 MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
327
328 ALIGNTEXT16
329 LLBL( G3TP3NRR_1 ):
330
331 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
332
333 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
334 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
335 MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */
336
337 ADD_L ( EDI, EAX ) /* next vertex */
338 PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
339
340 MOVQ ( MM5, MM6 ) /* x3 | x2 */
341 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
342
343 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
344 PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */
345
346 PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
347 PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */
348
349 PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
350 ADD_L ( CONST(16), EDX ) /* next r */
351
352 MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
353 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
354
355 DEC_L ( ESI ) /* decrement vertex counter */
356 JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
357
358 LLBL( G3TP3NRR_2 ):
359
360 FEMMS
361 POP_L ( EDI )
362 POP_L ( ESI )
363 RET
364
365
366
367
368 ALIGNTEXT16
369 GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
370 GLNAME( _mesa_3dnow_transform_points4_2d ):
371
372 PUSH_L ( ESI )
373
374 MOV_L ( ARG_DEST, ECX )
375 MOV_L ( ARG_MATRIX, ESI )
376 MOV_L ( ARG_SOURCE, EAX )
377 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
378 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
379 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
380 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
381
382 PUSH_L ( EDI )
383
384 MOV_L ( REGOFF(V4F_START, ECX), EDX )
385 MOV_L ( ESI, ECX )
386 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
387 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
388 MOV_L ( REGOFF(V4F_START, EAX), EAX )
389
390 TEST_L ( ESI, ESI )
391 JZ ( LLBL( G3TP2R_2 ) )
392
393 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
394 PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
395
396 MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
397 PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
398
399 MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
400
401 ALIGNTEXT16
402 LLBL( G3TP2R_1 ):
403
404 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
405
406 MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
407 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
408
409 ADD_L ( EDI, EAX ) /* next vertex */
410 PREFETCH ( REGIND(EAX) )
411
412 MOVQ ( MM3, MM4 ) /* x1 | x0 */
413 MOVQ ( MM5, MM6 ) /* x3 | x2 */
414
415 PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
416 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
417
418 PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
419 ADD_L ( CONST(16), EDX ) /* next r */
420
421 PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
422 PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */
423
424 PFADD ( MM6, MM3 ) /* r1 | r0 */
425 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
426
427 MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
428
429 DEC_L ( ESI ) /* decrement vertex counter */
430 JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */
431
432 LLBL( G3TP2R_2 ):
433
434 FEMMS
435 POP_L ( EDI )
436 POP_L ( ESI )
437 RET
438
439
440
441
442 ALIGNTEXT16
443 GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
444 GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
445
446 PUSH_L ( ESI )
447
448 MOV_L ( ARG_DEST, ECX )
449 MOV_L ( ARG_MATRIX, ESI )
450 MOV_L ( ARG_SOURCE, EAX )
451 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
452 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
453 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
454 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
455
456 PUSH_L ( EDI )
457
458 MOV_L ( REGOFF(V4F_START, ECX), EDX )
459 MOV_L ( ESI, ECX )
460 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
461 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
462 MOV_L ( REGOFF(V4F_START, EAX), EAX )
463
464 TEST_L ( ESI, ESI )
465 JZ ( LLBL( G3TP2NRR_3 ) )
466
467 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
468 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
469
470 MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
471
472 ALIGNTEXT16
473 LLBL( G3TP2NRR_2 ):
474
475 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
476
477 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
478 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
479
480 ADD_L ( EDI, EAX ) /* next vertex */
481 PREFETCH ( REGIND(EAX) )
482
483 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
484 MOVQ ( MM5, MM6 ) /* x3 | x2 */
485
486 ADD_L ( CONST(16), EDX ) /* next r */
487 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
488
489 PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
490 PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
491
492 MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */
493 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
494
495 DEC_L ( ESI ) /* decrement vertex counter */
496
497 JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
498
499 LLBL( G3TP2NRR_3 ):
500
501 FEMMS
502 POP_L ( EDI )
503 POP_L ( ESI )
504 RET
505
506
507
508
509 ALIGNTEXT16
510 GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
511 GLNAME( _mesa_3dnow_transform_points4_identity ):
512
513 PUSH_L ( ESI )
514
515 MOV_L ( ARG_DEST, ECX )
516 MOV_L ( ARG_MATRIX, ESI )
517 MOV_L ( ARG_SOURCE, EAX )
518 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
519 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
520 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
521 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
522
523 PUSH_L ( EDI )
524
525 MOV_L ( REGOFF(V4F_START, ECX), EDX )
526 MOV_L ( ESI, ECX )
527 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
528 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
529 MOV_L ( REGOFF(V4F_START, EAX), EAX )
530
531 TEST_L ( ESI, ESI )
532 JZ ( LLBL( G3TPIR_2 ) )
533
534 ALIGNTEXT16
535 LLBL( G3TPIR_1 ):
536
537 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
538
539 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
540 MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */
541
542 ADD_L ( EDI, EAX ) /* next vertex */
543 PREFETCH ( REGIND(EAX) )
544
545 ADD_L ( CONST(16), EDX ) /* next r */
546 MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
547
548 MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */
549
550 DEC_L ( ESI ) /* decrement vertex counter */
551 JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
552
553 LLBL( G3TPIR_2 ):
554
555 FEMMS
556 POP_L ( EDI )
557 POP_L ( ESI )
558 RET
559 #endif