Merge branch 'gallium-vertex-linear' into gallium-0.1
[mesa.git] / src / mesa / x86 / 3dnow_xform4.S
1
2 /*
3 * Mesa 3-D graphics library
4 * Version: 3.5
5 *
6 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 #ifdef USE_3DNOW_ASM
27 #include "matypes.h"
28 #include "xform_args.h"
29
30 SEG_TEXT
31
32 #define FRAME_OFFSET 4
33
34
35 ALIGNTEXT16
36 GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
37 HIDDEN(_mesa_3dnow_transform_points4_general)
38 GLNAME( _mesa_3dnow_transform_points4_general ):
39
40 PUSH_L ( ESI )
41
42 MOV_L ( ARG_DEST, ECX )
43 MOV_L ( ARG_MATRIX, ESI )
44 MOV_L ( ARG_SOURCE, EAX )
45 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
46 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
47 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
48 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
49
50 PUSH_L ( EDI )
51
52 MOV_L ( REGOFF(V4F_START, ECX), EDX )
53 MOV_L ( ESI, ECX )
54 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
55 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
56 MOV_L ( REGOFF(V4F_START, EAX), EAX )
57
58 TEST_L ( ESI, ESI )
59 JZ ( LLBL( G3TPGR_2 ) )
60
61 PREFETCHW ( REGIND(EDX) )
62
63 ALIGNTEXT16
64 LLBL( G3TPGR_1 ):
65
66 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
67
68 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
69 MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */
70
71 ADD_L ( EDI, EAX ) /* next vertex */
72 PREFETCH ( REGIND(EAX) )
73
74 MOVQ ( MM0, MM2 ) /* x1 | x0 */
75 MOVQ ( MM4, MM6 ) /* x3 | x2 */
76
77 PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
78 PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */
79
80 MOVQ ( MM0, MM1 ) /* x0 | x0 */
81 ADD_L ( CONST(16), EDX ) /* next r */
82
83 PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
84 MOVQ ( MM2, MM3 ) /* x1 | x1 */
85
86 PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */
87 PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
88
89 PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */
90 MOVQ ( MM4, MM5 ) /* x2 | x2 */
91
92 PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */
93 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
94
95 PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
96 MOVQ ( MM6, MM7 ) /* x3 | x3 */
97
98 PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
99 PFADD ( MM0, MM2 )
100
101 PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */
102 PFADD ( MM1, MM3 )
103
104 PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */
105 PFADD ( MM4, MM6 )
106
107 PFADD ( MM5, MM7 )
108 PFADD ( MM2, MM6 )
109
110 PFADD ( MM3, MM7 )
111 MOVQ ( MM6, REGOFF(-16, EDX) )
112
113 MOVQ ( MM7, REGOFF(-8, EDX) )
114
115 DEC_L ( ESI ) /* decrement vertex counter */
116 JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
117
118 LLBL( G3TPGR_2 ):
119
120 FEMMS
121 POP_L ( EDI )
122 POP_L ( ESI )
123 RET
124
125
126
127
128 ALIGNTEXT16
129 GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
130 HIDDEN(_mesa_3dnow_transform_points4_perspective)
131 GLNAME( _mesa_3dnow_transform_points4_perspective ):
132
133 PUSH_L ( ESI )
134
135 MOV_L ( ARG_DEST, ECX )
136 MOV_L ( ARG_MATRIX, ESI )
137 MOV_L ( ARG_SOURCE, EAX )
138 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
139 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
140 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
141 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
142
143 PUSH_L ( EDI )
144
145 MOV_L ( REGOFF(V4F_START, ECX), EDX )
146 MOV_L ( ESI, ECX )
147 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
148 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
149 MOV_L ( REGOFF(V4F_START, EAX), EAX )
150
151 TEST_L ( ESI, ESI )
152 JZ ( LLBL( G3TPPR_2 ) )
153
154 PREFETCH ( REGIND(EAX) )
155 PREFETCHW ( REGIND(EDX) )
156
157 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
158 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
159
160 MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */
161 PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */
162
163 MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */
164 PXOR ( MM7, MM7 ) /* 0 | 0 */
165
166 ALIGNTEXT16
167 LLBL( G3TPPR_1 ):
168
169 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
170
171 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
172 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
173 MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */
174
175 ADD_L ( EDI, EAX ) /* next vertex */
176 PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
177
178 MOVQ ( MM5, MM6 ) /* x3 | x2 */
179 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
180
181 PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
182 ADD_L ( CONST(16), EDX ) /* next r */
183
184 PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */
185 PFSUBR ( MM7, MM3 ) /* | -x2 */
186
187 PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */
188 PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
189
190 PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */
191 MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
192
193 MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */
194 DEC_L ( ESI ) /* decrement vertex counter */
195
196 JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
197
198 LLBL( G3TPPR_2 ):
199
200 FEMMS
201 POP_L ( EDI )
202 POP_L ( ESI )
203 RET
204
205
206
207
208 ALIGNTEXT16
209 GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
210 HIDDEN(_mesa_3dnow_transform_points4_3d)
211 GLNAME( _mesa_3dnow_transform_points4_3d ):
212
213 PUSH_L ( ESI )
214
215 MOV_L ( ARG_DEST, ECX )
216 MOV_L ( ARG_MATRIX, ESI )
217 MOV_L ( ARG_SOURCE, EAX )
218 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
219 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
220 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
221 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
222
223 PUSH_L ( EDI )
224
225 MOV_L ( REGOFF(V4F_START, ECX), EDX )
226 MOV_L ( ESI, ECX )
227 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
228 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
229 MOV_L ( REGOFF(V4F_START, EAX), EAX )
230
231 TEST_L ( ESI, ESI )
232 JZ ( LLBL( G3TP3R_2 ) )
233
234 MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */
235 PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */
236
237 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
238 PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */
239
240 ALIGNTEXT16
241 LLBL( G3TP3R_1 ):
242
243 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
244 PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */
245
246 MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */
247 MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */
248
249 MOVQ ( MM2, MM0 ) /* x1 | x0 */
250 MOVQ ( MM3, MM4 ) /* x3 | x2 */
251
252 MOVQ ( MM0, MM1 ) /* x1 | x0 */
253 MOVQ ( MM4, MM5 ) /* x3 | x2 */
254
255 PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
256 PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
257
258 PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
259 PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */
260
261 PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
262 PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */
263
264 PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */
265 PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
266
267 PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */
268 ADD_L ( CONST(16), EDX ) /* next r */
269
270 PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */
271 PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */
272
273 PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */
274 PFADD ( MM3, MM4 ) /* r1 | r0 */
275
276 PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */
277 MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */
278
279 ADD_L ( EDI, EAX ) /* next vertex */
280 PFACC ( MM0, MM5 ) /* r3 | r2 */
281
282 MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
283 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
284
285 DEC_L ( ESI ) /* decrement vertex counter */
286 JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
287
288 LLBL( G3TP3R_2 ):
289
290 FEMMS
291 POP_L ( EDI )
292 POP_L ( ESI )
293 RET
294
295
296
297
298 ALIGNTEXT16
299 GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
300 HIDDEN(_mesa_3dnow_transform_points4_3d_no_rot)
301 GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
302
303 PUSH_L ( ESI )
304 MOV_L ( ARG_DEST, ECX )
305 MOV_L ( ARG_MATRIX, ESI )
306 MOV_L ( ARG_SOURCE, EAX )
307 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
308 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
309 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
310 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
311
312 PUSH_L ( EDI )
313
314 MOV_L ( REGOFF(V4F_START, ECX), EDX )
315 MOV_L ( ESI, ECX )
316 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
317 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
318 MOV_L ( REGOFF(V4F_START, EAX), EAX )
319
320 TEST_L ( ESI, ESI )
321 JZ ( LLBL( G3TP3NRR_2 ) )
322
323 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
324 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
325
326 MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
327 PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */
328
329 MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
330
331 ALIGNTEXT16
332 LLBL( G3TP3NRR_1 ):
333
334 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
335
336 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
337 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
338 MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */
339
340 ADD_L ( EDI, EAX ) /* next vertex */
341 PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
342
343 MOVQ ( MM5, MM6 ) /* x3 | x2 */
344 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
345
346 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
347 PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */
348
349 PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
350 PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */
351
352 PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
353 ADD_L ( CONST(16), EDX ) /* next r */
354
355 MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
356 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
357
358 DEC_L ( ESI ) /* decrement vertex counter */
359 JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
360
361 LLBL( G3TP3NRR_2 ):
362
363 FEMMS
364 POP_L ( EDI )
365 POP_L ( ESI )
366 RET
367
368
369
370
371 ALIGNTEXT16
372 GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
373 HIDDEN(_mesa_3dnow_transform_points4_2d)
374 GLNAME( _mesa_3dnow_transform_points4_2d ):
375
376 PUSH_L ( ESI )
377
378 MOV_L ( ARG_DEST, ECX )
379 MOV_L ( ARG_MATRIX, ESI )
380 MOV_L ( ARG_SOURCE, EAX )
381 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
382 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
383 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
384 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
385
386 PUSH_L ( EDI )
387
388 MOV_L ( REGOFF(V4F_START, ECX), EDX )
389 MOV_L ( ESI, ECX )
390 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
391 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
392 MOV_L ( REGOFF(V4F_START, EAX), EAX )
393
394 TEST_L ( ESI, ESI )
395 JZ ( LLBL( G3TP2R_2 ) )
396
397 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
398 PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
399
400 MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
401 PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
402
403 MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
404
405 ALIGNTEXT16
406 LLBL( G3TP2R_1 ):
407
408 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
409
410 MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
411 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
412
413 ADD_L ( EDI, EAX ) /* next vertex */
414 PREFETCH ( REGIND(EAX) )
415
416 MOVQ ( MM3, MM4 ) /* x1 | x0 */
417 MOVQ ( MM5, MM6 ) /* x3 | x2 */
418
419 PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
420 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
421
422 PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
423 ADD_L ( CONST(16), EDX ) /* next r */
424
425 PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
426 PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */
427
428 PFADD ( MM6, MM3 ) /* r1 | r0 */
429 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
430
431 MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
432
433 DEC_L ( ESI ) /* decrement vertex counter */
434 JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */
435
436 LLBL( G3TP2R_2 ):
437
438 FEMMS
439 POP_L ( EDI )
440 POP_L ( ESI )
441 RET
442
443
444
445
446 ALIGNTEXT16
447 GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
448 HIDDEN(_mesa_3dnow_transform_points4_2d_no_rot)
449 GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
450
451 PUSH_L ( ESI )
452
453 MOV_L ( ARG_DEST, ECX )
454 MOV_L ( ARG_MATRIX, ESI )
455 MOV_L ( ARG_SOURCE, EAX )
456 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
457 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
458 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
459 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
460
461 PUSH_L ( EDI )
462
463 MOV_L ( REGOFF(V4F_START, ECX), EDX )
464 MOV_L ( ESI, ECX )
465 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
466 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
467 MOV_L ( REGOFF(V4F_START, EAX), EAX )
468
469 TEST_L ( ESI, ESI )
470 JZ ( LLBL( G3TP2NRR_3 ) )
471
472 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
473 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
474
475 MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
476
477 ALIGNTEXT16
478 LLBL( G3TP2NRR_2 ):
479
480 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
481
482 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
483 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
484
485 ADD_L ( EDI, EAX ) /* next vertex */
486 PREFETCH ( REGIND(EAX) )
487
488 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
489 MOVQ ( MM5, MM6 ) /* x3 | x2 */
490
491 ADD_L ( CONST(16), EDX ) /* next r */
492 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
493
494 PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
495 PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
496
497 MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */
498 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
499
500 DEC_L ( ESI ) /* decrement vertex counter */
501
502 JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
503
504 LLBL( G3TP2NRR_3 ):
505
506 FEMMS
507 POP_L ( EDI )
508 POP_L ( ESI )
509 RET
510
511
512
513
514 ALIGNTEXT16
515 GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
516 HIDDEN(_mesa_3dnow_transform_points4_identity)
517 GLNAME( _mesa_3dnow_transform_points4_identity ):
518
519 PUSH_L ( ESI )
520
521 MOV_L ( ARG_DEST, ECX )
522 MOV_L ( ARG_MATRIX, ESI )
523 MOV_L ( ARG_SOURCE, EAX )
524 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
525 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
526 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
527 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
528
529 PUSH_L ( EDI )
530
531 MOV_L ( REGOFF(V4F_START, ECX), EDX )
532 MOV_L ( ESI, ECX )
533 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
534 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
535 MOV_L ( REGOFF(V4F_START, EAX), EAX )
536
537 TEST_L ( ESI, ESI )
538 JZ ( LLBL( G3TPIR_2 ) )
539
540 ALIGNTEXT16
541 LLBL( G3TPIR_1 ):
542
543 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
544
545 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
546 MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */
547
548 ADD_L ( EDI, EAX ) /* next vertex */
549 PREFETCH ( REGIND(EAX) )
550
551 ADD_L ( CONST(16), EDX ) /* next r */
552 MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
553
554 MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */
555
556 DEC_L ( ESI ) /* decrement vertex counter */
557 JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
558
559 LLBL( G3TPIR_2 ):
560
561 FEMMS
562 POP_L ( EDI )
563 POP_L ( ESI )
564 RET
565 #endif
566
567 #if defined (__ELF__) && defined (__linux__)
568 .section .note.GNU-stack,"",%progbits
569 #endif