3 * Mesa 3-D graphics library
6 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "xform_args.h"
32 #define FRAME_OFFSET 4
36 GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
37 HIDDEN(_mesa_3dnow_transform_points4_general)
38 GLNAME( _mesa_3dnow_transform_points4_general ):
42 MOV_L ( ARG_DEST, ECX )
43 MOV_L ( ARG_MATRIX, ESI )
44 MOV_L ( ARG_SOURCE, EAX )
45 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
46 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
47 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
48 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
52 MOV_L ( REGOFF(V4F_START, ECX), EDX )
54 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
55 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
56 MOV_L ( REGOFF(V4F_START, EAX), EAX )
59 JZ ( LLBL( G3TPGR_2 ) )
61 PREFETCHW ( REGIND(EDX) )
66 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
68 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
69 MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */
71 ADD_L ( EDI, EAX ) /* next vertex */
72 PREFETCH ( REGIND(EAX) )
74 MOVQ ( MM0, MM2 ) /* x1 | x0 */
75 MOVQ ( MM4, MM6 ) /* x3 | x2 */
77 PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
78 PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */
80 MOVQ ( MM0, MM1 ) /* x0 | x0 */
81 ADD_L ( CONST(16), EDX ) /* next r */
83 PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
84 MOVQ ( MM2, MM3 ) /* x1 | x1 */
86 PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */
87 PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
89 PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */
90 MOVQ ( MM4, MM5 ) /* x2 | x2 */
92 PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */
93 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
95 PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
96 MOVQ ( MM6, MM7 ) /* x3 | x3 */
98 PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
101 PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */
104 PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */
111 MOVQ ( MM6, REGOFF(-16, EDX) )
113 MOVQ ( MM7, REGOFF(-8, EDX) )
115 DEC_L ( ESI ) /* decrement vertex counter */
116 JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
129 GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
130 HIDDEN(_mesa_3dnow_transform_points4_perspective)
131 GLNAME( _mesa_3dnow_transform_points4_perspective ):
135 MOV_L ( ARG_DEST, ECX )
136 MOV_L ( ARG_MATRIX, ESI )
137 MOV_L ( ARG_SOURCE, EAX )
138 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
139 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
140 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
141 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
145 MOV_L ( REGOFF(V4F_START, ECX), EDX )
147 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
148 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
149 MOV_L ( REGOFF(V4F_START, EAX), EAX )
152 JZ ( LLBL( G3TPPR_2 ) )
154 PREFETCH ( REGIND(EAX) )
155 PREFETCHW ( REGIND(EDX) )
157 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
158 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
160 MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */
161 PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */
163 MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */
164 PXOR ( MM7, MM7 ) /* 0 | 0 */
169 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
171 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
172 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
173 MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */
175 ADD_L ( EDI, EAX ) /* next vertex */
176 PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
178 MOVQ ( MM5, MM6 ) /* x3 | x2 */
179 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
181 PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
182 ADD_L ( CONST(16), EDX ) /* next r */
184 PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */
185 PFSUBR ( MM7, MM3 ) /* | -x2 */
187 PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */
188 PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
190 PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */
191 MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
193 MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */
194 DEC_L ( ESI ) /* decrement vertex counter */
196 JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
209 GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
210 HIDDEN(_mesa_3dnow_transform_points4_3d)
211 GLNAME( _mesa_3dnow_transform_points4_3d ):
215 MOV_L ( ARG_DEST, ECX )
216 MOV_L ( ARG_MATRIX, ESI )
217 MOV_L ( ARG_SOURCE, EAX )
218 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
219 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
220 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
221 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
225 MOV_L ( REGOFF(V4F_START, ECX), EDX )
227 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
228 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
229 MOV_L ( REGOFF(V4F_START, EAX), EAX )
232 JZ ( LLBL( G3TP3R_2 ) )
234 MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */
235 PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */
237 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
238 PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */
243 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
244 PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */
246 MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */
247 MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */
249 MOVQ ( MM2, MM0 ) /* x1 | x0 */
250 MOVQ ( MM3, MM4 ) /* x3 | x2 */
252 MOVQ ( MM0, MM1 ) /* x1 | x0 */
253 MOVQ ( MM4, MM5 ) /* x3 | x2 */
255 PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
256 PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
258 PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
259 PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */
261 PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
262 PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */
264 PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */
265 PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
267 PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */
268 ADD_L ( CONST(16), EDX ) /* next r */
270 PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */
271 PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */
273 PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */
274 PFADD ( MM3, MM4 ) /* r1 | r0 */
276 PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */
277 MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */
279 ADD_L ( EDI, EAX ) /* next vertex */
280 PFACC ( MM0, MM5 ) /* r3 | r2 */
282 MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
283 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
285 DEC_L ( ESI ) /* decrement vertex counter */
286 JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
299 GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
300 HIDDEN(_mesa_3dnow_transform_points4_3d_no_rot)
301 GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
304 MOV_L ( ARG_DEST, ECX )
305 MOV_L ( ARG_MATRIX, ESI )
306 MOV_L ( ARG_SOURCE, EAX )
307 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
308 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
309 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
310 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
314 MOV_L ( REGOFF(V4F_START, ECX), EDX )
316 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
317 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
318 MOV_L ( REGOFF(V4F_START, EAX), EAX )
321 JZ ( LLBL( G3TP3NRR_2 ) )
323 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
324 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
326 MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
327 PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */
329 MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
334 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
336 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
337 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
338 MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */
340 ADD_L ( EDI, EAX ) /* next vertex */
341 PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
343 MOVQ ( MM5, MM6 ) /* x3 | x2 */
344 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
346 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
347 PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */
349 PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
350 PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */
352 PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
353 ADD_L ( CONST(16), EDX ) /* next r */
355 MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
356 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
358 DEC_L ( ESI ) /* decrement vertex counter */
359 JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
372 GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
373 HIDDEN(_mesa_3dnow_transform_points4_2d)
374 GLNAME( _mesa_3dnow_transform_points4_2d ):
378 MOV_L ( ARG_DEST, ECX )
379 MOV_L ( ARG_MATRIX, ESI )
380 MOV_L ( ARG_SOURCE, EAX )
381 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
382 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
383 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
384 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
388 MOV_L ( REGOFF(V4F_START, ECX), EDX )
390 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
391 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
392 MOV_L ( REGOFF(V4F_START, EAX), EAX )
395 JZ ( LLBL( G3TP2R_2 ) )
397 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
398 PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
400 MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
401 PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
403 MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
408 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
410 MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
411 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
413 ADD_L ( EDI, EAX ) /* next vertex */
414 PREFETCH ( REGIND(EAX) )
416 MOVQ ( MM3, MM4 ) /* x1 | x0 */
417 MOVQ ( MM5, MM6 ) /* x3 | x2 */
419 PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
420 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
422 PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
423 ADD_L ( CONST(16), EDX ) /* next r */
425 PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
426 PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */
428 PFADD ( MM6, MM3 ) /* r1 | r0 */
429 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
431 MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
433 DEC_L ( ESI ) /* decrement vertex counter */
434 JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */
447 GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
448 HIDDEN(_mesa_3dnow_transform_points4_2d_no_rot)
449 GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
453 MOV_L ( ARG_DEST, ECX )
454 MOV_L ( ARG_MATRIX, ESI )
455 MOV_L ( ARG_SOURCE, EAX )
456 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
457 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
458 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
459 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
463 MOV_L ( REGOFF(V4F_START, ECX), EDX )
465 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
466 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
467 MOV_L ( REGOFF(V4F_START, EAX), EAX )
470 JZ ( LLBL( G3TP2NRR_3 ) )
472 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
473 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
475 MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
480 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
482 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
483 MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
485 ADD_L ( EDI, EAX ) /* next vertex */
486 PREFETCH ( REGIND(EAX) )
488 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
489 MOVQ ( MM5, MM6 ) /* x3 | x2 */
491 ADD_L ( CONST(16), EDX ) /* next r */
492 PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
494 PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
495 PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
497 MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */
498 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
500 DEC_L ( ESI ) /* decrement vertex counter */
502 JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
515 GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
516 HIDDEN(_mesa_3dnow_transform_points4_identity)
517 GLNAME( _mesa_3dnow_transform_points4_identity ):
521 MOV_L ( ARG_DEST, ECX )
522 MOV_L ( ARG_MATRIX, ESI )
523 MOV_L ( ARG_SOURCE, EAX )
524 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
525 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
526 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
527 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
531 MOV_L ( REGOFF(V4F_START, ECX), EDX )
533 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
534 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
535 MOV_L ( REGOFF(V4F_START, EAX), EAX )
538 JZ ( LLBL( G3TPIR_2 ) )
543 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
545 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
546 MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */
548 ADD_L ( EDI, EAX ) /* next vertex */
549 PREFETCH ( REGIND(EAX) )
551 ADD_L ( CONST(16), EDX ) /* next r */
552 MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
554 MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */
556 DEC_L ( ESI ) /* decrement vertex counter */
557 JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
567 #if defined (__ELF__) && defined (__linux__)
568 .section .note.GNU-stack,"",%progbits