1 /* $Id: 3dnow_normal.S,v 1.8 2004/04/26 10:10:25 alanh Exp $ */
4 * Mesa 3-D graphics library
7 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * 3Dnow assembly code by Holger Waechtler
34 #include "norm_args.h"
38 #define M(i) REGOFF(i * 4, ECX)
39 #define STRIDE REGOFF(12, ESI)
43 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
44 GLNAME(_mesa_3dnow_transform_normalize_normals):
46 #define FRAME_OFFSET 12
52 MOV_L ( ARG_LENGTHS, EDI )
54 MOV_L ( ARG_DEST, EAX )
55 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
56 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
57 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
58 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
59 MOV_L ( ARG_MAT, ECX )
60 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
62 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
63 JE ( LLBL (G3TN_end) )
65 MOV_L ( REGOFF (V4F_COUNT, ESI), EBP )
70 PUSH_L ( EDX ) /* save counter & pointer for */
71 /* the normalize pass */
73 #define FRAME_OFFSET 24
75 MOVQ ( M(0), MM3 ) /* m1 | m0 */
76 MOVQ ( M(4), MM4 ) /* m5 | m4 */
78 MOVD ( M(2), MM5 ) /* | m2 */
79 PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
81 MOVQ ( M(8), MM6 ) /* m9 | m8 */
82 MOVQ ( M(10), MM7 ) /* | m10 */
84 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
85 JNE ( LLBL (G3TN_scale_end ) )
87 MOVD ( ARG_SCALE, MM0 ) /* | scale */
88 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
90 PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
91 PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
92 PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
93 PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
94 PFMUL ( MM0, MM7 ) /* | scale * m10 */
97 LLBL (G3TN_scale_end):
98 LLBL (G3TN_transform):
99 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
100 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
102 MOVQ ( MM0, MM1 ) /* x1 | x0 */
103 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
105 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
106 ADD_L ( CONST(16), EAX ) /* next r */
108 PREFETCHW ( REGIND(EAX) )
110 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
111 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
113 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
114 PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
116 MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
117 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
119 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
120 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
122 PFMUL ( MM7, MM2 ) /* | x2*m10 */
123 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
125 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
126 ADD_L ( STRIDE, EDX ) /* next normal */
128 PREFETCH ( REGIND(EDX) )
130 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
131 DEC_L ( EBP ) /* decrement normal counter */
132 JA ( LLBL (G3TN_transform) )
135 POP_L ( EDX ) /* end of transform --- */
136 POP_L ( EAX ) /* now normalizing ... */
139 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
140 JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
144 LLBL (G3TN_norm_w_lengths):
146 PREFETCHW ( REGOFF(12,EAX) )
148 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
149 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
151 MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
152 PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
154 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
155 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
157 ADD_L ( STRIDE, EDX ) /* next normal */
158 ADD_L ( CONST(4), EDI ) /* next length */
160 PREFETCH ( REGIND(EDI) )
162 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
163 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
165 ADD_L ( CONST(16), EAX ) /* next r */
166 DEC_L ( EBP ) /* decrement normal counter */
168 JA ( LLBL (G3TN_norm_w_lengths) )
169 JMP ( LLBL (G3TN_exit_3dnow) )
174 PREFETCHW ( REGIND(EAX) )
176 MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
177 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
179 MOVQ ( MM0, MM3 ) /* x1 | x0 */
180 MOVQ ( MM1, MM4 ) /* | x2 */
182 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
183 ADD_L ( CONST(16), EAX ) /* next r */
185 PFMUL ( MM1, MM4 ) /* | x2*x2 */
186 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
188 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
189 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
192 PUNPCKLDQ ( MM3, MM3 )
194 DEC_L ( EBP ) /* decrement normal counter */
197 PFRSQIT1 ( MM3, MM5 )
198 PFRCPIT2 ( MM4, MM5 )
200 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
202 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
203 PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
205 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
206 JA ( LLBL (G3TN_norm) )
208 LLBL (G3TN_exit_3dnow):
220 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
221 GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
224 #define FRAME_OFFSET 12
230 MOV_L ( ARG_LENGTHS, EDI )
231 MOV_L ( ARG_IN, ESI )
232 MOV_L ( ARG_DEST, EAX )
233 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
234 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
235 MOV_L ( ARG_MAT, ECX )
236 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
237 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
238 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
240 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
241 JE ( LLBL (G3TNNR_end) )
245 MOVD ( M(0), MM0 ) /* | m0 */
246 PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
248 MOVD ( M(10), MM2 ) /* | m10 */
249 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
251 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
252 JNE ( LLBL (G3TNNR_scale_end ) )
254 MOVD ( ARG_SCALE, MM7 ) /* | scale */
255 PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
257 PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
258 PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
261 LLBL (G3TNNR_scale_end):
262 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
263 JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
265 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
269 LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
271 PREFETCHW ( REGIND(EAX) )
273 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
274 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
276 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
277 ADD_L ( STRIDE, EDX ) /* next normal */
279 PREFETCH ( REGIND(EDX) )
281 PFMUL ( MM2, MM7 ) /* | x2*m10 */
282 ADD_L ( CONST(16), EAX ) /* next r */
284 PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
285 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
287 ADD_L ( CONST(4), EDI ) /* next length */
288 PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
290 DEC_L ( EBP ) /* decrement normal counter */
291 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
293 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
294 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
296 JA ( LLBL (G3TNNR_norm_w_lengths) )
297 JMP ( LLBL (G3TNNR_exit_3dnow) )
300 LLBL (G3TNNR_norm): /* need to calculate lengths */
302 PREFETCHW ( REGIND(EAX) )
304 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
305 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
307 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
308 ADD_L ( CONST(16), EAX ) /* next r */
310 PFMUL ( MM2, MM7 ) /* | x2*m10 */
311 MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
313 MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
314 PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
317 PFMUL ( MM7, MM4 ) /* | x2*x2 */
318 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
320 PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
321 ADD_L ( STRIDE, EDX ) /* next normal */
323 PREFETCH ( REGIND(EDX) )
325 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
328 PUNPCKLDQ ( MM3, MM3 )
331 PFRSQIT1 ( MM3, MM5 )
332 DEC_L ( EBP ) /* decrement normal counter */
334 PFRCPIT2 ( MM4, MM5 )
335 PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
337 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
338 PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
340 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
341 JA ( LLBL (G3TNNR_norm) )
344 LLBL (G3TNNR_exit_3dnow):
359 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
360 GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
363 #define FRAME_OFFSET 12
369 MOV_L ( ARG_IN, EAX )
370 MOV_L ( ARG_DEST, EDX )
371 MOV_L ( REGOFF(V4F_COUNT, EAX), EBP ) /* dest->count = in->count */
372 MOV_L ( EBP, REGOFF(V4F_COUNT, EDX) )
373 MOV_L ( ARG_IN, ESI )
374 MOV_L ( ARG_MAT, ECX )
375 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
376 MOV_L ( REGOFF(V4F_START, EDX), EAX ) /* dest->start */
377 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
379 CMP_L ( CONST(0), EBP )
380 JE ( LLBL (G3TRNR_end) )
384 MOVD ( ARG_SCALE, MM6 ) /* | scale */
385 PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
387 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
388 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
390 PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
391 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
393 PFMUL ( MM6, MM2 ) /* | scale*m10 */
396 LLBL (G3TRNR_rescale):
398 PREFETCHW ( REGIND(EAX) )
400 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
401 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
403 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
404 ADD_L ( STRIDE, EDX ) /* next normal */
406 PREFETCH ( REGIND(EDX) )
408 PFMUL ( MM2, MM5 ) /* | x2*m10 */
409 ADD_L ( CONST(16), EAX ) /* next r */
411 DEC_L ( EBP ) /* decrement normal counter */
412 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
414 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
415 JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
430 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
431 GLNAME(_mesa_3dnow_transform_rescale_normals):
434 #define FRAME_OFFSET 8
439 MOV_L ( ARG_IN, ESI )
440 MOV_L ( ARG_DEST, EAX )
441 MOV_L ( ARG_MAT, ECX )
442 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
443 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
444 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
445 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
446 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
448 CMP_L ( CONST(0), EDI )
449 JE ( LLBL (G3TR_end) )
453 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
455 MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
456 MOVD ( ARG_SCALE, MM0 ) /* scale */
458 MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
459 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
461 PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
462 PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
464 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
465 PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
467 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
468 PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
470 PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
472 PFMUL ( MM0, MM7 ) /* | scale*m10 */
477 PREFETCHW ( REGIND(EAX) )
479 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
480 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
482 MOVQ ( MM0, MM1 ) /* x1 | x0 */
483 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
485 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
486 ADD_L ( CONST(16), EAX ) /* next r */
488 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
489 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
491 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
493 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
494 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
496 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
497 ADD_L ( STRIDE, EDX ) /* next normal */
499 PREFETCH ( REGIND(EDX) )
501 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
502 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
504 PFMUL ( MM7, MM2 ) /* | x2*m10 */
505 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
507 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
508 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
510 DEC_L ( EDI ) /* decrement normal counter */
511 JA ( LLBL (G3TR_rescale) )
527 GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
528 GLNAME(_mesa_3dnow_transform_normals_no_rot):
531 #define FRAME_OFFSET 8
536 MOV_L ( ARG_IN, ESI )
537 MOV_L ( ARG_DEST, EAX )
538 MOV_L ( ARG_MAT, ECX )
539 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
540 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
541 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
542 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
543 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
545 CMP_L ( CONST(0), EDI )
546 JE ( LLBL (G3TNR_end) )
550 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
551 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
553 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
554 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
557 LLBL (G3TNR_transform):
559 PREFETCHW ( REGIND(EAX) )
561 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
562 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
564 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
565 ADD_L ( STRIDE, EDX) /* next normal */
567 PREFETCH ( REGIND(EDX) )
569 PFMUL ( MM2, MM5 ) /* | x2*m10 */
570 ADD_L ( CONST(16), EAX ) /* next r */
572 DEC_L ( EDI ) /* decrement normal counter */
573 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
575 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
576 JA ( LLBL (G3TNR_transform) )
593 GLOBL GLNAME(_mesa_3dnow_transform_normals)
594 GLNAME(_mesa_3dnow_transform_normals):
597 #define FRAME_OFFSET 8
602 MOV_L ( ARG_IN, ESI )
603 MOV_L ( ARG_DEST, EAX )
604 MOV_L ( ARG_MAT, ECX )
605 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
606 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
607 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
608 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
609 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
611 CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
612 JE ( LLBL (G3T_end) )
616 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
617 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
619 MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
620 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
622 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
623 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
626 LLBL (G3T_transform):
628 PREFETCHW ( REGIND(EAX) )
630 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
631 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
633 MOVQ ( MM0, MM1 ) /* x1 | x0 */
634 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
636 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
637 ADD_L ( CONST(16), EAX ) /* next r */
639 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
640 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
642 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
643 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
645 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
646 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
648 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
649 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
651 PFMUL ( MM7, MM2 ) /* | x2*m10 */
652 ADD_L ( STRIDE, EDX ) /* next normal */
654 PREFETCH ( REGIND(EDX) )
656 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
657 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
659 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
660 DEC_L ( EDI ) /* decrement normal counter */
662 JA ( LLBL (G3T_transform) )
677 GLOBL GLNAME(_mesa_3dnow_normalize_normals)
678 GLNAME(_mesa_3dnow_normalize_normals):
681 #define FRAME_OFFSET 12
687 MOV_L ( ARG_IN, ESI )
688 MOV_L ( ARG_DEST, EAX )
689 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
690 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
691 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
692 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
693 MOV_L ( ARG_LENGTHS, EDX )
695 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
696 JE ( LLBL (G3N_end) )
700 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
701 JE ( LLBL (G3N_norm2) ) /* calculate lengths */
704 LLBL (G3N_norm1): /* use precalculated lengths */
706 PREFETCH ( REGIND(EAX) )
708 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
709 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
711 MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
712 PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
714 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
715 ADD_L ( STRIDE, ECX ) /* next normal */
717 PREFETCH ( REGIND(ECX) )
719 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
720 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
722 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
723 ADD_L ( CONST(16), EAX ) /* next r */
725 ADD_L ( CONST(4), EDX ) /* next length */
726 DEC_L ( EBP ) /* decrement normal counter */
728 JA ( LLBL (G3N_norm1) )
730 JMP ( LLBL (G3N_end1) )
733 LLBL (G3N_norm2): /* need to calculate lengths */
735 PREFETCHW ( REGIND(EAX) )
737 PREFETCH ( REGIND(ECX) )
739 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
740 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
742 MOVQ ( MM0, MM3 ) /* x1 | x0 */
743 ADD_L ( STRIDE, ECX ) /* next normal */
745 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
746 MOVQ ( MM1, MM4 ) /* | x2 */
748 ADD_L ( CONST(16), EAX ) /* next r */
749 PFMUL ( MM1, MM4 ) /* | x2*x2 */
751 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
752 PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
754 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
757 PUNPCKLDQ ( MM3, MM3 )
760 PFRSQIT1 ( MM3, MM5 )
761 DEC_L ( EBP ) /* decrement normal counter */
763 PFRCPIT2 ( MM4, MM5 )
765 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
766 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
768 PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
769 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
771 JA ( LLBL (G3N_norm2) )
788 GLOBL GLNAME(_mesa_3dnow_rescale_normals)
789 GLNAME(_mesa_3dnow_rescale_normals):
792 #define FRAME_OFFSET 8
796 MOV_L ( ARG_IN, ESI )
797 MOV_L ( ARG_DEST, EAX )
798 MOV_L ( REGOFF(V4F_COUNT, ESI), EDX ) /* dest->count = in->count */
799 MOV_L ( EDX, REGOFF(V4F_COUNT, EAX) )
800 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
801 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
803 CMP_L ( CONST(0), EDX )
804 JE ( LLBL (G3R_end) )
808 MOVD ( ARG_SCALE, MM0 ) /* scale */
809 PUNPCKLDQ ( MM0, MM0 )
814 PREFETCHW ( REGIND(EAX) )
816 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
817 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
819 PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
820 ADD_L ( STRIDE, ECX ) /* next normal */
822 PREFETCH ( REGIND(ECX) )
824 PFMUL ( MM0, MM2 ) /* | x2*scale */
825 ADD_L ( CONST(16), EAX ) /* next r */
827 MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */
828 MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */
830 DEC_L ( EDX ) /* decrement normal counter */
831 JA ( LLBL (G3R_rescale) )