3 * Mesa 3-D graphics library
6 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
28 * 3Dnow assembly code by Holger Waechtler
35 #include "norm_args.h"
39 #define M(i) REGOFF(i * 4, ECX)
40 #define STRIDE REGOFF(12, ESI)
44 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
45 HIDDEN(_mesa_3dnow_transform_normalize_normals)
46 GLNAME(_mesa_3dnow_transform_normalize_normals):
48 #define FRAME_OFFSET 12
54 MOV_L ( ARG_LENGTHS, EDI )
56 MOV_L ( ARG_DEST, EAX )
57 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
58 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
59 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
60 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
61 MOV_L ( ARG_MAT, ECX )
62 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
64 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
65 JE ( LLBL (G3TN_end) )
67 MOV_L ( REGOFF (V4F_COUNT, ESI), EBP )
72 PUSH_L ( EDX ) /* save counter & pointer for */
73 /* the normalize pass */
75 #define FRAME_OFFSET 24
77 MOVQ ( M(0), MM3 ) /* m1 | m0 */
78 MOVQ ( M(4), MM4 ) /* m5 | m4 */
80 MOVD ( M(2), MM5 ) /* | m2 */
81 PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
83 MOVQ ( M(8), MM6 ) /* m9 | m8 */
84 MOVQ ( M(10), MM7 ) /* | m10 */
86 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
87 JNE ( LLBL (G3TN_scale_end ) )
89 MOVD ( ARG_SCALE, MM0 ) /* | scale */
90 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
92 PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
93 PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
94 PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
95 PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
96 PFMUL ( MM0, MM7 ) /* | scale * m10 */
99 LLBL (G3TN_scale_end):
100 LLBL (G3TN_transform):
101 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
102 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
104 MOVQ ( MM0, MM1 ) /* x1 | x0 */
105 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
107 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
108 ADD_L ( CONST(16), EAX ) /* next r */
110 PREFETCHW ( REGIND(EAX) )
112 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
113 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
115 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
116 PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
118 MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
119 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
121 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
122 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
124 PFMUL ( MM7, MM2 ) /* | x2*m10 */
125 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
127 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
128 ADD_L ( STRIDE, EDX ) /* next normal */
130 PREFETCH ( REGIND(EDX) )
132 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
133 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
134 JNZ ( LLBL (G3TN_transform) )
137 POP_L ( EDX ) /* end of transform --- */
138 POP_L ( EAX ) /* now normalizing ... */
141 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
142 JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
146 LLBL (G3TN_norm_w_lengths):
148 PREFETCHW ( REGOFF(12,EAX) )
150 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
151 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
153 MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
154 PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
156 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
157 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
159 ADD_L ( STRIDE, EDX ) /* next normal */
160 ADD_L ( CONST(4), EDI ) /* next length */
162 PREFETCH ( REGIND(EDI) )
164 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
165 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
167 ADD_L ( CONST(16), EAX ) /* next r */
168 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
170 JNZ ( LLBL (G3TN_norm_w_lengths) )
171 JMP ( LLBL (G3TN_exit_3dnow) )
176 PREFETCHW ( REGIND(EAX) )
178 MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
179 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
181 MOVQ ( MM0, MM3 ) /* x1 | x0 */
182 MOVQ ( MM1, MM4 ) /* | x2 */
184 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
185 ADD_L ( CONST(16), EAX ) /* next r */
187 PFMUL ( MM1, MM4 ) /* | x2*x2 */
188 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
190 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
191 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
194 PUNPCKLDQ ( MM3, MM3 )
196 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
199 PFRSQIT1 ( MM3, MM5 )
200 PFRCPIT2 ( MM4, MM5 )
202 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
204 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
205 PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
207 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
208 JNZ ( LLBL (G3TN_norm) )
210 LLBL (G3TN_exit_3dnow):
222 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
223 HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot)
224 GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
227 #define FRAME_OFFSET 12
233 MOV_L ( ARG_LENGTHS, EDI )
234 MOV_L ( ARG_IN, ESI )
235 MOV_L ( ARG_DEST, EAX )
236 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
237 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
238 MOV_L ( ARG_MAT, ECX )
239 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
240 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
241 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
243 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
244 JE ( LLBL (G3TNNR_end) )
248 MOVD ( M(0), MM0 ) /* | m0 */
249 PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
251 MOVD ( M(10), MM2 ) /* | m10 */
252 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
254 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
255 JNE ( LLBL (G3TNNR_scale_end ) )
257 MOVD ( ARG_SCALE, MM7 ) /* | scale */
258 PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
260 PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
261 PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
264 LLBL (G3TNNR_scale_end):
265 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
266 JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
268 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
272 LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
274 PREFETCHW ( REGIND(EAX) )
276 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
277 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
279 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
280 ADD_L ( STRIDE, EDX ) /* next normal */
282 PREFETCH ( REGIND(EDX) )
284 PFMUL ( MM2, MM7 ) /* | x2*m10 */
285 ADD_L ( CONST(16), EAX ) /* next r */
287 PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
288 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
290 ADD_L ( CONST(4), EDI ) /* next length */
291 PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
293 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
294 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
296 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
297 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
299 JNZ ( LLBL (G3TNNR_norm_w_lengths) )
300 JMP ( LLBL (G3TNNR_exit_3dnow) )
303 LLBL (G3TNNR_norm): /* need to calculate lengths */
305 PREFETCHW ( REGIND(EAX) )
307 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
308 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
310 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
311 ADD_L ( CONST(16), EAX ) /* next r */
313 PFMUL ( MM2, MM7 ) /* | x2*m10 */
314 MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
316 MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
317 PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
320 PFMUL ( MM7, MM4 ) /* | x2*x2 */
321 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
323 PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
324 ADD_L ( STRIDE, EDX ) /* next normal */
326 PREFETCH ( REGIND(EDX) )
328 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
331 PUNPCKLDQ ( MM3, MM3 )
334 PFRSQIT1 ( MM3, MM5 )
335 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
337 PFRCPIT2 ( MM4, MM5 )
338 PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
340 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
341 PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
343 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
344 JNZ ( LLBL (G3TNNR_norm) )
347 LLBL (G3TNNR_exit_3dnow):
362 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
363 HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot)
364 GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
367 #define FRAME_OFFSET 12
373 MOV_L ( ARG_IN, EAX )
374 MOV_L ( ARG_DEST, EDX )
375 MOV_L ( REGOFF(V4F_COUNT, EAX), EBP ) /* dest->count = in->count */
376 MOV_L ( EBP, REGOFF(V4F_COUNT, EDX) )
377 MOV_L ( ARG_IN, ESI )
378 MOV_L ( ARG_MAT, ECX )
379 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
380 MOV_L ( REGOFF(V4F_START, EDX), EAX ) /* dest->start */
381 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
383 CMP_L ( CONST(0), EBP )
384 JE ( LLBL (G3TRNR_end) )
388 MOVD ( ARG_SCALE, MM6 ) /* | scale */
389 PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
391 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
392 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
394 PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
395 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
397 PFMUL ( MM6, MM2 ) /* | scale*m10 */
400 LLBL (G3TRNR_rescale):
402 PREFETCHW ( REGIND(EAX) )
404 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
405 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
407 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
408 ADD_L ( STRIDE, EDX ) /* next normal */
410 PREFETCH ( REGIND(EDX) )
412 PFMUL ( MM2, MM5 ) /* | x2*m10 */
413 ADD_L ( CONST(16), EAX ) /* next r */
415 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
416 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
418 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
419 JNZ ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
434 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
435 HIDDEN(_mesa_3dnow_transform_rescale_normals)
436 GLNAME(_mesa_3dnow_transform_rescale_normals):
439 #define FRAME_OFFSET 8
444 MOV_L ( ARG_IN, ESI )
445 MOV_L ( ARG_DEST, EAX )
446 MOV_L ( ARG_MAT, ECX )
447 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
448 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
449 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
450 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
451 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
453 CMP_L ( CONST(0), EDI )
454 JE ( LLBL (G3TR_end) )
458 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
460 MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
461 MOVD ( ARG_SCALE, MM0 ) /* scale */
463 MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
464 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
466 PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
467 PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
469 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
470 PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
472 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
473 PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
475 PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
477 PFMUL ( MM0, MM7 ) /* | scale*m10 */
482 PREFETCHW ( REGIND(EAX) )
484 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
485 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
487 MOVQ ( MM0, MM1 ) /* x1 | x0 */
488 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
490 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
491 ADD_L ( CONST(16), EAX ) /* next r */
493 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
494 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
496 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
498 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
499 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
501 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
502 ADD_L ( STRIDE, EDX ) /* next normal */
504 PREFETCH ( REGIND(EDX) )
506 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
507 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
509 PFMUL ( MM7, MM2 ) /* | x2*m10 */
510 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
512 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
513 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
515 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
516 JNZ ( LLBL (G3TR_rescale) )
532 GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
533 HIDDEN(_mesa_3dnow_transform_normals_no_rot)
534 GLNAME(_mesa_3dnow_transform_normals_no_rot):
537 #define FRAME_OFFSET 8
542 MOV_L ( ARG_IN, ESI )
543 MOV_L ( ARG_DEST, EAX )
544 MOV_L ( ARG_MAT, ECX )
545 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
546 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
547 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
548 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
549 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
551 CMP_L ( CONST(0), EDI )
552 JE ( LLBL (G3TNR_end) )
556 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
557 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
559 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
560 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
563 LLBL (G3TNR_transform):
565 PREFETCHW ( REGIND(EAX) )
567 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
568 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
570 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
571 ADD_L ( STRIDE, EDX) /* next normal */
573 PREFETCH ( REGIND(EDX) )
575 PFMUL ( MM2, MM5 ) /* | x2*m10 */
576 ADD_L ( CONST(16), EAX ) /* next r */
578 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
579 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
581 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
582 JNZ ( LLBL (G3TNR_transform) )
599 GLOBL GLNAME(_mesa_3dnow_transform_normals)
600 HIDDEN(_mesa_3dnow_transform_normals)
601 GLNAME(_mesa_3dnow_transform_normals):
604 #define FRAME_OFFSET 8
609 MOV_L ( ARG_IN, ESI )
610 MOV_L ( ARG_DEST, EAX )
611 MOV_L ( ARG_MAT, ECX )
612 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
613 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
614 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
615 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
616 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
618 CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
619 JE ( LLBL (G3T_end) )
623 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
624 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
626 MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
627 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
629 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
630 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
633 LLBL (G3T_transform):
635 PREFETCHW ( REGIND(EAX) )
637 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
638 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
640 MOVQ ( MM0, MM1 ) /* x1 | x0 */
641 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
643 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
644 ADD_L ( CONST(16), EAX ) /* next r */
646 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
647 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
649 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
650 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
652 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
653 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
655 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
656 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
658 PFMUL ( MM7, MM2 ) /* | x2*m10 */
659 ADD_L ( STRIDE, EDX ) /* next normal */
661 PREFETCH ( REGIND(EDX) )
663 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
664 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
666 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
667 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
669 JNZ ( LLBL (G3T_transform) )
684 GLOBL GLNAME(_mesa_3dnow_normalize_normals)
685 HIDDEN(_mesa_3dnow_normalize_normals)
686 GLNAME(_mesa_3dnow_normalize_normals):
689 #define FRAME_OFFSET 12
695 MOV_L ( ARG_IN, ESI )
696 MOV_L ( ARG_DEST, EAX )
697 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
698 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
699 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
700 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
701 MOV_L ( ARG_LENGTHS, EDX )
703 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
704 JE ( LLBL (G3N_end) )
708 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
709 JE ( LLBL (G3N_norm2) ) /* calculate lengths */
712 LLBL (G3N_norm1): /* use precalculated lengths */
714 PREFETCH ( REGIND(EAX) )
716 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
717 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
719 MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
720 PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
722 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
723 ADD_L ( STRIDE, ECX ) /* next normal */
725 PREFETCH ( REGIND(ECX) )
727 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
728 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
730 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
731 ADD_L ( CONST(16), EAX ) /* next r */
733 ADD_L ( CONST(4), EDX ) /* next length */
734 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
736 JNZ ( LLBL (G3N_norm1) )
738 JMP ( LLBL (G3N_end1) )
741 LLBL (G3N_norm2): /* need to calculate lengths */
743 PREFETCHW ( REGIND(EAX) )
745 PREFETCH ( REGIND(ECX) )
747 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
748 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
750 MOVQ ( MM0, MM3 ) /* x1 | x0 */
751 ADD_L ( STRIDE, ECX ) /* next normal */
753 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
754 MOVQ ( MM1, MM4 ) /* | x2 */
756 ADD_L ( CONST(16), EAX ) /* next r */
757 PFMUL ( MM1, MM4 ) /* | x2*x2 */
759 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
760 PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
762 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
765 PUNPCKLDQ ( MM3, MM3 )
768 PFRSQIT1 ( MM3, MM5 )
769 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
771 PFRCPIT2 ( MM4, MM5 )
773 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
774 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
776 PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
777 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
779 JNZ ( LLBL (G3N_norm2) )
796 GLOBL GLNAME(_mesa_3dnow_rescale_normals)
797 HIDDEN(_mesa_3dnow_rescale_normals)
798 GLNAME(_mesa_3dnow_rescale_normals):
801 #define FRAME_OFFSET 8
805 MOV_L ( ARG_IN, ESI )
806 MOV_L ( ARG_DEST, EAX )
807 MOV_L ( REGOFF(V4F_COUNT, ESI), EDX ) /* dest->count = in->count */
808 MOV_L ( EDX, REGOFF(V4F_COUNT, EAX) )
809 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
810 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
812 CMP_L ( CONST(0), EDX )
813 JE ( LLBL (G3R_end) )
817 MOVD ( ARG_SCALE, MM0 ) /* scale */
818 PUNPCKLDQ ( MM0, MM0 )
823 PREFETCHW ( REGIND(EAX) )
825 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
826 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
828 PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
829 ADD_L ( STRIDE, ECX ) /* next normal */
831 PREFETCH ( REGIND(ECX) )
833 PFMUL ( MM0, MM2 ) /* | x2*scale */
834 ADD_L ( CONST(16), EAX ) /* next r */
836 MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */
837 MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */
839 SUB_L ( CONST(1), EDX ) /* decrement normal counter */
840 JNZ ( LLBL (G3R_rescale) )
851 #if defined (__ELF__) && defined (__linux__)
852 .section .note.GNU-stack,"",%progbits