1 /* $Id: 3dnow_normal.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
4 * Mesa 3-D graphics library
7 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * 3Dnow assembly code by Holger Waechtler
32 #include "norm_args.h"
36 #define M(i) REGOFF(i * 4, ECX)
37 #define STRIDE REGOFF(12, ESI)
41 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
42 GLNAME(_mesa_3dnow_transform_normalize_normals):
44 #define FRAME_OFFSET 12
50 MOV_L ( ARG_LENGTHS, EDI )
52 MOV_L ( ARG_DEST, EAX )
53 MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
54 MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
55 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
56 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
57 MOV_L ( ARG_MAT, ECX )
58 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
60 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
61 JE ( LLBL (G3TN_end) )
63 MOV_L ( REGOFF (V3F_COUNT, ESI), EBP )
68 PUSH_L ( EDX ) /* save counter & pointer for */
69 /* the normalize pass */
71 #define FRAME_OFFSET 24
73 MOVQ ( M(0), MM3 ) /* m1 | m0 */
74 MOVQ ( M(4), MM4 ) /* m5 | m4 */
76 MOVD ( M(2), MM5 ) /* | m2 */
77 PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
79 MOVQ ( M(8), MM6 ) /* m9 | m8 */
80 MOVQ ( M(10), MM7 ) /* | m10 */
82 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
83 JNE ( LLBL (G3TN_scale_end ) )
85 MOVD ( ARG_SCALE, MM0 ) /* | scale */
86 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
88 PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
89 PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
90 PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
91 PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
92 PFMUL ( MM0, MM7 ) /* | scale * m10 */
94 LLBL (G3TN_scale_end):
95 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
96 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
99 LLBL (G3TN_transform):
100 MOVQ ( MM0, MM1 ) /* x1 | x0 */
101 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
103 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
104 ADD_L ( CONST(12), EAX ) /* next r */
106 PREFETCHW ( REGIND(EAX) )
108 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
109 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
111 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
112 PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
114 MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
115 MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
117 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
118 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
120 PFMUL ( MM7, MM2 ) /* | x2*m10 */
121 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
123 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
124 ADD_L ( STRIDE, EDX ) /* next normal */
126 PREFETCH ( REGIND(EDX) )
128 MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
129 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
131 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
132 DEC_L ( EBP ) /* decrement normal counter */
133 JA ( LLBL (G3TN_transform) )
136 POP_L ( EDX ) /* end of transform --- */
137 POP_L ( EAX ) /* now normalizing ... */
140 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
141 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
143 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
144 JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
148 LLBL (G3TN_norm_w_lengths):
150 PREFETCHW ( REGOFF(12,EAX) )
152 MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
153 PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
155 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
156 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
158 ADD_L ( STRIDE, EDX ) /* next normal */
159 ADD_L ( CONST(4), EDI ) /* next length */
161 PREFETCH ( REGIND(EDI) )
163 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
164 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
166 ADD_L ( CONST(12), EAX ) /* next r */
167 DEC_L ( EBP ) /* decrement normal counter */
169 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
170 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
171 JA ( LLBL (G3TN_norm_w_lengths) )
172 JMP ( LLBL (G3TN_exit_3dnow) )
177 PREFETCHW ( REGIND(EAX) )
179 MOVQ ( MM0, MM3 ) /* x1 | x0 */
180 MOVQ ( MM1, MM4 ) /* | x2 */
182 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
183 ADD_L ( CONST(12), EAX ) /* next r */
185 PFMUL ( MM1, MM4 ) /* | x2*x2 */
186 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
188 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
189 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
192 PUNPCKLDQ ( MM3, MM3 )
194 DEC_L ( EBP ) /* decrement normal counter */
197 PFRSQIT1 ( MM3, MM5 )
198 PFRCPIT2 ( MM4, MM5 )
200 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
202 MOVQ ( MM0, REGOFF(-12, EAX) ) /* write new x0, x1 */
203 PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
205 MOVD ( MM1, REGOFF(-4, EAX) ) /* write new x2 */
206 MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
208 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
209 JA ( LLBL (G3TN_norm) )
211 LLBL (G3TN_exit_3dnow):
223 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
224 GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
227 #define FRAME_OFFSET 12
233 MOV_L ( ARG_LENGTHS, EDI )
234 MOV_L ( ARG_IN, ESI )
235 MOV_L ( ARG_DEST, EAX )
236 MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
237 MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
238 MOV_L ( ARG_MAT, ECX )
239 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
240 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
241 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
243 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
244 JE ( LLBL (G3TNNR_end) )
248 MOVD ( M(0), MM0 ) /* | m0 */
249 PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
251 MOVD ( M(10), MM2 ) /* | m10 */
252 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
254 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
255 JNE ( LLBL (G3TNNR_scale_end ) )
257 MOVD ( ARG_SCALE, MM7 ) /* | scale */
258 PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
260 PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
261 PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
264 LLBL (G3TNNR_scale_end):
265 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
266 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
268 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
269 JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
271 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
275 LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
277 PREFETCHW ( REGIND(EAX) )
279 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
280 ADD_L ( STRIDE, EDX ) /* next normal */
282 PREFETCH ( REGIND(EDX) )
284 PFMUL ( MM2, MM7 ) /* | x2*m10 */
285 ADD_L ( CONST(12), EAX ) /* next r */
287 PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
288 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
290 ADD_L ( CONST(4), EDI ) /* next length */
291 PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
293 DEC_L ( EBP ) /* decrement normal counter */
294 MOVQ ( MM6, REGOFF(-12, EAX) ) /* write r0, r1 */
296 MOVD ( MM7, REGOFF(-4, EAX) ) /* write r2 */
297 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
299 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
300 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
302 JA ( LLBL (G3TNNR_norm_w_lengths) )
303 JMP ( LLBL (G3TNNR_exit_3dnow) )
306 LLBL (G3TNNR_norm): /* need to calculate lengths */
308 PREFETCHW ( REGIND(EAX) )
310 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
311 ADD_L ( CONST(12), EAX ) /* next r */
313 PFMUL ( MM2, MM7 ) /* | x2*m10 */
314 MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
316 MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
317 PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
320 PFMUL ( MM7, MM4 ) /* | x2*x2 */
321 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
323 PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
324 ADD_L ( STRIDE, EDX ) /* next normal */
326 PREFETCH ( REGIND(EDX) )
328 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
331 PUNPCKLDQ ( MM3, MM3 )
334 PFRSQIT1 ( MM3, MM5 )
335 DEC_L ( EBP ) /* decrement normal counter */
337 PFRCPIT2 ( MM4, MM5 )
338 PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
340 MOVQ ( MM6, REGOFF(-12, EAX) ) /* write r0, r1 */
341 PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
343 MOVD ( MM7, REGOFF(-4, EAX) ) /* write r2 */
344 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
346 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
347 JA ( LLBL (G3TNNR_norm) )
350 LLBL (G3TNNR_exit_3dnow):
365 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
366 GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
369 #define FRAME_OFFSET 12
375 MOV_L ( ARG_IN, EAX )
376 MOV_L ( ARG_DEST, EDX )
377 MOV_L ( REGOFF(V3F_COUNT, EAX), EBP ) /* dest->count = in->count */
378 MOV_L ( EBP, REGOFF(V3F_COUNT, EDX) )
379 MOV_L ( ARG_IN, ESI )
380 MOV_L ( ARG_MAT, ECX )
381 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
382 MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */
383 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
385 CMP_L ( CONST(0), EBP )
386 JE ( LLBL (G3TRNR_end) )
390 MOVD ( ARG_SCALE, MM6 ) /* | scale */
391 PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
393 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
394 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
396 PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
397 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
399 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
400 PFMUL ( MM6, MM2 ) /* | scale*m10 */
402 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
405 LLBL (G3TRNR_rescale):
407 PREFETCHW ( REGIND(EAX) )
409 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
410 ADD_L ( STRIDE, EDX ) /* next normal */
412 PREFETCH ( REGIND(EDX) )
414 PFMUL ( MM2, MM5 ) /* | x2*m10 */
415 ADD_L ( CONST(12), EAX ) /* next r */
417 DEC_L ( EBP ) /* decrement normal counter */
418 MOVQ ( MM4, REGOFF(-12, EAX) ) /* write r0, r1 */
420 MOVD ( MM5, REGOFF(-4, EAX) ) /* write r2 */
421 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
423 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
424 JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
439 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
440 GLNAME(_mesa_3dnow_transform_rescale_normals):
443 #define FRAME_OFFSET 8
448 MOV_L ( ARG_IN, ESI )
449 MOV_L ( ARG_DEST, EAX )
450 MOV_L ( ARG_MAT, ECX )
451 MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
452 MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
453 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
454 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
455 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
457 CMP_L ( CONST(0), EDI )
458 JE ( LLBL (G3TR_end) )
462 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
464 MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
465 MOVD ( ARG_SCALE, MM0 ) /* scale */
467 MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
468 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
470 PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
471 PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
473 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
474 PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
476 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
477 PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
479 PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
480 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
482 PFMUL ( MM0, MM7 ) /* | scale*m10 */
483 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
488 PREFETCHW ( REGIND(EAX) )
490 MOVQ ( MM0, MM1 ) /* x1 | x0 */
491 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
493 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
494 ADD_L ( CONST(12), EAX ) /* next r */
496 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
497 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
499 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
501 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
502 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
504 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
505 ADD_L ( STRIDE, EDX ) /* next normal */
507 PREFETCH ( REGIND(EDX) )
509 MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
510 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
512 PFMUL ( MM7, MM2 ) /* | x2*m10 */
513 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
515 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
516 MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
518 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
519 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
521 DEC_L ( EDI ) /* decrement normal counter */
522 JA ( LLBL (G3TR_rescale) )
538 GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
539 GLNAME(_mesa_3dnow_transform_normals_no_rot):
542 #define FRAME_OFFSET 8
547 MOV_L ( ARG_IN, ESI )
548 MOV_L ( ARG_DEST, EAX )
549 MOV_L ( ARG_MAT, ECX )
550 MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
551 MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
552 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
553 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
554 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
556 CMP_L ( CONST(0), EDI )
557 JE ( LLBL (G3TNR_end) )
561 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
562 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
564 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
565 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
567 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
568 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
571 LLBL (G3TNR_transform):
573 PREFETCHW ( REGIND(EAX) )
575 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
576 ADD_L ( STRIDE, EDX) /* next normal */
578 PREFETCH ( REGIND(EDX) )
580 PFMUL ( MM2, MM5 ) /* | x2*m10 */
581 ADD_L ( CONST(12), EAX ) /* next r */
583 DEC_L ( EDI ) /* decrement normal counter */
584 MOVQ ( MM4, REGOFF(-12, EAX) ) /* write r0, r1 */
586 MOVD ( MM5, REGOFF(-4, EAX) ) /* write r2 */
587 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
589 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
590 JA ( LLBL (G3TNR_transform) )
607 GLOBL GLNAME(_mesa_3dnow_transform_normals)
608 GLNAME(_mesa_3dnow_transform_normals):
611 #define FRAME_OFFSET 8
616 MOV_L ( ARG_IN, ESI )
617 MOV_L ( ARG_DEST, EAX )
618 MOV_L ( ARG_MAT, ECX )
619 MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
620 MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
621 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
622 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
623 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
625 CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
626 JE ( LLBL (G3T_end) )
630 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
631 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
633 MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
634 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
636 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
637 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
639 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
640 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
643 LLBL (G3T_transform):
645 PREFETCHW ( REGIND(EAX) )
647 MOVQ ( MM0, MM1 ) /* x1 | x0 */
648 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
650 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
651 ADD_L ( CONST(12), EAX ) /* next r */
653 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
654 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
656 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
657 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
659 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
660 MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
662 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
663 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
665 PFMUL ( MM7, MM2 ) /* | x2*m10 */
666 ADD_L ( STRIDE, EDX ) /* next normal */
668 PREFETCH ( REGIND(EDX) )
670 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
671 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
673 MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
674 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
676 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
677 DEC_L ( EDI ) /* decrement normal counter */
678 JA ( LLBL (G3T_transform) )
693 GLOBL GLNAME(_mesa_3dnow_normalize_normals)
694 GLNAME(_mesa_3dnow_normalize_normals):
697 #define FRAME_OFFSET 12
703 MOV_L ( ARG_IN, ESI )
704 MOV_L ( ARG_DEST, EAX )
705 MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
706 MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
707 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
708 MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */
709 MOV_L ( ARG_LENGTHS, EDX )
711 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
712 JE ( LLBL (G3N_end) )
716 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
717 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
719 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
720 JE ( LLBL (G3N_norm2) ) /* calculate lengths */
723 LLBL (G3N_norm1): /* use precalculated lengths */
725 PREFETCH ( REGIND(EAX) )
727 MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
728 PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
730 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
731 ADD_L ( STRIDE, ECX ) /* next normal */
733 PREFETCH ( REGIND(ECX) )
735 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
736 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
738 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
739 ADD_L ( CONST(12), EAX ) /* next r */
741 ADD_L ( CONST(4), EDX ) /* next length */
742 DEC_L ( EBP ) /* decrement normal counter */
744 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
745 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
746 JA ( LLBL (G3N_norm1) )
748 JMP ( LLBL (G3N_end1) )
751 LLBL (G3N_norm2): /* need to calculate lengths */
753 PREFETCHW ( REGIND(EAX) )
755 MOVQ ( MM0, MM3 ) /* x1 | x0 */
756 ADD_L ( STRIDE, ECX ) /* next normal */
758 PREFETCH ( REGIND(ECX) )
760 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
761 MOVQ ( MM1, MM4 ) /* | x2 */
763 ADD_L ( CONST(12), EAX ) /* next r */
764 PFMUL ( MM1, MM4 ) /* | x2*x2 */
766 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
767 PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
769 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
772 PUNPCKLDQ ( MM3, MM3 )
775 PFRSQIT1 ( MM3, MM5 )
776 DEC_L ( EBP ) /* decrement normal counter */
778 PFRCPIT2 ( MM4, MM5 )
780 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
781 MOVQ ( MM0, REGOFF(-12, EAX) ) /* write new x0, x1 */
783 PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
784 MOVD ( MM1, REGOFF(-4, EAX) ) /* write new x2 */
786 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
787 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
788 JA ( LLBL (G3N_norm2) )
805 GLOBL GLNAME(_mesa_3dnow_rescale_normals)
806 GLNAME(_mesa_3dnow_rescale_normals):
809 #define FRAME_OFFSET 8
813 MOV_L ( ARG_IN, ESI )
814 MOV_L ( ARG_DEST, EAX )
815 MOV_L ( REGOFF(V3F_COUNT, ESI), EDX ) /* dest->count = in->count */
816 MOV_L ( EDX, REGOFF(V3F_COUNT, EAX) )
817 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
818 MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */
820 CMP_L ( CONST(0), EDX )
821 JE ( LLBL (G3R_end) )
825 MOVD ( ARG_SCALE, MM0 ) /* scale */
826 PUNPCKLDQ ( MM0, MM0 )
828 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
829 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
834 PREFETCHW ( REGIND(EAX) )
836 PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
837 ADD_L ( STRIDE, ECX ) /* next normal */
839 PREFETCH ( REGIND(ECX) )
841 PFMUL ( MM0, MM2 ) /* | x2*scale */
842 ADD_L ( CONST(12), EAX ) /* next r */
844 MOVQ ( MM1, REGOFF(-12, EAX) ) /* write r0, r1 */
845 MOVD ( MM2, REGOFF(-4, EAX) ) /* write r2 */
847 DEC_L ( EDX ) /* decrement normal counter */
848 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
850 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
851 JA ( LLBL (G3R_rescale) )