remove non-portable whitespace
[mesa.git] / src / mesa / x86 / 3dnow_normal.S
1 /* $Id: 3dnow_normal.S,v 1.7 2004/04/08 08:10:37 alanh Exp $ */
2
3 /*
4 * Mesa 3-D graphics library
5 * Version: 5.1
6 *
7 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 /*
28 * 3Dnow assembly code by Holger Waechtler
29 */
30
31 #include "matypes.h"
32 #include "norm_args.h"
33
34 SEG_TEXT
35
36 #define M(i) REGOFF(i * 4, ECX)
37 #define STRIDE REGOFF(12, ESI)
38
39
40 ALIGNTEXT16
41 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
42 GLNAME(_mesa_3dnow_transform_normalize_normals):
43
44 #define FRAME_OFFSET 12
45
46 PUSH_L ( EDI )
47 PUSH_L ( ESI )
48 PUSH_L ( EBP )
49
50 MOV_L ( ARG_LENGTHS, EDI )
51 MOV_L ( ARG_IN, ESI )
52 MOV_L ( ARG_DEST, EAX )
53 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
54 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
55 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
56 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
57 MOV_L ( ARG_MAT, ECX )
58 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
59
60 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
61 JE ( LLBL (G3TN_end) )
62
63 MOV_L ( REGOFF (V4F_COUNT, ESI), EBP )
64 FEMMS
65
66 PUSH_L ( EBP )
67 PUSH_L ( EAX )
68 PUSH_L ( EDX ) /* save counter & pointer for */
69 /* the normalize pass */
70 #undef FRAME_OFFSET
71 #define FRAME_OFFSET 24
72
73 MOVQ ( M(0), MM3 ) /* m1 | m0 */
74 MOVQ ( M(4), MM4 ) /* m5 | m4 */
75
76 MOVD ( M(2), MM5 ) /* | m2 */
77 PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
78
79 MOVQ ( M(8), MM6 ) /* m9 | m8 */
80 MOVQ ( M(10), MM7 ) /* | m10 */
81
82 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
83 JNE ( LLBL (G3TN_scale_end ) )
84
85 MOVD ( ARG_SCALE, MM0 ) /* | scale */
86 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
87
88 PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
89 PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
90 PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
91 PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
92 PFMUL ( MM0, MM7 ) /* | scale * m10 */
93
94 ALIGNTEXT32
95 LLBL (G3TN_scale_end):
96 LLBL (G3TN_transform):
97 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
98 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
99
100 MOVQ ( MM0, MM1 ) /* x1 | x0 */
101 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
102
103 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
104 ADD_L ( CONST(16), EAX ) /* next r */
105
106 PREFETCHW ( REGIND(EAX) )
107
108 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
109 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
110
111 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
112 PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
113
114 MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
115 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
116
117 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
118 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
119
120 PFMUL ( MM7, MM2 ) /* | x2*m10 */
121 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
122
123 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
124 ADD_L ( STRIDE, EDX ) /* next normal */
125
126 PREFETCH ( REGIND(EDX) )
127
128 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
129 DEC_L ( EBP ) /* decrement normal counter */
130 JA ( LLBL (G3TN_transform) )
131
132
133 POP_L ( EDX ) /* end of transform --- */
134 POP_L ( EAX ) /* now normalizing ... */
135 POP_L ( EBP )
136
137 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
138 JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
139
140
141 ALIGNTEXT32
142 LLBL (G3TN_norm_w_lengths):
143
144 PREFETCHW ( REGOFF(12,EAX) )
145
146 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
147 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
148
149 MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
150 PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
151
152 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
153 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
154
155 ADD_L ( STRIDE, EDX ) /* next normal */
156 ADD_L ( CONST(4), EDI ) /* next length */
157
158 PREFETCH ( REGIND(EDI) )
159
160 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
161 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
162
163 ADD_L ( CONST(16), EAX ) /* next r */
164 DEC_L ( EBP ) /* decrement normal counter */
165
166 JA ( LLBL (G3TN_norm_w_lengths) )
167 JMP ( LLBL (G3TN_exit_3dnow) )
168
169 ALIGNTEXT32
170 LLBL (G3TN_norm):
171
172 PREFETCHW ( REGIND(EAX) )
173
174 MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
175 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
176
177 MOVQ ( MM0, MM3 ) /* x1 | x0 */
178 MOVQ ( MM1, MM4 ) /* | x2 */
179
180 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
181 ADD_L ( CONST(16), EAX ) /* next r */
182
183 PFMUL ( MM1, MM4 ) /* | x2*x2 */
184 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
185
186 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
187 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
188
189 MOVQ ( MM5, MM4 )
190 PUNPCKLDQ ( MM3, MM3 )
191
192 DEC_L ( EBP ) /* decrement normal counter */
193 PFMUL ( MM5, MM5 )
194
195 PFRSQIT1 ( MM3, MM5 )
196 PFRCPIT2 ( MM4, MM5 )
197
198 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
199
200 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
201 PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
202
203 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
204 JA ( LLBL (G3TN_norm) )
205
206 LLBL (G3TN_exit_3dnow):
207 FEMMS
208
209 LLBL (G3TN_end):
210 POP_L ( EBP )
211 POP_L ( ESI )
212 POP_L ( EDI )
213 RET
214
215
216
217 ALIGNTEXT16
218 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
219 GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
220
221 #undef FRAME_OFFSET
222 #define FRAME_OFFSET 12
223
224 PUSH_L ( EDI )
225 PUSH_L ( ESI )
226 PUSH_L ( EBP )
227
228 MOV_L ( ARG_LENGTHS, EDI )
229 MOV_L ( ARG_IN, ESI )
230 MOV_L ( ARG_DEST, EAX )
231 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
232 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
233 MOV_L ( ARG_MAT, ECX )
234 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
235 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
236 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
237
238 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
239 JE ( LLBL (G3TNNR_end) )
240
241 FEMMS
242
243 MOVD ( M(0), MM0 ) /* | m0 */
244 PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
245
246 MOVD ( M(10), MM2 ) /* | m10 */
247 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
248
249 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
250 JNE ( LLBL (G3TNNR_scale_end ) )
251
252 MOVD ( ARG_SCALE, MM7 ) /* | scale */
253 PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
254
255 PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
256 PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
257
258 ALIGNTEXT32
259 LLBL (G3TNNR_scale_end):
260 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
261 JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
262
263 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
264
265
266 ALIGNTEXT32
267 LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
268
269 PREFETCHW ( REGIND(EAX) )
270
271 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
272 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
273
274 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
275 ADD_L ( STRIDE, EDX ) /* next normal */
276
277 PREFETCH ( REGIND(EDX) )
278
279 PFMUL ( MM2, MM7 ) /* | x2*m10 */
280 ADD_L ( CONST(16), EAX ) /* next r */
281
282 PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
283 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
284
285 ADD_L ( CONST(4), EDI ) /* next length */
286 PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
287
288 DEC_L ( EBP ) /* decrement normal counter */
289 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
290
291 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
292 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
293
294 JA ( LLBL (G3TNNR_norm_w_lengths) )
295 JMP ( LLBL (G3TNNR_exit_3dnow) )
296
297 ALIGNTEXT32
298 LLBL (G3TNNR_norm): /* need to calculate lengths */
299
300 PREFETCHW ( REGIND(EAX) )
301
302 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
303 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
304
305 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
306 ADD_L ( CONST(16), EAX ) /* next r */
307
308 PFMUL ( MM2, MM7 ) /* | x2*m10 */
309 MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
310
311 MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
312 PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
313
314
315 PFMUL ( MM7, MM4 ) /* | x2*x2 */
316 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
317
318 PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
319 ADD_L ( STRIDE, EDX ) /* next normal */
320
321 PREFETCH ( REGIND(EDX) )
322
323 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
324 MOVQ ( MM5, MM4 )
325
326 PUNPCKLDQ ( MM3, MM3 )
327 PFMUL ( MM5, MM5 )
328
329 PFRSQIT1 ( MM3, MM5 )
330 DEC_L ( EBP ) /* decrement normal counter */
331
332 PFRCPIT2 ( MM4, MM5 )
333 PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
334
335 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
336 PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
337
338 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
339 JA ( LLBL (G3TNNR_norm) )
340
341
342 LLBL (G3TNNR_exit_3dnow):
343 FEMMS
344
345 LLBL (G3TNNR_end):
346 POP_L ( EBP )
347 POP_L ( ESI )
348 POP_L ( EDI )
349 RET
350
351
352
353
354
355
356 ALIGNTEXT16
357 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
358 GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
359
360 #undef FRAME_OFFSET
361 #define FRAME_OFFSET 12
362
363 PUSH_L ( EDI )
364 PUSH_L ( ESI )
365 PUSH_L ( EBP )
366
367 MOV_L ( ARG_IN, EAX )
368 MOV_L ( ARG_DEST, EDX )
369 MOV_L ( REGOFF(V4F_COUNT, EAX), EBP ) /* dest->count = in->count */
370 MOV_L ( EBP, REGOFF(V4F_COUNT, EDX) )
371 MOV_L ( ARG_IN, ESI )
372 MOV_L ( ARG_MAT, ECX )
373 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
374 MOV_L ( REGOFF(V4F_START, EDX), EAX ) /* dest->start */
375 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
376
377 CMP_L ( CONST(0), EBP )
378 JE ( LLBL (G3TRNR_end) )
379
380 FEMMS
381
382 MOVD ( ARG_SCALE, MM6 ) /* | scale */
383 PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
384
385 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
386 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
387
388 PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
389 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
390
391 PFMUL ( MM6, MM2 ) /* | scale*m10 */
392
393 ALIGNTEXT32
394 LLBL (G3TRNR_rescale):
395
396 PREFETCHW ( REGIND(EAX) )
397
398 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
399 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
400
401 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
402 ADD_L ( STRIDE, EDX ) /* next normal */
403
404 PREFETCH ( REGIND(EDX) )
405
406 PFMUL ( MM2, MM5 ) /* | x2*m10 */
407 ADD_L ( CONST(16), EAX ) /* next r */
408
409 DEC_L ( EBP ) /* decrement normal counter */
410 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
411
412 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
413 JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
414
415 FEMMS
416
417 LLBL (G3TRNR_end):
418 POP_L ( EBP )
419 POP_L ( ESI )
420 POP_L ( EDI )
421 RET
422
423
424
425
426
427 ALIGNTEXT16
428 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
429 GLNAME(_mesa_3dnow_transform_rescale_normals):
430
431 #undef FRAME_OFFSET
432 #define FRAME_OFFSET 8
433
434 PUSH_L ( EDI )
435 PUSH_L ( ESI )
436
437 MOV_L ( ARG_IN, ESI )
438 MOV_L ( ARG_DEST, EAX )
439 MOV_L ( ARG_MAT, ECX )
440 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
441 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
442 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
443 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
444 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
445
446 CMP_L ( CONST(0), EDI )
447 JE ( LLBL (G3TR_end) )
448
449 FEMMS
450
451 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
452
453 MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
454 MOVD ( ARG_SCALE, MM0 ) /* scale */
455
456 MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
457 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
458
459 PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
460 PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
461
462 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
463 PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
464
465 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
466 PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
467
468 PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
469
470 PFMUL ( MM0, MM7 ) /* | scale*m10 */
471
472 ALIGNTEXT32
473 LLBL (G3TR_rescale):
474
475 PREFETCHW ( REGIND(EAX) )
476
477 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
478 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
479
480 MOVQ ( MM0, MM1 ) /* x1 | x0 */
481 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
482
483 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
484 ADD_L ( CONST(16), EAX ) /* next r */
485
486 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
487 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
488
489 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
490
491 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
492 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
493
494 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
495 ADD_L ( STRIDE, EDX ) /* next normal */
496
497 PREFETCH ( REGIND(EDX) )
498
499 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
500 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
501
502 PFMUL ( MM7, MM2 ) /* | x2*m10 */
503 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
504
505 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
506 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
507
508 DEC_L ( EDI ) /* decrement normal counter */
509 JA ( LLBL (G3TR_rescale) )
510
511 FEMMS
512
513 LLBL (G3TR_end):
514 POP_L ( ESI )
515 POP_L ( EDI )
516 RET
517
518
519
520
521
522
523
524 ALIGNTEXT16
525 GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
526 GLNAME(_mesa_3dnow_transform_normals_no_rot):
527
528 #undef FRAME_OFFSET
529 #define FRAME_OFFSET 8
530
531 PUSH_L ( EDI )
532 PUSH_L ( ESI )
533
534 MOV_L ( ARG_IN, ESI )
535 MOV_L ( ARG_DEST, EAX )
536 MOV_L ( ARG_MAT, ECX )
537 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
538 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
539 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
540 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
541 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
542
543 CMP_L ( CONST(0), EDI )
544 JE ( LLBL (G3TNR_end) )
545
546 FEMMS
547
548 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
549 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
550
551 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
552 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
553
554 ALIGNTEXT32
555 LLBL (G3TNR_transform):
556
557 PREFETCHW ( REGIND(EAX) )
558
559 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
560 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
561
562 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
563 ADD_L ( STRIDE, EDX) /* next normal */
564
565 PREFETCH ( REGIND(EDX) )
566
567 PFMUL ( MM2, MM5 ) /* | x2*m10 */
568 ADD_L ( CONST(16), EAX ) /* next r */
569
570 DEC_L ( EDI ) /* decrement normal counter */
571 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
572
573 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
574 JA ( LLBL (G3TNR_transform) )
575
576 FEMMS
577
578 LLBL (G3TNR_end):
579 POP_L ( ESI )
580 POP_L ( EDI )
581 RET
582
583
584
585
586
587
588
589
590 ALIGNTEXT16
591 GLOBL GLNAME(_mesa_3dnow_transform_normals)
592 GLNAME(_mesa_3dnow_transform_normals):
593
594 #undef FRAME_OFFSET
595 #define FRAME_OFFSET 8
596
597 PUSH_L ( EDI )
598 PUSH_L ( ESI )
599
600 MOV_L ( ARG_IN, ESI )
601 MOV_L ( ARG_DEST, EAX )
602 MOV_L ( ARG_MAT, ECX )
603 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
604 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
605 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
606 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
607 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
608
609 CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
610 JE ( LLBL (G3T_end) )
611
612 FEMMS
613
614 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
615 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
616
617 MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
618 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
619
620 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
621 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
622
623 ALIGNTEXT32
624 LLBL (G3T_transform):
625
626 PREFETCHW ( REGIND(EAX) )
627
628 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
629 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
630
631 MOVQ ( MM0, MM1 ) /* x1 | x0 */
632 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
633
634 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
635 ADD_L ( CONST(16), EAX ) /* next r */
636
637 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
638 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
639
640 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
641 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
642
643 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
644 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
645
646 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
647 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
648
649 PFMUL ( MM7, MM2 ) /* | x2*m10 */
650 ADD_L ( STRIDE, EDX ) /* next normal */
651
652 PREFETCH ( REGIND(EDX) )
653
654 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
655 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
656
657 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
658 DEC_L ( EDI ) /* decrement normal counter */
659
660 JA ( LLBL (G3T_transform) )
661
662 FEMMS
663
664 LLBL (G3T_end):
665 POP_L ( ESI )
666 POP_L ( EDI )
667 RET
668
669
670
671
672
673
674 ALIGNTEXT16
675 GLOBL GLNAME(_mesa_3dnow_normalize_normals)
676 GLNAME(_mesa_3dnow_normalize_normals):
677
678 #undef FRAME_OFFSET
679 #define FRAME_OFFSET 12
680
681 PUSH_L ( EDI )
682 PUSH_L ( ESI )
683 PUSH_L ( EBP )
684
685 MOV_L ( ARG_IN, ESI )
686 MOV_L ( ARG_DEST, EAX )
687 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
688 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
689 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
690 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
691 MOV_L ( ARG_LENGTHS, EDX )
692
693 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
694 JE ( LLBL (G3N_end) )
695
696 FEMMS
697
698 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
699 JE ( LLBL (G3N_norm2) ) /* calculate lengths */
700
701 ALIGNTEXT32
702 LLBL (G3N_norm1): /* use precalculated lengths */
703
704 PREFETCH ( REGIND(EAX) )
705
706 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
707 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
708
709 MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
710 PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
711
712 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
713 ADD_L ( STRIDE, ECX ) /* next normal */
714
715 PREFETCH ( REGIND(ECX) )
716
717 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
718 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
719
720 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
721 ADD_L ( CONST(16), EAX ) /* next r */
722
723 ADD_L ( CONST(4), EDX ) /* next length */
724 DEC_L ( EBP ) /* decrement normal counter */
725
726 JA ( LLBL (G3N_norm1) )
727
728 JMP ( LLBL (G3N_end1) )
729
730 ALIGNTEXT32
731 LLBL (G3N_norm2): /* need to calculate lengths */
732
733 PREFETCHW ( REGIND(EAX) )
734
735 PREFETCH ( REGIND(ECX) )
736
737 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
738 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
739
740 MOVQ ( MM0, MM3 ) /* x1 | x0 */
741 ADD_L ( STRIDE, ECX ) /* next normal */
742
743 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
744 MOVQ ( MM1, MM4 ) /* | x2 */
745
746 ADD_L ( CONST(16), EAX ) /* next r */
747 PFMUL ( MM1, MM4 ) /* | x2*x2 */
748
749 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
750 PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
751
752 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
753 MOVQ ( MM5, MM4 )
754
755 PUNPCKLDQ ( MM3, MM3 )
756 PFMUL ( MM5, MM5 )
757
758 PFRSQIT1 ( MM3, MM5 )
759 DEC_L ( EBP ) /* decrement normal counter */
760
761 PFRCPIT2 ( MM4, MM5 )
762
763 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
764 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
765
766 PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
767 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
768
769 JA ( LLBL (G3N_norm2) )
770
771 LLBL (G3N_end1):
772 FEMMS
773
774 LLBL (G3N_end):
775 POP_L ( EBP )
776 POP_L ( ESI )
777 POP_L ( EDI )
778 RET
779
780
781
782
783
784
785 ALIGNTEXT16
786 GLOBL GLNAME(_mesa_3dnow_rescale_normals)
787 GLNAME(_mesa_3dnow_rescale_normals):
788
789 #undef FRAME_OFFSET
790 #define FRAME_OFFSET 8
791 PUSH_L ( EDI )
792 PUSH_L ( ESI )
793
794 MOV_L ( ARG_IN, ESI )
795 MOV_L ( ARG_DEST, EAX )
796 MOV_L ( REGOFF(V4F_COUNT, ESI), EDX ) /* dest->count = in->count */
797 MOV_L ( EDX, REGOFF(V4F_COUNT, EAX) )
798 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
799 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
800
801 CMP_L ( CONST(0), EDX )
802 JE ( LLBL (G3R_end) )
803
804 FEMMS
805
806 MOVD ( ARG_SCALE, MM0 ) /* scale */
807 PUNPCKLDQ ( MM0, MM0 )
808
809 ALIGNTEXT32
810 LLBL (G3R_rescale):
811
812 PREFETCHW ( REGIND(EAX) )
813
814 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
815 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
816
817 PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
818 ADD_L ( STRIDE, ECX ) /* next normal */
819
820 PREFETCH ( REGIND(ECX) )
821
822 PFMUL ( MM0, MM2 ) /* | x2*scale */
823 ADD_L ( CONST(16), EAX ) /* next r */
824
825 MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */
826 MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */
827
828 DEC_L ( EDX ) /* decrement normal counter */
829 JA ( LLBL (G3R_rescale) )
830
831 FEMMS
832
833 LLBL (G3R_end):
834 POP_L ( ESI )
835 POP_L ( EDI )
836 RET