Consolidation of asm code in 3.5
[mesa.git] / src / mesa / x86 / 3dnow_normal.S
1 /* $Id: 3dnow_normal.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
2
3 /*
4 * Mesa 3-D graphics library
5 * Version: 3.5
6 *
7 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 /*
28 * 3Dnow assembly code by Holger Waechtler
29 */
30
31 #include "matypes.h"
32 #include "norm_args.h"
33
34 SEG_TEXT
35
36 #define M(i) REGOFF(i * 4, ECX)
37 #define STRIDE REGOFF(12, ESI)
38
39
40 ALIGNTEXT16
41 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
42 GLNAME(_mesa_3dnow_transform_normalize_normals):
43
44 #define FRAME_OFFSET 12
45
46 PUSH_L ( EDI )
47 PUSH_L ( ESI )
48 PUSH_L ( EBP )
49
50 MOV_L ( ARG_LENGTHS, EDI )
51 MOV_L ( ARG_IN, ESI )
52 MOV_L ( ARG_DEST, EAX )
53 MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
54 MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
55 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
56 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
57 MOV_L ( ARG_MAT, ECX )
58 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
59
60 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
61 JE ( LLBL (G3TN_end) )
62
63 MOV_L ( REGOFF (V3F_COUNT, ESI), EBP )
64 FEMMS
65
66 PUSH_L ( EBP )
67 PUSH_L ( EAX )
68 PUSH_L ( EDX ) /* save counter & pointer for */
69 /* the normalize pass */
70 #undef FRAME_OFFSET
71 #define FRAME_OFFSET 24
72
73 MOVQ ( M(0), MM3 ) /* m1 | m0 */
74 MOVQ ( M(4), MM4 ) /* m5 | m4 */
75
76 MOVD ( M(2), MM5 ) /* | m2 */
77 PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
78
79 MOVQ ( M(8), MM6 ) /* m9 | m8 */
80 MOVQ ( M(10), MM7 ) /* | m10 */
81
82 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
83 JNE ( LLBL (G3TN_scale_end ) )
84
85 MOVD ( ARG_SCALE, MM0 ) /* | scale */
86 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
87
88 PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
89 PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
90 PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
91 PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
92 PFMUL ( MM0, MM7 ) /* | scale * m10 */
93
94 LLBL (G3TN_scale_end):
95 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
96 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
97
98 ALIGNTEXT32
99 LLBL (G3TN_transform):
100 MOVQ ( MM0, MM1 ) /* x1 | x0 */
101 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
102
103 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
104 ADD_L ( CONST(12), EAX ) /* next r */
105
106 PREFETCHW ( REGIND(EAX) )
107
108 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
109 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
110
111 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
112 PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
113
114 MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
115 MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
116
117 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
118 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
119
120 PFMUL ( MM7, MM2 ) /* | x2*m10 */
121 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
122
123 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
124 ADD_L ( STRIDE, EDX ) /* next normal */
125
126 PREFETCH ( REGIND(EDX) )
127
128 MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
129 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
130
131 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
132 DEC_L ( EBP ) /* decrement normal counter */
133 JA ( LLBL (G3TN_transform) )
134
135
136 POP_L ( EDX ) /* end of transform --- */
137 POP_L ( EAX ) /* now normalizing ... */
138 POP_L ( EBP )
139
140 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
141 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
142
143 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
144 JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
145
146
147 ALIGNTEXT32
148 LLBL (G3TN_norm_w_lengths):
149
150 PREFETCHW ( REGOFF(12,EAX) )
151
152 MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
153 PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
154
155 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
156 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
157
158 ADD_L ( STRIDE, EDX ) /* next normal */
159 ADD_L ( CONST(4), EDI ) /* next length */
160
161 PREFETCH ( REGIND(EDI) )
162
163 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
164 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
165
166 ADD_L ( CONST(12), EAX ) /* next r */
167 DEC_L ( EBP ) /* decrement normal counter */
168
169 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
170 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
171 JA ( LLBL (G3TN_norm_w_lengths) )
172 JMP ( LLBL (G3TN_exit_3dnow) )
173
174 ALIGNTEXT32
175 LLBL (G3TN_norm):
176
177 PREFETCHW ( REGIND(EAX) )
178
179 MOVQ ( MM0, MM3 ) /* x1 | x0 */
180 MOVQ ( MM1, MM4 ) /* | x2 */
181
182 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
183 ADD_L ( CONST(12), EAX ) /* next r */
184
185 PFMUL ( MM1, MM4 ) /* | x2*x2 */
186 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
187
188 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
189 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
190
191 MOVQ ( MM5, MM4 )
192 PUNPCKLDQ ( MM3, MM3 )
193
194 DEC_L ( EBP ) /* decrement normal counter */
195 PFMUL ( MM5, MM5 )
196
197 PFRSQIT1 ( MM3, MM5 )
198 PFRCPIT2 ( MM4, MM5 )
199
200 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
201
202 MOVQ ( MM0, REGOFF(-12, EAX) ) /* write new x0, x1 */
203 PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
204
205 MOVD ( MM1, REGOFF(-4, EAX) ) /* write new x2 */
206 MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
207
208 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
209 JA ( LLBL (G3TN_norm) )
210
211 LLBL (G3TN_exit_3dnow):
212 FEMMS
213
214 LLBL (G3TN_end):
215 POP_L ( EBP )
216 POP_L ( ESI )
217 POP_L ( EDI )
218 RET
219
220
221
222 ALIGNTEXT16
223 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
224 GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
225
226 #undef FRAME_OFFSET
227 #define FRAME_OFFSET 12
228
229 PUSH_L ( EDI )
230 PUSH_L ( ESI )
231 PUSH_L ( EBP )
232
233 MOV_L ( ARG_LENGTHS, EDI )
234 MOV_L ( ARG_IN, ESI )
235 MOV_L ( ARG_DEST, EAX )
236 MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
237 MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
238 MOV_L ( ARG_MAT, ECX )
239 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
240 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
241 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
242
243 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
244 JE ( LLBL (G3TNNR_end) )
245
246 FEMMS
247
248 MOVD ( M(0), MM0 ) /* | m0 */
249 PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
250
251 MOVD ( M(10), MM2 ) /* | m10 */
252 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
253
254 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
255 JNE ( LLBL (G3TNNR_scale_end ) )
256
257 MOVD ( ARG_SCALE, MM7 ) /* | scale */
258 PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
259
260 PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
261 PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
262
263 ALIGNTEXT32
264 LLBL (G3TNNR_scale_end):
265 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
266 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
267
268 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
269 JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
270
271 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
272
273
274 ALIGNTEXT32
275 LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
276
277 PREFETCHW ( REGIND(EAX) )
278
279 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
280 ADD_L ( STRIDE, EDX ) /* next normal */
281
282 PREFETCH ( REGIND(EDX) )
283
284 PFMUL ( MM2, MM7 ) /* | x2*m10 */
285 ADD_L ( CONST(12), EAX ) /* next r */
286
287 PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
288 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
289
290 ADD_L ( CONST(4), EDI ) /* next length */
291 PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
292
293 DEC_L ( EBP ) /* decrement normal counter */
294 MOVQ ( MM6, REGOFF(-12, EAX) ) /* write r0, r1 */
295
296 MOVD ( MM7, REGOFF(-4, EAX) ) /* write r2 */
297 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
298
299 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
300 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
301
302 JA ( LLBL (G3TNNR_norm_w_lengths) )
303 JMP ( LLBL (G3TNNR_exit_3dnow) )
304
305 ALIGNTEXT32
306 LLBL (G3TNNR_norm): /* need to calculate lengths */
307
308 PREFETCHW ( REGIND(EAX) )
309
310 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
311 ADD_L ( CONST(12), EAX ) /* next r */
312
313 PFMUL ( MM2, MM7 ) /* | x2*m10 */
314 MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
315
316 MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
317 PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
318
319
320 PFMUL ( MM7, MM4 ) /* | x2*x2 */
321 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
322
323 PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
324 ADD_L ( STRIDE, EDX ) /* next normal */
325
326 PREFETCH ( REGIND(EDX) )
327
328 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
329 MOVQ ( MM5, MM4 )
330
331 PUNPCKLDQ ( MM3, MM3 )
332 PFMUL ( MM5, MM5 )
333
334 PFRSQIT1 ( MM3, MM5 )
335 DEC_L ( EBP ) /* decrement normal counter */
336
337 PFRCPIT2 ( MM4, MM5 )
338 PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
339
340 MOVQ ( MM6, REGOFF(-12, EAX) ) /* write r0, r1 */
341 PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
342
343 MOVD ( MM7, REGOFF(-4, EAX) ) /* write r2 */
344 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
345
346 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
347 JA ( LLBL (G3TNNR_norm) )
348
349
350 LLBL (G3TNNR_exit_3dnow):
351 FEMMS
352
353 LLBL (G3TNNR_end):
354 POP_L ( EBP )
355 POP_L ( ESI )
356 POP_L ( EDI )
357 RET
358
359
360
361
362
363
364 ALIGNTEXT16
365 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
366 GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
367
368 #undef FRAME_OFFSET
369 #define FRAME_OFFSET 12
370
371 PUSH_L ( EDI )
372 PUSH_L ( ESI )
373 PUSH_L ( EBP )
374
375 MOV_L ( ARG_IN, EAX )
376 MOV_L ( ARG_DEST, EDX )
377 MOV_L ( REGOFF(V3F_COUNT, EAX), EBP ) /* dest->count = in->count */
378 MOV_L ( EBP, REGOFF(V3F_COUNT, EDX) )
379 MOV_L ( ARG_IN, ESI )
380 MOV_L ( ARG_MAT, ECX )
381 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
382 MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */
383 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
384
385 CMP_L ( CONST(0), EBP )
386 JE ( LLBL (G3TRNR_end) )
387
388 FEMMS
389
390 MOVD ( ARG_SCALE, MM6 ) /* | scale */
391 PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
392
393 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
394 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
395
396 PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
397 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
398
399 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
400 PFMUL ( MM6, MM2 ) /* | scale*m10 */
401
402 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
403
404 ALIGNTEXT32
405 LLBL (G3TRNR_rescale):
406
407 PREFETCHW ( REGIND(EAX) )
408
409 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
410 ADD_L ( STRIDE, EDX ) /* next normal */
411
412 PREFETCH ( REGIND(EDX) )
413
414 PFMUL ( MM2, MM5 ) /* | x2*m10 */
415 ADD_L ( CONST(12), EAX ) /* next r */
416
417 DEC_L ( EBP ) /* decrement normal counter */
418 MOVQ ( MM4, REGOFF(-12, EAX) ) /* write r0, r1 */
419
420 MOVD ( MM5, REGOFF(-4, EAX) ) /* write r2 */
421 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
422
423 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
424 JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
425
426 FEMMS
427
428 LLBL (G3TRNR_end):
429 POP_L ( EBP )
430 POP_L ( ESI )
431 POP_L ( EDI )
432 RET
433
434
435
436
437
438 ALIGNTEXT16
439 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
440 GLNAME(_mesa_3dnow_transform_rescale_normals):
441
442 #undef FRAME_OFFSET
443 #define FRAME_OFFSET 8
444
445 PUSH_L ( EDI )
446 PUSH_L ( ESI )
447
448 MOV_L ( ARG_IN, ESI )
449 MOV_L ( ARG_DEST, EAX )
450 MOV_L ( ARG_MAT, ECX )
451 MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
452 MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
453 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
454 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
455 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
456
457 CMP_L ( CONST(0), EDI )
458 JE ( LLBL (G3TR_end) )
459
460 FEMMS
461
462 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
463
464 MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
465 MOVD ( ARG_SCALE, MM0 ) /* scale */
466
467 MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
468 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
469
470 PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
471 PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
472
473 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
474 PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
475
476 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
477 PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
478
479 PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
480 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
481
482 PFMUL ( MM0, MM7 ) /* | scale*m10 */
483 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
484
485 ALIGNTEXT32
486 LLBL (G3TR_rescale):
487
488 PREFETCHW ( REGIND(EAX) )
489
490 MOVQ ( MM0, MM1 ) /* x1 | x0 */
491 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
492
493 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
494 ADD_L ( CONST(12), EAX ) /* next r */
495
496 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
497 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
498
499 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
500
501 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
502 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
503
504 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
505 ADD_L ( STRIDE, EDX ) /* next normal */
506
507 PREFETCH ( REGIND(EDX) )
508
509 MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
510 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
511
512 PFMUL ( MM7, MM2 ) /* | x2*m10 */
513 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
514
515 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
516 MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
517
518 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
519 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
520
521 DEC_L ( EDI ) /* decrement normal counter */
522 JA ( LLBL (G3TR_rescale) )
523
524 FEMMS
525
526 LLBL (G3TR_end):
527 POP_L ( ESI )
528 POP_L ( EDI )
529 RET
530
531
532
533
534
535
536
537 ALIGNTEXT16
538 GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
539 GLNAME(_mesa_3dnow_transform_normals_no_rot):
540
541 #undef FRAME_OFFSET
542 #define FRAME_OFFSET 8
543
544 PUSH_L ( EDI )
545 PUSH_L ( ESI )
546
547 MOV_L ( ARG_IN, ESI )
548 MOV_L ( ARG_DEST, EAX )
549 MOV_L ( ARG_MAT, ECX )
550 MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
551 MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
552 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
553 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
554 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
555
556 CMP_L ( CONST(0), EDI )
557 JE ( LLBL (G3TNR_end) )
558
559 FEMMS
560
561 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
562 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
563
564 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
565 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
566
567 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
568 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
569
570 ALIGNTEXT32
571 LLBL (G3TNR_transform):
572
573 PREFETCHW ( REGIND(EAX) )
574
575 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
576 ADD_L ( STRIDE, EDX) /* next normal */
577
578 PREFETCH ( REGIND(EDX) )
579
580 PFMUL ( MM2, MM5 ) /* | x2*m10 */
581 ADD_L ( CONST(12), EAX ) /* next r */
582
583 DEC_L ( EDI ) /* decrement normal counter */
584 MOVQ ( MM4, REGOFF(-12, EAX) ) /* write r0, r1 */
585
586 MOVD ( MM5, REGOFF(-4, EAX) ) /* write r2 */
587 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
588
589 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
590 JA ( LLBL (G3TNR_transform) )
591
592 FEMMS
593
594 LLBL (G3TNR_end):
595 POP_L ( ESI )
596 POP_L ( EDI )
597 RET
598
599
600
601
602
603
604
605
606 ALIGNTEXT16
607 GLOBL GLNAME(_mesa_3dnow_transform_normals)
608 GLNAME(_mesa_3dnow_transform_normals):
609
610 #undef FRAME_OFFSET
611 #define FRAME_OFFSET 8
612
613 PUSH_L ( EDI )
614 PUSH_L ( ESI )
615
616 MOV_L ( ARG_IN, ESI )
617 MOV_L ( ARG_DEST, EAX )
618 MOV_L ( ARG_MAT, ECX )
619 MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
620 MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
621 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
622 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
623 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
624
625 CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
626 JE ( LLBL (G3T_end) )
627
628 FEMMS
629
630 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
631 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
632
633 MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
634 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
635
636 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
637 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
638
639 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
640 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
641
642 ALIGNTEXT32
643 LLBL (G3T_transform):
644
645 PREFETCHW ( REGIND(EAX) )
646
647 MOVQ ( MM0, MM1 ) /* x1 | x0 */
648 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
649
650 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
651 ADD_L ( CONST(12), EAX ) /* next r */
652
653 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
654 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
655
656 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
657 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
658
659 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
660 MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
661
662 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
663 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
664
665 PFMUL ( MM7, MM2 ) /* | x2*m10 */
666 ADD_L ( STRIDE, EDX ) /* next normal */
667
668 PREFETCH ( REGIND(EDX) )
669
670 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
671 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
672
673 MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
674 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
675
676 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
677 DEC_L ( EDI ) /* decrement normal counter */
678 JA ( LLBL (G3T_transform) )
679
680 FEMMS
681
682 LLBL (G3T_end):
683 POP_L ( ESI )
684 POP_L ( EDI )
685 RET
686
687
688
689
690
691
692 ALIGNTEXT16
693 GLOBL GLNAME(_mesa_3dnow_normalize_normals)
694 GLNAME(_mesa_3dnow_normalize_normals):
695
696 #undef FRAME_OFFSET
697 #define FRAME_OFFSET 12
698
699 PUSH_L ( EDI )
700 PUSH_L ( ESI )
701 PUSH_L ( EBP )
702
703 MOV_L ( ARG_IN, ESI )
704 MOV_L ( ARG_DEST, EAX )
705 MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
706 MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
707 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
708 MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */
709 MOV_L ( ARG_LENGTHS, EDX )
710
711 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
712 JE ( LLBL (G3N_end) )
713
714 FEMMS
715
716 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
717 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
718
719 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
720 JE ( LLBL (G3N_norm2) ) /* calculate lengths */
721
722 ALIGNTEXT32
723 LLBL (G3N_norm1): /* use precalculated lengths */
724
725 PREFETCH ( REGIND(EAX) )
726
727 MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
728 PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
729
730 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
731 ADD_L ( STRIDE, ECX ) /* next normal */
732
733 PREFETCH ( REGIND(ECX) )
734
735 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
736 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
737
738 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
739 ADD_L ( CONST(12), EAX ) /* next r */
740
741 ADD_L ( CONST(4), EDX ) /* next length */
742 DEC_L ( EBP ) /* decrement normal counter */
743
744 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
745 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
746 JA ( LLBL (G3N_norm1) )
747
748 JMP ( LLBL (G3N_end1) )
749
750 ALIGNTEXT32
751 LLBL (G3N_norm2): /* need to calculate lengths */
752
753 PREFETCHW ( REGIND(EAX) )
754
755 MOVQ ( MM0, MM3 ) /* x1 | x0 */
756 ADD_L ( STRIDE, ECX ) /* next normal */
757
758 PREFETCH ( REGIND(ECX) )
759
760 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
761 MOVQ ( MM1, MM4 ) /* | x2 */
762
763 ADD_L ( CONST(12), EAX ) /* next r */
764 PFMUL ( MM1, MM4 ) /* | x2*x2 */
765
766 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
767 PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
768
769 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
770 MOVQ ( MM5, MM4 )
771
772 PUNPCKLDQ ( MM3, MM3 )
773 PFMUL ( MM5, MM5 )
774
775 PFRSQIT1 ( MM3, MM5 )
776 DEC_L ( EBP ) /* decrement normal counter */
777
778 PFRCPIT2 ( MM4, MM5 )
779
780 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
781 MOVQ ( MM0, REGOFF(-12, EAX) ) /* write new x0, x1 */
782
783 PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
784 MOVD ( MM1, REGOFF(-4, EAX) ) /* write new x2 */
785
786 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
787 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
788 JA ( LLBL (G3N_norm2) )
789
790 LLBL (G3N_end1):
791 FEMMS
792
793 LLBL (G3N_end):
794 POP_L ( EBP )
795 POP_L ( ESI )
796 POP_L ( EDI )
797 RET
798
799
800
801
802
803
804 ALIGNTEXT16
805 GLOBL GLNAME(_mesa_3dnow_rescale_normals)
806 GLNAME(_mesa_3dnow_rescale_normals):
807
808 #undef FRAME_OFFSET
809 #define FRAME_OFFSET 8
810 PUSH_L ( EDI )
811 PUSH_L ( ESI )
812
813 MOV_L ( ARG_IN, ESI )
814 MOV_L ( ARG_DEST, EAX )
815 MOV_L ( REGOFF(V3F_COUNT, ESI), EDX ) /* dest->count = in->count */
816 MOV_L ( EDX, REGOFF(V3F_COUNT, EAX) )
817 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
818 MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */
819
820 CMP_L ( CONST(0), EDX )
821 JE ( LLBL (G3R_end) )
822
823 FEMMS
824
825 MOVD ( ARG_SCALE, MM0 ) /* scale */
826 PUNPCKLDQ ( MM0, MM0 )
827
828 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
829 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
830
831 ALIGNTEXT32
832 LLBL (G3R_rescale):
833
834 PREFETCHW ( REGIND(EAX) )
835
836 PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
837 ADD_L ( STRIDE, ECX ) /* next normal */
838
839 PREFETCH ( REGIND(ECX) )
840
841 PFMUL ( MM0, MM2 ) /* | x2*scale */
842 ADD_L ( CONST(12), EAX ) /* next r */
843
844 MOVQ ( MM1, REGOFF(-12, EAX) ) /* write r0, r1 */
845 MOVD ( MM2, REGOFF(-4, EAX) ) /* write r2 */
846
847 DEC_L ( EDX ) /* decrement normal counter */
848 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
849
850 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
851 JA ( LLBL (G3R_rescale) )
852
853 FEMMS
854
855 LLBL (G3R_end):
856 POP_L ( ESI )
857 POP_L ( EDI )
858 RET