Remove CVS keywords.
[mesa.git] / src / mesa / x86 / 3dnow_normal.S
1
2 /*
3 * Mesa 3-D graphics library
4 * Version: 5.1
5 *
6 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /*
27 * 3Dnow assembly code by Holger Waechtler
28 */
29
30 #ifdef USE_3DNOW_ASM
31
32 #include "matypes.h"
33 #include "norm_args.h"
34
35 SEG_TEXT
36
37 #define M(i) REGOFF(i * 4, ECX)
38 #define STRIDE REGOFF(12, ESI)
39
40
41 ALIGNTEXT16
42 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
43 HIDDEN(_mesa_3dnow_transform_normalize_normals)
44 GLNAME(_mesa_3dnow_transform_normalize_normals):
45
46 #define FRAME_OFFSET 12
47
48 PUSH_L ( EDI )
49 PUSH_L ( ESI )
50 PUSH_L ( EBP )
51
52 MOV_L ( ARG_LENGTHS, EDI )
53 MOV_L ( ARG_IN, ESI )
54 MOV_L ( ARG_DEST, EAX )
55 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
56 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
57 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
58 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
59 MOV_L ( ARG_MAT, ECX )
60 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
61
62 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
63 JE ( LLBL (G3TN_end) )
64
65 MOV_L ( REGOFF (V4F_COUNT, ESI), EBP )
66 FEMMS
67
68 PUSH_L ( EBP )
69 PUSH_L ( EAX )
70 PUSH_L ( EDX ) /* save counter & pointer for */
71 /* the normalize pass */
72 #undef FRAME_OFFSET
73 #define FRAME_OFFSET 24
74
75 MOVQ ( M(0), MM3 ) /* m1 | m0 */
76 MOVQ ( M(4), MM4 ) /* m5 | m4 */
77
78 MOVD ( M(2), MM5 ) /* | m2 */
79 PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
80
81 MOVQ ( M(8), MM6 ) /* m9 | m8 */
82 MOVQ ( M(10), MM7 ) /* | m10 */
83
84 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
85 JNE ( LLBL (G3TN_scale_end ) )
86
87 MOVD ( ARG_SCALE, MM0 ) /* | scale */
88 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
89
90 PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
91 PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
92 PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
93 PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
94 PFMUL ( MM0, MM7 ) /* | scale * m10 */
95
96 ALIGNTEXT32
97 LLBL (G3TN_scale_end):
98 LLBL (G3TN_transform):
99 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
100 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
101
102 MOVQ ( MM0, MM1 ) /* x1 | x0 */
103 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
104
105 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
106 ADD_L ( CONST(16), EAX ) /* next r */
107
108 PREFETCHW ( REGIND(EAX) )
109
110 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
111 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
112
113 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
114 PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
115
116 MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
117 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
118
119 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
120 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
121
122 PFMUL ( MM7, MM2 ) /* | x2*m10 */
123 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
124
125 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
126 ADD_L ( STRIDE, EDX ) /* next normal */
127
128 PREFETCH ( REGIND(EDX) )
129
130 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
131 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
132 JNZ ( LLBL (G3TN_transform) )
133
134
135 POP_L ( EDX ) /* end of transform --- */
136 POP_L ( EAX ) /* now normalizing ... */
137 POP_L ( EBP )
138
139 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
140 JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
141
142
143 ALIGNTEXT32
144 LLBL (G3TN_norm_w_lengths):
145
146 PREFETCHW ( REGOFF(12,EAX) )
147
148 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
149 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
150
151 MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
152 PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
153
154 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
155 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
156
157 ADD_L ( STRIDE, EDX ) /* next normal */
158 ADD_L ( CONST(4), EDI ) /* next length */
159
160 PREFETCH ( REGIND(EDI) )
161
162 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
163 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
164
165 ADD_L ( CONST(16), EAX ) /* next r */
166 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
167
168 JNZ ( LLBL (G3TN_norm_w_lengths) )
169 JMP ( LLBL (G3TN_exit_3dnow) )
170
171 ALIGNTEXT32
172 LLBL (G3TN_norm):
173
174 PREFETCHW ( REGIND(EAX) )
175
176 MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
177 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
178
179 MOVQ ( MM0, MM3 ) /* x1 | x0 */
180 MOVQ ( MM1, MM4 ) /* | x2 */
181
182 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
183 ADD_L ( CONST(16), EAX ) /* next r */
184
185 PFMUL ( MM1, MM4 ) /* | x2*x2 */
186 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
187
188 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
189 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
190
191 MOVQ ( MM5, MM4 )
192 PUNPCKLDQ ( MM3, MM3 )
193
194 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
195 PFMUL ( MM5, MM5 )
196
197 PFRSQIT1 ( MM3, MM5 )
198 PFRCPIT2 ( MM4, MM5 )
199
200 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
201
202 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
203 PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
204
205 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
206 JNZ ( LLBL (G3TN_norm) )
207
208 LLBL (G3TN_exit_3dnow):
209 FEMMS
210
211 LLBL (G3TN_end):
212 POP_L ( EBP )
213 POP_L ( ESI )
214 POP_L ( EDI )
215 RET
216
217
218
219 ALIGNTEXT16
220 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
221 HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot)
222 GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
223
224 #undef FRAME_OFFSET
225 #define FRAME_OFFSET 12
226
227 PUSH_L ( EDI )
228 PUSH_L ( ESI )
229 PUSH_L ( EBP )
230
231 MOV_L ( ARG_LENGTHS, EDI )
232 MOV_L ( ARG_IN, ESI )
233 MOV_L ( ARG_DEST, EAX )
234 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
235 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
236 MOV_L ( ARG_MAT, ECX )
237 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
238 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
239 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
240
241 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
242 JE ( LLBL (G3TNNR_end) )
243
244 FEMMS
245
246 MOVD ( M(0), MM0 ) /* | m0 */
247 PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
248
249 MOVD ( M(10), MM2 ) /* | m10 */
250 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
251
252 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
253 JNE ( LLBL (G3TNNR_scale_end ) )
254
255 MOVD ( ARG_SCALE, MM7 ) /* | scale */
256 PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
257
258 PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
259 PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
260
261 ALIGNTEXT32
262 LLBL (G3TNNR_scale_end):
263 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
264 JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
265
266 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
267
268
269 ALIGNTEXT32
270 LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
271
272 PREFETCHW ( REGIND(EAX) )
273
274 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
275 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
276
277 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
278 ADD_L ( STRIDE, EDX ) /* next normal */
279
280 PREFETCH ( REGIND(EDX) )
281
282 PFMUL ( MM2, MM7 ) /* | x2*m10 */
283 ADD_L ( CONST(16), EAX ) /* next r */
284
285 PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
286 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
287
288 ADD_L ( CONST(4), EDI ) /* next length */
289 PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
290
291 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
292 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
293
294 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
295 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
296
297 JNZ ( LLBL (G3TNNR_norm_w_lengths) )
298 JMP ( LLBL (G3TNNR_exit_3dnow) )
299
300 ALIGNTEXT32
301 LLBL (G3TNNR_norm): /* need to calculate lengths */
302
303 PREFETCHW ( REGIND(EAX) )
304
305 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
306 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
307
308 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
309 ADD_L ( CONST(16), EAX ) /* next r */
310
311 PFMUL ( MM2, MM7 ) /* | x2*m10 */
312 MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
313
314 MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
315 PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
316
317
318 PFMUL ( MM7, MM4 ) /* | x2*x2 */
319 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
320
321 PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
322 ADD_L ( STRIDE, EDX ) /* next normal */
323
324 PREFETCH ( REGIND(EDX) )
325
326 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
327 MOVQ ( MM5, MM4 )
328
329 PUNPCKLDQ ( MM3, MM3 )
330 PFMUL ( MM5, MM5 )
331
332 PFRSQIT1 ( MM3, MM5 )
333 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
334
335 PFRCPIT2 ( MM4, MM5 )
336 PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
337
338 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
339 PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
340
341 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
342 JNZ ( LLBL (G3TNNR_norm) )
343
344
345 LLBL (G3TNNR_exit_3dnow):
346 FEMMS
347
348 LLBL (G3TNNR_end):
349 POP_L ( EBP )
350 POP_L ( ESI )
351 POP_L ( EDI )
352 RET
353
354
355
356
357
358
359 ALIGNTEXT16
360 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
361 HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot)
362 GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
363
364 #undef FRAME_OFFSET
365 #define FRAME_OFFSET 12
366
367 PUSH_L ( EDI )
368 PUSH_L ( ESI )
369 PUSH_L ( EBP )
370
371 MOV_L ( ARG_IN, EAX )
372 MOV_L ( ARG_DEST, EDX )
373 MOV_L ( REGOFF(V4F_COUNT, EAX), EBP ) /* dest->count = in->count */
374 MOV_L ( EBP, REGOFF(V4F_COUNT, EDX) )
375 MOV_L ( ARG_IN, ESI )
376 MOV_L ( ARG_MAT, ECX )
377 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
378 MOV_L ( REGOFF(V4F_START, EDX), EAX ) /* dest->start */
379 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
380
381 CMP_L ( CONST(0), EBP )
382 JE ( LLBL (G3TRNR_end) )
383
384 FEMMS
385
386 MOVD ( ARG_SCALE, MM6 ) /* | scale */
387 PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
388
389 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
390 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
391
392 PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
393 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
394
395 PFMUL ( MM6, MM2 ) /* | scale*m10 */
396
397 ALIGNTEXT32
398 LLBL (G3TRNR_rescale):
399
400 PREFETCHW ( REGIND(EAX) )
401
402 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
403 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
404
405 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
406 ADD_L ( STRIDE, EDX ) /* next normal */
407
408 PREFETCH ( REGIND(EDX) )
409
410 PFMUL ( MM2, MM5 ) /* | x2*m10 */
411 ADD_L ( CONST(16), EAX ) /* next r */
412
413 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
414 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
415
416 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
417 JNZ ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
418
419 FEMMS
420
421 LLBL (G3TRNR_end):
422 POP_L ( EBP )
423 POP_L ( ESI )
424 POP_L ( EDI )
425 RET
426
427
428
429
430
431 ALIGNTEXT16
432 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
433 HIDDEN(_mesa_3dnow_transform_rescale_normals)
434 GLNAME(_mesa_3dnow_transform_rescale_normals):
435
436 #undef FRAME_OFFSET
437 #define FRAME_OFFSET 8
438
439 PUSH_L ( EDI )
440 PUSH_L ( ESI )
441
442 MOV_L ( ARG_IN, ESI )
443 MOV_L ( ARG_DEST, EAX )
444 MOV_L ( ARG_MAT, ECX )
445 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
446 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
447 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
448 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
449 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
450
451 CMP_L ( CONST(0), EDI )
452 JE ( LLBL (G3TR_end) )
453
454 FEMMS
455
456 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
457
458 MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
459 MOVD ( ARG_SCALE, MM0 ) /* scale */
460
461 MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
462 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
463
464 PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
465 PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
466
467 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
468 PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
469
470 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
471 PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
472
473 PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
474
475 PFMUL ( MM0, MM7 ) /* | scale*m10 */
476
477 ALIGNTEXT32
478 LLBL (G3TR_rescale):
479
480 PREFETCHW ( REGIND(EAX) )
481
482 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
483 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
484
485 MOVQ ( MM0, MM1 ) /* x1 | x0 */
486 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
487
488 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
489 ADD_L ( CONST(16), EAX ) /* next r */
490
491 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
492 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
493
494 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
495
496 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
497 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
498
499 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
500 ADD_L ( STRIDE, EDX ) /* next normal */
501
502 PREFETCH ( REGIND(EDX) )
503
504 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
505 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
506
507 PFMUL ( MM7, MM2 ) /* | x2*m10 */
508 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
509
510 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
511 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
512
513 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
514 JNZ ( LLBL (G3TR_rescale) )
515
516 FEMMS
517
518 LLBL (G3TR_end):
519 POP_L ( ESI )
520 POP_L ( EDI )
521 RET
522
523
524
525
526
527
528
529 ALIGNTEXT16
530 GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
531 HIDDEN(_mesa_3dnow_transform_normals_no_rot)
532 GLNAME(_mesa_3dnow_transform_normals_no_rot):
533
534 #undef FRAME_OFFSET
535 #define FRAME_OFFSET 8
536
537 PUSH_L ( EDI )
538 PUSH_L ( ESI )
539
540 MOV_L ( ARG_IN, ESI )
541 MOV_L ( ARG_DEST, EAX )
542 MOV_L ( ARG_MAT, ECX )
543 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
544 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
545 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
546 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
547 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
548
549 CMP_L ( CONST(0), EDI )
550 JE ( LLBL (G3TNR_end) )
551
552 FEMMS
553
554 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
555 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
556
557 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
558 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
559
560 ALIGNTEXT32
561 LLBL (G3TNR_transform):
562
563 PREFETCHW ( REGIND(EAX) )
564
565 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
566 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
567
568 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
569 ADD_L ( STRIDE, EDX) /* next normal */
570
571 PREFETCH ( REGIND(EDX) )
572
573 PFMUL ( MM2, MM5 ) /* | x2*m10 */
574 ADD_L ( CONST(16), EAX ) /* next r */
575
576 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
577 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
578
579 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
580 JNZ ( LLBL (G3TNR_transform) )
581
582 FEMMS
583
584 LLBL (G3TNR_end):
585 POP_L ( ESI )
586 POP_L ( EDI )
587 RET
588
589
590
591
592
593
594
595
596 ALIGNTEXT16
597 GLOBL GLNAME(_mesa_3dnow_transform_normals)
598 HIDDEN(_mesa_3dnow_transform_normals)
599 GLNAME(_mesa_3dnow_transform_normals):
600
601 #undef FRAME_OFFSET
602 #define FRAME_OFFSET 8
603
604 PUSH_L ( EDI )
605 PUSH_L ( ESI )
606
607 MOV_L ( ARG_IN, ESI )
608 MOV_L ( ARG_DEST, EAX )
609 MOV_L ( ARG_MAT, ECX )
610 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
611 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
612 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
613 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
614 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
615
616 CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
617 JE ( LLBL (G3T_end) )
618
619 FEMMS
620
621 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
622 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
623
624 MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
625 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
626
627 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
628 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
629
630 ALIGNTEXT32
631 LLBL (G3T_transform):
632
633 PREFETCHW ( REGIND(EAX) )
634
635 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
636 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
637
638 MOVQ ( MM0, MM1 ) /* x1 | x0 */
639 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
640
641 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
642 ADD_L ( CONST(16), EAX ) /* next r */
643
644 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
645 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
646
647 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
648 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
649
650 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
651 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
652
653 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
654 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
655
656 PFMUL ( MM7, MM2 ) /* | x2*m10 */
657 ADD_L ( STRIDE, EDX ) /* next normal */
658
659 PREFETCH ( REGIND(EDX) )
660
661 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
662 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
663
664 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
665 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
666
667 JNZ ( LLBL (G3T_transform) )
668
669 FEMMS
670
671 LLBL (G3T_end):
672 POP_L ( ESI )
673 POP_L ( EDI )
674 RET
675
676
677
678
679
680
681 ALIGNTEXT16
682 GLOBL GLNAME(_mesa_3dnow_normalize_normals)
683 HIDDEN(_mesa_3dnow_normalize_normals)
684 GLNAME(_mesa_3dnow_normalize_normals):
685
686 #undef FRAME_OFFSET
687 #define FRAME_OFFSET 12
688
689 PUSH_L ( EDI )
690 PUSH_L ( ESI )
691 PUSH_L ( EBP )
692
693 MOV_L ( ARG_IN, ESI )
694 MOV_L ( ARG_DEST, EAX )
695 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
696 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
697 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
698 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
699 MOV_L ( ARG_LENGTHS, EDX )
700
701 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
702 JE ( LLBL (G3N_end) )
703
704 FEMMS
705
706 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
707 JE ( LLBL (G3N_norm2) ) /* calculate lengths */
708
709 ALIGNTEXT32
710 LLBL (G3N_norm1): /* use precalculated lengths */
711
712 PREFETCH ( REGIND(EAX) )
713
714 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
715 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
716
717 MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
718 PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
719
720 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
721 ADD_L ( STRIDE, ECX ) /* next normal */
722
723 PREFETCH ( REGIND(ECX) )
724
725 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
726 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
727
728 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
729 ADD_L ( CONST(16), EAX ) /* next r */
730
731 ADD_L ( CONST(4), EDX ) /* next length */
732 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
733
734 JNZ ( LLBL (G3N_norm1) )
735
736 JMP ( LLBL (G3N_end1) )
737
738 ALIGNTEXT32
739 LLBL (G3N_norm2): /* need to calculate lengths */
740
741 PREFETCHW ( REGIND(EAX) )
742
743 PREFETCH ( REGIND(ECX) )
744
745 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
746 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
747
748 MOVQ ( MM0, MM3 ) /* x1 | x0 */
749 ADD_L ( STRIDE, ECX ) /* next normal */
750
751 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
752 MOVQ ( MM1, MM4 ) /* | x2 */
753
754 ADD_L ( CONST(16), EAX ) /* next r */
755 PFMUL ( MM1, MM4 ) /* | x2*x2 */
756
757 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
758 PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
759
760 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
761 MOVQ ( MM5, MM4 )
762
763 PUNPCKLDQ ( MM3, MM3 )
764 PFMUL ( MM5, MM5 )
765
766 PFRSQIT1 ( MM3, MM5 )
767 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
768
769 PFRCPIT2 ( MM4, MM5 )
770
771 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
772 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
773
774 PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
775 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
776
777 JNZ ( LLBL (G3N_norm2) )
778
779 LLBL (G3N_end1):
780 FEMMS
781
782 LLBL (G3N_end):
783 POP_L ( EBP )
784 POP_L ( ESI )
785 POP_L ( EDI )
786 RET
787
788
789
790
791
792
793 ALIGNTEXT16
794 GLOBL GLNAME(_mesa_3dnow_rescale_normals)
795 HIDDEN(_mesa_3dnow_rescale_normals)
796 GLNAME(_mesa_3dnow_rescale_normals):
797
798 #undef FRAME_OFFSET
799 #define FRAME_OFFSET 8
800 PUSH_L ( EDI )
801 PUSH_L ( ESI )
802
803 MOV_L ( ARG_IN, ESI )
804 MOV_L ( ARG_DEST, EAX )
805 MOV_L ( REGOFF(V4F_COUNT, ESI), EDX ) /* dest->count = in->count */
806 MOV_L ( EDX, REGOFF(V4F_COUNT, EAX) )
807 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
808 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
809
810 CMP_L ( CONST(0), EDX )
811 JE ( LLBL (G3R_end) )
812
813 FEMMS
814
815 MOVD ( ARG_SCALE, MM0 ) /* scale */
816 PUNPCKLDQ ( MM0, MM0 )
817
818 ALIGNTEXT32
819 LLBL (G3R_rescale):
820
821 PREFETCHW ( REGIND(EAX) )
822
823 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
824 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
825
826 PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
827 ADD_L ( STRIDE, ECX ) /* next normal */
828
829 PREFETCH ( REGIND(ECX) )
830
831 PFMUL ( MM0, MM2 ) /* | x2*scale */
832 ADD_L ( CONST(16), EAX ) /* next r */
833
834 MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */
835 MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */
836
837 SUB_L ( CONST(1), EDX ) /* decrement normal counter */
838 JNZ ( LLBL (G3R_rescale) )
839
840 FEMMS
841
842 LLBL (G3R_end):
843 POP_L ( ESI )
844 POP_L ( EDI )
845 RET
846
847 #endif
848
849 #if defined (__ELF__) && defined (__linux__)
850 .section .note.GNU-stack,"",%progbits
851 #endif