mesa: Remove assyntax.h include from generated matypes.h.
[mesa.git] / src / mesa / x86 / 3dnow_normal.S
1 /* $Id: 3dnow_normal.S,v 1.10 2006/04/17 18:58:24 krh Exp $ */
2
3 /*
4 * Mesa 3-D graphics library
5 * Version: 5.1
6 *
7 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 /*
28 * 3Dnow assembly code by Holger Waechtler
29 */
30
31 #ifdef USE_3DNOW_ASM
32
33 #include "assyntax.h"
34 #include "matypes.h"
35 #include "norm_args.h"
36
37 SEG_TEXT
38
39 #define M(i) REGOFF(i * 4, ECX)
40 #define STRIDE REGOFF(12, ESI)
41
42
43 ALIGNTEXT16
44 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
45 HIDDEN(_mesa_3dnow_transform_normalize_normals)
46 GLNAME(_mesa_3dnow_transform_normalize_normals):
47
48 #define FRAME_OFFSET 12
49
50 PUSH_L ( EDI )
51 PUSH_L ( ESI )
52 PUSH_L ( EBP )
53
54 MOV_L ( ARG_LENGTHS, EDI )
55 MOV_L ( ARG_IN, ESI )
56 MOV_L ( ARG_DEST, EAX )
57 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
58 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
59 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
60 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
61 MOV_L ( ARG_MAT, ECX )
62 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
63
64 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
65 JE ( LLBL (G3TN_end) )
66
67 MOV_L ( REGOFF (V4F_COUNT, ESI), EBP )
68 FEMMS
69
70 PUSH_L ( EBP )
71 PUSH_L ( EAX )
72 PUSH_L ( EDX ) /* save counter & pointer for */
73 /* the normalize pass */
74 #undef FRAME_OFFSET
75 #define FRAME_OFFSET 24
76
77 MOVQ ( M(0), MM3 ) /* m1 | m0 */
78 MOVQ ( M(4), MM4 ) /* m5 | m4 */
79
80 MOVD ( M(2), MM5 ) /* | m2 */
81 PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
82
83 MOVQ ( M(8), MM6 ) /* m9 | m8 */
84 MOVQ ( M(10), MM7 ) /* | m10 */
85
86 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
87 JNE ( LLBL (G3TN_scale_end ) )
88
89 MOVD ( ARG_SCALE, MM0 ) /* | scale */
90 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
91
92 PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
93 PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
94 PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
95 PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
96 PFMUL ( MM0, MM7 ) /* | scale * m10 */
97
98 ALIGNTEXT32
99 LLBL (G3TN_scale_end):
100 LLBL (G3TN_transform):
101 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
102 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
103
104 MOVQ ( MM0, MM1 ) /* x1 | x0 */
105 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
106
107 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
108 ADD_L ( CONST(16), EAX ) /* next r */
109
110 PREFETCHW ( REGIND(EAX) )
111
112 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
113 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
114
115 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
116 PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
117
118 MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
119 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
120
121 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
122 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
123
124 PFMUL ( MM7, MM2 ) /* | x2*m10 */
125 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
126
127 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
128 ADD_L ( STRIDE, EDX ) /* next normal */
129
130 PREFETCH ( REGIND(EDX) )
131
132 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
133 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
134 JNZ ( LLBL (G3TN_transform) )
135
136
137 POP_L ( EDX ) /* end of transform --- */
138 POP_L ( EAX ) /* now normalizing ... */
139 POP_L ( EBP )
140
141 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
142 JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
143
144
145 ALIGNTEXT32
146 LLBL (G3TN_norm_w_lengths):
147
148 PREFETCHW ( REGOFF(12,EAX) )
149
150 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
151 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
152
153 MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
154 PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
155
156 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
157 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
158
159 ADD_L ( STRIDE, EDX ) /* next normal */
160 ADD_L ( CONST(4), EDI ) /* next length */
161
162 PREFETCH ( REGIND(EDI) )
163
164 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
165 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
166
167 ADD_L ( CONST(16), EAX ) /* next r */
168 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
169
170 JNZ ( LLBL (G3TN_norm_w_lengths) )
171 JMP ( LLBL (G3TN_exit_3dnow) )
172
173 ALIGNTEXT32
174 LLBL (G3TN_norm):
175
176 PREFETCHW ( REGIND(EAX) )
177
178 MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
179 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
180
181 MOVQ ( MM0, MM3 ) /* x1 | x0 */
182 MOVQ ( MM1, MM4 ) /* | x2 */
183
184 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
185 ADD_L ( CONST(16), EAX ) /* next r */
186
187 PFMUL ( MM1, MM4 ) /* | x2*x2 */
188 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
189
190 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
191 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
192
193 MOVQ ( MM5, MM4 )
194 PUNPCKLDQ ( MM3, MM3 )
195
196 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
197 PFMUL ( MM5, MM5 )
198
199 PFRSQIT1 ( MM3, MM5 )
200 PFRCPIT2 ( MM4, MM5 )
201
202 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
203
204 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
205 PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
206
207 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
208 JNZ ( LLBL (G3TN_norm) )
209
210 LLBL (G3TN_exit_3dnow):
211 FEMMS
212
213 LLBL (G3TN_end):
214 POP_L ( EBP )
215 POP_L ( ESI )
216 POP_L ( EDI )
217 RET
218
219
220
221 ALIGNTEXT16
222 GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
223 HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot)
224 GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
225
226 #undef FRAME_OFFSET
227 #define FRAME_OFFSET 12
228
229 PUSH_L ( EDI )
230 PUSH_L ( ESI )
231 PUSH_L ( EBP )
232
233 MOV_L ( ARG_LENGTHS, EDI )
234 MOV_L ( ARG_IN, ESI )
235 MOV_L ( ARG_DEST, EAX )
236 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
237 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
238 MOV_L ( ARG_MAT, ECX )
239 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
240 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
241 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
242
243 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
244 JE ( LLBL (G3TNNR_end) )
245
246 FEMMS
247
248 MOVD ( M(0), MM0 ) /* | m0 */
249 PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
250
251 MOVD ( M(10), MM2 ) /* | m10 */
252 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
253
254 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
255 JNE ( LLBL (G3TNNR_scale_end ) )
256
257 MOVD ( ARG_SCALE, MM7 ) /* | scale */
258 PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
259
260 PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
261 PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
262
263 ALIGNTEXT32
264 LLBL (G3TNNR_scale_end):
265 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
266 JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
267
268 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
269
270
271 ALIGNTEXT32
272 LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
273
274 PREFETCHW ( REGIND(EAX) )
275
276 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
277 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
278
279 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
280 ADD_L ( STRIDE, EDX ) /* next normal */
281
282 PREFETCH ( REGIND(EDX) )
283
284 PFMUL ( MM2, MM7 ) /* | x2*m10 */
285 ADD_L ( CONST(16), EAX ) /* next r */
286
287 PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
288 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
289
290 ADD_L ( CONST(4), EDI ) /* next length */
291 PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
292
293 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
294 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
295
296 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
297 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
298
299 JNZ ( LLBL (G3TNNR_norm_w_lengths) )
300 JMP ( LLBL (G3TNNR_exit_3dnow) )
301
302 ALIGNTEXT32
303 LLBL (G3TNNR_norm): /* need to calculate lengths */
304
305 PREFETCHW ( REGIND(EAX) )
306
307 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
308 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
309
310 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
311 ADD_L ( CONST(16), EAX ) /* next r */
312
313 PFMUL ( MM2, MM7 ) /* | x2*m10 */
314 MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
315
316 MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
317 PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
318
319
320 PFMUL ( MM7, MM4 ) /* | x2*x2 */
321 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
322
323 PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
324 ADD_L ( STRIDE, EDX ) /* next normal */
325
326 PREFETCH ( REGIND(EDX) )
327
328 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
329 MOVQ ( MM5, MM4 )
330
331 PUNPCKLDQ ( MM3, MM3 )
332 PFMUL ( MM5, MM5 )
333
334 PFRSQIT1 ( MM3, MM5 )
335 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
336
337 PFRCPIT2 ( MM4, MM5 )
338 PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
339
340 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
341 PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
342
343 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
344 JNZ ( LLBL (G3TNNR_norm) )
345
346
347 LLBL (G3TNNR_exit_3dnow):
348 FEMMS
349
350 LLBL (G3TNNR_end):
351 POP_L ( EBP )
352 POP_L ( ESI )
353 POP_L ( EDI )
354 RET
355
356
357
358
359
360
361 ALIGNTEXT16
362 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
363 HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot)
364 GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
365
366 #undef FRAME_OFFSET
367 #define FRAME_OFFSET 12
368
369 PUSH_L ( EDI )
370 PUSH_L ( ESI )
371 PUSH_L ( EBP )
372
373 MOV_L ( ARG_IN, EAX )
374 MOV_L ( ARG_DEST, EDX )
375 MOV_L ( REGOFF(V4F_COUNT, EAX), EBP ) /* dest->count = in->count */
376 MOV_L ( EBP, REGOFF(V4F_COUNT, EDX) )
377 MOV_L ( ARG_IN, ESI )
378 MOV_L ( ARG_MAT, ECX )
379 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
380 MOV_L ( REGOFF(V4F_START, EDX), EAX ) /* dest->start */
381 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
382
383 CMP_L ( CONST(0), EBP )
384 JE ( LLBL (G3TRNR_end) )
385
386 FEMMS
387
388 MOVD ( ARG_SCALE, MM6 ) /* | scale */
389 PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
390
391 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
392 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
393
394 PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
395 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
396
397 PFMUL ( MM6, MM2 ) /* | scale*m10 */
398
399 ALIGNTEXT32
400 LLBL (G3TRNR_rescale):
401
402 PREFETCHW ( REGIND(EAX) )
403
404 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
405 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
406
407 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
408 ADD_L ( STRIDE, EDX ) /* next normal */
409
410 PREFETCH ( REGIND(EDX) )
411
412 PFMUL ( MM2, MM5 ) /* | x2*m10 */
413 ADD_L ( CONST(16), EAX ) /* next r */
414
415 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
416 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
417
418 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
419 JNZ ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
420
421 FEMMS
422
423 LLBL (G3TRNR_end):
424 POP_L ( EBP )
425 POP_L ( ESI )
426 POP_L ( EDI )
427 RET
428
429
430
431
432
433 ALIGNTEXT16
434 GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
435 HIDDEN(_mesa_3dnow_transform_rescale_normals)
436 GLNAME(_mesa_3dnow_transform_rescale_normals):
437
438 #undef FRAME_OFFSET
439 #define FRAME_OFFSET 8
440
441 PUSH_L ( EDI )
442 PUSH_L ( ESI )
443
444 MOV_L ( ARG_IN, ESI )
445 MOV_L ( ARG_DEST, EAX )
446 MOV_L ( ARG_MAT, ECX )
447 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
448 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
449 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
450 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
451 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
452
453 CMP_L ( CONST(0), EDI )
454 JE ( LLBL (G3TR_end) )
455
456 FEMMS
457
458 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
459
460 MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
461 MOVD ( ARG_SCALE, MM0 ) /* scale */
462
463 MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
464 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
465
466 PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
467 PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
468
469 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
470 PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
471
472 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
473 PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
474
475 PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
476
477 PFMUL ( MM0, MM7 ) /* | scale*m10 */
478
479 ALIGNTEXT32
480 LLBL (G3TR_rescale):
481
482 PREFETCHW ( REGIND(EAX) )
483
484 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
485 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
486
487 MOVQ ( MM0, MM1 ) /* x1 | x0 */
488 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
489
490 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
491 ADD_L ( CONST(16), EAX ) /* next r */
492
493 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
494 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
495
496 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
497
498 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
499 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
500
501 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
502 ADD_L ( STRIDE, EDX ) /* next normal */
503
504 PREFETCH ( REGIND(EDX) )
505
506 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
507 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
508
509 PFMUL ( MM7, MM2 ) /* | x2*m10 */
510 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
511
512 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
513 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
514
515 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
516 JNZ ( LLBL (G3TR_rescale) )
517
518 FEMMS
519
520 LLBL (G3TR_end):
521 POP_L ( ESI )
522 POP_L ( EDI )
523 RET
524
525
526
527
528
529
530
531 ALIGNTEXT16
532 GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
533 HIDDEN(_mesa_3dnow_transform_normals_no_rot)
534 GLNAME(_mesa_3dnow_transform_normals_no_rot):
535
536 #undef FRAME_OFFSET
537 #define FRAME_OFFSET 8
538
539 PUSH_L ( EDI )
540 PUSH_L ( ESI )
541
542 MOV_L ( ARG_IN, ESI )
543 MOV_L ( ARG_DEST, EAX )
544 MOV_L ( ARG_MAT, ECX )
545 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
546 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
547 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
548 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
549 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
550
551 CMP_L ( CONST(0), EDI )
552 JE ( LLBL (G3TNR_end) )
553
554 FEMMS
555
556 MOVD ( REGIND(ECX), MM0 ) /* | m0 */
557 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
558
559 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
560 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
561
562 ALIGNTEXT32
563 LLBL (G3TNR_transform):
564
565 PREFETCHW ( REGIND(EAX) )
566
567 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
568 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
569
570 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
571 ADD_L ( STRIDE, EDX) /* next normal */
572
573 PREFETCH ( REGIND(EDX) )
574
575 PFMUL ( MM2, MM5 ) /* | x2*m10 */
576 ADD_L ( CONST(16), EAX ) /* next r */
577
578 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
579 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
580
581 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
582 JNZ ( LLBL (G3TNR_transform) )
583
584 FEMMS
585
586 LLBL (G3TNR_end):
587 POP_L ( ESI )
588 POP_L ( EDI )
589 RET
590
591
592
593
594
595
596
597
598 ALIGNTEXT16
599 GLOBL GLNAME(_mesa_3dnow_transform_normals)
600 HIDDEN(_mesa_3dnow_transform_normals)
601 GLNAME(_mesa_3dnow_transform_normals):
602
603 #undef FRAME_OFFSET
604 #define FRAME_OFFSET 8
605
606 PUSH_L ( EDI )
607 PUSH_L ( ESI )
608
609 MOV_L ( ARG_IN, ESI )
610 MOV_L ( ARG_DEST, EAX )
611 MOV_L ( ARG_MAT, ECX )
612 MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
613 MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
614 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
615 MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
616 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
617
618 CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
619 JE ( LLBL (G3T_end) )
620
621 FEMMS
622
623 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
624 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
625
626 MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
627 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
628
629 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
630 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
631
632 ALIGNTEXT32
633 LLBL (G3T_transform):
634
635 PREFETCHW ( REGIND(EAX) )
636
637 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
638 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
639
640 MOVQ ( MM0, MM1 ) /* x1 | x0 */
641 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
642
643 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
644 ADD_L ( CONST(16), EAX ) /* next r */
645
646 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
647 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
648
649 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
650 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
651
652 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
653 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
654
655 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
656 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
657
658 PFMUL ( MM7, MM2 ) /* | x2*m10 */
659 ADD_L ( STRIDE, EDX ) /* next normal */
660
661 PREFETCH ( REGIND(EDX) )
662
663 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
664 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
665
666 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
667 SUB_L ( CONST(1), EDI ) /* decrement normal counter */
668
669 JNZ ( LLBL (G3T_transform) )
670
671 FEMMS
672
673 LLBL (G3T_end):
674 POP_L ( ESI )
675 POP_L ( EDI )
676 RET
677
678
679
680
681
682
683 ALIGNTEXT16
684 GLOBL GLNAME(_mesa_3dnow_normalize_normals)
685 HIDDEN(_mesa_3dnow_normalize_normals)
686 GLNAME(_mesa_3dnow_normalize_normals):
687
688 #undef FRAME_OFFSET
689 #define FRAME_OFFSET 12
690
691 PUSH_L ( EDI )
692 PUSH_L ( ESI )
693 PUSH_L ( EBP )
694
695 MOV_L ( ARG_IN, ESI )
696 MOV_L ( ARG_DEST, EAX )
697 MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
698 MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
699 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
700 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
701 MOV_L ( ARG_LENGTHS, EDX )
702
703 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
704 JE ( LLBL (G3N_end) )
705
706 FEMMS
707
708 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
709 JE ( LLBL (G3N_norm2) ) /* calculate lengths */
710
711 ALIGNTEXT32
712 LLBL (G3N_norm1): /* use precalculated lengths */
713
714 PREFETCH ( REGIND(EAX) )
715
716 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
717 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
718
719 MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
720 PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
721
722 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
723 ADD_L ( STRIDE, ECX ) /* next normal */
724
725 PREFETCH ( REGIND(ECX) )
726
727 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
728 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
729
730 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
731 ADD_L ( CONST(16), EAX ) /* next r */
732
733 ADD_L ( CONST(4), EDX ) /* next length */
734 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
735
736 JNZ ( LLBL (G3N_norm1) )
737
738 JMP ( LLBL (G3N_end1) )
739
740 ALIGNTEXT32
741 LLBL (G3N_norm2): /* need to calculate lengths */
742
743 PREFETCHW ( REGIND(EAX) )
744
745 PREFETCH ( REGIND(ECX) )
746
747 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
748 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
749
750 MOVQ ( MM0, MM3 ) /* x1 | x0 */
751 ADD_L ( STRIDE, ECX ) /* next normal */
752
753 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
754 MOVQ ( MM1, MM4 ) /* | x2 */
755
756 ADD_L ( CONST(16), EAX ) /* next r */
757 PFMUL ( MM1, MM4 ) /* | x2*x2 */
758
759 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
760 PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
761
762 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
763 MOVQ ( MM5, MM4 )
764
765 PUNPCKLDQ ( MM3, MM3 )
766 PFMUL ( MM5, MM5 )
767
768 PFRSQIT1 ( MM3, MM5 )
769 SUB_L ( CONST(1), EBP ) /* decrement normal counter */
770
771 PFRCPIT2 ( MM4, MM5 )
772
773 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
774 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
775
776 PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
777 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
778
779 JNZ ( LLBL (G3N_norm2) )
780
781 LLBL (G3N_end1):
782 FEMMS
783
784 LLBL (G3N_end):
785 POP_L ( EBP )
786 POP_L ( ESI )
787 POP_L ( EDI )
788 RET
789
790
791
792
793
794
795 ALIGNTEXT16
796 GLOBL GLNAME(_mesa_3dnow_rescale_normals)
797 HIDDEN(_mesa_3dnow_rescale_normals)
798 GLNAME(_mesa_3dnow_rescale_normals):
799
800 #undef FRAME_OFFSET
801 #define FRAME_OFFSET 8
802 PUSH_L ( EDI )
803 PUSH_L ( ESI )
804
805 MOV_L ( ARG_IN, ESI )
806 MOV_L ( ARG_DEST, EAX )
807 MOV_L ( REGOFF(V4F_COUNT, ESI), EDX ) /* dest->count = in->count */
808 MOV_L ( EDX, REGOFF(V4F_COUNT, EAX) )
809 MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
810 MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
811
812 CMP_L ( CONST(0), EDX )
813 JE ( LLBL (G3R_end) )
814
815 FEMMS
816
817 MOVD ( ARG_SCALE, MM0 ) /* scale */
818 PUNPCKLDQ ( MM0, MM0 )
819
820 ALIGNTEXT32
821 LLBL (G3R_rescale):
822
823 PREFETCHW ( REGIND(EAX) )
824
825 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
826 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
827
828 PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
829 ADD_L ( STRIDE, ECX ) /* next normal */
830
831 PREFETCH ( REGIND(ECX) )
832
833 PFMUL ( MM0, MM2 ) /* | x2*scale */
834 ADD_L ( CONST(16), EAX ) /* next r */
835
836 MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */
837 MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */
838
839 SUB_L ( CONST(1), EDX ) /* decrement normal counter */
840 JNZ ( LLBL (G3R_rescale) )
841
842 FEMMS
843
844 LLBL (G3R_end):
845 POP_L ( ESI )
846 POP_L ( EDI )
847 RET
848
849 #endif
850
851 #if defined (__ELF__) && defined (__linux__)
852 .section .note.GNU-stack,"",%progbits
853 #endif