3 * Mesa 3-D graphics library
6 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
29 * in there will break the build on some platforms.
33 #include "xform_args.h"
37 #define FP_ONE 1065353216
40 #define SRC0 REGOFF(0, ESI)
41 #define SRC1 REGOFF(4, ESI)
42 #define SRC2 REGOFF(8, ESI)
43 #define SRC3 REGOFF(12, ESI)
44 #define DST0 REGOFF(0, EDI)
45 #define DST1 REGOFF(4, EDI)
46 #define DST2 REGOFF(8, EDI)
47 #define DST3 REGOFF(12, EDI)
48 #define MAT0 REGOFF(0, EDX)
49 #define MAT1 REGOFF(4, EDX)
50 #define MAT2 REGOFF(8, EDX)
51 #define MAT3 REGOFF(12, EDX)
52 #define MAT4 REGOFF(16, EDX)
53 #define MAT5 REGOFF(20, EDX)
54 #define MAT6 REGOFF(24, EDX)
55 #define MAT7 REGOFF(28, EDX)
56 #define MAT8 REGOFF(32, EDX)
57 #define MAT9 REGOFF(36, EDX)
58 #define MAT10 REGOFF(40, EDX)
59 #define MAT11 REGOFF(44, EDX)
60 #define MAT12 REGOFF(48, EDX)
61 #define MAT13 REGOFF(52, EDX)
62 #define MAT14 REGOFF(56, EDX)
63 #define MAT15 REGOFF(60, EDX)
67 GLOBL GLNAME( _mesa_x86_transform_points4_general )
68 HIDDEN(_mesa_x86_transform_points4_general)
69 GLNAME( _mesa_x86_transform_points4_general ):
71 #define FRAME_OFFSET 8
75 MOV_L( ARG_SOURCE, ESI )
76 MOV_L( ARG_DEST, EDI )
78 MOV_L( ARG_MATRIX, EDX )
79 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
82 JZ( LLBL(x86_p4_gr_done) )
84 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
85 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
87 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
88 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
90 SHL_L( CONST(4), ECX )
91 MOV_L( REGOFF(V4F_START, ESI), ESI )
93 MOV_L( REGOFF(V4F_START, EDI), EDI )
99 FLD_S( SRC0 ) /* F4 */
101 FLD_S( SRC0 ) /* F5 F4 */
103 FLD_S( SRC0 ) /* F6 F5 F4 */
105 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
108 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
110 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
112 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
114 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
117 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
118 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
119 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
120 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
124 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
126 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
128 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
130 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
133 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
134 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
135 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
136 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
137 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
138 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
140 FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */
142 FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */
144 FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */
146 FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
149 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
150 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
151 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
152 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
153 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
154 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
156 FXCH( ST(3) ) /* F4 F6 F5 F7 */
157 FSTP_S( DST0 ) /* F6 F5 F7 */
158 FXCH( ST(1) ) /* F5 F6 F7 */
159 FSTP_S( DST1 ) /* F6 F7 */
160 FSTP_S( DST2 ) /* F7 */
163 LLBL(x86_p4_gr_skip):
165 ADD_L( CONST(16), EDI )
168 JNE( LLBL(x86_p4_gr_loop) )
170 LLBL(x86_p4_gr_done):
181 GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
182 HIDDEN(_mesa_x86_transform_points4_perspective)
183 GLNAME( _mesa_x86_transform_points4_perspective ):
185 #define FRAME_OFFSET 12
190 MOV_L( ARG_SOURCE, ESI )
191 MOV_L( ARG_DEST, EDI )
193 MOV_L( ARG_MATRIX, EDX )
194 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
197 JZ( LLBL(x86_p4_pr_done) )
199 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
200 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
202 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
203 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
205 SHL_L( CONST(4), ECX )
206 MOV_L( REGOFF(V4F_START, ESI), ESI )
208 MOV_L( REGOFF(V4F_START, EDI), EDI )
212 LLBL(x86_p4_pr_loop):
214 FLD_S( SRC0 ) /* F4 */
217 FLD_S( SRC1 ) /* F5 F4 */
220 FLD_S( SRC2 ) /* F0 F5 F4 */
222 FLD_S( SRC2 ) /* F1 F0 F5 F4 */
224 FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */
227 FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */
228 FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */
229 FADDP( ST0, ST(2) ) /* F6 F5 F4 */
231 FLD_S( SRC3 ) /* F2 F6 F5 F4 */
234 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
237 XOR_L( CONST(-2147483648), EBX )/* change sign */
239 FXCH( ST(2) ) /* F4 F5 F6 */
240 FSTP_S( DST0 ) /* F5 F6 */
241 FSTP_S( DST1 ) /* F6 */
245 LLBL(x86_p4_pr_skip):
247 ADD_L( CONST(16), EDI )
250 JNE( LLBL(x86_p4_pr_loop) )
252 LLBL(x86_p4_pr_done):
264 GLOBL GLNAME( _mesa_x86_transform_points4_3d )
265 HIDDEN(_mesa_x86_transform_points4_3d)
266 GLNAME( _mesa_x86_transform_points4_3d ):
268 #define FRAME_OFFSET 12
273 MOV_L( ARG_SOURCE, ESI )
274 MOV_L( ARG_DEST, EDI )
276 MOV_L( ARG_MATRIX, EDX )
277 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
280 JZ( LLBL(x86_p4_3dr_done) )
282 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
283 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
285 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
286 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
288 SHL_L( CONST(4), ECX )
289 MOV_L( REGOFF(V4F_START, ESI), ESI )
291 MOV_L( REGOFF(V4F_START, EDI), EDI )
295 LLBL(x86_p4_3dr_loop):
297 FLD_S( SRC0 ) /* F4 */
299 FLD_S( SRC0 ) /* F5 F4 */
301 FLD_S( SRC0 ) /* F6 F5 F4 */
304 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
306 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
308 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
311 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
312 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
313 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
314 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
316 FLD_S( SRC2 ) /* F0 F6 F5 F4 */
318 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
320 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
323 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
324 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
325 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
326 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
328 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
330 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
332 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
335 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
336 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
337 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
338 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
342 FXCH( ST(2) ) /* F4 F5 F6 */
343 FSTP_S( DST0 ) /* F5 F6 */
344 FSTP_S( DST1 ) /* F6 */
348 LLBL(x86_p4_3dr_skip):
350 ADD_L( CONST(16), EDI )
353 JNE( LLBL(x86_p4_3dr_loop) )
355 LLBL(x86_p4_3dr_done):
367 GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
368 HIDDEN(_mesa_x86_transform_points4_3d_no_rot)
369 GLNAME(_mesa_x86_transform_points4_3d_no_rot):
371 #define FRAME_OFFSET 12
376 MOV_L( ARG_SOURCE, ESI )
377 MOV_L( ARG_DEST, EDI )
379 MOV_L( ARG_MATRIX, EDX )
380 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
383 JZ( LLBL(x86_p4_3dnrr_done) )
385 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
386 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
388 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
389 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
391 SHL_L( CONST(4), ECX )
392 MOV_L( REGOFF(V4F_START, ESI), ESI )
394 MOV_L( REGOFF(V4F_START, EDI), EDI )
398 LLBL(x86_p4_3dnrr_loop):
400 FLD_S( SRC0 ) /* F4 */
403 FLD_S( SRC1 ) /* F5 F4 */
406 FLD_S( SRC2 ) /* F6 F5 F4 */
409 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
411 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
413 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
416 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
417 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
418 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
419 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
423 FXCH( ST(2) ) /* F4 F5 F6 */
424 FSTP_S( DST0 ) /* F5 F6 */
425 FSTP_S( DST1 ) /* F6 */
429 LLBL(x86_p4_3dnrr_skip):
431 ADD_L( CONST(16), EDI )
434 JNE( LLBL(x86_p4_3dnrr_loop) )
436 LLBL(x86_p4_3dnrr_done):
448 GLOBL GLNAME( _mesa_x86_transform_points4_2d )
449 HIDDEN(_mesa_x86_transform_points4_2d)
450 GLNAME( _mesa_x86_transform_points4_2d ):
452 #define FRAME_OFFSET 16
458 MOV_L( ARG_SOURCE, ESI )
459 MOV_L( ARG_DEST, EDI )
461 MOV_L( ARG_MATRIX, EDX )
462 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
465 JZ( LLBL(x86_p4_2dr_done) )
467 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
468 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
470 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
471 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
473 SHL_L( CONST(4), ECX )
474 MOV_L( REGOFF(V4F_START, ESI), ESI )
476 MOV_L( REGOFF(V4F_START, EDI), EDI )
480 LLBL(x86_p4_2dr_loop):
482 FLD_S( SRC0 ) /* F4 */
484 FLD_S( SRC0 ) /* F5 F4 */
487 FLD_S( SRC1 ) /* F0 F5 F4 */
489 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
492 FXCH( ST(1) ) /* F0 F1 F5 F4 */
493 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
494 FADDP( ST0, ST(1) ) /* F5 F4 */
496 FLD_S( SRC3 ) /* F0 F5 F4 */
498 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
501 FXCH( ST(1) ) /* F0 F1 F5 F4 */
502 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
503 FADDP( ST0, ST(1) ) /* F5 F4 */
508 FXCH( ST(1) ) /* F4 F5 */
509 FSTP_S( DST0 ) /* F5 */
514 LLBL(x86_p4_2dr_skip):
516 ADD_L( CONST(16), EDI )
519 JNE( LLBL(x86_p4_2dr_loop) )
521 LLBL(x86_p4_2dr_done):
534 GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
535 HIDDEN(_mesa_x86_transform_points4_2d_no_rot)
536 GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
538 #define FRAME_OFFSET 16
544 MOV_L( ARG_SOURCE, ESI )
545 MOV_L( ARG_DEST, EDI )
547 MOV_L( ARG_MATRIX, EDX )
548 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
551 JZ( LLBL(x86_p4_2dnrr_done) )
553 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
554 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
556 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
557 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
559 SHL_L( CONST(4), ECX )
560 MOV_L( REGOFF(V4F_START, ESI), ESI )
562 MOV_L( REGOFF(V4F_START, EDI), EDI )
566 LLBL(x86_p4_2dnrr_loop):
568 FLD_S( SRC0 ) /* F4 */
571 FLD_S( SRC1 ) /* F5 F4 */
574 FLD_S( SRC3 ) /* F0 F5 F4 */
576 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
579 FXCH( ST(1) ) /* F0 F1 F5 F4 */
580 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
581 FADDP( ST0, ST(1) ) /* F5 F4 */
586 FXCH( ST(1) ) /* F4 F5 */
587 FSTP_S( DST0 ) /* F5 */
592 LLBL(x86_p4_2dnrr_skip):
594 ADD_L( CONST(16), EDI )
597 JNE( LLBL(x86_p4_2dnrr_loop) )
599 LLBL(x86_p4_2dnrr_done):
612 GLOBL GLNAME( _mesa_x86_transform_points4_identity )
613 HIDDEN(_mesa_x86_transform_points4_identity)
614 GLNAME( _mesa_x86_transform_points4_identity ):
616 #define FRAME_OFFSET 12
621 MOV_L( ARG_SOURCE, ESI )
622 MOV_L( ARG_DEST, EDI )
624 MOV_L( ARG_MATRIX, EDX )
625 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
628 JZ( LLBL(x86_p4_ir_done) )
630 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
631 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
633 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
634 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
636 SHL_L( CONST(4), ECX )
637 MOV_L( REGOFF(V4F_START, ESI), ESI )
639 MOV_L( REGOFF(V4F_START, EDI), EDI )
643 JE( LLBL(x86_p4_ir_done) )
646 LLBL(x86_p4_ir_loop):
660 LLBL(x86_p4_ir_skip):
662 ADD_L( CONST(16), EDI )
665 JNE( LLBL(x86_p4_ir_loop) )
667 LLBL(x86_p4_ir_done):
674 #if defined (__ELF__) && defined (__linux__)
675 .section .note.GNU-stack,"",%progbits