1 /* $Id: x86_xform4.S,v 1.2 2002/03/07 21:40:08 brianp Exp $ */
4 * Mesa 3-D graphics library
7 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
29 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
30 * in there will break the build on some platforms.
34 #include "xform_args.h"
38 #define FP_ONE 1065353216
41 #define SRC0 REGOFF(0, ESI)
42 #define SRC1 REGOFF(4, ESI)
43 #define SRC2 REGOFF(8, ESI)
44 #define SRC3 REGOFF(12, ESI)
45 #define DST0 REGOFF(0, EDI)
46 #define DST1 REGOFF(4, EDI)
47 #define DST2 REGOFF(8, EDI)
48 #define DST3 REGOFF(12, EDI)
49 #define MAT0 REGOFF(0, EDX)
50 #define MAT1 REGOFF(4, EDX)
51 #define MAT2 REGOFF(8, EDX)
52 #define MAT3 REGOFF(12, EDX)
53 #define MAT4 REGOFF(16, EDX)
54 #define MAT5 REGOFF(20, EDX)
55 #define MAT6 REGOFF(24, EDX)
56 #define MAT7 REGOFF(28, EDX)
57 #define MAT8 REGOFF(32, EDX)
58 #define MAT9 REGOFF(36, EDX)
59 #define MAT10 REGOFF(40, EDX)
60 #define MAT11 REGOFF(44, EDX)
61 #define MAT12 REGOFF(48, EDX)
62 #define MAT13 REGOFF(52, EDX)
63 #define MAT14 REGOFF(56, EDX)
64 #define MAT15 REGOFF(60, EDX)
68 GLOBL GLNAME( _mesa_x86_transform_points4_general )
69 GLNAME( _mesa_x86_transform_points4_general ):
71 #define FRAME_OFFSET 8
75 MOV_L( ARG_SOURCE, ESI )
76 MOV_L( ARG_DEST, EDI )
78 MOV_L( ARG_MATRIX, EDX )
79 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
82 JZ( LLBL(x86_p4_gr_done) )
84 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
85 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
87 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
88 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
90 SHL_L( CONST(4), ECX )
91 MOV_L( REGOFF(V4F_START, ESI), ESI )
93 MOV_L( REGOFF(V4F_START, EDI), EDI )
99 FLD_S( SRC0 ) /* F4 */
101 FLD_S( SRC0 ) /* F5 F4 */
103 FLD_S( SRC0 ) /* F6 F5 F4 */
105 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
108 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
110 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
112 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
114 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
117 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
118 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
119 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
120 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
124 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
126 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
128 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
130 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
133 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
134 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
135 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
136 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
137 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
138 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
140 FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */
142 FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */
144 FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */
146 FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
149 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
150 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
151 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
152 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
153 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
154 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
156 FXCH( ST(3) ) /* F4 F6 F5 F7 */
157 FSTP_S( DST0 ) /* F6 F5 F7 */
158 FXCH( ST(1) ) /* F5 F6 F7 */
159 FSTP_S( DST1 ) /* F6 F7 */
160 FSTP_S( DST2 ) /* F7 */
163 LLBL(x86_p4_gr_skip):
165 ADD_L( CONST(16), EDI )
168 JNE( LLBL(x86_p4_gr_loop) )
170 LLBL(x86_p4_gr_done):
181 GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
182 GLNAME( _mesa_x86_transform_points4_perspective ):
184 #define FRAME_OFFSET 12
189 MOV_L( ARG_SOURCE, ESI )
190 MOV_L( ARG_DEST, EDI )
192 MOV_L( ARG_MATRIX, EDX )
193 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
196 JZ( LLBL(x86_p4_pr_done) )
198 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
199 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
201 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
202 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
204 SHL_L( CONST(4), ECX )
205 MOV_L( REGOFF(V4F_START, ESI), ESI )
207 MOV_L( REGOFF(V4F_START, EDI), EDI )
211 LLBL(x86_p4_pr_loop):
213 FLD_S( SRC0 ) /* F4 */
216 FLD_S( SRC1 ) /* F5 F4 */
219 FLD_S( SRC2 ) /* F0 F5 F4 */
221 FLD_S( SRC2 ) /* F1 F0 F5 F4 */
223 FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */
226 FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */
227 FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */
228 FADDP( ST0, ST(2) ) /* F6 F5 F4 */
230 FLD_S( SRC3 ) /* F2 F6 F5 F4 */
233 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
236 XOR_L( CONST(-2147483648), EBX )/* change sign */
238 FXCH( ST(2) ) /* F4 F5 F6 */
239 FSTP_S( DST0 ) /* F5 F6 */
240 FSTP_S( DST1 ) /* F6 */
244 LLBL(x86_p4_pr_skip):
246 ADD_L( CONST(16), EDI )
249 JNE( LLBL(x86_p4_pr_loop) )
251 LLBL(x86_p4_pr_done):
263 GLOBL GLNAME( _mesa_x86_transform_points4_3d )
264 GLNAME( _mesa_x86_transform_points4_3d ):
266 #define FRAME_OFFSET 12
271 MOV_L( ARG_SOURCE, ESI )
272 MOV_L( ARG_DEST, EDI )
274 MOV_L( ARG_MATRIX, EDX )
275 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
278 JZ( LLBL(x86_p4_3dr_done) )
280 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
281 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
283 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
284 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
286 SHL_L( CONST(4), ECX )
287 MOV_L( REGOFF(V4F_START, ESI), ESI )
289 MOV_L( REGOFF(V4F_START, EDI), EDI )
293 LLBL(x86_p4_3dr_loop):
295 FLD_S( SRC0 ) /* F4 */
297 FLD_S( SRC0 ) /* F5 F4 */
299 FLD_S( SRC0 ) /* F6 F5 F4 */
302 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
304 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
306 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
309 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
310 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
311 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
312 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
314 FLD_S( SRC2 ) /* F0 F6 F5 F4 */
316 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
318 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
321 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
322 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
323 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
324 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
326 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
328 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
330 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
333 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
334 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
335 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
336 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
340 FXCH( ST(2) ) /* F4 F5 F6 */
341 FSTP_S( DST0 ) /* F5 F6 */
342 FSTP_S( DST1 ) /* F6 */
346 LLBL(x86_p4_3dr_skip):
348 ADD_L( CONST(16), EDI )
351 JNE( LLBL(x86_p4_3dr_loop) )
353 LLBL(x86_p4_3dr_done):
365 GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
366 GLNAME(_mesa_x86_transform_points4_3d_no_rot):
368 #define FRAME_OFFSET 12
373 MOV_L( ARG_SOURCE, ESI )
374 MOV_L( ARG_DEST, EDI )
376 MOV_L( ARG_MATRIX, EDX )
377 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
380 JZ( LLBL(x86_p4_3dnrr_done) )
382 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
383 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
385 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
386 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
388 SHL_L( CONST(4), ECX )
389 MOV_L( REGOFF(V4F_START, ESI), ESI )
391 MOV_L( REGOFF(V4F_START, EDI), EDI )
395 LLBL(x86_p4_3dnrr_loop):
397 FLD_S( SRC0 ) /* F4 */
400 FLD_S( SRC1 ) /* F5 F4 */
403 FLD_S( SRC2 ) /* F6 F5 F4 */
406 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
408 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
410 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
413 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
414 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
415 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
416 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
420 FXCH( ST(2) ) /* F4 F5 F6 */
421 FSTP_S( DST0 ) /* F5 F6 */
422 FSTP_S( DST1 ) /* F6 */
426 LLBL(x86_p4_3dnrr_skip):
428 ADD_L( CONST(16), EDI )
431 JNE( LLBL(x86_p4_3dnrr_loop) )
433 LLBL(x86_p4_3dnrr_done):
445 GLOBL GLNAME( _mesa_x86_transform_points4_2d )
446 GLNAME( _mesa_x86_transform_points4_2d ):
448 #define FRAME_OFFSET 16
454 MOV_L( ARG_SOURCE, ESI )
455 MOV_L( ARG_DEST, EDI )
457 MOV_L( ARG_MATRIX, EDX )
458 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
461 JZ( LLBL(x86_p4_2dr_done) )
463 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
464 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
466 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
467 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
469 SHL_L( CONST(4), ECX )
470 MOV_L( REGOFF(V4F_START, ESI), ESI )
472 MOV_L( REGOFF(V4F_START, EDI), EDI )
476 LLBL(x86_p4_2dr_loop):
478 FLD_S( SRC0 ) /* F4 */
480 FLD_S( SRC0 ) /* F5 F4 */
483 FLD_S( SRC1 ) /* F0 F5 F4 */
485 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
488 FXCH( ST(1) ) /* F0 F1 F5 F4 */
489 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
490 FADDP( ST0, ST(1) ) /* F5 F4 */
492 FLD_S( SRC3 ) /* F0 F5 F4 */
494 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
497 FXCH( ST(1) ) /* F0 F1 F5 F4 */
498 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
499 FADDP( ST0, ST(1) ) /* F5 F4 */
504 FXCH( ST(1) ) /* F4 F5 */
505 FSTP_S( DST0 ) /* F5 */
510 LLBL(x86_p4_2dr_skip):
512 ADD_L( CONST(16), EDI )
515 JNE( LLBL(x86_p4_2dr_loop) )
517 LLBL(x86_p4_2dr_done):
530 GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
531 GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
533 #define FRAME_OFFSET 16
539 MOV_L( ARG_SOURCE, ESI )
540 MOV_L( ARG_DEST, EDI )
542 MOV_L( ARG_MATRIX, EDX )
543 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
546 JZ( LLBL(x86_p4_2dnrr_done) )
548 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
549 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
551 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
552 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
554 SHL_L( CONST(4), ECX )
555 MOV_L( REGOFF(V4F_START, ESI), ESI )
557 MOV_L( REGOFF(V4F_START, EDI), EDI )
561 LLBL(x86_p4_2dnrr_loop):
563 FLD_S( SRC0 ) /* F4 */
566 FLD_S( SRC1 ) /* F5 F4 */
569 FLD_S( SRC3 ) /* F0 F5 F4 */
571 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
574 FXCH( ST(1) ) /* F0 F1 F5 F4 */
575 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
576 FADDP( ST0, ST(1) ) /* F5 F4 */
581 FXCH( ST(1) ) /* F4 F5 */
582 FSTP_S( DST0 ) /* F5 */
587 LLBL(x86_p4_2dnrr_skip):
589 ADD_L( CONST(16), EDI )
592 JNE( LLBL(x86_p4_2dnrr_loop) )
594 LLBL(x86_p4_2dnrr_done):
607 GLOBL GLNAME( _mesa_x86_transform_points4_identity )
608 GLNAME( _mesa_x86_transform_points4_identity ):
610 #define FRAME_OFFSET 12
615 MOV_L( ARG_SOURCE, ESI )
616 MOV_L( ARG_DEST, EDI )
618 MOV_L( ARG_MATRIX, EDX )
619 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
622 JZ( LLBL(x86_p4_ir_done) )
624 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
625 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
627 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
628 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
630 SHL_L( CONST(4), ECX )
631 MOV_L( REGOFF(V4F_START, ESI), ESI )
633 MOV_L( REGOFF(V4F_START, EDI), EDI )
637 JE( LLBL(x86_p4_ir_done) )
640 LLBL(x86_p4_ir_loop):
654 LLBL(x86_p4_ir_skip):
656 ADD_L( CONST(16), EDI )
659 JNE( LLBL(x86_p4_ir_loop) )
661 LLBL(x86_p4_ir_done):