1 /* $Id: x86_xform3.S,v 1.2 2002/03/07 21:40:08 brianp Exp $ */
4 * Mesa 3-D graphics library
7 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
29 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
30 * in there will break the build on some platforms.
34 #include "xform_args.h"
38 #define FP_ONE 1065353216
41 #define SRC0 REGOFF(0, ESI)
42 #define SRC1 REGOFF(4, ESI)
43 #define SRC2 REGOFF(8, ESI)
44 #define SRC3 REGOFF(12, ESI)
45 #define DST0 REGOFF(0, EDI)
46 #define DST1 REGOFF(4, EDI)
47 #define DST2 REGOFF(8, EDI)
48 #define DST3 REGOFF(12, EDI)
49 #define MAT0 REGOFF(0, EDX)
50 #define MAT1 REGOFF(4, EDX)
51 #define MAT2 REGOFF(8, EDX)
52 #define MAT3 REGOFF(12, EDX)
53 #define MAT4 REGOFF(16, EDX)
54 #define MAT5 REGOFF(20, EDX)
55 #define MAT6 REGOFF(24, EDX)
56 #define MAT7 REGOFF(28, EDX)
57 #define MAT8 REGOFF(32, EDX)
58 #define MAT9 REGOFF(36, EDX)
59 #define MAT10 REGOFF(40, EDX)
60 #define MAT11 REGOFF(44, EDX)
61 #define MAT12 REGOFF(48, EDX)
62 #define MAT13 REGOFF(52, EDX)
63 #define MAT14 REGOFF(56, EDX)
64 #define MAT15 REGOFF(60, EDX)
68 GLOBL GLNAME( _mesa_x86_transform_points3_general )
69 GLNAME( _mesa_x86_transform_points3_general ):
71 #define FRAME_OFFSET 8
75 MOV_L( ARG_SOURCE, ESI )
76 MOV_L( ARG_DEST, EDI )
78 MOV_L( ARG_MATRIX, EDX )
79 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
82 JZ( LLBL(x86_p3_gr_done) )
84 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
85 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
87 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
88 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
90 SHL_L( CONST(4), ECX )
91 MOV_L( REGOFF(V4F_START, ESI), ESI )
93 MOV_L( REGOFF(V4F_START, EDI), EDI )
99 FLD_S( SRC0 ) /* F4 */
101 FLD_S( SRC0 ) /* F5 F4 */
103 FLD_S( SRC0 ) /* F6 F5 F4 */
105 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
108 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
110 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
112 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
114 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
117 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
118 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
119 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
120 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
124 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
126 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
128 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
130 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
133 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
134 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
135 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
136 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
137 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
138 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
140 FXCH( ST(3) ) /* F4 F6 F5 F7 */
142 FXCH( ST(2) ) /* F5 F6 F4 F7 */
144 FXCH( ST(1) ) /* F6 F5 F4 F7 */
146 FXCH( ST(3) ) /* F7 F5 F4 F6 */
149 FXCH( ST(2) ) /* F4 F5 F7 F6 */
150 FSTP_S( DST0 ) /* F5 F7 F6 */
151 FSTP_S( DST1 ) /* F7 F6 */
152 FXCH( ST(1) ) /* F6 F7 */
153 FSTP_S( DST2 ) /* F7 */
156 LLBL(x86_p3_gr_skip):
158 ADD_L( CONST(16), EDI )
161 JNE( LLBL(x86_p3_gr_loop) )
163 LLBL(x86_p3_gr_done):
174 GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
175 GLNAME( _mesa_x86_transform_points3_perspective ):
177 #define FRAME_OFFSET 12
182 MOV_L( ARG_SOURCE, ESI )
183 MOV_L( ARG_DEST, EDI )
185 MOV_L( ARG_MATRIX, EDX )
186 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
189 JZ( LLBL(x86_p3_pr_done) )
191 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
192 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
194 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
195 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
197 SHL_L( CONST(4), ECX )
198 MOV_L( REGOFF(V4F_START, ESI), ESI )
200 MOV_L( REGOFF(V4F_START, EDI), EDI )
204 LLBL(x86_p3_pr_loop):
206 FLD_S( SRC0 ) /* F4 */
209 FLD_S( SRC1 ) /* F5 F4 */
212 FLD_S( SRC2 ) /* F0 F5 F4 */
214 FLD_S( SRC2 ) /* F1 F0 F5 F4 */
216 FLD_S( SRC2 ) /* F2 F1 F0 F5 F4 */
219 FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */
220 FADDP( ST0, ST(4) ) /* F1 F2 F5 F4 */
221 FADDP( ST0, ST(2) ) /* F2 F5 F4 */
222 FLD_S( MAT14 ) /* F6 F2 F5 F4 */
223 FXCH( ST(1) ) /* F2 F6 F5 F4 */
224 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
227 XOR_L( CONST(-2147483648), EBX )/* change sign */
229 FXCH( ST(2) ) /* F4 F5 F6 */
230 FSTP_S( DST0 ) /* F5 F6 */
231 FSTP_S( DST1 ) /* F6 */
235 LLBL(x86_p3_pr_skip):
237 ADD_L( CONST(16), EDI )
240 JNE( LLBL(x86_p3_pr_loop) )
242 LLBL(x86_p3_pr_done):
254 GLOBL GLNAME( _mesa_x86_transform_points3_3d )
255 GLNAME( _mesa_x86_transform_points3_3d ):
257 #define FRAME_OFFSET 8
261 MOV_L( ARG_SOURCE, ESI )
262 MOV_L( ARG_DEST, EDI )
264 MOV_L( ARG_MATRIX, EDX )
265 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
268 JZ( LLBL(x86_p3_3dr_done) )
270 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
271 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
273 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
274 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
276 SHL_L( CONST(4), ECX )
277 MOV_L( REGOFF(V4F_START, ESI), ESI )
279 MOV_L( REGOFF(V4F_START, EDI), EDI )
283 LLBL(x86_p3_3dr_loop):
285 FLD_S( SRC0 ) /* F4 */
287 FLD_S( SRC0 ) /* F5 F4 */
289 FLD_S( SRC0 ) /* F6 F5 F4 */
292 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
294 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
296 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
299 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
300 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
301 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
302 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
304 FLD_S( SRC2 ) /* F0 F6 F5 F4 */
306 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
308 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
311 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
312 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
313 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
314 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
316 FXCH( ST(2) ) /* F4 F5 F6 */
318 FXCH( ST(1) ) /* F5 F4 F6 */
320 FXCH( ST(2) ) /* F6 F4 F5 */
323 FXCH( ST(1) ) /* F4 F6 F5 */
324 FSTP_S( DST0 ) /* F6 F5 */
325 FXCH( ST(1) ) /* F5 F6 */
326 FSTP_S( DST1 ) /* F6 */
329 LLBL(x86_p3_3dr_skip):
331 ADD_L( CONST(16), EDI )
334 JNE( LLBL(x86_p3_3dr_loop) )
336 LLBL(x86_p3_3dr_done):
347 GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
348 GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
350 #define FRAME_OFFSET 8
354 MOV_L( ARG_SOURCE, ESI )
355 MOV_L( ARG_DEST, EDI )
358 MOV_L( ARG_MATRIX, EDX )
359 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
362 JZ( LLBL(x86_p3_3dnrr_done) )
364 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
365 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
367 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
368 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
370 SHL_L( CONST(4), ECX )
371 MOV_L( REGOFF(V4F_START, ESI), ESI )
373 MOV_L( REGOFF(V4F_START, EDI), EDI )
377 LLBL(x86_p3_3dnrr_loop):
379 FLD_S( SRC0 ) /* F4 */
382 FLD_S( SRC1 ) /* F1 F4 */
385 FLD_S( SRC2 ) /* F2 F1 F4 */
388 FXCH( ST(2) ) /* F4 F1 F2 */
390 FLD_S( MAT13 ) /* F5 F4 F1 F2 */
391 FXCH( ST(2) ) /* F1 F4 F5 F2 */
392 FADDP( ST0, ST(2) ) /* F4 F5 F2 */
393 FLD_S( MAT14 ) /* F6 F4 F5 F2 */
394 FXCH( ST(3) ) /* F2 F4 F5 F6 */
395 FADDP( ST0, ST(3) ) /* F4 F5 F6 */
397 FSTP_S( DST0 ) /* F5 F6 */
398 FSTP_S( DST1 ) /* F6 */
401 LLBL(x86_p3_3dnrr_skip):
403 ADD_L( CONST(16), EDI )
406 JNE( LLBL(x86_p3_3dnrr_loop) )
408 LLBL(x86_p3_3dnrr_done):
419 GLOBL GLNAME( _mesa_x86_transform_points3_2d )
420 GLNAME( _mesa_x86_transform_points3_2d ):
422 #define FRAME_OFFSET 12
427 MOV_L( ARG_SOURCE, ESI )
428 MOV_L( ARG_DEST, EDI )
430 MOV_L( ARG_MATRIX, EDX )
431 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
434 JZ( LLBL(x86_p3_2dr_done) )
436 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
437 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
439 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
440 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
442 SHL_L( CONST(4), ECX )
443 MOV_L( REGOFF(V4F_START, ESI), ESI )
445 MOV_L( REGOFF(V4F_START, EDI), EDI )
449 LLBL(x86_p3_2dr_loop):
451 FLD_S( SRC0 ) /* F4 */
453 FLD_S( SRC0 ) /* F5 F4 */
456 FLD_S( SRC1 ) /* F0 F5 F4 */
458 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
461 FXCH( ST(1) ) /* F0 F1 F5 F4 */
462 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
463 FADDP( ST0, ST(1) ) /* F5 F4 */
465 FXCH( ST(1) ) /* F4 F5 */
467 FXCH( ST(1) ) /* F5 F4 */
472 FXCH( ST(1) ) /* F4 F5 */
473 FSTP_S( DST0 ) /* F5 */
477 LLBL(x86_p3_2dr_skip):
479 ADD_L( CONST(16), EDI )
482 JNE( LLBL(x86_p3_2dr_loop) )
484 LLBL(x86_p3_2dr_done):
496 GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
497 GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
499 #define FRAME_OFFSET 12
504 MOV_L( ARG_SOURCE, ESI )
505 MOV_L( ARG_DEST, EDI )
507 MOV_L( ARG_MATRIX, EDX )
508 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
511 JZ( LLBL(x86_p3_2dnrr_done) )
513 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
514 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
516 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
517 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
519 SHL_L( CONST(4), ECX )
520 MOV_L( REGOFF(V4F_START, ESI), ESI )
522 MOV_L( REGOFF(V4F_START, EDI), EDI )
526 LLBL(x86_p3_2dnrr_loop):
528 FLD_S( SRC0 ) /* F4 */
531 FLD_S( SRC1 ) /* F1 F4 */
534 FXCH( ST(1) ) /* F4 F1 */
536 FLD_S( MAT13 ) /* F5 F4 F1 */
538 FXCH( ST(2) ) /* F1 F4 F5 */
539 FADDP( ST0, ST(2) ) /* F4 F5 */
543 FSTP_S( DST0 ) /* F5 */
547 LLBL(x86_p3_2dnrr_skip):
549 ADD_L( CONST(16), EDI )
552 JNE( LLBL(x86_p3_2dnrr_loop) )
554 LLBL(x86_p3_2dnrr_done):
566 GLOBL GLNAME( _mesa_x86_transform_points3_identity )
567 GLNAME(_mesa_x86_transform_points3_identity ):
569 #define FRAME_OFFSET 16
575 MOV_L( ARG_SOURCE, ESI )
576 MOV_L( ARG_DEST, EDI )
578 MOV_L( ARG_MATRIX, EDX )
579 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
582 JZ( LLBL(x86_p3_ir_done) )
584 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
585 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
587 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
588 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
590 SHL_L( CONST(4), ECX )
591 MOV_L( REGOFF(V4F_START, ESI), ESI )
593 MOV_L( REGOFF(V4F_START, EDI), EDI )
597 JE( LLBL(x86_p3_ir_done) )
600 LLBL(x86_p3_ir_loop):
620 LLBL(x86_p3_ir_skip):
622 ADD_L( CONST(16), EDI )
625 JNE( LLBL(x86_p3_ir_loop) )
627 LLBL(x86_p3_ir_done):