1 /* $Id: x86_xform2.S,v 1.2 2002/03/07 21:40:08 brianp Exp $ */
4 * Mesa 3-D graphics library
7 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
29 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
30 * in there will break the build on some platforms.
34 #include "xform_args.h"
38 #define FP_ONE 1065353216
41 #define SRC0 REGOFF(0, ESI)
42 #define SRC1 REGOFF(4, ESI)
43 #define SRC2 REGOFF(8, ESI)
44 #define SRC3 REGOFF(12, ESI)
45 #define DST0 REGOFF(0, EDI)
46 #define DST1 REGOFF(4, EDI)
47 #define DST2 REGOFF(8, EDI)
48 #define DST3 REGOFF(12, EDI)
49 #define MAT0 REGOFF(0, EDX)
50 #define MAT1 REGOFF(4, EDX)
51 #define MAT2 REGOFF(8, EDX)
52 #define MAT3 REGOFF(12, EDX)
53 #define MAT4 REGOFF(16, EDX)
54 #define MAT5 REGOFF(20, EDX)
55 #define MAT6 REGOFF(24, EDX)
56 #define MAT7 REGOFF(28, EDX)
57 #define MAT8 REGOFF(32, EDX)
58 #define MAT9 REGOFF(36, EDX)
59 #define MAT10 REGOFF(40, EDX)
60 #define MAT11 REGOFF(44, EDX)
61 #define MAT12 REGOFF(48, EDX)
62 #define MAT13 REGOFF(52, EDX)
63 #define MAT14 REGOFF(56, EDX)
64 #define MAT15 REGOFF(60, EDX)
68 GLOBL GLNAME( _mesa_x86_transform_points2_general )
69 GLNAME( _mesa_x86_transform_points2_general ):
71 #define FRAME_OFFSET 8
75 MOV_L( ARG_SOURCE, ESI )
76 MOV_L( ARG_DEST, EDI )
78 MOV_L( ARG_MATRIX, EDX )
79 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
82 JZ( LLBL(x86_p2_gr_done) )
84 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
85 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
87 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
88 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
90 SHL_L( CONST(4), ECX )
91 MOV_L( REGOFF(V4F_START, ESI), ESI )
93 MOV_L( REGOFF(V4F_START, EDI), EDI )
99 FLD_S( SRC0 ) /* F4 */
101 FLD_S( SRC0 ) /* F5 F4 */
103 FLD_S( SRC0 ) /* F6 F5 F4 */
105 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
108 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
110 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
112 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
114 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
117 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
118 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
119 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
120 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
124 FXCH( ST(3) ) /* F4 F6 F5 F7 */
126 FXCH( ST(2) ) /* F5 F6 F4 F7 */
128 FXCH( ST(1) ) /* F6 F5 F4 F7 */
130 FXCH( ST(3) ) /* F7 F5 F4 F6 */
133 FXCH( ST(2) ) /* F4 F5 F7 F6 */
134 FSTP_S( DST0 ) /* F5 F7 F6 */
135 FSTP_S( DST1 ) /* F7 F6 */
136 FXCH( ST(1) ) /* F6 F7 */
137 FSTP_S( DST2 ) /* F7 */
140 LLBL(x86_p2_gr_skip):
142 ADD_L( CONST(16), EDI )
145 JNE( LLBL(x86_p2_gr_loop) )
147 LLBL(x86_p2_gr_done):
158 GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
159 GLNAME( _mesa_x86_transform_points2_perspective ):
161 #define FRAME_OFFSET 12
166 MOV_L( ARG_SOURCE, ESI )
167 MOV_L( ARG_DEST, EDI )
169 MOV_L( ARG_MATRIX, EDX )
170 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
173 JZ( LLBL(x86_p2_pr_done) )
175 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
176 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
178 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
179 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
181 SHL_L( CONST(4), ECX )
182 MOV_L( REGOFF(V4F_START, ESI), ESI )
184 MOV_L( REGOFF(V4F_START, EDI), EDI )
190 LLBL(x86_p2_pr_loop):
192 FLD_S( SRC0 ) /* F4 */
195 FLD_S( SRC1 ) /* F1 F4 */
198 FXCH( ST(1) ) /* F4 F1 */
199 FSTP_S( DST0 ) /* F1 */
202 MOV_L( CONST(FP_ZERO), DST3 )
204 LLBL(x86_p2_pr_skip):
206 ADD_L( CONST(16), EDI )
209 JNE( LLBL(x86_p2_pr_loop) )
211 LLBL(x86_p2_pr_done):
223 GLOBL GLNAME( _mesa_x86_transform_points2_3d )
224 GLNAME( _mesa_x86_transform_points2_3d ):
226 #define FRAME_OFFSET 8
230 MOV_L( ARG_SOURCE, ESI )
231 MOV_L( ARG_DEST, EDI )
233 MOV_L( ARG_MATRIX, EDX )
234 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
237 JZ( LLBL(x86_p2_3dr_done) )
239 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
240 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
242 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
243 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
245 SHL_L( CONST(4), ECX )
246 MOV_L( REGOFF(V4F_START, ESI), ESI )
248 MOV_L( REGOFF(V4F_START, EDI), EDI )
252 LLBL(x86_p2_3dr_loop):
254 FLD_S( SRC0 ) /* F4 */
256 FLD_S( SRC0 ) /* F5 F4 */
258 FLD_S( SRC0 ) /* F6 F5 F4 */
261 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
263 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
265 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
268 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
269 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
270 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
271 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
273 FXCH( ST(2) ) /* F4 F5 F6 */
275 FXCH( ST(1) ) /* F5 F4 F6 */
277 FXCH( ST(2) ) /* F6 F4 F5 */
280 FXCH( ST(1) ) /* F4 F6 F5 */
281 FSTP_S( DST0 ) /* F6 F5 */
282 FXCH( ST(1) ) /* F5 F6 */
283 FSTP_S( DST1 ) /* F6 */
286 LLBL(x86_p2_3dr_skip):
288 ADD_L( CONST(16), EDI )
291 JNE( LLBL(x86_p2_3dr_loop) )
293 LLBL(x86_p2_3dr_done):
304 GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
305 GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
307 #define FRAME_OFFSET 12
312 MOV_L( ARG_SOURCE, ESI )
313 MOV_L( ARG_DEST, EDI )
315 MOV_L( ARG_MATRIX, EDX )
316 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
319 JZ( LLBL(x86_p2_3dnrr_done) )
321 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
322 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
324 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
325 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
327 SHL_L( CONST(4), ECX )
328 MOV_L( REGOFF(V4F_START, ESI), ESI )
330 MOV_L( REGOFF(V4F_START, EDI), EDI )
336 LLBL(x86_p2_3dnrr_loop):
338 FLD_S( SRC0 ) /* F4 */
341 FLD_S( SRC1 ) /* F1 F4 */
344 FXCH( ST(1) ) /* F4 F1 */
346 FLD_S( MAT13 ) /* F5 F4 F1 */
347 FXCH( ST(2) ) /* F1 F4 F5 */
348 FADDP( ST0, ST(2) ) /* F4 F5 */
350 FSTP_S( DST0 ) /* F5 */
354 LLBL(x86_p2_3dnrr_skip):
356 ADD_L( CONST(16), EDI )
359 JNE( LLBL(x86_p2_3dnrr_loop) )
361 LLBL(x86_p2_3dnrr_done):
373 GLOBL GLNAME( _mesa_x86_transform_points2_2d )
374 GLNAME( _mesa_x86_transform_points2_2d ):
376 #define FRAME_OFFSET 8
380 MOV_L( ARG_SOURCE, ESI )
381 MOV_L( ARG_DEST, EDI )
383 MOV_L( ARG_MATRIX, EDX )
384 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
387 JZ( LLBL(x86_p2_2dr_done) )
389 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
390 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
392 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
393 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
395 SHL_L( CONST(4), ECX )
396 MOV_L( REGOFF(V4F_START, ESI), ESI )
398 MOV_L( REGOFF(V4F_START, EDI), EDI )
402 LLBL(x86_p2_2dr_loop):
404 FLD_S( SRC0 ) /* F4 */
406 FLD_S( SRC0 ) /* F5 F4 */
409 FLD_S( SRC1 ) /* F0 F5 F4 */
411 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
414 FXCH( ST(1) ) /* F0 F1 F5 F4 */
415 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
416 FADDP( ST0, ST(1) ) /* F5 F4 */
418 FXCH( ST(1) ) /* F4 F5 */
420 FXCH( ST(1) ) /* F5 F4 */
423 FXCH( ST(1) ) /* F4 F5 */
424 FSTP_S( DST0 ) /* F5 */
427 LLBL(x86_p2_2dr_skip):
429 ADD_L( CONST(16), EDI )
432 JNE( LLBL(x86_p2_2dr_loop) )
434 LLBL(x86_p2_2dr_done):
445 GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
446 GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
448 #define FRAME_OFFSET 8
452 MOV_L( ARG_SOURCE, ESI )
453 MOV_L( ARG_DEST, EDI )
455 MOV_L( ARG_MATRIX, EDX )
456 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
459 JZ( LLBL(x86_p2_2dnrr_done) )
461 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
462 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
464 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
465 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
467 SHL_L( CONST(4), ECX )
468 MOV_L( REGOFF(V4F_START, ESI), ESI )
470 MOV_L( REGOFF(V4F_START, EDI), EDI )
474 LLBL(x86_p2_2dnrr_loop):
476 FLD_S( SRC0 ) /* F4 */
479 FLD_S( SRC1 ) /* F1 F4 */
482 FXCH( ST(1) ) /* F4 F1 */
484 FLD_S( MAT13 ) /* F5 F4 F1 */
485 FXCH( ST(2) ) /* F1 F4 F5 */
486 FADDP( ST0, ST(2) ) /* F4 F5 */
488 FSTP_S( DST0 ) /* F5 */
491 LLBL(x86_p2_2dnrr_skip):
493 ADD_L( CONST(16), EDI )
496 JNE( LLBL(x86_p2_2dnrr_loop) )
498 LLBL(x86_p2_2dnrr_done):
509 GLOBL GLNAME( _mesa_x86_transform_points2_identity )
510 GLNAME( _mesa_x86_transform_points2_identity ):
512 #define FRAME_OFFSET 12
517 MOV_L( ARG_SOURCE, ESI )
518 MOV_L( ARG_DEST, EDI )
520 MOV_L( ARG_MATRIX, EDX )
521 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
524 JZ( LLBL(x86_p2_ir_done) )
526 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
527 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
529 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
530 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
532 SHL_L( CONST(4), ECX )
533 MOV_L( REGOFF(V4F_START, ESI), ESI )
535 MOV_L( REGOFF(V4F_START, EDI), EDI )
539 JE( LLBL(x86_p2_ir_done) )
542 LLBL(x86_p2_ir_loop):
550 LLBL(x86_p2_ir_skip):
552 ADD_L( CONST(16), EDI )
555 JNE( LLBL(x86_p2_ir_loop) )
557 LLBL(x86_p2_ir_done):