1 /* $Id: x86_xform2.S,v 1.3 2005/01/04 14:33:47 brianp Exp $ */
4 * Mesa 3-D graphics library
7 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
29 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
30 * in there will break the build on some platforms.
34 #include "xform_args.h"
38 #define FP_ONE 1065353216
41 #define SRC0 REGOFF(0, ESI)
42 #define SRC1 REGOFF(4, ESI)
43 #define SRC2 REGOFF(8, ESI)
44 #define SRC3 REGOFF(12, ESI)
45 #define DST0 REGOFF(0, EDI)
46 #define DST1 REGOFF(4, EDI)
47 #define DST2 REGOFF(8, EDI)
48 #define DST3 REGOFF(12, EDI)
49 #define MAT0 REGOFF(0, EDX)
50 #define MAT1 REGOFF(4, EDX)
51 #define MAT2 REGOFF(8, EDX)
52 #define MAT3 REGOFF(12, EDX)
53 #define MAT4 REGOFF(16, EDX)
54 #define MAT5 REGOFF(20, EDX)
55 #define MAT6 REGOFF(24, EDX)
56 #define MAT7 REGOFF(28, EDX)
57 #define MAT8 REGOFF(32, EDX)
58 #define MAT9 REGOFF(36, EDX)
59 #define MAT10 REGOFF(40, EDX)
60 #define MAT11 REGOFF(44, EDX)
61 #define MAT12 REGOFF(48, EDX)
62 #define MAT13 REGOFF(52, EDX)
63 #define MAT14 REGOFF(56, EDX)
64 #define MAT15 REGOFF(60, EDX)
68 GLOBL GLNAME( _mesa_x86_transform_points2_general )
69 HIDDEN(_mesa_x86_transform_points2_general)
70 GLNAME( _mesa_x86_transform_points2_general ):
72 #define FRAME_OFFSET 8
76 MOV_L( ARG_SOURCE, ESI )
77 MOV_L( ARG_DEST, EDI )
79 MOV_L( ARG_MATRIX, EDX )
80 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
83 JZ( LLBL(x86_p2_gr_done) )
85 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
86 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
88 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
89 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
91 SHL_L( CONST(4), ECX )
92 MOV_L( REGOFF(V4F_START, ESI), ESI )
94 MOV_L( REGOFF(V4F_START, EDI), EDI )
100 FLD_S( SRC0 ) /* F4 */
102 FLD_S( SRC0 ) /* F5 F4 */
104 FLD_S( SRC0 ) /* F6 F5 F4 */
106 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
109 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
111 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
113 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
115 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
118 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
119 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
120 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
123 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
125 FXCH( ST(3) ) /* F4 F6 F5 F7 */
127 FXCH( ST(2) ) /* F5 F6 F4 F7 */
129 FXCH( ST(1) ) /* F6 F5 F4 F7 */
131 FXCH( ST(3) ) /* F7 F5 F4 F6 */
134 FXCH( ST(2) ) /* F4 F5 F7 F6 */
135 FSTP_S( DST0 ) /* F5 F7 F6 */
136 FSTP_S( DST1 ) /* F7 F6 */
137 FXCH( ST(1) ) /* F6 F7 */
138 FSTP_S( DST2 ) /* F7 */
141 LLBL(x86_p2_gr_skip):
143 ADD_L( CONST(16), EDI )
146 JNE( LLBL(x86_p2_gr_loop) )
148 LLBL(x86_p2_gr_done):
159 GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
160 HIDDEN(_mesa_x86_transform_points2_perspective)
161 GLNAME( _mesa_x86_transform_points2_perspective ):
163 #define FRAME_OFFSET 12
168 MOV_L( ARG_SOURCE, ESI )
169 MOV_L( ARG_DEST, EDI )
171 MOV_L( ARG_MATRIX, EDX )
172 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
175 JZ( LLBL(x86_p2_pr_done) )
177 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
178 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
180 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
181 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
183 SHL_L( CONST(4), ECX )
184 MOV_L( REGOFF(V4F_START, ESI), ESI )
186 MOV_L( REGOFF(V4F_START, EDI), EDI )
192 LLBL(x86_p2_pr_loop):
194 FLD_S( SRC0 ) /* F4 */
197 FLD_S( SRC1 ) /* F1 F4 */
200 FXCH( ST(1) ) /* F4 F1 */
201 FSTP_S( DST0 ) /* F1 */
204 MOV_L( CONST(FP_ZERO), DST3 )
206 LLBL(x86_p2_pr_skip):
208 ADD_L( CONST(16), EDI )
211 JNE( LLBL(x86_p2_pr_loop) )
213 LLBL(x86_p2_pr_done):
225 GLOBL GLNAME( _mesa_x86_transform_points2_3d )
226 HIDDEN(_mesa_x86_transform_points2_3d)
227 GLNAME( _mesa_x86_transform_points2_3d ):
229 #define FRAME_OFFSET 8
233 MOV_L( ARG_SOURCE, ESI )
234 MOV_L( ARG_DEST, EDI )
236 MOV_L( ARG_MATRIX, EDX )
237 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
240 JZ( LLBL(x86_p2_3dr_done) )
242 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
243 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
245 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
246 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
248 SHL_L( CONST(4), ECX )
249 MOV_L( REGOFF(V4F_START, ESI), ESI )
251 MOV_L( REGOFF(V4F_START, EDI), EDI )
255 LLBL(x86_p2_3dr_loop):
257 FLD_S( SRC0 ) /* F4 */
259 FLD_S( SRC0 ) /* F5 F4 */
261 FLD_S( SRC0 ) /* F6 F5 F4 */
264 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
266 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
268 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
271 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
272 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
273 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
274 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
276 FXCH( ST(2) ) /* F4 F5 F6 */
278 FXCH( ST(1) ) /* F5 F4 F6 */
280 FXCH( ST(2) ) /* F6 F4 F5 */
283 FXCH( ST(1) ) /* F4 F6 F5 */
284 FSTP_S( DST0 ) /* F6 F5 */
285 FXCH( ST(1) ) /* F5 F6 */
286 FSTP_S( DST1 ) /* F6 */
289 LLBL(x86_p2_3dr_skip):
291 ADD_L( CONST(16), EDI )
294 JNE( LLBL(x86_p2_3dr_loop) )
296 LLBL(x86_p2_3dr_done):
307 GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
308 HIDDEN(_mesa_x86_transform_points2_3d_no_rot)
309 GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
311 #define FRAME_OFFSET 12
316 MOV_L( ARG_SOURCE, ESI )
317 MOV_L( ARG_DEST, EDI )
319 MOV_L( ARG_MATRIX, EDX )
320 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
323 JZ( LLBL(x86_p2_3dnrr_done) )
325 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
326 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
328 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
329 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
331 SHL_L( CONST(4), ECX )
332 MOV_L( REGOFF(V4F_START, ESI), ESI )
334 MOV_L( REGOFF(V4F_START, EDI), EDI )
340 LLBL(x86_p2_3dnrr_loop):
342 FLD_S( SRC0 ) /* F4 */
345 FLD_S( SRC1 ) /* F1 F4 */
348 FXCH( ST(1) ) /* F4 F1 */
350 FLD_S( MAT13 ) /* F5 F4 F1 */
351 FXCH( ST(2) ) /* F1 F4 F5 */
352 FADDP( ST0, ST(2) ) /* F4 F5 */
354 FSTP_S( DST0 ) /* F5 */
358 LLBL(x86_p2_3dnrr_skip):
360 ADD_L( CONST(16), EDI )
363 JNE( LLBL(x86_p2_3dnrr_loop) )
365 LLBL(x86_p2_3dnrr_done):
377 GLOBL GLNAME( _mesa_x86_transform_points2_2d )
378 HIDDEN(_mesa_x86_transform_points2_2d)
379 GLNAME( _mesa_x86_transform_points2_2d ):
381 #define FRAME_OFFSET 8
385 MOV_L( ARG_SOURCE, ESI )
386 MOV_L( ARG_DEST, EDI )
388 MOV_L( ARG_MATRIX, EDX )
389 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
392 JZ( LLBL(x86_p2_2dr_done) )
394 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
395 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
397 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
398 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
400 SHL_L( CONST(4), ECX )
401 MOV_L( REGOFF(V4F_START, ESI), ESI )
403 MOV_L( REGOFF(V4F_START, EDI), EDI )
407 LLBL(x86_p2_2dr_loop):
409 FLD_S( SRC0 ) /* F4 */
411 FLD_S( SRC0 ) /* F5 F4 */
414 FLD_S( SRC1 ) /* F0 F5 F4 */
416 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
419 FXCH( ST(1) ) /* F0 F1 F5 F4 */
420 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
421 FADDP( ST0, ST(1) ) /* F5 F4 */
423 FXCH( ST(1) ) /* F4 F5 */
425 FXCH( ST(1) ) /* F5 F4 */
428 FXCH( ST(1) ) /* F4 F5 */
429 FSTP_S( DST0 ) /* F5 */
432 LLBL(x86_p2_2dr_skip):
434 ADD_L( CONST(16), EDI )
437 JNE( LLBL(x86_p2_2dr_loop) )
439 LLBL(x86_p2_2dr_done):
450 GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
451 HIDDEN(_mesa_x86_transform_points2_2d_no_rot)
452 GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
454 #define FRAME_OFFSET 8
458 MOV_L( ARG_SOURCE, ESI )
459 MOV_L( ARG_DEST, EDI )
461 MOV_L( ARG_MATRIX, EDX )
462 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
465 JZ( LLBL(x86_p2_2dnrr_done) )
467 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
468 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
470 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
471 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
473 SHL_L( CONST(4), ECX )
474 MOV_L( REGOFF(V4F_START, ESI), ESI )
476 MOV_L( REGOFF(V4F_START, EDI), EDI )
480 LLBL(x86_p2_2dnrr_loop):
482 FLD_S( SRC0 ) /* F4 */
485 FLD_S( SRC1 ) /* F1 F4 */
488 FXCH( ST(1) ) /* F4 F1 */
490 FLD_S( MAT13 ) /* F5 F4 F1 */
491 FXCH( ST(2) ) /* F1 F4 F5 */
492 FADDP( ST0, ST(2) ) /* F4 F5 */
494 FSTP_S( DST0 ) /* F5 */
497 LLBL(x86_p2_2dnrr_skip):
499 ADD_L( CONST(16), EDI )
502 JNE( LLBL(x86_p2_2dnrr_loop) )
504 LLBL(x86_p2_2dnrr_done):
515 GLOBL GLNAME( _mesa_x86_transform_points2_identity )
516 HIDDEN(_mesa_x86_transform_points2_identity)
517 GLNAME( _mesa_x86_transform_points2_identity ):
519 #define FRAME_OFFSET 12
524 MOV_L( ARG_SOURCE, ESI )
525 MOV_L( ARG_DEST, EDI )
527 MOV_L( ARG_MATRIX, EDX )
528 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
531 JZ( LLBL(x86_p2_ir_done) )
533 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
534 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
536 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
537 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
539 SHL_L( CONST(4), ECX )
540 MOV_L( REGOFF(V4F_START, ESI), ESI )
542 MOV_L( REGOFF(V4F_START, EDI), EDI )
546 JE( LLBL(x86_p2_ir_done) )
549 LLBL(x86_p2_ir_loop):
557 LLBL(x86_p2_ir_skip):
559 ADD_L( CONST(16), EDI )
562 JNE( LLBL(x86_p2_ir_loop) )
564 LLBL(x86_p2_ir_done):