3 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
29 * in there will break the build on some platforms.
34 #include "xform_args.h"
38 #define FP_ONE 1065353216
41 #define SRC0 REGOFF(0, ESI)
42 #define SRC1 REGOFF(4, ESI)
43 #define SRC2 REGOFF(8, ESI)
44 #define SRC3 REGOFF(12, ESI)
45 #define DST0 REGOFF(0, EDI)
46 #define DST1 REGOFF(4, EDI)
47 #define DST2 REGOFF(8, EDI)
48 #define DST3 REGOFF(12, EDI)
49 #define MAT0 REGOFF(0, EDX)
50 #define MAT1 REGOFF(4, EDX)
51 #define MAT2 REGOFF(8, EDX)
52 #define MAT3 REGOFF(12, EDX)
53 #define MAT4 REGOFF(16, EDX)
54 #define MAT5 REGOFF(20, EDX)
55 #define MAT6 REGOFF(24, EDX)
56 #define MAT7 REGOFF(28, EDX)
57 #define MAT8 REGOFF(32, EDX)
58 #define MAT9 REGOFF(36, EDX)
59 #define MAT10 REGOFF(40, EDX)
60 #define MAT11 REGOFF(44, EDX)
61 #define MAT12 REGOFF(48, EDX)
62 #define MAT13 REGOFF(52, EDX)
63 #define MAT14 REGOFF(56, EDX)
64 #define MAT15 REGOFF(60, EDX)
68 GLOBL GLNAME( _mesa_x86_transform_points3_general )
69 HIDDEN(_mesa_x86_transform_points3_general)
70 GLNAME( _mesa_x86_transform_points3_general ):
72 #define FRAME_OFFSET 8
76 MOV_L( ARG_SOURCE, ESI )
77 MOV_L( ARG_DEST, EDI )
79 MOV_L( ARG_MATRIX, EDX )
80 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
83 JZ( LLBL(x86_p3_gr_done) )
85 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
86 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
88 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
89 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
91 SHL_L( CONST(4), ECX )
92 MOV_L( REGOFF(V4F_START, ESI), ESI )
94 MOV_L( REGOFF(V4F_START, EDI), EDI )
100 FLD_S( SRC0 ) /* F4 */
102 FLD_S( SRC0 ) /* F5 F4 */
104 FLD_S( SRC0 ) /* F6 F5 F4 */
106 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
109 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
111 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
113 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
115 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
118 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
119 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
120 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
123 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
125 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
127 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
129 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
131 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
134 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
135 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
136 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
137 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
138 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
139 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
141 FXCH( ST(3) ) /* F4 F6 F5 F7 */
143 FXCH( ST(2) ) /* F5 F6 F4 F7 */
145 FXCH( ST(1) ) /* F6 F5 F4 F7 */
147 FXCH( ST(3) ) /* F7 F5 F4 F6 */
150 FXCH( ST(2) ) /* F4 F5 F7 F6 */
151 FSTP_S( DST0 ) /* F5 F7 F6 */
152 FSTP_S( DST1 ) /* F7 F6 */
153 FXCH( ST(1) ) /* F6 F7 */
154 FSTP_S( DST2 ) /* F7 */
157 LLBL(x86_p3_gr_skip):
159 ADD_L( CONST(16), EDI )
162 JNE( LLBL(x86_p3_gr_loop) )
164 LLBL(x86_p3_gr_done):
175 GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
176 HIDDEN(_mesa_x86_transform_points3_perspective)
177 GLNAME( _mesa_x86_transform_points3_perspective ):
179 #define FRAME_OFFSET 12
184 MOV_L( ARG_SOURCE, ESI )
185 MOV_L( ARG_DEST, EDI )
187 MOV_L( ARG_MATRIX, EDX )
188 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
191 JZ( LLBL(x86_p3_pr_done) )
193 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
194 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
196 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
197 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
199 SHL_L( CONST(4), ECX )
200 MOV_L( REGOFF(V4F_START, ESI), ESI )
202 MOV_L( REGOFF(V4F_START, EDI), EDI )
206 LLBL(x86_p3_pr_loop):
208 FLD_S( SRC0 ) /* F4 */
211 FLD_S( SRC1 ) /* F5 F4 */
214 FLD_S( SRC2 ) /* F0 F5 F4 */
216 FLD_S( SRC2 ) /* F1 F0 F5 F4 */
218 FLD_S( SRC2 ) /* F2 F1 F0 F5 F4 */
221 FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */
222 FADDP( ST0, ST(4) ) /* F1 F2 F5 F4 */
223 FADDP( ST0, ST(2) ) /* F2 F5 F4 */
224 FLD_S( MAT14 ) /* F6 F2 F5 F4 */
225 FXCH( ST(1) ) /* F2 F6 F5 F4 */
226 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
229 XOR_L( CONST(-2147483648), EBX )/* change sign */
231 FXCH( ST(2) ) /* F4 F5 F6 */
232 FSTP_S( DST0 ) /* F5 F6 */
233 FSTP_S( DST1 ) /* F6 */
237 LLBL(x86_p3_pr_skip):
239 ADD_L( CONST(16), EDI )
242 JNE( LLBL(x86_p3_pr_loop) )
244 LLBL(x86_p3_pr_done):
256 GLOBL GLNAME( _mesa_x86_transform_points3_3d )
257 HIDDEN(_mesa_x86_transform_points3_3d)
258 GLNAME( _mesa_x86_transform_points3_3d ):
260 #define FRAME_OFFSET 8
264 MOV_L( ARG_SOURCE, ESI )
265 MOV_L( ARG_DEST, EDI )
267 MOV_L( ARG_MATRIX, EDX )
268 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
271 JZ( LLBL(x86_p3_3dr_done) )
273 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
274 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
276 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
277 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
279 SHL_L( CONST(4), ECX )
280 MOV_L( REGOFF(V4F_START, ESI), ESI )
282 MOV_L( REGOFF(V4F_START, EDI), EDI )
286 LLBL(x86_p3_3dr_loop):
288 FLD_S( SRC0 ) /* F4 */
290 FLD_S( SRC0 ) /* F5 F4 */
292 FLD_S( SRC0 ) /* F6 F5 F4 */
295 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
297 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
299 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
302 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
303 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
304 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
305 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
307 FLD_S( SRC2 ) /* F0 F6 F5 F4 */
309 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
311 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
314 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
315 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
316 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
317 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
319 FXCH( ST(2) ) /* F4 F5 F6 */
321 FXCH( ST(1) ) /* F5 F4 F6 */
323 FXCH( ST(2) ) /* F6 F4 F5 */
326 FXCH( ST(1) ) /* F4 F6 F5 */
327 FSTP_S( DST0 ) /* F6 F5 */
328 FXCH( ST(1) ) /* F5 F6 */
329 FSTP_S( DST1 ) /* F6 */
332 LLBL(x86_p3_3dr_skip):
334 ADD_L( CONST(16), EDI )
337 JNE( LLBL(x86_p3_3dr_loop) )
339 LLBL(x86_p3_3dr_done):
350 GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
351 HIDDEN(_mesa_x86_transform_points3_3d_no_rot)
352 GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
354 #define FRAME_OFFSET 8
358 MOV_L( ARG_SOURCE, ESI )
359 MOV_L( ARG_DEST, EDI )
362 MOV_L( ARG_MATRIX, EDX )
363 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
366 JZ( LLBL(x86_p3_3dnrr_done) )
368 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
369 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
371 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
372 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
374 SHL_L( CONST(4), ECX )
375 MOV_L( REGOFF(V4F_START, ESI), ESI )
377 MOV_L( REGOFF(V4F_START, EDI), EDI )
381 LLBL(x86_p3_3dnrr_loop):
383 FLD_S( SRC0 ) /* F4 */
386 FLD_S( SRC1 ) /* F1 F4 */
389 FLD_S( SRC2 ) /* F2 F1 F4 */
392 FXCH( ST(2) ) /* F4 F1 F2 */
394 FLD_S( MAT13 ) /* F5 F4 F1 F2 */
395 FXCH( ST(2) ) /* F1 F4 F5 F2 */
396 FADDP( ST0, ST(2) ) /* F4 F5 F2 */
397 FLD_S( MAT14 ) /* F6 F4 F5 F2 */
398 FXCH( ST(3) ) /* F2 F4 F5 F6 */
399 FADDP( ST0, ST(3) ) /* F4 F5 F6 */
401 FSTP_S( DST0 ) /* F5 F6 */
402 FSTP_S( DST1 ) /* F6 */
405 LLBL(x86_p3_3dnrr_skip):
407 ADD_L( CONST(16), EDI )
410 JNE( LLBL(x86_p3_3dnrr_loop) )
412 LLBL(x86_p3_3dnrr_done):
423 GLOBL GLNAME( _mesa_x86_transform_points3_2d )
424 HIDDEN(_mesa_x86_transform_points3_2d)
425 GLNAME( _mesa_x86_transform_points3_2d ):
427 #define FRAME_OFFSET 12
432 MOV_L( ARG_SOURCE, ESI )
433 MOV_L( ARG_DEST, EDI )
435 MOV_L( ARG_MATRIX, EDX )
436 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
439 JZ( LLBL(x86_p3_2dr_done) )
441 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
442 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
444 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
445 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
447 SHL_L( CONST(4), ECX )
448 MOV_L( REGOFF(V4F_START, ESI), ESI )
450 MOV_L( REGOFF(V4F_START, EDI), EDI )
454 LLBL(x86_p3_2dr_loop):
456 FLD_S( SRC0 ) /* F4 */
458 FLD_S( SRC0 ) /* F5 F4 */
461 FLD_S( SRC1 ) /* F0 F5 F4 */
463 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
466 FXCH( ST(1) ) /* F0 F1 F5 F4 */
467 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
468 FADDP( ST0, ST(1) ) /* F5 F4 */
470 FXCH( ST(1) ) /* F4 F5 */
472 FXCH( ST(1) ) /* F5 F4 */
477 FXCH( ST(1) ) /* F4 F5 */
478 FSTP_S( DST0 ) /* F5 */
482 LLBL(x86_p3_2dr_skip):
484 ADD_L( CONST(16), EDI )
487 JNE( LLBL(x86_p3_2dr_loop) )
489 LLBL(x86_p3_2dr_done):
501 GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
502 HIDDEN(_mesa_x86_transform_points3_2d_no_rot)
503 GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
505 #define FRAME_OFFSET 12
510 MOV_L( ARG_SOURCE, ESI )
511 MOV_L( ARG_DEST, EDI )
513 MOV_L( ARG_MATRIX, EDX )
514 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
517 JZ( LLBL(x86_p3_2dnrr_done) )
519 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
520 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
522 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
523 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
525 SHL_L( CONST(4), ECX )
526 MOV_L( REGOFF(V4F_START, ESI), ESI )
528 MOV_L( REGOFF(V4F_START, EDI), EDI )
532 LLBL(x86_p3_2dnrr_loop):
534 FLD_S( SRC0 ) /* F4 */
537 FLD_S( SRC1 ) /* F1 F4 */
540 FXCH( ST(1) ) /* F4 F1 */
542 FLD_S( MAT13 ) /* F5 F4 F1 */
544 FXCH( ST(2) ) /* F1 F4 F5 */
545 FADDP( ST0, ST(2) ) /* F4 F5 */
549 FSTP_S( DST0 ) /* F5 */
553 LLBL(x86_p3_2dnrr_skip):
555 ADD_L( CONST(16), EDI )
558 JNE( LLBL(x86_p3_2dnrr_loop) )
560 LLBL(x86_p3_2dnrr_done):
572 GLOBL GLNAME( _mesa_x86_transform_points3_identity )
573 HIDDEN(_mesa_x86_transform_points3_identity)
574 GLNAME(_mesa_x86_transform_points3_identity ):
576 #define FRAME_OFFSET 16
582 MOV_L( ARG_SOURCE, ESI )
583 MOV_L( ARG_DEST, EDI )
585 MOV_L( ARG_MATRIX, EDX )
586 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
589 JZ( LLBL(x86_p3_ir_done) )
591 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
592 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
594 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
595 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
597 SHL_L( CONST(4), ECX )
598 MOV_L( REGOFF(V4F_START, ESI), ESI )
600 MOV_L( REGOFF(V4F_START, EDI), EDI )
604 JE( LLBL(x86_p3_ir_done) )
607 LLBL(x86_p3_ir_loop):
627 LLBL(x86_p3_ir_skip):
629 ADD_L( CONST(16), EDI )
632 JNE( LLBL(x86_p3_ir_loop) )
634 LLBL(x86_p3_ir_done):
642 #if defined (__ELF__) && defined (__linux__)
643 .section .note.GNU-stack,"",%progbits