3 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
29 * in there will break the build on some platforms.
33 #define MATH_ASM_PTR_SIZE 4
34 #include "math/m_vector_asm.h"
35 #include "xform_args.h"
39 #define FP_ONE 1065353216
42 #define SRC0 REGOFF(0, ESI)
43 #define SRC1 REGOFF(4, ESI)
44 #define SRC2 REGOFF(8, ESI)
45 #define SRC3 REGOFF(12, ESI)
46 #define DST0 REGOFF(0, EDI)
47 #define DST1 REGOFF(4, EDI)
48 #define DST2 REGOFF(8, EDI)
49 #define DST3 REGOFF(12, EDI)
50 #define MAT0 REGOFF(0, EDX)
51 #define MAT1 REGOFF(4, EDX)
52 #define MAT2 REGOFF(8, EDX)
53 #define MAT3 REGOFF(12, EDX)
54 #define MAT4 REGOFF(16, EDX)
55 #define MAT5 REGOFF(20, EDX)
56 #define MAT6 REGOFF(24, EDX)
57 #define MAT7 REGOFF(28, EDX)
58 #define MAT8 REGOFF(32, EDX)
59 #define MAT9 REGOFF(36, EDX)
60 #define MAT10 REGOFF(40, EDX)
61 #define MAT11 REGOFF(44, EDX)
62 #define MAT12 REGOFF(48, EDX)
63 #define MAT13 REGOFF(52, EDX)
64 #define MAT14 REGOFF(56, EDX)
65 #define MAT15 REGOFF(60, EDX)
69 GLOBL GLNAME( _mesa_x86_transform_points2_general )
70 HIDDEN(_mesa_x86_transform_points2_general)
71 GLNAME( _mesa_x86_transform_points2_general ):
73 #define FRAME_OFFSET 8
77 MOV_L( ARG_SOURCE, ESI )
78 MOV_L( ARG_DEST, EDI )
80 MOV_L( ARG_MATRIX, EDX )
81 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
84 JZ( LLBL(x86_p2_gr_done) )
86 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
87 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
89 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
90 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
92 SHL_L( CONST(4), ECX )
93 MOV_L( REGOFF(V4F_START, ESI), ESI )
95 MOV_L( REGOFF(V4F_START, EDI), EDI )
101 FLD_S( SRC0 ) /* F4 */
103 FLD_S( SRC0 ) /* F5 F4 */
105 FLD_S( SRC0 ) /* F6 F5 F4 */
107 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
110 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
112 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
114 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
116 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
119 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
120 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
121 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
123 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
124 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
126 FXCH( ST(3) ) /* F4 F6 F5 F7 */
128 FXCH( ST(2) ) /* F5 F6 F4 F7 */
130 FXCH( ST(1) ) /* F6 F5 F4 F7 */
132 FXCH( ST(3) ) /* F7 F5 F4 F6 */
135 FXCH( ST(2) ) /* F4 F5 F7 F6 */
136 FSTP_S( DST0 ) /* F5 F7 F6 */
137 FSTP_S( DST1 ) /* F7 F6 */
138 FXCH( ST(1) ) /* F6 F7 */
139 FSTP_S( DST2 ) /* F7 */
142 LLBL(x86_p2_gr_skip):
144 ADD_L( CONST(16), EDI )
147 JNE( LLBL(x86_p2_gr_loop) )
149 LLBL(x86_p2_gr_done):
160 GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
161 HIDDEN(_mesa_x86_transform_points2_perspective)
162 GLNAME( _mesa_x86_transform_points2_perspective ):
164 #define FRAME_OFFSET 12
169 MOV_L( ARG_SOURCE, ESI )
170 MOV_L( ARG_DEST, EDI )
172 MOV_L( ARG_MATRIX, EDX )
173 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
176 JZ( LLBL(x86_p2_pr_done) )
178 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
179 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
181 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
182 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
184 SHL_L( CONST(4), ECX )
185 MOV_L( REGOFF(V4F_START, ESI), ESI )
187 MOV_L( REGOFF(V4F_START, EDI), EDI )
193 LLBL(x86_p2_pr_loop):
195 FLD_S( SRC0 ) /* F4 */
198 FLD_S( SRC1 ) /* F1 F4 */
201 FXCH( ST(1) ) /* F4 F1 */
202 FSTP_S( DST0 ) /* F1 */
205 MOV_L( CONST(FP_ZERO), DST3 )
207 LLBL(x86_p2_pr_skip):
209 ADD_L( CONST(16), EDI )
212 JNE( LLBL(x86_p2_pr_loop) )
214 LLBL(x86_p2_pr_done):
226 GLOBL GLNAME( _mesa_x86_transform_points2_3d )
227 HIDDEN(_mesa_x86_transform_points2_3d)
228 GLNAME( _mesa_x86_transform_points2_3d ):
230 #define FRAME_OFFSET 8
234 MOV_L( ARG_SOURCE, ESI )
235 MOV_L( ARG_DEST, EDI )
237 MOV_L( ARG_MATRIX, EDX )
238 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
241 JZ( LLBL(x86_p2_3dr_done) )
243 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
244 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
246 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
247 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
249 SHL_L( CONST(4), ECX )
250 MOV_L( REGOFF(V4F_START, ESI), ESI )
252 MOV_L( REGOFF(V4F_START, EDI), EDI )
256 LLBL(x86_p2_3dr_loop):
258 FLD_S( SRC0 ) /* F4 */
260 FLD_S( SRC0 ) /* F5 F4 */
262 FLD_S( SRC0 ) /* F6 F5 F4 */
265 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
267 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
269 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
272 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
273 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
274 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
275 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
277 FXCH( ST(2) ) /* F4 F5 F6 */
279 FXCH( ST(1) ) /* F5 F4 F6 */
281 FXCH( ST(2) ) /* F6 F4 F5 */
284 FXCH( ST(1) ) /* F4 F6 F5 */
285 FSTP_S( DST0 ) /* F6 F5 */
286 FXCH( ST(1) ) /* F5 F6 */
287 FSTP_S( DST1 ) /* F6 */
290 LLBL(x86_p2_3dr_skip):
292 ADD_L( CONST(16), EDI )
295 JNE( LLBL(x86_p2_3dr_loop) )
297 LLBL(x86_p2_3dr_done):
308 GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
309 HIDDEN(_mesa_x86_transform_points2_3d_no_rot)
310 GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
312 #define FRAME_OFFSET 12
317 MOV_L( ARG_SOURCE, ESI )
318 MOV_L( ARG_DEST, EDI )
320 MOV_L( ARG_MATRIX, EDX )
321 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
324 JZ( LLBL(x86_p2_3dnrr_done) )
326 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
327 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
329 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
330 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
332 SHL_L( CONST(4), ECX )
333 MOV_L( REGOFF(V4F_START, ESI), ESI )
335 MOV_L( REGOFF(V4F_START, EDI), EDI )
341 LLBL(x86_p2_3dnrr_loop):
343 FLD_S( SRC0 ) /* F4 */
346 FLD_S( SRC1 ) /* F1 F4 */
349 FXCH( ST(1) ) /* F4 F1 */
351 FLD_S( MAT13 ) /* F5 F4 F1 */
352 FXCH( ST(2) ) /* F1 F4 F5 */
353 FADDP( ST0, ST(2) ) /* F4 F5 */
355 FSTP_S( DST0 ) /* F5 */
359 LLBL(x86_p2_3dnrr_skip):
361 ADD_L( CONST(16), EDI )
364 JNE( LLBL(x86_p2_3dnrr_loop) )
366 LLBL(x86_p2_3dnrr_done):
378 GLOBL GLNAME( _mesa_x86_transform_points2_2d )
379 HIDDEN(_mesa_x86_transform_points2_2d)
380 GLNAME( _mesa_x86_transform_points2_2d ):
382 #define FRAME_OFFSET 8
386 MOV_L( ARG_SOURCE, ESI )
387 MOV_L( ARG_DEST, EDI )
389 MOV_L( ARG_MATRIX, EDX )
390 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
393 JZ( LLBL(x86_p2_2dr_done) )
395 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
396 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
398 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
399 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
401 SHL_L( CONST(4), ECX )
402 MOV_L( REGOFF(V4F_START, ESI), ESI )
404 MOV_L( REGOFF(V4F_START, EDI), EDI )
408 LLBL(x86_p2_2dr_loop):
410 FLD_S( SRC0 ) /* F4 */
412 FLD_S( SRC0 ) /* F5 F4 */
415 FLD_S( SRC1 ) /* F0 F5 F4 */
417 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
420 FXCH( ST(1) ) /* F0 F1 F5 F4 */
421 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
422 FADDP( ST0, ST(1) ) /* F5 F4 */
424 FXCH( ST(1) ) /* F4 F5 */
426 FXCH( ST(1) ) /* F5 F4 */
429 FXCH( ST(1) ) /* F4 F5 */
430 FSTP_S( DST0 ) /* F5 */
433 LLBL(x86_p2_2dr_skip):
435 ADD_L( CONST(16), EDI )
438 JNE( LLBL(x86_p2_2dr_loop) )
440 LLBL(x86_p2_2dr_done):
451 GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
452 HIDDEN(_mesa_x86_transform_points2_2d_no_rot)
453 GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
455 #define FRAME_OFFSET 8
459 MOV_L( ARG_SOURCE, ESI )
460 MOV_L( ARG_DEST, EDI )
462 MOV_L( ARG_MATRIX, EDX )
463 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
466 JZ( LLBL(x86_p2_2dnrr_done) )
468 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
469 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
471 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
472 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
474 SHL_L( CONST(4), ECX )
475 MOV_L( REGOFF(V4F_START, ESI), ESI )
477 MOV_L( REGOFF(V4F_START, EDI), EDI )
481 LLBL(x86_p2_2dnrr_loop):
483 FLD_S( SRC0 ) /* F4 */
486 FLD_S( SRC1 ) /* F1 F4 */
489 FXCH( ST(1) ) /* F4 F1 */
491 FLD_S( MAT13 ) /* F5 F4 F1 */
492 FXCH( ST(2) ) /* F1 F4 F5 */
493 FADDP( ST0, ST(2) ) /* F4 F5 */
495 FSTP_S( DST0 ) /* F5 */
498 LLBL(x86_p2_2dnrr_skip):
500 ADD_L( CONST(16), EDI )
503 JNE( LLBL(x86_p2_2dnrr_loop) )
505 LLBL(x86_p2_2dnrr_done):
516 GLOBL GLNAME( _mesa_x86_transform_points2_identity )
517 HIDDEN(_mesa_x86_transform_points2_identity)
518 GLNAME( _mesa_x86_transform_points2_identity ):
520 #define FRAME_OFFSET 12
525 MOV_L( ARG_SOURCE, ESI )
526 MOV_L( ARG_DEST, EDI )
528 MOV_L( ARG_MATRIX, EDX )
529 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
532 JZ( LLBL(x86_p2_ir_done) )
534 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
535 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
537 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
538 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
540 SHL_L( CONST(4), ECX )
541 MOV_L( REGOFF(V4F_START, ESI), ESI )
543 MOV_L( REGOFF(V4F_START, EDI), EDI )
547 JE( LLBL(x86_p2_ir_done) )
550 LLBL(x86_p2_ir_loop):
558 LLBL(x86_p2_ir_skip):
560 ADD_L( CONST(16), EDI )
563 JNE( LLBL(x86_p2_ir_loop) )
565 LLBL(x86_p2_ir_done):
573 #if defined (__ELF__) && defined (__linux__)
574 .section .note.GNU-stack,"",%progbits