Merge branch 'gallium-vertex-linear' into gallium-0.1
[mesa.git] / src / mesa / x86 / x86_xform2.S
1
2 /*
3 * Mesa 3-D graphics library
4 * Version: 3.5
5 *
6 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /*
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
29 * in there will break the build on some platforms.
30 */
31
32 #include "matypes.h"
33 #include "xform_args.h"
34
35 SEG_TEXT
36
37 #define FP_ONE 1065353216
38 #define FP_ZERO 0
39
40 #define SRC0 REGOFF(0, ESI)
41 #define SRC1 REGOFF(4, ESI)
42 #define SRC2 REGOFF(8, ESI)
43 #define SRC3 REGOFF(12, ESI)
44 #define DST0 REGOFF(0, EDI)
45 #define DST1 REGOFF(4, EDI)
46 #define DST2 REGOFF(8, EDI)
47 #define DST3 REGOFF(12, EDI)
48 #define MAT0 REGOFF(0, EDX)
49 #define MAT1 REGOFF(4, EDX)
50 #define MAT2 REGOFF(8, EDX)
51 #define MAT3 REGOFF(12, EDX)
52 #define MAT4 REGOFF(16, EDX)
53 #define MAT5 REGOFF(20, EDX)
54 #define MAT6 REGOFF(24, EDX)
55 #define MAT7 REGOFF(28, EDX)
56 #define MAT8 REGOFF(32, EDX)
57 #define MAT9 REGOFF(36, EDX)
58 #define MAT10 REGOFF(40, EDX)
59 #define MAT11 REGOFF(44, EDX)
60 #define MAT12 REGOFF(48, EDX)
61 #define MAT13 REGOFF(52, EDX)
62 #define MAT14 REGOFF(56, EDX)
63 #define MAT15 REGOFF(60, EDX)
64
65
66 ALIGNTEXT16
67 GLOBL GLNAME( _mesa_x86_transform_points2_general )
68 HIDDEN(_mesa_x86_transform_points2_general)
69 GLNAME( _mesa_x86_transform_points2_general ):
70
71 #define FRAME_OFFSET 8
72 PUSH_L( ESI )
73 PUSH_L( EDI )
74
75 MOV_L( ARG_SOURCE, ESI )
76 MOV_L( ARG_DEST, EDI )
77
78 MOV_L( ARG_MATRIX, EDX )
79 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
80
81 TEST_L( ECX, ECX )
82 JZ( LLBL(x86_p2_gr_done) )
83
84 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
85 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
86
87 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
88 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
89
90 SHL_L( CONST(4), ECX )
91 MOV_L( REGOFF(V4F_START, ESI), ESI )
92
93 MOV_L( REGOFF(V4F_START, EDI), EDI )
94 ADD_L( EDI, ECX )
95
96 ALIGNTEXT16
97 LLBL(x86_p2_gr_loop):
98
99 FLD_S( SRC0 ) /* F4 */
100 FMUL_S( MAT0 )
101 FLD_S( SRC0 ) /* F5 F4 */
102 FMUL_S( MAT1 )
103 FLD_S( SRC0 ) /* F6 F5 F4 */
104 FMUL_S( MAT2 )
105 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
106 FMUL_S( MAT3 )
107
108 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
109 FMUL_S( MAT4 )
110 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
111 FMUL_S( MAT5 )
112 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
113 FMUL_S( MAT6 )
114 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
115 FMUL_S( MAT7 )
116
117 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
118 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
119 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
120 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
123
124 FXCH( ST(3) ) /* F4 F6 F5 F7 */
125 FADD_S( MAT12 )
126 FXCH( ST(2) ) /* F5 F6 F4 F7 */
127 FADD_S( MAT13 )
128 FXCH( ST(1) ) /* F6 F5 F4 F7 */
129 FADD_S( MAT14 )
130 FXCH( ST(3) ) /* F7 F5 F4 F6 */
131 FADD_S( MAT15 )
132
133 FXCH( ST(2) ) /* F4 F5 F7 F6 */
134 FSTP_S( DST0 ) /* F5 F7 F6 */
135 FSTP_S( DST1 ) /* F7 F6 */
136 FXCH( ST(1) ) /* F6 F7 */
137 FSTP_S( DST2 ) /* F7 */
138 FSTP_S( DST3 ) /* */
139
140 LLBL(x86_p2_gr_skip):
141
142 ADD_L( CONST(16), EDI )
143 ADD_L( EAX, ESI )
144 CMP_L( ECX, EDI )
145 JNE( LLBL(x86_p2_gr_loop) )
146
147 LLBL(x86_p2_gr_done):
148
149 POP_L( EDI )
150 POP_L( ESI )
151 RET
152 #undef FRAME_OFFSET
153
154
155
156
157 ALIGNTEXT16
158 GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
159 HIDDEN(_mesa_x86_transform_points2_perspective)
160 GLNAME( _mesa_x86_transform_points2_perspective ):
161
162 #define FRAME_OFFSET 12
163 PUSH_L( ESI )
164 PUSH_L( EDI )
165 PUSH_L( EBX )
166
167 MOV_L( ARG_SOURCE, ESI )
168 MOV_L( ARG_DEST, EDI )
169
170 MOV_L( ARG_MATRIX, EDX )
171 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
172
173 TEST_L( ECX, ECX )
174 JZ( LLBL(x86_p2_pr_done) )
175
176 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
177 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
178
179 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
180 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
181
182 SHL_L( CONST(4), ECX )
183 MOV_L( REGOFF(V4F_START, ESI), ESI )
184
185 MOV_L( REGOFF(V4F_START, EDI), EDI )
186 ADD_L( EDI, ECX )
187
188 MOV_L( MAT14, EBX )
189
190 ALIGNTEXT16
191 LLBL(x86_p2_pr_loop):
192
193 FLD_S( SRC0 ) /* F4 */
194 FMUL_S( MAT0 )
195
196 FLD_S( SRC1 ) /* F1 F4 */
197 FMUL_S( MAT5 )
198
199 FXCH( ST(1) ) /* F4 F1 */
200 FSTP_S( DST0 ) /* F1 */
201 FSTP_S( DST1 ) /* */
202 MOV_L( EBX, DST2 )
203 MOV_L( CONST(FP_ZERO), DST3 )
204
205 LLBL(x86_p2_pr_skip):
206
207 ADD_L( CONST(16), EDI )
208 ADD_L( EAX, ESI )
209 CMP_L( ECX, EDI )
210 JNE( LLBL(x86_p2_pr_loop) )
211
212 LLBL(x86_p2_pr_done):
213
214 POP_L( EBX )
215 POP_L( EDI )
216 POP_L( ESI )
217 RET
218 #undef FRAME_OFFSET
219
220
221
222
223 ALIGNTEXT16
224 GLOBL GLNAME( _mesa_x86_transform_points2_3d )
225 HIDDEN(_mesa_x86_transform_points2_3d)
226 GLNAME( _mesa_x86_transform_points2_3d ):
227
228 #define FRAME_OFFSET 8
229 PUSH_L( ESI )
230 PUSH_L( EDI )
231
232 MOV_L( ARG_SOURCE, ESI )
233 MOV_L( ARG_DEST, EDI )
234
235 MOV_L( ARG_MATRIX, EDX )
236 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
237
238 TEST_L( ECX, ECX )
239 JZ( LLBL(x86_p2_3dr_done) )
240
241 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
242 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
243
244 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
245 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
246
247 SHL_L( CONST(4), ECX )
248 MOV_L( REGOFF(V4F_START, ESI), ESI )
249
250 MOV_L( REGOFF(V4F_START, EDI), EDI )
251 ADD_L( EDI, ECX )
252
253 ALIGNTEXT16
254 LLBL(x86_p2_3dr_loop):
255
256 FLD_S( SRC0 ) /* F4 */
257 FMUL_S( MAT0 )
258 FLD_S( SRC0 ) /* F5 F4 */
259 FMUL_S( MAT1 )
260 FLD_S( SRC0 ) /* F6 F5 F4 */
261 FMUL_S( MAT2 )
262
263 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
264 FMUL_S( MAT4 )
265 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
266 FMUL_S( MAT5 )
267 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
268 FMUL_S( MAT6 )
269
270 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
271 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
272 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
273 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
274
275 FXCH( ST(2) ) /* F4 F5 F6 */
276 FADD_S( MAT12 )
277 FXCH( ST(1) ) /* F5 F4 F6 */
278 FADD_S( MAT13 )
279 FXCH( ST(2) ) /* F6 F4 F5 */
280 FADD_S( MAT14 )
281
282 FXCH( ST(1) ) /* F4 F6 F5 */
283 FSTP_S( DST0 ) /* F6 F5 */
284 FXCH( ST(1) ) /* F5 F6 */
285 FSTP_S( DST1 ) /* F6 */
286 FSTP_S( DST2 ) /* */
287
288 LLBL(x86_p2_3dr_skip):
289
290 ADD_L( CONST(16), EDI )
291 ADD_L( EAX, ESI )
292 CMP_L( ECX, EDI )
293 JNE( LLBL(x86_p2_3dr_loop) )
294
295 LLBL(x86_p2_3dr_done):
296
297 POP_L( EDI )
298 POP_L( ESI )
299 RET
300 #undef FRAME_OFFSET
301
302
303
304
305 ALIGNTEXT16
306 GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
307 HIDDEN(_mesa_x86_transform_points2_3d_no_rot)
308 GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
309
310 #define FRAME_OFFSET 12
311 PUSH_L( ESI )
312 PUSH_L( EDI )
313 PUSH_L( EBX )
314
315 MOV_L( ARG_SOURCE, ESI )
316 MOV_L( ARG_DEST, EDI )
317
318 MOV_L( ARG_MATRIX, EDX )
319 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
320
321 TEST_L( ECX, ECX )
322 JZ( LLBL(x86_p2_3dnrr_done) )
323
324 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
325 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
326
327 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
328 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
329
330 SHL_L( CONST(4), ECX )
331 MOV_L( REGOFF(V4F_START, ESI), ESI )
332
333 MOV_L( REGOFF(V4F_START, EDI), EDI )
334 ADD_L( EDI, ECX )
335
336 MOV_L( MAT14, EBX )
337
338 ALIGNTEXT16
339 LLBL(x86_p2_3dnrr_loop):
340
341 FLD_S( SRC0 ) /* F4 */
342 FMUL_S( MAT0 )
343
344 FLD_S( SRC1 ) /* F1 F4 */
345 FMUL_S( MAT5 )
346
347 FXCH( ST(1) ) /* F4 F1 */
348 FADD_S( MAT12 )
349 FLD_S( MAT13 ) /* F5 F4 F1 */
350 FXCH( ST(2) ) /* F1 F4 F5 */
351 FADDP( ST0, ST(2) ) /* F4 F5 */
352
353 FSTP_S( DST0 ) /* F5 */
354 FSTP_S( DST1 ) /* */
355 MOV_L( EBX, DST2 )
356
357 LLBL(x86_p2_3dnrr_skip):
358
359 ADD_L( CONST(16), EDI )
360 ADD_L( EAX, ESI )
361 CMP_L( ECX, EDI )
362 JNE( LLBL(x86_p2_3dnrr_loop) )
363
364 LLBL(x86_p2_3dnrr_done):
365
366 POP_L( EBX )
367 POP_L( EDI )
368 POP_L( ESI )
369 RET
370 #undef FRAME_OFFSET
371
372
373
374
375 ALIGNTEXT16
376 GLOBL GLNAME( _mesa_x86_transform_points2_2d )
377 HIDDEN(_mesa_x86_transform_points2_2d)
378 GLNAME( _mesa_x86_transform_points2_2d ):
379
380 #define FRAME_OFFSET 8
381 PUSH_L( ESI )
382 PUSH_L( EDI )
383
384 MOV_L( ARG_SOURCE, ESI )
385 MOV_L( ARG_DEST, EDI )
386
387 MOV_L( ARG_MATRIX, EDX )
388 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
389
390 TEST_L( ECX, ECX )
391 JZ( LLBL(x86_p2_2dr_done) )
392
393 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
394 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
395
396 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
397 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
398
399 SHL_L( CONST(4), ECX )
400 MOV_L( REGOFF(V4F_START, ESI), ESI )
401
402 MOV_L( REGOFF(V4F_START, EDI), EDI )
403 ADD_L( EDI, ECX )
404
405 ALIGNTEXT16
406 LLBL(x86_p2_2dr_loop):
407
408 FLD_S( SRC0 ) /* F4 */
409 FMUL_S( MAT0 )
410 FLD_S( SRC0 ) /* F5 F4 */
411 FMUL_S( MAT1 )
412
413 FLD_S( SRC1 ) /* F0 F5 F4 */
414 FMUL_S( MAT4 )
415 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
416 FMUL_S( MAT5 )
417
418 FXCH( ST(1) ) /* F0 F1 F5 F4 */
419 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
420 FADDP( ST0, ST(1) ) /* F5 F4 */
421
422 FXCH( ST(1) ) /* F4 F5 */
423 FADD_S( MAT12 )
424 FXCH( ST(1) ) /* F5 F4 */
425 FADD_S( MAT13 )
426
427 FXCH( ST(1) ) /* F4 F5 */
428 FSTP_S( DST0 ) /* F5 */
429 FSTP_S( DST1 ) /* */
430
431 LLBL(x86_p2_2dr_skip):
432
433 ADD_L( CONST(16), EDI )
434 ADD_L( EAX, ESI )
435 CMP_L( ECX, EDI )
436 JNE( LLBL(x86_p2_2dr_loop) )
437
438 LLBL(x86_p2_2dr_done):
439
440 POP_L( EDI )
441 POP_L( ESI )
442 RET
443 #undef FRAME_OFFSET
444
445
446
447
448 ALIGNTEXT4
449 GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
450 HIDDEN(_mesa_x86_transform_points2_2d_no_rot)
451 GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
452
453 #define FRAME_OFFSET 8
454 PUSH_L( ESI )
455 PUSH_L( EDI )
456
457 MOV_L( ARG_SOURCE, ESI )
458 MOV_L( ARG_DEST, EDI )
459
460 MOV_L( ARG_MATRIX, EDX )
461 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
462
463 TEST_L( ECX, ECX )
464 JZ( LLBL(x86_p2_2dnrr_done) )
465
466 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
467 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
468
469 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
470 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
471
472 SHL_L( CONST(4), ECX )
473 MOV_L( REGOFF(V4F_START, ESI), ESI )
474
475 MOV_L( REGOFF(V4F_START, EDI), EDI )
476 ADD_L( EDI, ECX )
477
478 ALIGNTEXT16
479 LLBL(x86_p2_2dnrr_loop):
480
481 FLD_S( SRC0 ) /* F4 */
482 FMUL_S( MAT0 )
483
484 FLD_S( SRC1 ) /* F1 F4 */
485 FMUL_S( MAT5 )
486
487 FXCH( ST(1) ) /* F4 F1 */
488 FADD_S( MAT12 )
489 FLD_S( MAT13 ) /* F5 F4 F1 */
490 FXCH( ST(2) ) /* F1 F4 F5 */
491 FADDP( ST0, ST(2) ) /* F4 F5 */
492
493 FSTP_S( DST0 ) /* F5 */
494 FSTP_S( DST1 ) /* */
495
496 LLBL(x86_p2_2dnrr_skip):
497
498 ADD_L( CONST(16), EDI )
499 ADD_L( EAX, ESI )
500 CMP_L( ECX, EDI )
501 JNE( LLBL(x86_p2_2dnrr_loop) )
502
503 LLBL(x86_p2_2dnrr_done):
504
505 POP_L( EDI )
506 POP_L( ESI )
507 RET
508 #undef FRAME_OFFSET
509
510
511
512
513 ALIGNTEXT16
514 GLOBL GLNAME( _mesa_x86_transform_points2_identity )
515 HIDDEN(_mesa_x86_transform_points2_identity)
516 GLNAME( _mesa_x86_transform_points2_identity ):
517
518 #define FRAME_OFFSET 12
519 PUSH_L( ESI )
520 PUSH_L( EDI )
521 PUSH_L( EBX )
522
523 MOV_L( ARG_SOURCE, ESI )
524 MOV_L( ARG_DEST, EDI )
525
526 MOV_L( ARG_MATRIX, EDX )
527 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
528
529 TEST_L( ECX, ECX )
530 JZ( LLBL(x86_p2_ir_done) )
531
532 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
533 OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
534
535 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
536 MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
537
538 SHL_L( CONST(4), ECX )
539 MOV_L( REGOFF(V4F_START, ESI), ESI )
540
541 MOV_L( REGOFF(V4F_START, EDI), EDI )
542 ADD_L( EDI, ECX )
543
544 CMP_L( ESI, EDI )
545 JE( LLBL(x86_p2_ir_done) )
546
547 ALIGNTEXT16
548 LLBL(x86_p2_ir_loop):
549
550 MOV_L( SRC0, EBX )
551 MOV_L( SRC1, EDX )
552
553 MOV_L( EBX, DST0 )
554 MOV_L( EDX, DST1 )
555
556 LLBL(x86_p2_ir_skip):
557
558 ADD_L( CONST(16), EDI )
559 ADD_L( EAX, ESI )
560 CMP_L( ECX, EDI )
561 JNE( LLBL(x86_p2_ir_loop) )
562
563 LLBL(x86_p2_ir_done):
564
565 POP_L( EBX )
566 POP_L( EDI )
567 POP_L( ESI )
568 RET
569 #undef FRAME_OFFSET
570
571 #if defined (__ELF__) && defined (__linux__)
572 .section .note.GNU-stack,"",%progbits
573 #endif