minimize the number of DIVs
[mesa.git] / src / mesa / x86 / x86_xform4.S
1 /* $Id: x86_xform4.S,v 1.2 2002/03/07 21:40:08 brianp Exp $ */
2
3 /*
4 * Mesa 3-D graphics library
5 * Version: 3.5
6 *
7 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 /*
28 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
29 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
30 * in there will break the build on some platforms.
31 */
32
33 #include "matypes.h"
34 #include "xform_args.h"
35
36 SEG_TEXT
37
38 #define FP_ONE 1065353216
39 #define FP_ZERO 0
40
41 #define SRC0 REGOFF(0, ESI)
42 #define SRC1 REGOFF(4, ESI)
43 #define SRC2 REGOFF(8, ESI)
44 #define SRC3 REGOFF(12, ESI)
45 #define DST0 REGOFF(0, EDI)
46 #define DST1 REGOFF(4, EDI)
47 #define DST2 REGOFF(8, EDI)
48 #define DST3 REGOFF(12, EDI)
49 #define MAT0 REGOFF(0, EDX)
50 #define MAT1 REGOFF(4, EDX)
51 #define MAT2 REGOFF(8, EDX)
52 #define MAT3 REGOFF(12, EDX)
53 #define MAT4 REGOFF(16, EDX)
54 #define MAT5 REGOFF(20, EDX)
55 #define MAT6 REGOFF(24, EDX)
56 #define MAT7 REGOFF(28, EDX)
57 #define MAT8 REGOFF(32, EDX)
58 #define MAT9 REGOFF(36, EDX)
59 #define MAT10 REGOFF(40, EDX)
60 #define MAT11 REGOFF(44, EDX)
61 #define MAT12 REGOFF(48, EDX)
62 #define MAT13 REGOFF(52, EDX)
63 #define MAT14 REGOFF(56, EDX)
64 #define MAT15 REGOFF(60, EDX)
65
66
67 ALIGNTEXT16
68 GLOBL GLNAME( _mesa_x86_transform_points4_general )
69 GLNAME( _mesa_x86_transform_points4_general ):
70
71 #define FRAME_OFFSET 8
72 PUSH_L( ESI )
73 PUSH_L( EDI )
74
75 MOV_L( ARG_SOURCE, ESI )
76 MOV_L( ARG_DEST, EDI )
77
78 MOV_L( ARG_MATRIX, EDX )
79 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
80
81 TEST_L( ECX, ECX )
82 JZ( LLBL(x86_p4_gr_done) )
83
84 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
85 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
86
87 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
88 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
89
90 SHL_L( CONST(4), ECX )
91 MOV_L( REGOFF(V4F_START, ESI), ESI )
92
93 MOV_L( REGOFF(V4F_START, EDI), EDI )
94 ADD_L( EDI, ECX )
95
96 ALIGNTEXT16
97 LLBL(x86_p4_gr_loop):
98
99 FLD_S( SRC0 ) /* F4 */
100 FMUL_S( MAT0 )
101 FLD_S( SRC0 ) /* F5 F4 */
102 FMUL_S( MAT1 )
103 FLD_S( SRC0 ) /* F6 F5 F4 */
104 FMUL_S( MAT2 )
105 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
106 FMUL_S( MAT3 )
107
108 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
109 FMUL_S( MAT4 )
110 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
111 FMUL_S( MAT5 )
112 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
113 FMUL_S( MAT6 )
114 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
115 FMUL_S( MAT7 )
116
117 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
118 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
119 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
120 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
123
124 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
125 FMUL_S( MAT8 )
126 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
127 FMUL_S( MAT9 )
128 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
129 FMUL_S( MAT10 )
130 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
131 FMUL_S( MAT11 )
132
133 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
134 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
135 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
136 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
137 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
138 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
139
140 FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */
141 FMUL_S( MAT12 )
142 FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */
143 FMUL_S( MAT13 )
144 FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */
145 FMUL_S( MAT14 )
146 FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
147 FMUL_S( MAT15 )
148
149 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
150 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
151 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
152 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
153 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
154 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
155
156 FXCH( ST(3) ) /* F4 F6 F5 F7 */
157 FSTP_S( DST0 ) /* F6 F5 F7 */
158 FXCH( ST(1) ) /* F5 F6 F7 */
159 FSTP_S( DST1 ) /* F6 F7 */
160 FSTP_S( DST2 ) /* F7 */
161 FSTP_S( DST3 ) /* */
162
163 LLBL(x86_p4_gr_skip):
164
165 ADD_L( CONST(16), EDI )
166 ADD_L( EAX, ESI )
167 CMP_L( ECX, EDI )
168 JNE( LLBL(x86_p4_gr_loop) )
169
170 LLBL(x86_p4_gr_done):
171
172 POP_L( EDI )
173 POP_L( ESI )
174 RET
175 #undef FRAME_OFFSET
176
177
178
179
180 ALIGNTEXT16
181 GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
182 GLNAME( _mesa_x86_transform_points4_perspective ):
183
184 #define FRAME_OFFSET 12
185 PUSH_L( ESI )
186 PUSH_L( EDI )
187 PUSH_L( EBX )
188
189 MOV_L( ARG_SOURCE, ESI )
190 MOV_L( ARG_DEST, EDI )
191
192 MOV_L( ARG_MATRIX, EDX )
193 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
194
195 TEST_L( ECX, ECX )
196 JZ( LLBL(x86_p4_pr_done) )
197
198 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
199 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
200
201 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
202 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
203
204 SHL_L( CONST(4), ECX )
205 MOV_L( REGOFF(V4F_START, ESI), ESI )
206
207 MOV_L( REGOFF(V4F_START, EDI), EDI )
208 ADD_L( EDI, ECX )
209
210 ALIGNTEXT16
211 LLBL(x86_p4_pr_loop):
212
213 FLD_S( SRC0 ) /* F4 */
214 FMUL_S( MAT0 )
215
216 FLD_S( SRC1 ) /* F5 F4 */
217 FMUL_S( MAT5 )
218
219 FLD_S( SRC2 ) /* F0 F5 F4 */
220 FMUL_S( MAT8 )
221 FLD_S( SRC2 ) /* F1 F0 F5 F4 */
222 FMUL_S( MAT9 )
223 FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */
224 FMUL_S( MAT10 )
225
226 FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */
227 FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */
228 FADDP( ST0, ST(2) ) /* F6 F5 F4 */
229
230 FLD_S( SRC3 ) /* F2 F6 F5 F4 */
231 FMUL_S( MAT14 )
232
233 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
234
235 MOV_L( SRC2, EBX )
236 XOR_L( CONST(-2147483648), EBX )/* change sign */
237
238 FXCH( ST(2) ) /* F4 F5 F6 */
239 FSTP_S( DST0 ) /* F5 F6 */
240 FSTP_S( DST1 ) /* F6 */
241 FSTP_S( DST2 ) /* */
242 MOV_L( EBX, DST3 )
243
244 LLBL(x86_p4_pr_skip):
245
246 ADD_L( CONST(16), EDI )
247 ADD_L( EAX, ESI )
248 CMP_L( ECX, EDI )
249 JNE( LLBL(x86_p4_pr_loop) )
250
251 LLBL(x86_p4_pr_done):
252
253 POP_L( EBX )
254 POP_L( EDI )
255 POP_L( ESI )
256 RET
257 #undef FRAME_OFFSET
258
259
260
261
262 ALIGNTEXT16
263 GLOBL GLNAME( _mesa_x86_transform_points4_3d )
264 GLNAME( _mesa_x86_transform_points4_3d ):
265
266 #define FRAME_OFFSET 12
267 PUSH_L( ESI )
268 PUSH_L( EDI )
269 PUSH_L( EBX )
270
271 MOV_L( ARG_SOURCE, ESI )
272 MOV_L( ARG_DEST, EDI )
273
274 MOV_L( ARG_MATRIX, EDX )
275 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
276
277 TEST_L( ECX, ECX )
278 JZ( LLBL(x86_p4_3dr_done) )
279
280 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
281 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
282
283 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
284 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
285
286 SHL_L( CONST(4), ECX )
287 MOV_L( REGOFF(V4F_START, ESI), ESI )
288
289 MOV_L( REGOFF(V4F_START, EDI), EDI )
290 ADD_L( EDI, ECX )
291
292 ALIGNTEXT16
293 LLBL(x86_p4_3dr_loop):
294
295 FLD_S( SRC0 ) /* F4 */
296 FMUL_S( MAT0 )
297 FLD_S( SRC0 ) /* F5 F4 */
298 FMUL_S( MAT1 )
299 FLD_S( SRC0 ) /* F6 F5 F4 */
300 FMUL_S( MAT2 )
301
302 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
303 FMUL_S( MAT4 )
304 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
305 FMUL_S( MAT5 )
306 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
307 FMUL_S( MAT6 )
308
309 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
310 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
311 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
312 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
313
314 FLD_S( SRC2 ) /* F0 F6 F5 F4 */
315 FMUL_S( MAT8 )
316 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
317 FMUL_S( MAT9 )
318 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
319 FMUL_S( MAT10 )
320
321 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
322 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
323 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
324 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
325
326 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
327 FMUL_S( MAT12 )
328 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
329 FMUL_S( MAT13 )
330 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
331 FMUL_S( MAT14 )
332
333 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
334 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
335 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
336 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
337
338 MOV_L( SRC3, EBX )
339
340 FXCH( ST(2) ) /* F4 F5 F6 */
341 FSTP_S( DST0 ) /* F5 F6 */
342 FSTP_S( DST1 ) /* F6 */
343 FSTP_S( DST2 ) /* */
344 MOV_L( EBX, DST3 )
345
346 LLBL(x86_p4_3dr_skip):
347
348 ADD_L( CONST(16), EDI )
349 ADD_L( EAX, ESI )
350 CMP_L( ECX, EDI )
351 JNE( LLBL(x86_p4_3dr_loop) )
352
353 LLBL(x86_p4_3dr_done):
354
355 POP_L( EBX )
356 POP_L( EDI )
357 POP_L( ESI )
358 RET
359 #undef FRAME_OFFSET
360
361
362
363
364 ALIGNTEXT16
365 GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
366 GLNAME(_mesa_x86_transform_points4_3d_no_rot):
367
368 #define FRAME_OFFSET 12
369 PUSH_L( ESI )
370 PUSH_L( EDI )
371 PUSH_L( EBX )
372
373 MOV_L( ARG_SOURCE, ESI )
374 MOV_L( ARG_DEST, EDI )
375
376 MOV_L( ARG_MATRIX, EDX )
377 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
378
379 TEST_L( ECX, ECX )
380 JZ( LLBL(x86_p4_3dnrr_done) )
381
382 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
383 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
384
385 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
386 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
387
388 SHL_L( CONST(4), ECX )
389 MOV_L( REGOFF(V4F_START, ESI), ESI )
390
391 MOV_L( REGOFF(V4F_START, EDI), EDI )
392 ADD_L( EDI, ECX )
393
394 ALIGNTEXT16
395 LLBL(x86_p4_3dnrr_loop):
396
397 FLD_S( SRC0 ) /* F4 */
398 FMUL_S( MAT0 )
399
400 FLD_S( SRC1 ) /* F5 F4 */
401 FMUL_S( MAT5 )
402
403 FLD_S( SRC2 ) /* F6 F5 F4 */
404 FMUL_S( MAT10 )
405
406 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
407 FMUL_S( MAT12 )
408 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
409 FMUL_S( MAT13 )
410 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
411 FMUL_S( MAT14 )
412
413 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
414 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
415 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
416 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
417
418 MOV_L( SRC3, EBX )
419
420 FXCH( ST(2) ) /* F4 F5 F6 */
421 FSTP_S( DST0 ) /* F5 F6 */
422 FSTP_S( DST1 ) /* F6 */
423 FSTP_S( DST2 ) /* */
424 MOV_L( EBX, DST3 )
425
426 LLBL(x86_p4_3dnrr_skip):
427
428 ADD_L( CONST(16), EDI )
429 ADD_L( EAX, ESI )
430 CMP_L( ECX, EDI )
431 JNE( LLBL(x86_p4_3dnrr_loop) )
432
433 LLBL(x86_p4_3dnrr_done):
434
435 POP_L( EBX )
436 POP_L( EDI )
437 POP_L( ESI )
438 RET
439 #undef FRAME_OFFSET
440
441
442
443
444 ALIGNTEXT16
445 GLOBL GLNAME( _mesa_x86_transform_points4_2d )
446 GLNAME( _mesa_x86_transform_points4_2d ):
447
448 #define FRAME_OFFSET 16
449 PUSH_L( ESI )
450 PUSH_L( EDI )
451 PUSH_L( EBX )
452 PUSH_L( EBP )
453
454 MOV_L( ARG_SOURCE, ESI )
455 MOV_L( ARG_DEST, EDI )
456
457 MOV_L( ARG_MATRIX, EDX )
458 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
459
460 TEST_L( ECX, ECX )
461 JZ( LLBL(x86_p4_2dr_done) )
462
463 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
464 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
465
466 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
467 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
468
469 SHL_L( CONST(4), ECX )
470 MOV_L( REGOFF(V4F_START, ESI), ESI )
471
472 MOV_L( REGOFF(V4F_START, EDI), EDI )
473 ADD_L( EDI, ECX )
474
475 ALIGNTEXT16
476 LLBL(x86_p4_2dr_loop):
477
478 FLD_S( SRC0 ) /* F4 */
479 FMUL_S( MAT0 )
480 FLD_S( SRC0 ) /* F5 F4 */
481 FMUL_S( MAT1 )
482
483 FLD_S( SRC1 ) /* F0 F5 F4 */
484 FMUL_S( MAT4 )
485 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
486 FMUL_S( MAT5 )
487
488 FXCH( ST(1) ) /* F0 F1 F5 F4 */
489 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
490 FADDP( ST0, ST(1) ) /* F5 F4 */
491
492 FLD_S( SRC3 ) /* F0 F5 F4 */
493 FMUL_S( MAT12 )
494 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
495 FMUL_S( MAT13 )
496
497 FXCH( ST(1) ) /* F0 F1 F5 F4 */
498 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
499 FADDP( ST0, ST(1) ) /* F5 F4 */
500
501 MOV_L( SRC2, EBX )
502 MOV_L( SRC3, EBP )
503
504 FXCH( ST(1) ) /* F4 F5 */
505 FSTP_S( DST0 ) /* F5 */
506 FSTP_S( DST1 ) /* */
507 MOV_L( EBX, DST2 )
508 MOV_L( EBP, DST3 )
509
510 LLBL(x86_p4_2dr_skip):
511
512 ADD_L( CONST(16), EDI )
513 ADD_L( EAX, ESI )
514 CMP_L( ECX, EDI )
515 JNE( LLBL(x86_p4_2dr_loop) )
516
517 LLBL(x86_p4_2dr_done):
518
519 POP_L( EBP )
520 POP_L( EBX )
521 POP_L( EDI )
522 POP_L( ESI )
523 RET
524 #undef FRAME_OFFSET
525
526
527
528
529 ALIGNTEXT16
530 GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
531 GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
532
533 #define FRAME_OFFSET 16
534 PUSH_L( ESI )
535 PUSH_L( EDI )
536 PUSH_L( EBX )
537 PUSH_L( EBP )
538
539 MOV_L( ARG_SOURCE, ESI )
540 MOV_L( ARG_DEST, EDI )
541
542 MOV_L( ARG_MATRIX, EDX )
543 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
544
545 TEST_L( ECX, ECX )
546 JZ( LLBL(x86_p4_2dnrr_done) )
547
548 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
549 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
550
551 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
552 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
553
554 SHL_L( CONST(4), ECX )
555 MOV_L( REGOFF(V4F_START, ESI), ESI )
556
557 MOV_L( REGOFF(V4F_START, EDI), EDI )
558 ADD_L( EDI, ECX )
559
560 ALIGNTEXT16
561 LLBL(x86_p4_2dnrr_loop):
562
563 FLD_S( SRC0 ) /* F4 */
564 FMUL_S( MAT0 )
565
566 FLD_S( SRC1 ) /* F5 F4 */
567 FMUL_S( MAT5 )
568
569 FLD_S( SRC3 ) /* F0 F5 F4 */
570 FMUL_S( MAT12 )
571 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
572 FMUL_S( MAT13 )
573
574 FXCH( ST(1) ) /* F0 F1 F5 F4 */
575 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
576 FADDP( ST0, ST(1) ) /* F5 F4 */
577
578 MOV_L( SRC2, EBX )
579 MOV_L( SRC3, EBP )
580
581 FXCH( ST(1) ) /* F4 F5 */
582 FSTP_S( DST0 ) /* F5 */
583 FSTP_S( DST1 ) /* */
584 MOV_L( EBX, DST2 )
585 MOV_L( EBP, DST3 )
586
587 LLBL(x86_p4_2dnrr_skip):
588
589 ADD_L( CONST(16), EDI )
590 ADD_L( EAX, ESI )
591 CMP_L( ECX, EDI )
592 JNE( LLBL(x86_p4_2dnrr_loop) )
593
594 LLBL(x86_p4_2dnrr_done):
595
596 POP_L( EBP )
597 POP_L( EBX )
598 POP_L( EDI )
599 POP_L( ESI )
600 RET
601 #undef FRAME_OFFSET
602
603
604
605
606 ALIGNTEXT16
607 GLOBL GLNAME( _mesa_x86_transform_points4_identity )
608 GLNAME( _mesa_x86_transform_points4_identity ):
609
610 #define FRAME_OFFSET 12
611 PUSH_L( ESI )
612 PUSH_L( EDI )
613 PUSH_L( EBX )
614
615 MOV_L( ARG_SOURCE, ESI )
616 MOV_L( ARG_DEST, EDI )
617
618 MOV_L( ARG_MATRIX, EDX )
619 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
620
621 TEST_L( ECX, ECX )
622 JZ( LLBL(x86_p4_ir_done) )
623
624 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
625 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
626
627 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
628 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
629
630 SHL_L( CONST(4), ECX )
631 MOV_L( REGOFF(V4F_START, ESI), ESI )
632
633 MOV_L( REGOFF(V4F_START, EDI), EDI )
634 ADD_L( EDI, ECX )
635
636 CMP_L( ESI, EDI )
637 JE( LLBL(x86_p4_ir_done) )
638
639 ALIGNTEXT16
640 LLBL(x86_p4_ir_loop):
641
642 MOV_L( SRC0, EBX )
643 MOV_L( SRC1, EDX )
644
645 MOV_L( EBX, DST0 )
646 MOV_L( EDX, DST1 )
647
648 MOV_L( SRC2, EBX )
649 MOV_L( SRC3, EDX )
650
651 MOV_L( EBX, DST2 )
652 MOV_L( EDX, DST3 )
653
654 LLBL(x86_p4_ir_skip):
655
656 ADD_L( CONST(16), EDI )
657 ADD_L( EAX, ESI )
658 CMP_L( ECX, EDI )
659 JNE( LLBL(x86_p4_ir_loop) )
660
661 LLBL(x86_p4_ir_done):
662
663 POP_L( EBX )
664 POP_L( EDI )
665 POP_L( ESI )
666 RET