syntax fixes for Solaris (David Dawes)
[mesa.git] / src / mesa / x86 / x86_xform3.S
1 /* $Id: x86_xform3.S,v 1.2 2002/03/07 21:40:08 brianp Exp $ */
2
3 /*
4 * Mesa 3-D graphics library
5 * Version: 3.5
6 *
7 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 /*
28 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
29 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
30 * in there will break the build on some platforms.
31 */
32
33 #include "matypes.h"
34 #include "xform_args.h"
35
36 SEG_TEXT
37
38 #define FP_ONE 1065353216
39 #define FP_ZERO 0
40
41 #define SRC0 REGOFF(0, ESI)
42 #define SRC1 REGOFF(4, ESI)
43 #define SRC2 REGOFF(8, ESI)
44 #define SRC3 REGOFF(12, ESI)
45 #define DST0 REGOFF(0, EDI)
46 #define DST1 REGOFF(4, EDI)
47 #define DST2 REGOFF(8, EDI)
48 #define DST3 REGOFF(12, EDI)
49 #define MAT0 REGOFF(0, EDX)
50 #define MAT1 REGOFF(4, EDX)
51 #define MAT2 REGOFF(8, EDX)
52 #define MAT3 REGOFF(12, EDX)
53 #define MAT4 REGOFF(16, EDX)
54 #define MAT5 REGOFF(20, EDX)
55 #define MAT6 REGOFF(24, EDX)
56 #define MAT7 REGOFF(28, EDX)
57 #define MAT8 REGOFF(32, EDX)
58 #define MAT9 REGOFF(36, EDX)
59 #define MAT10 REGOFF(40, EDX)
60 #define MAT11 REGOFF(44, EDX)
61 #define MAT12 REGOFF(48, EDX)
62 #define MAT13 REGOFF(52, EDX)
63 #define MAT14 REGOFF(56, EDX)
64 #define MAT15 REGOFF(60, EDX)
65
66
67 ALIGNTEXT16
68 GLOBL GLNAME( _mesa_x86_transform_points3_general )
69 GLNAME( _mesa_x86_transform_points3_general ):
70
71 #define FRAME_OFFSET 8
72 PUSH_L( ESI )
73 PUSH_L( EDI )
74
75 MOV_L( ARG_SOURCE, ESI )
76 MOV_L( ARG_DEST, EDI )
77
78 MOV_L( ARG_MATRIX, EDX )
79 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
80
81 TEST_L( ECX, ECX )
82 JZ( LLBL(x86_p3_gr_done) )
83
84 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
85 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
86
87 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
88 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
89
90 SHL_L( CONST(4), ECX )
91 MOV_L( REGOFF(V4F_START, ESI), ESI )
92
93 MOV_L( REGOFF(V4F_START, EDI), EDI )
94 ADD_L( EDI, ECX )
95
96 ALIGNTEXT16
97 LLBL(x86_p3_gr_loop):
98
99 FLD_S( SRC0 ) /* F4 */
100 FMUL_S( MAT0 )
101 FLD_S( SRC0 ) /* F5 F4 */
102 FMUL_S( MAT1 )
103 FLD_S( SRC0 ) /* F6 F5 F4 */
104 FMUL_S( MAT2 )
105 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
106 FMUL_S( MAT3 )
107
108 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
109 FMUL_S( MAT4 )
110 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
111 FMUL_S( MAT5 )
112 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
113 FMUL_S( MAT6 )
114 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
115 FMUL_S( MAT7 )
116
117 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
118 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
119 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
120 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
123
124 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
125 FMUL_S( MAT8 )
126 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
127 FMUL_S( MAT9 )
128 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
129 FMUL_S( MAT10 )
130 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
131 FMUL_S( MAT11 )
132
133 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
134 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
135 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
136 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
137 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
138 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
139
140 FXCH( ST(3) ) /* F4 F6 F5 F7 */
141 FADD_S( MAT12 )
142 FXCH( ST(2) ) /* F5 F6 F4 F7 */
143 FADD_S( MAT13 )
144 FXCH( ST(1) ) /* F6 F5 F4 F7 */
145 FADD_S( MAT14 )
146 FXCH( ST(3) ) /* F7 F5 F4 F6 */
147 FADD_S( MAT15 )
148
149 FXCH( ST(2) ) /* F4 F5 F7 F6 */
150 FSTP_S( DST0 ) /* F5 F7 F6 */
151 FSTP_S( DST1 ) /* F7 F6 */
152 FXCH( ST(1) ) /* F6 F7 */
153 FSTP_S( DST2 ) /* F7 */
154 FSTP_S( DST3 ) /* */
155
156 LLBL(x86_p3_gr_skip):
157
158 ADD_L( CONST(16), EDI )
159 ADD_L( EAX, ESI )
160 CMP_L( ECX, EDI )
161 JNE( LLBL(x86_p3_gr_loop) )
162
163 LLBL(x86_p3_gr_done):
164
165 POP_L( EDI )
166 POP_L( ESI )
167 RET
168 #undef FRAME_OFFSET
169
170
171
172
173 ALIGNTEXT16
174 GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
175 GLNAME( _mesa_x86_transform_points3_perspective ):
176
177 #define FRAME_OFFSET 12
178 PUSH_L( ESI )
179 PUSH_L( EDI )
180 PUSH_L( EBX )
181
182 MOV_L( ARG_SOURCE, ESI )
183 MOV_L( ARG_DEST, EDI )
184
185 MOV_L( ARG_MATRIX, EDX )
186 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
187
188 TEST_L( ECX, ECX )
189 JZ( LLBL(x86_p3_pr_done) )
190
191 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
192 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
193
194 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
195 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
196
197 SHL_L( CONST(4), ECX )
198 MOV_L( REGOFF(V4F_START, ESI), ESI )
199
200 MOV_L( REGOFF(V4F_START, EDI), EDI )
201 ADD_L( EDI, ECX )
202
203 ALIGNTEXT16
204 LLBL(x86_p3_pr_loop):
205
206 FLD_S( SRC0 ) /* F4 */
207 FMUL_S( MAT0 )
208
209 FLD_S( SRC1 ) /* F5 F4 */
210 FMUL_S( MAT5 )
211
212 FLD_S( SRC2 ) /* F0 F5 F4 */
213 FMUL_S( MAT8 )
214 FLD_S( SRC2 ) /* F1 F0 F5 F4 */
215 FMUL_S( MAT9 )
216 FLD_S( SRC2 ) /* F2 F1 F0 F5 F4 */
217 FMUL_S( MAT10 )
218
219 FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */
220 FADDP( ST0, ST(4) ) /* F1 F2 F5 F4 */
221 FADDP( ST0, ST(2) ) /* F2 F5 F4 */
222 FLD_S( MAT14 ) /* F6 F2 F5 F4 */
223 FXCH( ST(1) ) /* F2 F6 F5 F4 */
224 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
225
226 MOV_L( SRC2, EBX )
227 XOR_L( CONST(-2147483648), EBX )/* change sign */
228
229 FXCH( ST(2) ) /* F4 F5 F6 */
230 FSTP_S( DST0 ) /* F5 F6 */
231 FSTP_S( DST1 ) /* F6 */
232 FSTP_S( DST2 ) /* */
233 MOV_L( EBX, DST3 )
234
235 LLBL(x86_p3_pr_skip):
236
237 ADD_L( CONST(16), EDI )
238 ADD_L( EAX, ESI )
239 CMP_L( ECX, EDI )
240 JNE( LLBL(x86_p3_pr_loop) )
241
242 LLBL(x86_p3_pr_done):
243
244 POP_L( EBX )
245 POP_L( EDI )
246 POP_L( ESI )
247 RET
248 #undef FRAME_OFFSET
249
250
251
252
253 ALIGNTEXT16
254 GLOBL GLNAME( _mesa_x86_transform_points3_3d )
255 GLNAME( _mesa_x86_transform_points3_3d ):
256
257 #define FRAME_OFFSET 8
258 PUSH_L( ESI )
259 PUSH_L( EDI )
260
261 MOV_L( ARG_SOURCE, ESI )
262 MOV_L( ARG_DEST, EDI )
263
264 MOV_L( ARG_MATRIX, EDX )
265 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
266
267 TEST_L( ECX, ECX )
268 JZ( LLBL(x86_p3_3dr_done) )
269
270 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
271 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
272
273 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
274 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
275
276 SHL_L( CONST(4), ECX )
277 MOV_L( REGOFF(V4F_START, ESI), ESI )
278
279 MOV_L( REGOFF(V4F_START, EDI), EDI )
280 ADD_L( EDI, ECX )
281
282 ALIGNTEXT16
283 LLBL(x86_p3_3dr_loop):
284
285 FLD_S( SRC0 ) /* F4 */
286 FMUL_S( MAT0 )
287 FLD_S( SRC0 ) /* F5 F4 */
288 FMUL_S( MAT1 )
289 FLD_S( SRC0 ) /* F6 F5 F4 */
290 FMUL_S( MAT2 )
291
292 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
293 FMUL_S( MAT4 )
294 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
295 FMUL_S( MAT5 )
296 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
297 FMUL_S( MAT6 )
298
299 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
300 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
301 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
302 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
303
304 FLD_S( SRC2 ) /* F0 F6 F5 F4 */
305 FMUL_S( MAT8 )
306 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
307 FMUL_S( MAT9 )
308 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
309 FMUL_S( MAT10 )
310
311 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
312 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
313 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
314 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
315
316 FXCH( ST(2) ) /* F4 F5 F6 */
317 FADD_S( MAT12 )
318 FXCH( ST(1) ) /* F5 F4 F6 */
319 FADD_S( MAT13 )
320 FXCH( ST(2) ) /* F6 F4 F5 */
321 FADD_S( MAT14 )
322
323 FXCH( ST(1) ) /* F4 F6 F5 */
324 FSTP_S( DST0 ) /* F6 F5 */
325 FXCH( ST(1) ) /* F5 F6 */
326 FSTP_S( DST1 ) /* F6 */
327 FSTP_S( DST2 ) /* */
328
329 LLBL(x86_p3_3dr_skip):
330
331 ADD_L( CONST(16), EDI )
332 ADD_L( EAX, ESI )
333 CMP_L( ECX, EDI )
334 JNE( LLBL(x86_p3_3dr_loop) )
335
336 LLBL(x86_p3_3dr_done):
337
338 POP_L( EDI )
339 POP_L( ESI )
340 RET
341 #undef FRAME_OFFSET
342
343
344
345
346 ALIGNTEXT16
347 GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
348 GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
349
350 #define FRAME_OFFSET 8
351 PUSH_L( ESI )
352 PUSH_L( EDI )
353
354 MOV_L( ARG_SOURCE, ESI )
355 MOV_L( ARG_DEST, EDI )
356
357
358 MOV_L( ARG_MATRIX, EDX )
359 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
360
361 TEST_L( ECX, ECX )
362 JZ( LLBL(x86_p3_3dnrr_done) )
363
364 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
365 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
366
367 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
368 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
369
370 SHL_L( CONST(4), ECX )
371 MOV_L( REGOFF(V4F_START, ESI), ESI )
372
373 MOV_L( REGOFF(V4F_START, EDI), EDI )
374 ADD_L( EDI, ECX )
375
376 ALIGNTEXT16
377 LLBL(x86_p3_3dnrr_loop):
378
379 FLD_S( SRC0 ) /* F4 */
380 FMUL_S( MAT0 )
381
382 FLD_S( SRC1 ) /* F1 F4 */
383 FMUL_S( MAT5 )
384
385 FLD_S( SRC2 ) /* F2 F1 F4 */
386 FMUL_S( MAT10 )
387
388 FXCH( ST(2) ) /* F4 F1 F2 */
389 FADD_S( MAT12 )
390 FLD_S( MAT13 ) /* F5 F4 F1 F2 */
391 FXCH( ST(2) ) /* F1 F4 F5 F2 */
392 FADDP( ST0, ST(2) ) /* F4 F5 F2 */
393 FLD_S( MAT14 ) /* F6 F4 F5 F2 */
394 FXCH( ST(3) ) /* F2 F4 F5 F6 */
395 FADDP( ST0, ST(3) ) /* F4 F5 F6 */
396
397 FSTP_S( DST0 ) /* F5 F6 */
398 FSTP_S( DST1 ) /* F6 */
399 FSTP_S( DST2 ) /* */
400
401 LLBL(x86_p3_3dnrr_skip):
402
403 ADD_L( CONST(16), EDI )
404 ADD_L( EAX, ESI )
405 CMP_L( ECX, EDI )
406 JNE( LLBL(x86_p3_3dnrr_loop) )
407
408 LLBL(x86_p3_3dnrr_done):
409
410 POP_L( EDI )
411 POP_L( ESI )
412 RET
413 #undef FRAME_OFFSET
414
415
416
417
418 ALIGNTEXT16
419 GLOBL GLNAME( _mesa_x86_transform_points3_2d )
420 GLNAME( _mesa_x86_transform_points3_2d ):
421
422 #define FRAME_OFFSET 12
423 PUSH_L( ESI )
424 PUSH_L( EDI )
425 PUSH_L( EBX )
426
427 MOV_L( ARG_SOURCE, ESI )
428 MOV_L( ARG_DEST, EDI )
429
430 MOV_L( ARG_MATRIX, EDX )
431 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
432
433 TEST_L( ECX, ECX )
434 JZ( LLBL(x86_p3_2dr_done) )
435
436 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
437 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
438
439 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
440 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
441
442 SHL_L( CONST(4), ECX )
443 MOV_L( REGOFF(V4F_START, ESI), ESI )
444
445 MOV_L( REGOFF(V4F_START, EDI), EDI )
446 ADD_L( EDI, ECX )
447
448 ALIGNTEXT16
449 LLBL(x86_p3_2dr_loop):
450
451 FLD_S( SRC0 ) /* F4 */
452 FMUL_S( MAT0 )
453 FLD_S( SRC0 ) /* F5 F4 */
454 FMUL_S( MAT1 )
455
456 FLD_S( SRC1 ) /* F0 F5 F4 */
457 FMUL_S( MAT4 )
458 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
459 FMUL_S( MAT5 )
460
461 FXCH( ST(1) ) /* F0 F1 F5 F4 */
462 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
463 FADDP( ST0, ST(1) ) /* F5 F4 */
464
465 FXCH( ST(1) ) /* F4 F5 */
466 FADD_S( MAT12 )
467 FXCH( ST(1) ) /* F5 F4 */
468 FADD_S( MAT13 )
469
470 MOV_L( SRC2, EBX )
471
472 FXCH( ST(1) ) /* F4 F5 */
473 FSTP_S( DST0 ) /* F5 */
474 FSTP_S( DST1 ) /* */
475 MOV_L( EBX, DST2 )
476
477 LLBL(x86_p3_2dr_skip):
478
479 ADD_L( CONST(16), EDI )
480 ADD_L( EAX, ESI )
481 CMP_L( ECX, EDI )
482 JNE( LLBL(x86_p3_2dr_loop) )
483
484 LLBL(x86_p3_2dr_done):
485
486 POP_L( EBX )
487 POP_L( EDI )
488 POP_L( ESI )
489 RET
490 #undef FRAME_OFFSET
491
492
493
494
495 ALIGNTEXT16
496 GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
497 GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
498
499 #define FRAME_OFFSET 12
500 PUSH_L( ESI )
501 PUSH_L( EDI )
502 PUSH_L( EBX )
503
504 MOV_L( ARG_SOURCE, ESI )
505 MOV_L( ARG_DEST, EDI )
506
507 MOV_L( ARG_MATRIX, EDX )
508 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
509
510 TEST_L( ECX, ECX )
511 JZ( LLBL(x86_p3_2dnrr_done) )
512
513 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
514 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
515
516 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
517 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
518
519 SHL_L( CONST(4), ECX )
520 MOV_L( REGOFF(V4F_START, ESI), ESI )
521
522 MOV_L( REGOFF(V4F_START, EDI), EDI )
523 ADD_L( EDI, ECX )
524
525 ALIGNTEXT16
526 LLBL(x86_p3_2dnrr_loop):
527
528 FLD_S( SRC0 ) /* F4 */
529 FMUL_S( MAT0 )
530
531 FLD_S( SRC1 ) /* F1 F4 */
532 FMUL_S( MAT5 )
533
534 FXCH( ST(1) ) /* F4 F1 */
535 FADD_S( MAT12 )
536 FLD_S( MAT13 ) /* F5 F4 F1 */
537
538 FXCH( ST(2) ) /* F1 F4 F5 */
539 FADDP( ST0, ST(2) ) /* F4 F5 */
540
541 MOV_L( SRC2, EBX )
542
543 FSTP_S( DST0 ) /* F5 */
544 FSTP_S( DST1 ) /* */
545 MOV_L( EBX, DST2 )
546
547 LLBL(x86_p3_2dnrr_skip):
548
549 ADD_L( CONST(16), EDI )
550 ADD_L( EAX, ESI )
551 CMP_L( ECX, EDI )
552 JNE( LLBL(x86_p3_2dnrr_loop) )
553
554 LLBL(x86_p3_2dnrr_done):
555
556 POP_L( EBX )
557 POP_L( EDI )
558 POP_L( ESI )
559 RET
560 #undef FRAME_OFFSET
561
562
563
564
565 ALIGNTEXT16
566 GLOBL GLNAME( _mesa_x86_transform_points3_identity )
567 GLNAME(_mesa_x86_transform_points3_identity ):
568
569 #define FRAME_OFFSET 16
570 PUSH_L( ESI )
571 PUSH_L( EDI )
572 PUSH_L( EBX )
573 PUSH_L( EBP )
574
575 MOV_L( ARG_SOURCE, ESI )
576 MOV_L( ARG_DEST, EDI )
577
578 MOV_L( ARG_MATRIX, EDX )
579 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
580
581 TEST_L( ECX, ECX )
582 JZ( LLBL(x86_p3_ir_done) )
583
584 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
585 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
586
587 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
588 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
589
590 SHL_L( CONST(4), ECX )
591 MOV_L( REGOFF(V4F_START, ESI), ESI )
592
593 MOV_L( REGOFF(V4F_START, EDI), EDI )
594 ADD_L( EDI, ECX )
595
596 CMP_L( ESI, EDI )
597 JE( LLBL(x86_p3_ir_done) )
598
599 ALIGNTEXT16
600 LLBL(x86_p3_ir_loop):
601
602 #if 1
603 MOV_L( SRC0, EBX )
604 MOV_L( SRC1, EBP )
605 MOV_L( SRC2, EDX )
606
607 MOV_L( EBX, DST0 )
608 MOV_L( EBP, DST1 )
609 MOV_L( EDX, DST2 )
610 #else
611 FLD_S( SRC0 )
612 FLD_S( SRC1 )
613 FLD_S( SRC2 )
614
615 FSTP_S( DST2 )
616 FSTP_S( DST1 )
617 FSTP_S( DST0 )
618 #endif
619
620 LLBL(x86_p3_ir_skip):
621
622 ADD_L( CONST(16), EDI )
623 ADD_L( EAX, ESI )
624 CMP_L( ECX, EDI )
625 JNE( LLBL(x86_p3_ir_loop) )
626
627 LLBL(x86_p3_ir_done):
628
629 POP_L( EBP )
630 POP_L( EBX )
631 POP_L( EDI )
632 POP_L( ESI )
633 RET