Remove CVS keywords.
[mesa.git] / src / mesa / x86 / x86_xform3.S
1
2 /*
3 * Mesa 3-D graphics library
4 * Version: 3.5
5 *
6 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /*
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
29 * in there will break the build on some platforms.
30 */
31
32 #include "matypes.h"
33 #include "xform_args.h"
34
35 SEG_TEXT
36
37 #define FP_ONE 1065353216
38 #define FP_ZERO 0
39
40 #define SRC0 REGOFF(0, ESI)
41 #define SRC1 REGOFF(4, ESI)
42 #define SRC2 REGOFF(8, ESI)
43 #define SRC3 REGOFF(12, ESI)
44 #define DST0 REGOFF(0, EDI)
45 #define DST1 REGOFF(4, EDI)
46 #define DST2 REGOFF(8, EDI)
47 #define DST3 REGOFF(12, EDI)
48 #define MAT0 REGOFF(0, EDX)
49 #define MAT1 REGOFF(4, EDX)
50 #define MAT2 REGOFF(8, EDX)
51 #define MAT3 REGOFF(12, EDX)
52 #define MAT4 REGOFF(16, EDX)
53 #define MAT5 REGOFF(20, EDX)
54 #define MAT6 REGOFF(24, EDX)
55 #define MAT7 REGOFF(28, EDX)
56 #define MAT8 REGOFF(32, EDX)
57 #define MAT9 REGOFF(36, EDX)
58 #define MAT10 REGOFF(40, EDX)
59 #define MAT11 REGOFF(44, EDX)
60 #define MAT12 REGOFF(48, EDX)
61 #define MAT13 REGOFF(52, EDX)
62 #define MAT14 REGOFF(56, EDX)
63 #define MAT15 REGOFF(60, EDX)
64
65
66 ALIGNTEXT16
67 GLOBL GLNAME( _mesa_x86_transform_points3_general )
68 HIDDEN(_mesa_x86_transform_points3_general)
69 GLNAME( _mesa_x86_transform_points3_general ):
70
71 #define FRAME_OFFSET 8
72 PUSH_L( ESI )
73 PUSH_L( EDI )
74
75 MOV_L( ARG_SOURCE, ESI )
76 MOV_L( ARG_DEST, EDI )
77
78 MOV_L( ARG_MATRIX, EDX )
79 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
80
81 TEST_L( ECX, ECX )
82 JZ( LLBL(x86_p3_gr_done) )
83
84 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
85 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
86
87 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
88 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
89
90 SHL_L( CONST(4), ECX )
91 MOV_L( REGOFF(V4F_START, ESI), ESI )
92
93 MOV_L( REGOFF(V4F_START, EDI), EDI )
94 ADD_L( EDI, ECX )
95
96 ALIGNTEXT16
97 LLBL(x86_p3_gr_loop):
98
99 FLD_S( SRC0 ) /* F4 */
100 FMUL_S( MAT0 )
101 FLD_S( SRC0 ) /* F5 F4 */
102 FMUL_S( MAT1 )
103 FLD_S( SRC0 ) /* F6 F5 F4 */
104 FMUL_S( MAT2 )
105 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
106 FMUL_S( MAT3 )
107
108 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
109 FMUL_S( MAT4 )
110 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
111 FMUL_S( MAT5 )
112 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
113 FMUL_S( MAT6 )
114 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
115 FMUL_S( MAT7 )
116
117 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
118 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
119 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
120 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
123
124 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
125 FMUL_S( MAT8 )
126 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
127 FMUL_S( MAT9 )
128 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
129 FMUL_S( MAT10 )
130 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
131 FMUL_S( MAT11 )
132
133 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
134 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
135 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
136 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
137 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
138 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
139
140 FXCH( ST(3) ) /* F4 F6 F5 F7 */
141 FADD_S( MAT12 )
142 FXCH( ST(2) ) /* F5 F6 F4 F7 */
143 FADD_S( MAT13 )
144 FXCH( ST(1) ) /* F6 F5 F4 F7 */
145 FADD_S( MAT14 )
146 FXCH( ST(3) ) /* F7 F5 F4 F6 */
147 FADD_S( MAT15 )
148
149 FXCH( ST(2) ) /* F4 F5 F7 F6 */
150 FSTP_S( DST0 ) /* F5 F7 F6 */
151 FSTP_S( DST1 ) /* F7 F6 */
152 FXCH( ST(1) ) /* F6 F7 */
153 FSTP_S( DST2 ) /* F7 */
154 FSTP_S( DST3 ) /* */
155
156 LLBL(x86_p3_gr_skip):
157
158 ADD_L( CONST(16), EDI )
159 ADD_L( EAX, ESI )
160 CMP_L( ECX, EDI )
161 JNE( LLBL(x86_p3_gr_loop) )
162
163 LLBL(x86_p3_gr_done):
164
165 POP_L( EDI )
166 POP_L( ESI )
167 RET
168 #undef FRAME_OFFSET
169
170
171
172
173 ALIGNTEXT16
174 GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
175 HIDDEN(_mesa_x86_transform_points3_perspective)
176 GLNAME( _mesa_x86_transform_points3_perspective ):
177
178 #define FRAME_OFFSET 12
179 PUSH_L( ESI )
180 PUSH_L( EDI )
181 PUSH_L( EBX )
182
183 MOV_L( ARG_SOURCE, ESI )
184 MOV_L( ARG_DEST, EDI )
185
186 MOV_L( ARG_MATRIX, EDX )
187 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
188
189 TEST_L( ECX, ECX )
190 JZ( LLBL(x86_p3_pr_done) )
191
192 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
193 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
194
195 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
196 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
197
198 SHL_L( CONST(4), ECX )
199 MOV_L( REGOFF(V4F_START, ESI), ESI )
200
201 MOV_L( REGOFF(V4F_START, EDI), EDI )
202 ADD_L( EDI, ECX )
203
204 ALIGNTEXT16
205 LLBL(x86_p3_pr_loop):
206
207 FLD_S( SRC0 ) /* F4 */
208 FMUL_S( MAT0 )
209
210 FLD_S( SRC1 ) /* F5 F4 */
211 FMUL_S( MAT5 )
212
213 FLD_S( SRC2 ) /* F0 F5 F4 */
214 FMUL_S( MAT8 )
215 FLD_S( SRC2 ) /* F1 F0 F5 F4 */
216 FMUL_S( MAT9 )
217 FLD_S( SRC2 ) /* F2 F1 F0 F5 F4 */
218 FMUL_S( MAT10 )
219
220 FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */
221 FADDP( ST0, ST(4) ) /* F1 F2 F5 F4 */
222 FADDP( ST0, ST(2) ) /* F2 F5 F4 */
223 FLD_S( MAT14 ) /* F6 F2 F5 F4 */
224 FXCH( ST(1) ) /* F2 F6 F5 F4 */
225 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
226
227 MOV_L( SRC2, EBX )
228 XOR_L( CONST(-2147483648), EBX )/* change sign */
229
230 FXCH( ST(2) ) /* F4 F5 F6 */
231 FSTP_S( DST0 ) /* F5 F6 */
232 FSTP_S( DST1 ) /* F6 */
233 FSTP_S( DST2 ) /* */
234 MOV_L( EBX, DST3 )
235
236 LLBL(x86_p3_pr_skip):
237
238 ADD_L( CONST(16), EDI )
239 ADD_L( EAX, ESI )
240 CMP_L( ECX, EDI )
241 JNE( LLBL(x86_p3_pr_loop) )
242
243 LLBL(x86_p3_pr_done):
244
245 POP_L( EBX )
246 POP_L( EDI )
247 POP_L( ESI )
248 RET
249 #undef FRAME_OFFSET
250
251
252
253
254 ALIGNTEXT16
255 GLOBL GLNAME( _mesa_x86_transform_points3_3d )
256 HIDDEN(_mesa_x86_transform_points3_3d)
257 GLNAME( _mesa_x86_transform_points3_3d ):
258
259 #define FRAME_OFFSET 8
260 PUSH_L( ESI )
261 PUSH_L( EDI )
262
263 MOV_L( ARG_SOURCE, ESI )
264 MOV_L( ARG_DEST, EDI )
265
266 MOV_L( ARG_MATRIX, EDX )
267 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
268
269 TEST_L( ECX, ECX )
270 JZ( LLBL(x86_p3_3dr_done) )
271
272 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
273 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
274
275 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
276 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
277
278 SHL_L( CONST(4), ECX )
279 MOV_L( REGOFF(V4F_START, ESI), ESI )
280
281 MOV_L( REGOFF(V4F_START, EDI), EDI )
282 ADD_L( EDI, ECX )
283
284 ALIGNTEXT16
285 LLBL(x86_p3_3dr_loop):
286
287 FLD_S( SRC0 ) /* F4 */
288 FMUL_S( MAT0 )
289 FLD_S( SRC0 ) /* F5 F4 */
290 FMUL_S( MAT1 )
291 FLD_S( SRC0 ) /* F6 F5 F4 */
292 FMUL_S( MAT2 )
293
294 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
295 FMUL_S( MAT4 )
296 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
297 FMUL_S( MAT5 )
298 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
299 FMUL_S( MAT6 )
300
301 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
302 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
303 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
304 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
305
306 FLD_S( SRC2 ) /* F0 F6 F5 F4 */
307 FMUL_S( MAT8 )
308 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
309 FMUL_S( MAT9 )
310 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
311 FMUL_S( MAT10 )
312
313 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
314 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
315 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
316 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
317
318 FXCH( ST(2) ) /* F4 F5 F6 */
319 FADD_S( MAT12 )
320 FXCH( ST(1) ) /* F5 F4 F6 */
321 FADD_S( MAT13 )
322 FXCH( ST(2) ) /* F6 F4 F5 */
323 FADD_S( MAT14 )
324
325 FXCH( ST(1) ) /* F4 F6 F5 */
326 FSTP_S( DST0 ) /* F6 F5 */
327 FXCH( ST(1) ) /* F5 F6 */
328 FSTP_S( DST1 ) /* F6 */
329 FSTP_S( DST2 ) /* */
330
331 LLBL(x86_p3_3dr_skip):
332
333 ADD_L( CONST(16), EDI )
334 ADD_L( EAX, ESI )
335 CMP_L( ECX, EDI )
336 JNE( LLBL(x86_p3_3dr_loop) )
337
338 LLBL(x86_p3_3dr_done):
339
340 POP_L( EDI )
341 POP_L( ESI )
342 RET
343 #undef FRAME_OFFSET
344
345
346
347
348 ALIGNTEXT16
349 GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
350 HIDDEN(_mesa_x86_transform_points3_3d_no_rot)
351 GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
352
353 #define FRAME_OFFSET 8
354 PUSH_L( ESI )
355 PUSH_L( EDI )
356
357 MOV_L( ARG_SOURCE, ESI )
358 MOV_L( ARG_DEST, EDI )
359
360
361 MOV_L( ARG_MATRIX, EDX )
362 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
363
364 TEST_L( ECX, ECX )
365 JZ( LLBL(x86_p3_3dnrr_done) )
366
367 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
368 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
369
370 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
371 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
372
373 SHL_L( CONST(4), ECX )
374 MOV_L( REGOFF(V4F_START, ESI), ESI )
375
376 MOV_L( REGOFF(V4F_START, EDI), EDI )
377 ADD_L( EDI, ECX )
378
379 ALIGNTEXT16
380 LLBL(x86_p3_3dnrr_loop):
381
382 FLD_S( SRC0 ) /* F4 */
383 FMUL_S( MAT0 )
384
385 FLD_S( SRC1 ) /* F1 F4 */
386 FMUL_S( MAT5 )
387
388 FLD_S( SRC2 ) /* F2 F1 F4 */
389 FMUL_S( MAT10 )
390
391 FXCH( ST(2) ) /* F4 F1 F2 */
392 FADD_S( MAT12 )
393 FLD_S( MAT13 ) /* F5 F4 F1 F2 */
394 FXCH( ST(2) ) /* F1 F4 F5 F2 */
395 FADDP( ST0, ST(2) ) /* F4 F5 F2 */
396 FLD_S( MAT14 ) /* F6 F4 F5 F2 */
397 FXCH( ST(3) ) /* F2 F4 F5 F6 */
398 FADDP( ST0, ST(3) ) /* F4 F5 F6 */
399
400 FSTP_S( DST0 ) /* F5 F6 */
401 FSTP_S( DST1 ) /* F6 */
402 FSTP_S( DST2 ) /* */
403
404 LLBL(x86_p3_3dnrr_skip):
405
406 ADD_L( CONST(16), EDI )
407 ADD_L( EAX, ESI )
408 CMP_L( ECX, EDI )
409 JNE( LLBL(x86_p3_3dnrr_loop) )
410
411 LLBL(x86_p3_3dnrr_done):
412
413 POP_L( EDI )
414 POP_L( ESI )
415 RET
416 #undef FRAME_OFFSET
417
418
419
420
421 ALIGNTEXT16
422 GLOBL GLNAME( _mesa_x86_transform_points3_2d )
423 HIDDEN(_mesa_x86_transform_points3_2d)
424 GLNAME( _mesa_x86_transform_points3_2d ):
425
426 #define FRAME_OFFSET 12
427 PUSH_L( ESI )
428 PUSH_L( EDI )
429 PUSH_L( EBX )
430
431 MOV_L( ARG_SOURCE, ESI )
432 MOV_L( ARG_DEST, EDI )
433
434 MOV_L( ARG_MATRIX, EDX )
435 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
436
437 TEST_L( ECX, ECX )
438 JZ( LLBL(x86_p3_2dr_done) )
439
440 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
441 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
442
443 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
444 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
445
446 SHL_L( CONST(4), ECX )
447 MOV_L( REGOFF(V4F_START, ESI), ESI )
448
449 MOV_L( REGOFF(V4F_START, EDI), EDI )
450 ADD_L( EDI, ECX )
451
452 ALIGNTEXT16
453 LLBL(x86_p3_2dr_loop):
454
455 FLD_S( SRC0 ) /* F4 */
456 FMUL_S( MAT0 )
457 FLD_S( SRC0 ) /* F5 F4 */
458 FMUL_S( MAT1 )
459
460 FLD_S( SRC1 ) /* F0 F5 F4 */
461 FMUL_S( MAT4 )
462 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
463 FMUL_S( MAT5 )
464
465 FXCH( ST(1) ) /* F0 F1 F5 F4 */
466 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
467 FADDP( ST0, ST(1) ) /* F5 F4 */
468
469 FXCH( ST(1) ) /* F4 F5 */
470 FADD_S( MAT12 )
471 FXCH( ST(1) ) /* F5 F4 */
472 FADD_S( MAT13 )
473
474 MOV_L( SRC2, EBX )
475
476 FXCH( ST(1) ) /* F4 F5 */
477 FSTP_S( DST0 ) /* F5 */
478 FSTP_S( DST1 ) /* */
479 MOV_L( EBX, DST2 )
480
481 LLBL(x86_p3_2dr_skip):
482
483 ADD_L( CONST(16), EDI )
484 ADD_L( EAX, ESI )
485 CMP_L( ECX, EDI )
486 JNE( LLBL(x86_p3_2dr_loop) )
487
488 LLBL(x86_p3_2dr_done):
489
490 POP_L( EBX )
491 POP_L( EDI )
492 POP_L( ESI )
493 RET
494 #undef FRAME_OFFSET
495
496
497
498
499 ALIGNTEXT16
500 GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
501 HIDDEN(_mesa_x86_transform_points3_2d_no_rot)
502 GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
503
504 #define FRAME_OFFSET 12
505 PUSH_L( ESI )
506 PUSH_L( EDI )
507 PUSH_L( EBX )
508
509 MOV_L( ARG_SOURCE, ESI )
510 MOV_L( ARG_DEST, EDI )
511
512 MOV_L( ARG_MATRIX, EDX )
513 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
514
515 TEST_L( ECX, ECX )
516 JZ( LLBL(x86_p3_2dnrr_done) )
517
518 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
519 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
520
521 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
522 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
523
524 SHL_L( CONST(4), ECX )
525 MOV_L( REGOFF(V4F_START, ESI), ESI )
526
527 MOV_L( REGOFF(V4F_START, EDI), EDI )
528 ADD_L( EDI, ECX )
529
530 ALIGNTEXT16
531 LLBL(x86_p3_2dnrr_loop):
532
533 FLD_S( SRC0 ) /* F4 */
534 FMUL_S( MAT0 )
535
536 FLD_S( SRC1 ) /* F1 F4 */
537 FMUL_S( MAT5 )
538
539 FXCH( ST(1) ) /* F4 F1 */
540 FADD_S( MAT12 )
541 FLD_S( MAT13 ) /* F5 F4 F1 */
542
543 FXCH( ST(2) ) /* F1 F4 F5 */
544 FADDP( ST0, ST(2) ) /* F4 F5 */
545
546 MOV_L( SRC2, EBX )
547
548 FSTP_S( DST0 ) /* F5 */
549 FSTP_S( DST1 ) /* */
550 MOV_L( EBX, DST2 )
551
552 LLBL(x86_p3_2dnrr_skip):
553
554 ADD_L( CONST(16), EDI )
555 ADD_L( EAX, ESI )
556 CMP_L( ECX, EDI )
557 JNE( LLBL(x86_p3_2dnrr_loop) )
558
559 LLBL(x86_p3_2dnrr_done):
560
561 POP_L( EBX )
562 POP_L( EDI )
563 POP_L( ESI )
564 RET
565 #undef FRAME_OFFSET
566
567
568
569
570 ALIGNTEXT16
571 GLOBL GLNAME( _mesa_x86_transform_points3_identity )
572 HIDDEN(_mesa_x86_transform_points3_identity)
573 GLNAME(_mesa_x86_transform_points3_identity ):
574
575 #define FRAME_OFFSET 16
576 PUSH_L( ESI )
577 PUSH_L( EDI )
578 PUSH_L( EBX )
579 PUSH_L( EBP )
580
581 MOV_L( ARG_SOURCE, ESI )
582 MOV_L( ARG_DEST, EDI )
583
584 MOV_L( ARG_MATRIX, EDX )
585 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
586
587 TEST_L( ECX, ECX )
588 JZ( LLBL(x86_p3_ir_done) )
589
590 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
591 OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
592
593 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
594 MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
595
596 SHL_L( CONST(4), ECX )
597 MOV_L( REGOFF(V4F_START, ESI), ESI )
598
599 MOV_L( REGOFF(V4F_START, EDI), EDI )
600 ADD_L( EDI, ECX )
601
602 CMP_L( ESI, EDI )
603 JE( LLBL(x86_p3_ir_done) )
604
605 ALIGNTEXT16
606 LLBL(x86_p3_ir_loop):
607
608 #if 1
609 MOV_L( SRC0, EBX )
610 MOV_L( SRC1, EBP )
611 MOV_L( SRC2, EDX )
612
613 MOV_L( EBX, DST0 )
614 MOV_L( EBP, DST1 )
615 MOV_L( EDX, DST2 )
616 #else
617 FLD_S( SRC0 )
618 FLD_S( SRC1 )
619 FLD_S( SRC2 )
620
621 FSTP_S( DST2 )
622 FSTP_S( DST1 )
623 FSTP_S( DST0 )
624 #endif
625
626 LLBL(x86_p3_ir_skip):
627
628 ADD_L( CONST(16), EDI )
629 ADD_L( EAX, ESI )
630 CMP_L( ECX, EDI )
631 JNE( LLBL(x86_p3_ir_loop) )
632
633 LLBL(x86_p3_ir_done):
634
635 POP_L( EBP )
636 POP_L( EBX )
637 POP_L( EDI )
638 POP_L( ESI )
639 RET
640
641 #if defined (__ELF__) && defined (__linux__)
642 .section .note.GNU-stack,"",%progbits
643 #endif