Remove CVS keywords.
[mesa.git] / src / mesa / x86 / x86_xform4.S
1
2 /*
3 * Mesa 3-D graphics library
4 * Version: 3.5
5 *
6 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /*
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
29 * in there will break the build on some platforms.
30 */
31
32 #include "matypes.h"
33 #include "xform_args.h"
34
35 SEG_TEXT
36
37 #define FP_ONE 1065353216
38 #define FP_ZERO 0
39
40 #define SRC0 REGOFF(0, ESI)
41 #define SRC1 REGOFF(4, ESI)
42 #define SRC2 REGOFF(8, ESI)
43 #define SRC3 REGOFF(12, ESI)
44 #define DST0 REGOFF(0, EDI)
45 #define DST1 REGOFF(4, EDI)
46 #define DST2 REGOFF(8, EDI)
47 #define DST3 REGOFF(12, EDI)
48 #define MAT0 REGOFF(0, EDX)
49 #define MAT1 REGOFF(4, EDX)
50 #define MAT2 REGOFF(8, EDX)
51 #define MAT3 REGOFF(12, EDX)
52 #define MAT4 REGOFF(16, EDX)
53 #define MAT5 REGOFF(20, EDX)
54 #define MAT6 REGOFF(24, EDX)
55 #define MAT7 REGOFF(28, EDX)
56 #define MAT8 REGOFF(32, EDX)
57 #define MAT9 REGOFF(36, EDX)
58 #define MAT10 REGOFF(40, EDX)
59 #define MAT11 REGOFF(44, EDX)
60 #define MAT12 REGOFF(48, EDX)
61 #define MAT13 REGOFF(52, EDX)
62 #define MAT14 REGOFF(56, EDX)
63 #define MAT15 REGOFF(60, EDX)
64
65
66 ALIGNTEXT16
67 GLOBL GLNAME( _mesa_x86_transform_points4_general )
68 HIDDEN(_mesa_x86_transform_points4_general)
69 GLNAME( _mesa_x86_transform_points4_general ):
70
71 #define FRAME_OFFSET 8
72 PUSH_L( ESI )
73 PUSH_L( EDI )
74
75 MOV_L( ARG_SOURCE, ESI )
76 MOV_L( ARG_DEST, EDI )
77
78 MOV_L( ARG_MATRIX, EDX )
79 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
80
81 TEST_L( ECX, ECX )
82 JZ( LLBL(x86_p4_gr_done) )
83
84 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
85 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
86
87 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
88 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
89
90 SHL_L( CONST(4), ECX )
91 MOV_L( REGOFF(V4F_START, ESI), ESI )
92
93 MOV_L( REGOFF(V4F_START, EDI), EDI )
94 ADD_L( EDI, ECX )
95
96 ALIGNTEXT16
97 LLBL(x86_p4_gr_loop):
98
99 FLD_S( SRC0 ) /* F4 */
100 FMUL_S( MAT0 )
101 FLD_S( SRC0 ) /* F5 F4 */
102 FMUL_S( MAT1 )
103 FLD_S( SRC0 ) /* F6 F5 F4 */
104 FMUL_S( MAT2 )
105 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
106 FMUL_S( MAT3 )
107
108 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
109 FMUL_S( MAT4 )
110 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
111 FMUL_S( MAT5 )
112 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
113 FMUL_S( MAT6 )
114 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
115 FMUL_S( MAT7 )
116
117 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
118 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
119 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
120 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
121 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
123
124 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
125 FMUL_S( MAT8 )
126 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
127 FMUL_S( MAT9 )
128 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
129 FMUL_S( MAT10 )
130 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
131 FMUL_S( MAT11 )
132
133 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
134 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
135 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
136 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
137 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
138 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
139
140 FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */
141 FMUL_S( MAT12 )
142 FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */
143 FMUL_S( MAT13 )
144 FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */
145 FMUL_S( MAT14 )
146 FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
147 FMUL_S( MAT15 )
148
149 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
150 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
151 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
152 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
153 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
154 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
155
156 FXCH( ST(3) ) /* F4 F6 F5 F7 */
157 FSTP_S( DST0 ) /* F6 F5 F7 */
158 FXCH( ST(1) ) /* F5 F6 F7 */
159 FSTP_S( DST1 ) /* F6 F7 */
160 FSTP_S( DST2 ) /* F7 */
161 FSTP_S( DST3 ) /* */
162
163 LLBL(x86_p4_gr_skip):
164
165 ADD_L( CONST(16), EDI )
166 ADD_L( EAX, ESI )
167 CMP_L( ECX, EDI )
168 JNE( LLBL(x86_p4_gr_loop) )
169
170 LLBL(x86_p4_gr_done):
171
172 POP_L( EDI )
173 POP_L( ESI )
174 RET
175 #undef FRAME_OFFSET
176
177
178
179
180 ALIGNTEXT16
181 GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
182 HIDDEN(_mesa_x86_transform_points4_perspective)
183 GLNAME( _mesa_x86_transform_points4_perspective ):
184
185 #define FRAME_OFFSET 12
186 PUSH_L( ESI )
187 PUSH_L( EDI )
188 PUSH_L( EBX )
189
190 MOV_L( ARG_SOURCE, ESI )
191 MOV_L( ARG_DEST, EDI )
192
193 MOV_L( ARG_MATRIX, EDX )
194 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
195
196 TEST_L( ECX, ECX )
197 JZ( LLBL(x86_p4_pr_done) )
198
199 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
200 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
201
202 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
203 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
204
205 SHL_L( CONST(4), ECX )
206 MOV_L( REGOFF(V4F_START, ESI), ESI )
207
208 MOV_L( REGOFF(V4F_START, EDI), EDI )
209 ADD_L( EDI, ECX )
210
211 ALIGNTEXT16
212 LLBL(x86_p4_pr_loop):
213
214 FLD_S( SRC0 ) /* F4 */
215 FMUL_S( MAT0 )
216
217 FLD_S( SRC1 ) /* F5 F4 */
218 FMUL_S( MAT5 )
219
220 FLD_S( SRC2 ) /* F0 F5 F4 */
221 FMUL_S( MAT8 )
222 FLD_S( SRC2 ) /* F1 F0 F5 F4 */
223 FMUL_S( MAT9 )
224 FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */
225 FMUL_S( MAT10 )
226
227 FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */
228 FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */
229 FADDP( ST0, ST(2) ) /* F6 F5 F4 */
230
231 FLD_S( SRC3 ) /* F2 F6 F5 F4 */
232 FMUL_S( MAT14 )
233
234 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
235
236 MOV_L( SRC2, EBX )
237 XOR_L( CONST(-2147483648), EBX )/* change sign */
238
239 FXCH( ST(2) ) /* F4 F5 F6 */
240 FSTP_S( DST0 ) /* F5 F6 */
241 FSTP_S( DST1 ) /* F6 */
242 FSTP_S( DST2 ) /* */
243 MOV_L( EBX, DST3 )
244
245 LLBL(x86_p4_pr_skip):
246
247 ADD_L( CONST(16), EDI )
248 ADD_L( EAX, ESI )
249 CMP_L( ECX, EDI )
250 JNE( LLBL(x86_p4_pr_loop) )
251
252 LLBL(x86_p4_pr_done):
253
254 POP_L( EBX )
255 POP_L( EDI )
256 POP_L( ESI )
257 RET
258 #undef FRAME_OFFSET
259
260
261
262
263 ALIGNTEXT16
264 GLOBL GLNAME( _mesa_x86_transform_points4_3d )
265 HIDDEN(_mesa_x86_transform_points4_3d)
266 GLNAME( _mesa_x86_transform_points4_3d ):
267
268 #define FRAME_OFFSET 12
269 PUSH_L( ESI )
270 PUSH_L( EDI )
271 PUSH_L( EBX )
272
273 MOV_L( ARG_SOURCE, ESI )
274 MOV_L( ARG_DEST, EDI )
275
276 MOV_L( ARG_MATRIX, EDX )
277 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
278
279 TEST_L( ECX, ECX )
280 JZ( LLBL(x86_p4_3dr_done) )
281
282 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
283 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
284
285 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
286 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
287
288 SHL_L( CONST(4), ECX )
289 MOV_L( REGOFF(V4F_START, ESI), ESI )
290
291 MOV_L( REGOFF(V4F_START, EDI), EDI )
292 ADD_L( EDI, ECX )
293
294 ALIGNTEXT16
295 LLBL(x86_p4_3dr_loop):
296
297 FLD_S( SRC0 ) /* F4 */
298 FMUL_S( MAT0 )
299 FLD_S( SRC0 ) /* F5 F4 */
300 FMUL_S( MAT1 )
301 FLD_S( SRC0 ) /* F6 F5 F4 */
302 FMUL_S( MAT2 )
303
304 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
305 FMUL_S( MAT4 )
306 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
307 FMUL_S( MAT5 )
308 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
309 FMUL_S( MAT6 )
310
311 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
312 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
313 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
314 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
315
316 FLD_S( SRC2 ) /* F0 F6 F5 F4 */
317 FMUL_S( MAT8 )
318 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
319 FMUL_S( MAT9 )
320 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
321 FMUL_S( MAT10 )
322
323 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
324 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
325 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
326 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
327
328 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
329 FMUL_S( MAT12 )
330 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
331 FMUL_S( MAT13 )
332 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
333 FMUL_S( MAT14 )
334
335 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
336 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
337 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
338 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
339
340 MOV_L( SRC3, EBX )
341
342 FXCH( ST(2) ) /* F4 F5 F6 */
343 FSTP_S( DST0 ) /* F5 F6 */
344 FSTP_S( DST1 ) /* F6 */
345 FSTP_S( DST2 ) /* */
346 MOV_L( EBX, DST3 )
347
348 LLBL(x86_p4_3dr_skip):
349
350 ADD_L( CONST(16), EDI )
351 ADD_L( EAX, ESI )
352 CMP_L( ECX, EDI )
353 JNE( LLBL(x86_p4_3dr_loop) )
354
355 LLBL(x86_p4_3dr_done):
356
357 POP_L( EBX )
358 POP_L( EDI )
359 POP_L( ESI )
360 RET
361 #undef FRAME_OFFSET
362
363
364
365
366 ALIGNTEXT16
367 GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
368 HIDDEN(_mesa_x86_transform_points4_3d_no_rot)
369 GLNAME(_mesa_x86_transform_points4_3d_no_rot):
370
371 #define FRAME_OFFSET 12
372 PUSH_L( ESI )
373 PUSH_L( EDI )
374 PUSH_L( EBX )
375
376 MOV_L( ARG_SOURCE, ESI )
377 MOV_L( ARG_DEST, EDI )
378
379 MOV_L( ARG_MATRIX, EDX )
380 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
381
382 TEST_L( ECX, ECX )
383 JZ( LLBL(x86_p4_3dnrr_done) )
384
385 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
386 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
387
388 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
389 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
390
391 SHL_L( CONST(4), ECX )
392 MOV_L( REGOFF(V4F_START, ESI), ESI )
393
394 MOV_L( REGOFF(V4F_START, EDI), EDI )
395 ADD_L( EDI, ECX )
396
397 ALIGNTEXT16
398 LLBL(x86_p4_3dnrr_loop):
399
400 FLD_S( SRC0 ) /* F4 */
401 FMUL_S( MAT0 )
402
403 FLD_S( SRC1 ) /* F5 F4 */
404 FMUL_S( MAT5 )
405
406 FLD_S( SRC2 ) /* F6 F5 F4 */
407 FMUL_S( MAT10 )
408
409 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
410 FMUL_S( MAT12 )
411 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
412 FMUL_S( MAT13 )
413 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
414 FMUL_S( MAT14 )
415
416 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
417 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
418 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
419 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
420
421 MOV_L( SRC3, EBX )
422
423 FXCH( ST(2) ) /* F4 F5 F6 */
424 FSTP_S( DST0 ) /* F5 F6 */
425 FSTP_S( DST1 ) /* F6 */
426 FSTP_S( DST2 ) /* */
427 MOV_L( EBX, DST3 )
428
429 LLBL(x86_p4_3dnrr_skip):
430
431 ADD_L( CONST(16), EDI )
432 ADD_L( EAX, ESI )
433 CMP_L( ECX, EDI )
434 JNE( LLBL(x86_p4_3dnrr_loop) )
435
436 LLBL(x86_p4_3dnrr_done):
437
438 POP_L( EBX )
439 POP_L( EDI )
440 POP_L( ESI )
441 RET
442 #undef FRAME_OFFSET
443
444
445
446
447 ALIGNTEXT16
448 GLOBL GLNAME( _mesa_x86_transform_points4_2d )
449 HIDDEN(_mesa_x86_transform_points4_2d)
450 GLNAME( _mesa_x86_transform_points4_2d ):
451
452 #define FRAME_OFFSET 16
453 PUSH_L( ESI )
454 PUSH_L( EDI )
455 PUSH_L( EBX )
456 PUSH_L( EBP )
457
458 MOV_L( ARG_SOURCE, ESI )
459 MOV_L( ARG_DEST, EDI )
460
461 MOV_L( ARG_MATRIX, EDX )
462 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
463
464 TEST_L( ECX, ECX )
465 JZ( LLBL(x86_p4_2dr_done) )
466
467 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
468 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
469
470 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
471 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
472
473 SHL_L( CONST(4), ECX )
474 MOV_L( REGOFF(V4F_START, ESI), ESI )
475
476 MOV_L( REGOFF(V4F_START, EDI), EDI )
477 ADD_L( EDI, ECX )
478
479 ALIGNTEXT16
480 LLBL(x86_p4_2dr_loop):
481
482 FLD_S( SRC0 ) /* F4 */
483 FMUL_S( MAT0 )
484 FLD_S( SRC0 ) /* F5 F4 */
485 FMUL_S( MAT1 )
486
487 FLD_S( SRC1 ) /* F0 F5 F4 */
488 FMUL_S( MAT4 )
489 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
490 FMUL_S( MAT5 )
491
492 FXCH( ST(1) ) /* F0 F1 F5 F4 */
493 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
494 FADDP( ST0, ST(1) ) /* F5 F4 */
495
496 FLD_S( SRC3 ) /* F0 F5 F4 */
497 FMUL_S( MAT12 )
498 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
499 FMUL_S( MAT13 )
500
501 FXCH( ST(1) ) /* F0 F1 F5 F4 */
502 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
503 FADDP( ST0, ST(1) ) /* F5 F4 */
504
505 MOV_L( SRC2, EBX )
506 MOV_L( SRC3, EBP )
507
508 FXCH( ST(1) ) /* F4 F5 */
509 FSTP_S( DST0 ) /* F5 */
510 FSTP_S( DST1 ) /* */
511 MOV_L( EBX, DST2 )
512 MOV_L( EBP, DST3 )
513
514 LLBL(x86_p4_2dr_skip):
515
516 ADD_L( CONST(16), EDI )
517 ADD_L( EAX, ESI )
518 CMP_L( ECX, EDI )
519 JNE( LLBL(x86_p4_2dr_loop) )
520
521 LLBL(x86_p4_2dr_done):
522
523 POP_L( EBP )
524 POP_L( EBX )
525 POP_L( EDI )
526 POP_L( ESI )
527 RET
528 #undef FRAME_OFFSET
529
530
531
532
533 ALIGNTEXT16
534 GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
535 HIDDEN(_mesa_x86_transform_points4_2d_no_rot)
536 GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
537
538 #define FRAME_OFFSET 16
539 PUSH_L( ESI )
540 PUSH_L( EDI )
541 PUSH_L( EBX )
542 PUSH_L( EBP )
543
544 MOV_L( ARG_SOURCE, ESI )
545 MOV_L( ARG_DEST, EDI )
546
547 MOV_L( ARG_MATRIX, EDX )
548 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
549
550 TEST_L( ECX, ECX )
551 JZ( LLBL(x86_p4_2dnrr_done) )
552
553 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
554 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
555
556 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
557 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
558
559 SHL_L( CONST(4), ECX )
560 MOV_L( REGOFF(V4F_START, ESI), ESI )
561
562 MOV_L( REGOFF(V4F_START, EDI), EDI )
563 ADD_L( EDI, ECX )
564
565 ALIGNTEXT16
566 LLBL(x86_p4_2dnrr_loop):
567
568 FLD_S( SRC0 ) /* F4 */
569 FMUL_S( MAT0 )
570
571 FLD_S( SRC1 ) /* F5 F4 */
572 FMUL_S( MAT5 )
573
574 FLD_S( SRC3 ) /* F0 F5 F4 */
575 FMUL_S( MAT12 )
576 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
577 FMUL_S( MAT13 )
578
579 FXCH( ST(1) ) /* F0 F1 F5 F4 */
580 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
581 FADDP( ST0, ST(1) ) /* F5 F4 */
582
583 MOV_L( SRC2, EBX )
584 MOV_L( SRC3, EBP )
585
586 FXCH( ST(1) ) /* F4 F5 */
587 FSTP_S( DST0 ) /* F5 */
588 FSTP_S( DST1 ) /* */
589 MOV_L( EBX, DST2 )
590 MOV_L( EBP, DST3 )
591
592 LLBL(x86_p4_2dnrr_skip):
593
594 ADD_L( CONST(16), EDI )
595 ADD_L( EAX, ESI )
596 CMP_L( ECX, EDI )
597 JNE( LLBL(x86_p4_2dnrr_loop) )
598
599 LLBL(x86_p4_2dnrr_done):
600
601 POP_L( EBP )
602 POP_L( EBX )
603 POP_L( EDI )
604 POP_L( ESI )
605 RET
606 #undef FRAME_OFFSET
607
608
609
610
611 ALIGNTEXT16
612 GLOBL GLNAME( _mesa_x86_transform_points4_identity )
613 HIDDEN(_mesa_x86_transform_points4_identity)
614 GLNAME( _mesa_x86_transform_points4_identity ):
615
616 #define FRAME_OFFSET 12
617 PUSH_L( ESI )
618 PUSH_L( EDI )
619 PUSH_L( EBX )
620
621 MOV_L( ARG_SOURCE, ESI )
622 MOV_L( ARG_DEST, EDI )
623
624 MOV_L( ARG_MATRIX, EDX )
625 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
626
627 TEST_L( ECX, ECX )
628 JZ( LLBL(x86_p4_ir_done) )
629
630 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
631 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
632
633 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
634 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
635
636 SHL_L( CONST(4), ECX )
637 MOV_L( REGOFF(V4F_START, ESI), ESI )
638
639 MOV_L( REGOFF(V4F_START, EDI), EDI )
640 ADD_L( EDI, ECX )
641
642 CMP_L( ESI, EDI )
643 JE( LLBL(x86_p4_ir_done) )
644
645 ALIGNTEXT16
646 LLBL(x86_p4_ir_loop):
647
648 MOV_L( SRC0, EBX )
649 MOV_L( SRC1, EDX )
650
651 MOV_L( EBX, DST0 )
652 MOV_L( EDX, DST1 )
653
654 MOV_L( SRC2, EBX )
655 MOV_L( SRC3, EDX )
656
657 MOV_L( EBX, DST2 )
658 MOV_L( EDX, DST3 )
659
660 LLBL(x86_p4_ir_skip):
661
662 ADD_L( CONST(16), EDI )
663 ADD_L( EAX, ESI )
664 CMP_L( ECX, EDI )
665 JNE( LLBL(x86_p4_ir_loop) )
666
667 LLBL(x86_p4_ir_done):
668
669 POP_L( EBX )
670 POP_L( EDI )
671 POP_L( ESI )
672 RET
673
674 #if defined (__ELF__) && defined (__linux__)
675 .section .note.GNU-stack,"",%progbits
676 #endif