mesa: fix formatting of messages printed using _mesa_log
[mesa.git] / src / mesa / x86 / x86_xform4.S
1
2 /*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /*
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
29 * in there will break the build on some platforms.
30 */
31
32 #include "assyntax.h"
33 #define MATH_ASM_PTR_SIZE 4
34 #include "math/m_vector_asm.h"
35 #include "xform_args.h"
36
37 SEG_TEXT
38
39 #define FP_ONE 1065353216
40 #define FP_ZERO 0
41
42 #define SRC0 REGOFF(0, ESI)
43 #define SRC1 REGOFF(4, ESI)
44 #define SRC2 REGOFF(8, ESI)
45 #define SRC3 REGOFF(12, ESI)
46 #define DST0 REGOFF(0, EDI)
47 #define DST1 REGOFF(4, EDI)
48 #define DST2 REGOFF(8, EDI)
49 #define DST3 REGOFF(12, EDI)
50 #define MAT0 REGOFF(0, EDX)
51 #define MAT1 REGOFF(4, EDX)
52 #define MAT2 REGOFF(8, EDX)
53 #define MAT3 REGOFF(12, EDX)
54 #define MAT4 REGOFF(16, EDX)
55 #define MAT5 REGOFF(20, EDX)
56 #define MAT6 REGOFF(24, EDX)
57 #define MAT7 REGOFF(28, EDX)
58 #define MAT8 REGOFF(32, EDX)
59 #define MAT9 REGOFF(36, EDX)
60 #define MAT10 REGOFF(40, EDX)
61 #define MAT11 REGOFF(44, EDX)
62 #define MAT12 REGOFF(48, EDX)
63 #define MAT13 REGOFF(52, EDX)
64 #define MAT14 REGOFF(56, EDX)
65 #define MAT15 REGOFF(60, EDX)
66
67
68 ALIGNTEXT16
69 GLOBL GLNAME( _mesa_x86_transform_points4_general )
70 HIDDEN(_mesa_x86_transform_points4_general)
71 GLNAME( _mesa_x86_transform_points4_general ):
72
73 #define FRAME_OFFSET 8
74 PUSH_L( ESI )
75 PUSH_L( EDI )
76
77 MOV_L( ARG_SOURCE, ESI )
78 MOV_L( ARG_DEST, EDI )
79
80 MOV_L( ARG_MATRIX, EDX )
81 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
82
83 TEST_L( ECX, ECX )
84 JZ( LLBL(x86_p4_gr_done) )
85
86 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
87 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
88
89 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
90 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
91
92 SHL_L( CONST(4), ECX )
93 MOV_L( REGOFF(V4F_START, ESI), ESI )
94
95 MOV_L( REGOFF(V4F_START, EDI), EDI )
96 ADD_L( EDI, ECX )
97
98 ALIGNTEXT16
99 LLBL(x86_p4_gr_loop):
100
101 FLD_S( SRC0 ) /* F4 */
102 FMUL_S( MAT0 )
103 FLD_S( SRC0 ) /* F5 F4 */
104 FMUL_S( MAT1 )
105 FLD_S( SRC0 ) /* F6 F5 F4 */
106 FMUL_S( MAT2 )
107 FLD_S( SRC0 ) /* F7 F6 F5 F4 */
108 FMUL_S( MAT3 )
109
110 FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
111 FMUL_S( MAT4 )
112 FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
113 FMUL_S( MAT5 )
114 FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
115 FMUL_S( MAT6 )
116 FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
117 FMUL_S( MAT7 )
118
119 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
120 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
121 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
122 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
123 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
124 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
125
126 FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
127 FMUL_S( MAT8 )
128 FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
129 FMUL_S( MAT9 )
130 FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
131 FMUL_S( MAT10 )
132 FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
133 FMUL_S( MAT11 )
134
135 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
136 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
137 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
138 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
139 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
140 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
141
142 FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */
143 FMUL_S( MAT12 )
144 FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */
145 FMUL_S( MAT13 )
146 FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */
147 FMUL_S( MAT14 )
148 FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
149 FMUL_S( MAT15 )
150
151 FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
152 FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
153 FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
154 FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
155 FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
156 FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
157
158 FXCH( ST(3) ) /* F4 F6 F5 F7 */
159 FSTP_S( DST0 ) /* F6 F5 F7 */
160 FXCH( ST(1) ) /* F5 F6 F7 */
161 FSTP_S( DST1 ) /* F6 F7 */
162 FSTP_S( DST2 ) /* F7 */
163 FSTP_S( DST3 ) /* */
164
165 LLBL(x86_p4_gr_skip):
166
167 ADD_L( CONST(16), EDI )
168 ADD_L( EAX, ESI )
169 CMP_L( ECX, EDI )
170 JNE( LLBL(x86_p4_gr_loop) )
171
172 LLBL(x86_p4_gr_done):
173
174 POP_L( EDI )
175 POP_L( ESI )
176 RET
177 #undef FRAME_OFFSET
178
179
180
181
182 ALIGNTEXT16
183 GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
184 HIDDEN(_mesa_x86_transform_points4_perspective)
185 GLNAME( _mesa_x86_transform_points4_perspective ):
186
187 #define FRAME_OFFSET 12
188 PUSH_L( ESI )
189 PUSH_L( EDI )
190 PUSH_L( EBX )
191
192 MOV_L( ARG_SOURCE, ESI )
193 MOV_L( ARG_DEST, EDI )
194
195 MOV_L( ARG_MATRIX, EDX )
196 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
197
198 TEST_L( ECX, ECX )
199 JZ( LLBL(x86_p4_pr_done) )
200
201 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
202 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
203
204 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
205 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
206
207 SHL_L( CONST(4), ECX )
208 MOV_L( REGOFF(V4F_START, ESI), ESI )
209
210 MOV_L( REGOFF(V4F_START, EDI), EDI )
211 ADD_L( EDI, ECX )
212
213 ALIGNTEXT16
214 LLBL(x86_p4_pr_loop):
215
216 FLD_S( SRC0 ) /* F4 */
217 FMUL_S( MAT0 )
218
219 FLD_S( SRC1 ) /* F5 F4 */
220 FMUL_S( MAT5 )
221
222 FLD_S( SRC2 ) /* F0 F5 F4 */
223 FMUL_S( MAT8 )
224 FLD_S( SRC2 ) /* F1 F0 F5 F4 */
225 FMUL_S( MAT9 )
226 FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */
227 FMUL_S( MAT10 )
228
229 FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */
230 FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */
231 FADDP( ST0, ST(2) ) /* F6 F5 F4 */
232
233 FLD_S( SRC3 ) /* F2 F6 F5 F4 */
234 FMUL_S( MAT14 )
235
236 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
237
238 MOV_L( SRC2, EBX )
239 XOR_L( CONST(-2147483648), EBX )/* change sign */
240
241 FXCH( ST(2) ) /* F4 F5 F6 */
242 FSTP_S( DST0 ) /* F5 F6 */
243 FSTP_S( DST1 ) /* F6 */
244 FSTP_S( DST2 ) /* */
245 MOV_L( EBX, DST3 )
246
247 LLBL(x86_p4_pr_skip):
248
249 ADD_L( CONST(16), EDI )
250 ADD_L( EAX, ESI )
251 CMP_L( ECX, EDI )
252 JNE( LLBL(x86_p4_pr_loop) )
253
254 LLBL(x86_p4_pr_done):
255
256 POP_L( EBX )
257 POP_L( EDI )
258 POP_L( ESI )
259 RET
260 #undef FRAME_OFFSET
261
262
263
264
265 ALIGNTEXT16
266 GLOBL GLNAME( _mesa_x86_transform_points4_3d )
267 HIDDEN(_mesa_x86_transform_points4_3d)
268 GLNAME( _mesa_x86_transform_points4_3d ):
269
270 #define FRAME_OFFSET 12
271 PUSH_L( ESI )
272 PUSH_L( EDI )
273 PUSH_L( EBX )
274
275 MOV_L( ARG_SOURCE, ESI )
276 MOV_L( ARG_DEST, EDI )
277
278 MOV_L( ARG_MATRIX, EDX )
279 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
280
281 TEST_L( ECX, ECX )
282 JZ( LLBL(x86_p4_3dr_done) )
283
284 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
285 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
286
287 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
288 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
289
290 SHL_L( CONST(4), ECX )
291 MOV_L( REGOFF(V4F_START, ESI), ESI )
292
293 MOV_L( REGOFF(V4F_START, EDI), EDI )
294 ADD_L( EDI, ECX )
295
296 ALIGNTEXT16
297 LLBL(x86_p4_3dr_loop):
298
299 FLD_S( SRC0 ) /* F4 */
300 FMUL_S( MAT0 )
301 FLD_S( SRC0 ) /* F5 F4 */
302 FMUL_S( MAT1 )
303 FLD_S( SRC0 ) /* F6 F5 F4 */
304 FMUL_S( MAT2 )
305
306 FLD_S( SRC1 ) /* F0 F6 F5 F4 */
307 FMUL_S( MAT4 )
308 FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
309 FMUL_S( MAT5 )
310 FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
311 FMUL_S( MAT6 )
312
313 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
314 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
315 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
316 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
317
318 FLD_S( SRC2 ) /* F0 F6 F5 F4 */
319 FMUL_S( MAT8 )
320 FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
321 FMUL_S( MAT9 )
322 FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
323 FMUL_S( MAT10 )
324
325 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
326 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
327 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
328 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
329
330 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
331 FMUL_S( MAT12 )
332 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
333 FMUL_S( MAT13 )
334 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
335 FMUL_S( MAT14 )
336
337 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
338 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
339 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
340 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
341
342 MOV_L( SRC3, EBX )
343
344 FXCH( ST(2) ) /* F4 F5 F6 */
345 FSTP_S( DST0 ) /* F5 F6 */
346 FSTP_S( DST1 ) /* F6 */
347 FSTP_S( DST2 ) /* */
348 MOV_L( EBX, DST3 )
349
350 LLBL(x86_p4_3dr_skip):
351
352 ADD_L( CONST(16), EDI )
353 ADD_L( EAX, ESI )
354 CMP_L( ECX, EDI )
355 JNE( LLBL(x86_p4_3dr_loop) )
356
357 LLBL(x86_p4_3dr_done):
358
359 POP_L( EBX )
360 POP_L( EDI )
361 POP_L( ESI )
362 RET
363 #undef FRAME_OFFSET
364
365
366
367
368 ALIGNTEXT16
369 GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
370 HIDDEN(_mesa_x86_transform_points4_3d_no_rot)
371 GLNAME(_mesa_x86_transform_points4_3d_no_rot):
372
373 #define FRAME_OFFSET 12
374 PUSH_L( ESI )
375 PUSH_L( EDI )
376 PUSH_L( EBX )
377
378 MOV_L( ARG_SOURCE, ESI )
379 MOV_L( ARG_DEST, EDI )
380
381 MOV_L( ARG_MATRIX, EDX )
382 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
383
384 TEST_L( ECX, ECX )
385 JZ( LLBL(x86_p4_3dnrr_done) )
386
387 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
388 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
389
390 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
391 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
392
393 SHL_L( CONST(4), ECX )
394 MOV_L( REGOFF(V4F_START, ESI), ESI )
395
396 MOV_L( REGOFF(V4F_START, EDI), EDI )
397 ADD_L( EDI, ECX )
398
399 ALIGNTEXT16
400 LLBL(x86_p4_3dnrr_loop):
401
402 FLD_S( SRC0 ) /* F4 */
403 FMUL_S( MAT0 )
404
405 FLD_S( SRC1 ) /* F5 F4 */
406 FMUL_S( MAT5 )
407
408 FLD_S( SRC2 ) /* F6 F5 F4 */
409 FMUL_S( MAT10 )
410
411 FLD_S( SRC3 ) /* F0 F6 F5 F4 */
412 FMUL_S( MAT12 )
413 FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
414 FMUL_S( MAT13 )
415 FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
416 FMUL_S( MAT14 )
417
418 FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
419 FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
420 FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
421 FADDP( ST0, ST(1) ) /* F6 F5 F4 */
422
423 MOV_L( SRC3, EBX )
424
425 FXCH( ST(2) ) /* F4 F5 F6 */
426 FSTP_S( DST0 ) /* F5 F6 */
427 FSTP_S( DST1 ) /* F6 */
428 FSTP_S( DST2 ) /* */
429 MOV_L( EBX, DST3 )
430
431 LLBL(x86_p4_3dnrr_skip):
432
433 ADD_L( CONST(16), EDI )
434 ADD_L( EAX, ESI )
435 CMP_L( ECX, EDI )
436 JNE( LLBL(x86_p4_3dnrr_loop) )
437
438 LLBL(x86_p4_3dnrr_done):
439
440 POP_L( EBX )
441 POP_L( EDI )
442 POP_L( ESI )
443 RET
444 #undef FRAME_OFFSET
445
446
447
448
449 ALIGNTEXT16
450 GLOBL GLNAME( _mesa_x86_transform_points4_2d )
451 HIDDEN(_mesa_x86_transform_points4_2d)
452 GLNAME( _mesa_x86_transform_points4_2d ):
453
454 #define FRAME_OFFSET 16
455 PUSH_L( ESI )
456 PUSH_L( EDI )
457 PUSH_L( EBX )
458 PUSH_L( EBP )
459
460 MOV_L( ARG_SOURCE, ESI )
461 MOV_L( ARG_DEST, EDI )
462
463 MOV_L( ARG_MATRIX, EDX )
464 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
465
466 TEST_L( ECX, ECX )
467 JZ( LLBL(x86_p4_2dr_done) )
468
469 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
470 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
471
472 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
473 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
474
475 SHL_L( CONST(4), ECX )
476 MOV_L( REGOFF(V4F_START, ESI), ESI )
477
478 MOV_L( REGOFF(V4F_START, EDI), EDI )
479 ADD_L( EDI, ECX )
480
481 ALIGNTEXT16
482 LLBL(x86_p4_2dr_loop):
483
484 FLD_S( SRC0 ) /* F4 */
485 FMUL_S( MAT0 )
486 FLD_S( SRC0 ) /* F5 F4 */
487 FMUL_S( MAT1 )
488
489 FLD_S( SRC1 ) /* F0 F5 F4 */
490 FMUL_S( MAT4 )
491 FLD_S( SRC1 ) /* F1 F0 F5 F4 */
492 FMUL_S( MAT5 )
493
494 FXCH( ST(1) ) /* F0 F1 F5 F4 */
495 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
496 FADDP( ST0, ST(1) ) /* F5 F4 */
497
498 FLD_S( SRC3 ) /* F0 F5 F4 */
499 FMUL_S( MAT12 )
500 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
501 FMUL_S( MAT13 )
502
503 FXCH( ST(1) ) /* F0 F1 F5 F4 */
504 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
505 FADDP( ST0, ST(1) ) /* F5 F4 */
506
507 MOV_L( SRC2, EBX )
508 MOV_L( SRC3, EBP )
509
510 FXCH( ST(1) ) /* F4 F5 */
511 FSTP_S( DST0 ) /* F5 */
512 FSTP_S( DST1 ) /* */
513 MOV_L( EBX, DST2 )
514 MOV_L( EBP, DST3 )
515
516 LLBL(x86_p4_2dr_skip):
517
518 ADD_L( CONST(16), EDI )
519 ADD_L( EAX, ESI )
520 CMP_L( ECX, EDI )
521 JNE( LLBL(x86_p4_2dr_loop) )
522
523 LLBL(x86_p4_2dr_done):
524
525 POP_L( EBP )
526 POP_L( EBX )
527 POP_L( EDI )
528 POP_L( ESI )
529 RET
530 #undef FRAME_OFFSET
531
532
533
534
535 ALIGNTEXT16
536 GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
537 HIDDEN(_mesa_x86_transform_points4_2d_no_rot)
538 GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
539
540 #define FRAME_OFFSET 16
541 PUSH_L( ESI )
542 PUSH_L( EDI )
543 PUSH_L( EBX )
544 PUSH_L( EBP )
545
546 MOV_L( ARG_SOURCE, ESI )
547 MOV_L( ARG_DEST, EDI )
548
549 MOV_L( ARG_MATRIX, EDX )
550 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
551
552 TEST_L( ECX, ECX )
553 JZ( LLBL(x86_p4_2dnrr_done) )
554
555 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
556 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
557
558 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
559 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
560
561 SHL_L( CONST(4), ECX )
562 MOV_L( REGOFF(V4F_START, ESI), ESI )
563
564 MOV_L( REGOFF(V4F_START, EDI), EDI )
565 ADD_L( EDI, ECX )
566
567 ALIGNTEXT16
568 LLBL(x86_p4_2dnrr_loop):
569
570 FLD_S( SRC0 ) /* F4 */
571 FMUL_S( MAT0 )
572
573 FLD_S( SRC1 ) /* F5 F4 */
574 FMUL_S( MAT5 )
575
576 FLD_S( SRC3 ) /* F0 F5 F4 */
577 FMUL_S( MAT12 )
578 FLD_S( SRC3 ) /* F1 F0 F5 F4 */
579 FMUL_S( MAT13 )
580
581 FXCH( ST(1) ) /* F0 F1 F5 F4 */
582 FADDP( ST0, ST(3) ) /* F1 F5 F4 */
583 FADDP( ST0, ST(1) ) /* F5 F4 */
584
585 MOV_L( SRC2, EBX )
586 MOV_L( SRC3, EBP )
587
588 FXCH( ST(1) ) /* F4 F5 */
589 FSTP_S( DST0 ) /* F5 */
590 FSTP_S( DST1 ) /* */
591 MOV_L( EBX, DST2 )
592 MOV_L( EBP, DST3 )
593
594 LLBL(x86_p4_2dnrr_skip):
595
596 ADD_L( CONST(16), EDI )
597 ADD_L( EAX, ESI )
598 CMP_L( ECX, EDI )
599 JNE( LLBL(x86_p4_2dnrr_loop) )
600
601 LLBL(x86_p4_2dnrr_done):
602
603 POP_L( EBP )
604 POP_L( EBX )
605 POP_L( EDI )
606 POP_L( ESI )
607 RET
608 #undef FRAME_OFFSET
609
610
611
612
613 ALIGNTEXT16
614 GLOBL GLNAME( _mesa_x86_transform_points4_identity )
615 HIDDEN(_mesa_x86_transform_points4_identity)
616 GLNAME( _mesa_x86_transform_points4_identity ):
617
618 #define FRAME_OFFSET 12
619 PUSH_L( ESI )
620 PUSH_L( EDI )
621 PUSH_L( EBX )
622
623 MOV_L( ARG_SOURCE, ESI )
624 MOV_L( ARG_DEST, EDI )
625
626 MOV_L( ARG_MATRIX, EDX )
627 MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
628
629 TEST_L( ECX, ECX )
630 JZ( LLBL(x86_p4_ir_done) )
631
632 MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
633 OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
634
635 MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
636 MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
637
638 SHL_L( CONST(4), ECX )
639 MOV_L( REGOFF(V4F_START, ESI), ESI )
640
641 MOV_L( REGOFF(V4F_START, EDI), EDI )
642 ADD_L( EDI, ECX )
643
644 CMP_L( ESI, EDI )
645 JE( LLBL(x86_p4_ir_done) )
646
647 ALIGNTEXT16
648 LLBL(x86_p4_ir_loop):
649
650 MOV_L( SRC0, EBX )
651 MOV_L( SRC1, EDX )
652
653 MOV_L( EBX, DST0 )
654 MOV_L( EDX, DST1 )
655
656 MOV_L( SRC2, EBX )
657 MOV_L( SRC3, EDX )
658
659 MOV_L( EBX, DST2 )
660 MOV_L( EDX, DST3 )
661
662 LLBL(x86_p4_ir_skip):
663
664 ADD_L( CONST(16), EDI )
665 ADD_L( EAX, ESI )
666 CMP_L( ECX, EDI )
667 JNE( LLBL(x86_p4_ir_loop) )
668
669 LLBL(x86_p4_ir_done):
670
671 POP_L( EBX )
672 POP_L( EDI )
673 POP_L( ESI )
674 RET
675
676 #if defined (__ELF__) && defined (__linux__)
677 .section .note.GNU-stack,"",%progbits
678 #endif