iris: propagate error from gen_perf_begin_query to glBeginPerfQueryINTEL
[mesa.git] / src / mesa / x86 / 3dnow_xform3.S
1
2 /*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 #ifdef USE_3DNOW_ASM
27 #include "assyntax.h"
28 #define MATH_ASM_PTR_SIZE 4
29 #include "math/m_vector_asm.h"
30 #include "xform_args.h"
31
32 SEG_TEXT
33
34 #define FRAME_OFFSET 4
35
36
37 ALIGNTEXT16
38 GLOBL GLNAME( _mesa_3dnow_transform_points3_general )
39 HIDDEN(_mesa_3dnow_transform_points3_general)
40 GLNAME( _mesa_3dnow_transform_points3_general ):
41 _CET_ENDBR
42 PUSH_L ( ESI )
43
44 MOV_L ( ARG_DEST, ECX )
45 MOV_L ( ARG_MATRIX, ESI )
46 MOV_L ( ARG_SOURCE, EAX )
47 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
48 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
49 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
50 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
51
52 PUSH_L ( EDI )
53
54 MOV_L ( REGOFF(V4F_START, ECX), EDX )
55 MOV_L ( ESI, ECX )
56 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
57 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
58 MOV_L ( REGOFF(V4F_START, EAX), EAX )
59
60 TEST_L ( ESI, ESI )
61 JZ ( LLBL( G3TPGR_2 ) )
62
63 PREFETCHW ( REGIND(EDX) )
64
65 ALIGNTEXT16
66 LLBL( G3TPGR_1 ):
67
68 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
69
70 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
71 MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */
72
73 ADD_L ( EDI, EAX ) /* next vertex */
74 PREFETCH ( REGIND(EAX) )
75
76 MOVQ ( MM0, MM1 ) /* x1 | x0 */
77 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
78
79 PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
80 MOVQ ( MM2, MM5 ) /* x2 | x2 */
81
82 PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
83 PFMUL ( REGOFF(32, ECX), MM2 ) /* x2*m9 | x2*m8 */
84
85 MOVQ ( MM0, MM3 ) /* x0 | x0 */
86 PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
87
88 MOVQ ( MM1, MM4 ) /* x1 | x1 */
89 PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
90
91 PFADD ( REGOFF(48, ECX), MM2 ) /* x2*m9+m13 | x2*m8+m12 */
92 PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
93
94 PFADD ( REGOFF(56, ECX), MM5 ) /* x2*m11+m15 | x2*m10+m14 */
95 PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
96
97 PFMUL ( REGOFF(8, ECX), MM3 ) /* x0*m3 | x0*m2 */
98 PFADD ( MM1, MM2 ) /* r1 | r0 */
99
100 PFMUL ( REGOFF(24, ECX), MM4 ) /* x1*m7 | x1*m6 */
101 ADD_L ( CONST(16), EDX ) /* next output vertex */
102
103 PFADD ( MM3, MM4 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */
104 MOVQ ( MM2, REGOFF(-16, EDX) ) /* write r0, r1 */
105
106 PFADD ( MM4, MM5 ) /* r3 | r2 */
107 MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
108
109 DEC_L ( ESI ) /* decrement vertex counter */
110 JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
111
112 LLBL( G3TPGR_2 ):
113
114 FEMMS
115 POP_L ( EDI )
116 POP_L ( ESI )
117 RET
118
119
120
121
122 ALIGNTEXT16
123 GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
124 HIDDEN(_mesa_3dnow_transform_points3_perspective)
125 GLNAME( _mesa_3dnow_transform_points3_perspective ):
126 _CET_ENDBR
127 PUSH_L ( ESI )
128
129 MOV_L ( ARG_DEST, ECX )
130 MOV_L ( ARG_MATRIX, ESI )
131 MOV_L ( ARG_SOURCE, EAX )
132 MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
133 OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
134 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
135 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
136
137 PUSH_L ( EDI )
138
139 MOV_L ( REGOFF(V4F_START, ECX), EDX )
140 MOV_L ( ESI, ECX )
141 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
142 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
143 MOV_L ( REGOFF(V4F_START, EAX), EAX )
144
145 TEST_L ( ESI, ESI )
146 JZ ( LLBL( G3TPPR_2 ) )
147
148 PREFETCH ( REGIND(EAX) )
149 PREFETCHW ( REGIND(EDX) )
150
151 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
152 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
153
154 MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */
155 MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
156
157 MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
158
159 ALIGNTEXT16
160 LLBL( G3TPPR_1 ):
161
162 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
163
164 MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
165 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
166
167 ADD_L ( EDI, EAX ) /* next vertex */
168 PREFETCH ( REGIND(EAX) )
169
170 PXOR ( MM7, MM7 ) /* 0 | 0 */
171 MOVQ ( MM5, MM6 ) /* | x2 */
172
173 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
174 PFSUB ( MM5, MM7 ) /* | -x2 */
175
176 PFMUL ( MM2, MM6 ) /* | x2*m22 */
177 PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
178
179 ADD_L ( CONST(16), EDX ) /* next r */
180 PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */
181
182 PFADD ( MM3, MM6 ) /* | x2*m22+m32 */
183 PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
184
185 MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
186 MOVD ( MM6, REGOFF(-8, EDX) ) /* write r2 */
187
188 MOVD ( MM7, REGOFF(-4, EDX) ) /* write r3 */
189
190 DEC_L ( ESI ) /* decrement vertex counter */
191 JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
192
193 LLBL( G3TPPR_2 ):
194
195 FEMMS
196 POP_L ( EDI )
197 POP_L ( ESI )
198 RET
199
200
201
202
203 ALIGNTEXT16
204 GLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
205 HIDDEN(_mesa_3dnow_transform_points3_3d)
206 GLNAME( _mesa_3dnow_transform_points3_3d ):
207 _CET_ENDBR
208 PUSH_L ( ESI )
209
210 MOV_L ( ARG_DEST, ECX )
211 MOV_L ( ARG_MATRIX, ESI )
212 MOV_L ( ARG_SOURCE, EAX )
213 MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
214 OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
215 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
216 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
217
218 PUSH_L ( EDI )
219
220 MOV_L ( REGOFF(V4F_START, ECX), EDX )
221 MOV_L ( ESI, ECX )
222 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
223 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
224 MOV_L ( REGOFF(V4F_START, EAX), EAX )
225
226 TEST_L ( ESI, ESI )
227 JZ ( LLBL( G3TP3R_2 ) )
228
229 PREFETCH ( REGIND(EAX) )
230 PREFETCH ( REGIND(EDX) )
231
232 MOVD ( REGOFF(8, ECX), MM7 ) /* | m2 */
233 PUNPCKLDQ ( REGOFF(24, ECX), MM7 ) /* m6 | m2 */
234
235
236 ALIGNTEXT16
237 LLBL( G3TP3R_1 ):
238
239 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
240
241 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
242 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
243
244 ADD_L ( EDI, EAX ) /* next vertex */
245 PREFETCH ( REGIND(EAX) )
246
247 MOVQ ( MM0, MM2 ) /* x1 | x0 */
248 ADD_L ( CONST(16), EDX ) /* next r */
249
250 PUNPCKLDQ ( MM2, MM2 ) /* x0 | x0 */
251 MOVQ ( MM0, MM3 ) /* x1 | x0 */
252
253 PFMUL ( REGIND(ECX), MM2 ) /* x0*m1 | x0*m0 */
254 PUNPCKHDQ ( MM3, MM3 ) /* x1 | x1 */
255
256 MOVQ ( MM1, MM4 ) /* | x2 */
257 PFMUL ( REGOFF(16, ECX), MM3 ) /* x1*m5 | x1*m4 */
258
259 PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
260 PFADD ( MM2, MM3 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
261
262 PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
263 PFADD ( REGOFF(48, ECX), MM3 ) /* x0*m1+...+m11 | x0*m0+x1*m4+m12 */
264
265 PFMUL ( MM7, MM0 ) /* x1*m6 | x0*m2 */
266 PFADD ( MM4, MM3 ) /* r1 | r0 */
267
268 PFMUL ( REGOFF(40, ECX), MM1 ) /* | x2*m10 */
269 PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m14 | x2*m10 */
270
271 PFACC ( MM0, MM1 )
272
273 MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
274 PFACC ( MM1, MM1 ) /* | r2 */
275
276 MOVD ( MM1, REGOFF(-8, EDX) ) /* write r2 */
277
278 DEC_L ( ESI ) /* decrement vertex counter */
279 JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
280
281 LLBL( G3TP3R_2 ):
282
283 FEMMS
284 POP_L ( EDI )
285 POP_L ( ESI )
286 RET
287
288
289
290
291 ALIGNTEXT16
292 GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
293 HIDDEN(_mesa_3dnow_transform_points3_3d_no_rot)
294 GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
295 _CET_ENDBR
296 PUSH_L ( ESI )
297
298 MOV_L ( ARG_DEST, ECX )
299 MOV_L ( ARG_MATRIX, ESI )
300 MOV_L ( ARG_SOURCE, EAX )
301 MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
302 OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
303 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
304 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
305
306 PUSH_L ( EDI )
307
308 MOV_L ( REGOFF(V4F_START, ECX), EDX )
309 MOV_L ( ESI, ECX )
310 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
311 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
312 MOV_L ( REGOFF(V4F_START, EAX), EAX )
313
314 TEST_L ( ESI, ESI )
315 JZ ( LLBL( G3TP3NRR_2 ) )
316
317 PREFETCH ( REGIND(EAX) )
318 PREFETCHW ( REGIND(EDX) )
319
320 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
321 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
322
323 MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
324 PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */
325
326 MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
327 MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
328
329 PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */
330
331
332 ALIGNTEXT16
333 LLBL( G3TP3NRR_1 ):
334
335 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
336
337 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
338 MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
339
340 ADD_L ( EDI, EAX ) /* next vertex */
341 PREFETCHW ( REGIND(EAX) )
342
343 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
344
345 PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
346 PFMUL ( MM2, MM5 ) /* | x2*m22 */
347
348 PFADD ( MM3, MM5 ) /* | x2*m22+m32 */
349 MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */
350
351 ADD_L ( CONST(16), EDX ) /* next r */
352 DEC_L ( ESI ) /* decrement vertex counter */
353
354 MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 */
355 JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
356
357 LLBL( G3TP3NRR_2 ):
358
359 FEMMS
360 POP_L ( EDI )
361 POP_L ( ESI )
362 RET
363
364
365
366
367 ALIGNTEXT16
368 GLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
369 HIDDEN(_mesa_3dnow_transform_points3_2d)
370 GLNAME( _mesa_3dnow_transform_points3_2d ):
371 _CET_ENDBR
372 PUSH_L ( ESI )
373
374 MOV_L ( ARG_DEST, ECX )
375 MOV_L ( ARG_MATRIX, ESI )
376 MOV_L ( ARG_SOURCE, EAX )
377 MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
378 OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
379 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
380 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
381
382 PUSH_L ( EDI )
383
384 MOV_L ( REGOFF(V4F_START, ECX), EDX )
385 MOV_L ( ESI, ECX )
386 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
387 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
388 MOV_L ( REGOFF(V4F_START, EAX), EAX )
389
390 TEST_L ( ESI, ESI )
391 JZ ( LLBL( G3TP2R_3) )
392
393 PREFETCH ( REGIND(EAX) )
394 PREFETCHW ( REGIND(EDX) )
395
396 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
397 PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
398
399 MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
400 PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
401
402 MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
403
404 ALIGNTEXT16
405 LLBL( G3TP2R_2 ):
406
407 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
408
409 MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
410 MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
411
412 ADD_L ( EDI, EAX ) /* next vertex */
413 PREFETCH ( REGIND(EAX) )
414
415 MOVQ ( MM3, MM4 ) /* x1 | x0 */
416 PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
417
418 ADD_L ( CONST(16), EDX ) /* next r */
419 PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
420
421 PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */
422 MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
423
424 PFADD ( MM2, MM3 ) /* x0*...*m10+m30 | x0*...*m11+m31 */
425 MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
426
427 DEC_L ( ESI ) /* decrement vertex counter */
428 JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
429
430 LLBL( G3TP2R_3 ):
431
432 FEMMS
433 POP_L ( EDI )
434 POP_L ( ESI )
435 RET
436
437
438
439
440 ALIGNTEXT16
441 GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
442 HIDDEN(_mesa_3dnow_transform_points3_2d_no_rot)
443 GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
444 _CET_ENDBR
445 PUSH_L ( ESI )
446
447 MOV_L ( ARG_DEST, ECX )
448 MOV_L ( ARG_MATRIX, ESI )
449 MOV_L ( ARG_SOURCE, EAX )
450 MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
451 OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
452 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
453 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
454
455 PUSH_L ( EDI )
456
457 MOV_L ( REGOFF(V4F_START, ECX), EDX )
458 MOV_L ( ESI, ECX )
459 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
460 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
461 MOV_L ( REGOFF(V4F_START, EAX), EAX )
462
463 TEST_L ( ESI, ESI )
464 JZ ( LLBL( G3TP2NRR_2 ) )
465
466 PREFETCH ( REGIND(EAX) )
467 PREFETCHW ( REGIND(EDX) )
468
469 MOVD ( REGIND(ECX), MM0 ) /* | m00 */
470 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
471
472 MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
473
474
475 ALIGNTEXT16
476 LLBL( G3TP2NRR_1 ):
477
478 PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
479
480 MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
481 MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
482
483 ADD_L ( EDI, EAX ) /* next vertex */
484 PREFETCH ( REGIND(EAX) )
485
486 PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
487 ADD_L ( CONST(16), EDX ) /* next r */
488
489 PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
490
491 MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
492 MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
493
494 DEC_L ( ESI ) /* decrement vertex counter */
495 JNZ ( LLBL( G3TP2NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
496
497 LLBL( G3TP2NRR_2 ):
498
499 FEMMS
500 POP_L ( EDI )
501 POP_L ( ESI )
502 RET
503
504
505
506
507 ALIGNTEXT16
508 GLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
509 HIDDEN(_mesa_3dnow_transform_points3_identity)
510 GLNAME( _mesa_3dnow_transform_points3_identity ):
511 _CET_ENDBR
512 PUSH_L ( ESI )
513
514 MOV_L ( ARG_DEST, ECX )
515 MOV_L ( ARG_MATRIX, ESI )
516 MOV_L ( ARG_SOURCE, EAX )
517 MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
518 OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
519 MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
520 MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
521
522 PUSH_L ( EDI )
523
524 MOV_L ( REGOFF(V4F_START, ECX), EDX )
525 MOV_L ( ESI, ECX )
526 MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
527 MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
528 MOV_L ( REGOFF(V4F_START, EAX), EAX )
529
530 TEST_L ( ESI, ESI )
531 JZ ( LLBL( G3TPIR_2 ) )
532
533 PREFETCHW ( REGIND(EDX) )
534
535 ALIGNTEXT16
536 LLBL( G3TPIR_1 ):
537
538 PREFETCHW ( REGOFF(32, EDX) )
539
540 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
541 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
542
543 ADD_L ( EDI, EAX ) /* next vertex */
544 ADD_L ( CONST(16), EDX ) /* next r */
545
546 DEC_L ( ESI ) /* decrement vertex counter */
547 MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
548
549 MOVD ( MM1, REGOFF(-8, EDX) ) /* | r2 */
550 JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
551
552 LLBL( G3TPIR_2 ):
553
554 FEMMS
555 POP_L ( EDI )
556 POP_L ( ESI )
557 RET
558 #endif
559
560 #if defined (__ELF__) && defined (__linux__)
561 .section .note.GNU-stack,"",%progbits
562 #endif