368fdd98c1057739f9c316a36ccdce8744a68646
[mesa.git] / src / mesa / sparc / xform.S
1 /* $Id: xform.S,v 1.1 2001/05/23 14:27:03 brianp Exp $ */
2
3 /* TODO
4 *
5 * 1) It would be nice if load/store double could be used
6 * at least for the matrix parts. I think for the matrices
7 * it is safe, but for the vertices it probably is not due to
8 * things like glInterleavedArrays etc.
9 *
10 * UPDATE: Trying this now in sparc_matrix.h -DaveM_990624
11 *
12 * 2) One extremely slick trick would be if we could enclose
13 * groups of xform calls on the same vertices such that
14 * we just load the matrix into f16-->f31 before the calls
15 * and then we would not have to do them here. This may be
16 * tricky and not much of a gain though.
17 */
18
19 #ifdef __sparc_v9__
20 #define LDPTR ldx
21 #define V4F_DATA 0x00
22 #define V4F_START 0x08
23 #define V4F_COUNT 0x10
24 #define V4F_STRIDE 0x14
25 #define V4F_SIZE 0x18
26 #define V4F_FLAGS 0x1c
27 #else
28 #define LDPTR ld
29 #define V4F_DATA 0x00
30 #define V4F_START 0x04
31 #define V4F_COUNT 0x08
32 #define V4F_STRIDE 0x0c
33 #define V4F_SIZE 0x10
34 #define V4F_FLAGS 0x14
35 #endif
36
37 #define VEC_SIZE_1 1
38 #define VEC_SIZE_2 3
39 #define VEC_SIZE_3 7
40 #define VEC_SIZE_4 15
41
42 .text
43 .align 64
44
45 __set_v4f_1:
46 ld [%o0 + V4F_FLAGS], %g2
47 mov 1, %g1
48 st %g1, [%o0 + V4F_SIZE]
49 or %g2, VEC_SIZE_1, %g2
50 retl
51 st %g2, [%o0 + V4F_FLAGS]
52 __set_v4f_2:
53 ld [%o0 + V4F_FLAGS], %g2
54 mov 2, %g1
55 st %g1, [%o0 + V4F_SIZE]
56 or %g2, VEC_SIZE_2, %g2
57 retl
58 st %g2, [%o0 + V4F_FLAGS]
59 __set_v4f_3:
60 ld [%o0 + V4F_FLAGS], %g2
61 mov 3, %g1
62 st %g1, [%o0 + V4F_SIZE]
63 or %g2, VEC_SIZE_3, %g2
64 retl
65 st %g2, [%o0 + V4F_FLAGS]
66 __set_v4f_4:
67 ld [%o0 + V4F_FLAGS], %g2
68 mov 4, %g1
69 st %g1, [%o0 + V4F_SIZE]
70 or %g2, VEC_SIZE_4, %g2
71 retl
72 st %g2, [%o0 + V4F_FLAGS]
73
74 #include "sparc_matrix.h"
75
76 /* First the raw versions. */
77
78 .globl _mesa_sparc_transform_points1_general
79 _mesa_sparc_transform_points1_general:
80 ld [%o2 + V4F_STRIDE], %o5
81 LDPTR [%o2 + V4F_START], %g1
82 LDPTR [%o0 + V4F_START], %g2
83 ld [%o2 + V4F_COUNT], %g3
84
85 LDMATRIX_0_1_2_3_12_13_14_15(%o1)
86
87 cmp %g3, 1
88 st %g3, [%o0 + V4F_COUNT]
89 bl 3f
90 clr %o1
91
92 be 2f
93 andn %g3, 1, %o2
94
95 1: ld [%g1 + 0x00], %f0 ! LSU Group
96 add %g1, %o5, %g1 ! IEU0
97 ld [%g1 + 0x00], %f8 ! LSU Group
98 add %o1, 2, %o1 ! IEU0
99 add %g1, %o5, %g1 ! IEU1
100 fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
101 fmuls %f0, M1, %f2 ! FGM Group
102 fmuls %f0, M2, %f3 ! FGM Group
103 fmuls %f0, M3, %f4 ! FGM Group
104 fmuls %f8, M0, %f9 ! FGM Group f1 available
105 fadds %f1, M12, %f1 ! FGA
106 st %f1, [%g2 + 0x00] ! LSU
107 fmuls %f8, M1, %f10 ! FGM Group f2 available
108 fadds %f2, M13, %f2 ! FGA
109 st %f2, [%g2 + 0x04] ! LSU
110 fmuls %f8, M2, %f11 ! FGM Group f3 available
111 fadds %f3, M14, %f3 ! FGA
112 st %f3, [%g2 + 0x08] ! LSU
113 fmuls %f8, M3, %f12 ! FGM Group f4 available
114 fadds %f4, M15, %f4 ! FGA
115 st %f4, [%g2 + 0x0c] ! LSU
116 fadds %f9, M12, %f9 ! FGA Group f9 available
117 st %f9, [%g2 + 0x10] ! LSU
118 fadds %f10, M13, %f10 ! FGA Group f10 available
119 st %f10, [%g2 + 0x14] ! LSU
120 fadds %f11, M14, %f11 ! FGA Group f11 available
121 st %f11, [%g2 + 0x18] ! LSU
122 fadds %f12, M15, %f12 ! FGA Group f12 available
123 st %f12, [%g2 + 0x1c] ! LSU
124 cmp %o1, %o2 ! IEU1
125 bne 1b ! CTI
126 add %g2, 0x20, %g2 ! IEU0 Group
127
128 cmp %o1, %g3
129 be 3f
130 nop
131
132 2: ld [%g1 + 0x00], %f0 ! LSU Group
133 fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
134 fmuls %f0, M1, %f2 ! FGM Group
135 fmuls %f0, M2, %f3 ! FGM Group
136 fmuls %f0, M3, %f4 ! FGM Group
137 fadds %f1, M12, %f1 ! FGA Group
138 st %f1, [%g2 + 0x00] ! LSU
139 fadds %f2, M13, %f2 ! FGA Group
140 st %f2, [%g2 + 0x04] ! LSU
141 fadds %f3, M14, %f3 ! FGA Group
142 st %f3, [%g2 + 0x08] ! LSU
143 fadds %f4, M15, %f4 ! FGA Group
144 st %f4, [%g2 + 0x0c] ! LSU
145
146 3:
147 ba __set_v4f_4
148 nop
149
150 .globl _mesa_sparc_transform_points1_identity
151 _mesa_sparc_transform_points1_identity:
152 cmp %o0, %o2
153 be 4f
154 ld [%o2 + V4F_STRIDE], %o5
155 LDPTR [%o2 + V4F_START], %g1
156 LDPTR [%o0 + V4F_START], %g2
157 ld [%o2 + V4F_COUNT], %g3
158
159 cmp %g3, 1
160 st %g3, [%o0 + V4F_COUNT]
161 bl 3f
162 clr %o1
163
164 be 2f
165 andn %g3, 1, %o2
166
167 1: ld [%g1 + 0x00], %f0 ! LSU Group
168 add %g1, %o5, %g1 ! IEU0
169 ld [%g1 + 0x00], %f1 ! LSU Group
170 add %o1, 2, %o1 ! IEU0
171 add %g1, %o5, %g1 ! IEU1
172 st %f0, [%g2 + 0x00] ! LSU Group
173 cmp %o1, %o2 ! IEU1
174 st %f1, [%g2 + 0x10] ! LSU Group
175 bne 1b ! CTI
176 add %g2, 0x20, %g2 ! IEU0
177
178 cmp %o1, %g3
179 be 3f
180 nop
181
182 2: ld [%g1 + 0x00], %f0
183 addx %g0, %g0, %g0
184 st %f0, [%g2 + 0x00]
185
186 3:
187 ba __set_v4f_1
188 nop
189
190 4: retl
191 nop
192
193 .globl _mesa_sparc_transform_points1_2d
194 _mesa_sparc_transform_points1_2d:
195 ld [%o2 + V4F_STRIDE], %o5
196 LDPTR [%o2 + V4F_START], %g1
197 LDPTR [%o0 + V4F_START], %g2
198 ld [%o2 + V4F_COUNT], %g3
199
200 LDMATRIX_0_1_12_13(%o1)
201
202 cmp %g3, 1
203 st %g3, [%o0 + V4F_COUNT]
204 bl 3f
205 clr %o1
206
207 be 2f
208 andn %g3, 1, %o2
209
210 1: ld [%g1 + 0x00], %f0 ! LSU Group
211 add %g1, %o5, %g1 ! IEU0
212 ld [%g1 + 0x00], %f8 ! LSU Group
213 add %o1, 2, %o1 ! IEU0
214 add %g1, %o5, %g1 ! IEU1
215 fmuls %f0, M0, %f1 ! FGM Group
216 fmuls %f0, M1, %f2 ! FGM Group
217 fmuls %f8, M0, %f9 ! FGM Group
218 fmuls %f8, M1, %f10 ! FGM Group
219 fadds %f1, M12, %f3 ! FGA Group f1 available
220 st %f3, [%g2 + 0x00] ! LSU
221 fadds %f2, M13, %f4 ! FGA Group f2 available
222 st %f4, [%g2 + 0x04] ! LSU
223 fadds %f9, M12, %f11 ! FGA Group f9 available
224 st %f11, [%g2 + 0x10] ! LSU
225 fadds %f10, M13, %f12 ! FGA Group f10 available
226 st %f12, [%g2 + 0x14] ! LSU
227 cmp %o1, %o2 ! IEU1
228 bne 1b ! CTI
229 add %g2, 0x20, %g2 ! IEU0 Group
230
231 cmp %o1, %g3
232 be 3f
233 nop
234
235 2: ld [%g1 + 0x00], %f0
236 fmuls %f0, M0, %f1
237 fmuls %f0, M1, %f2
238 fadds %f1, M12, %f3
239 st %f3, [%g2 + 0x00]
240 fadds %f2, M13, %f4
241 st %f4, [%g2 + 0x04]
242
243 3:
244 ba __set_v4f_2
245 nop
246
247 .globl _mesa_sparc_transform_points1_2d_no_rot
248 _mesa_sparc_transform_points1_2d_no_rot:
249 ld [%o2 + V4F_STRIDE], %o5
250 LDPTR [%o2 + V4F_START], %g1
251 LDPTR [%o0 + V4F_START], %g2
252 ld [%o2 + V4F_COUNT], %g3
253
254 LDMATRIX_0_12_13(%o1)
255
256 cmp %g3, 1
257 st %g3, [%o0 + V4F_COUNT]
258 bl 3f
259 clr %o1
260
261 be 2f
262 andn %g3, 1, %o2
263
264 1: ld [%g1 + 0x00], %f0 ! LSU Group
265 add %g1, %o5, %g1 ! IEU0
266 ld [%g1 + 0x00], %f4 ! LSU Group
267 add %o1, 2, %o1 ! IEU0
268 add %g1, %o5, %g1 ! IEU1
269 fmuls %f0, M0, %f1 ! FGM Group
270 fmuls %f4, M0, %f5 ! FGM Group
271 fadds %f1, M12, %f3 ! FGA Group, 2 cycle stall, f1 available
272 st %f3, [%g2 + 0x00] ! LSU
273 st M13, [%g2 + 0x04] ! LSU Group, f5 available
274 fadds %f5, M12, %f6 ! FGA
275 st %f6, [%g2 + 0x10] ! LSU Group
276 st M13, [%g2 + 0x14] ! LSU Group
277 cmp %o1, %o2 ! IEU1
278 bne 1b ! CTI
279 add %g2, 0x20, %g2 ! IEU0 Group
280
281 cmp %o1, %g3
282 be 3f
283 nop
284
285 2: ld [%g1 + 0x00], %f0
286 fmuls %f0, M0, %f1
287 fadds %f1, M12, %f3
288 st %f3, [%g2 + 0x00]
289 st M13, [%g2 + 0x04]
290
291 3:
292 ba __set_v4f_2
293 nop
294
295 .globl _mesa_sparc_transform_points1_3d
296 _mesa_sparc_transform_points1_3d:
297 ld [%o2 + V4F_STRIDE], %o5
298 LDPTR [%o2 + V4F_START], %g1
299 LDPTR [%o0 + V4F_START], %g2
300 ld [%o2 + V4F_COUNT], %g3
301
302 LDMATRIX_0_1_2_12_13_14(%o1)
303
304 cmp %g3, 1
305 st %g3, [%o0 + V4F_COUNT]
306 bl 3f
307 clr %o1
308
309 be 2f
310 andn %g3, 1, %o2
311
312 1: ld [%g1 + 0x00], %f0 ! LSU Group
313 add %g1, %o5, %g1 ! IEU0
314 ld [%g1 + 0x00], %f4 ! LSU Group
315 add %o1, 2, %o1 ! IEU0
316 add %g1, %o5, %g1 ! IEU1
317 fmuls %f0, M0, %f1 ! FGM Group
318 fmuls %f0, M1, %f2 ! FGM Group
319 fmuls %f0, M2, %f3 ! FGM Group
320 fmuls %f4, M0, %f5 ! FGM Group
321 fadds %f1, M12, %f1 ! FGA Group, f1 available
322 st %f1, [%g2 + 0x00] ! LSU
323 fmuls %f4, M1, %f6 ! FGM
324 fadds %f2, M13, %f2 ! FGA Group, f2 available
325 st %f2, [%g2 + 0x04] ! LSU
326 fmuls %f4, M2, %f7 ! FGM
327 fadds %f3, M14, %f3 ! FGA Group, f3 available
328 st %f3, [%g2 + 0x08] ! LSU
329 fadds %f5, M12, %f5 ! FGA Group, f5 available
330 st %f5, [%g2 + 0x10] ! LSU
331 fadds %f6, M13, %f6 ! FGA Group, f6 available
332 st %f6, [%g2 + 0x14] ! LSU
333 fadds %f7, M14, %f7 ! FGA Group, f7 available
334 st %f7, [%g2 + 0x18] ! LSU
335 cmp %o1, %o2 ! IEU1
336 bne 1b ! CTI
337 add %g2, 0x20, %g2 ! IEU0 Group
338
339 cmp %o1, %g3
340 be 3f
341 nop
342
343 2: ld [%g1 + 0x00], %f0
344 fmuls %f0, M0, %f1
345 fmuls %f0, M1, %f2
346 fmuls %f0, M2, %f3
347 fadds %f1, M12, %f1
348 st %f1, [%g2 + 0x00]
349 fadds %f2, M13, %f2
350 st %f2, [%g2 + 0x04]
351 fadds %f3, M14, %f3
352 st %f3, [%g2 + 0x08]
353
354 3:
355 ba __set_v4f_3
356 nop
357
358 .globl _mesa_sparc_transform_points1_3d_no_rot
359 _mesa_sparc_transform_points1_3d_no_rot:
360 ld [%o2 + V4F_STRIDE], %o5
361 LDPTR [%o2 + V4F_START], %g1
362 LDPTR [%o0 + V4F_START], %g2
363 ld [%o2 + V4F_COUNT], %g3
364
365 LDMATRIX_0_12_13_14(%o1)
366
367 cmp %g3, 1
368 st %g3, [%o0 + V4F_COUNT]
369 bl 3f
370 clr %o1
371
372 be 2f
373 andn %g3, 1, %o2
374
375 1: ld [%g1 + 0x00], %f0 ! LSU Group
376 add %g1, %o5, %g1 ! IEU0
377 ld [%g1 + 0x00], %f2 ! LSU Group
378 add %o1, 2, %o1 ! IEU0
379 add %g1, %o5, %g1 ! IEU1
380 fmuls %f0, M0, %f1 ! FGM Group
381 fmuls %f2, M0, %f3 ! FGM Group
382 fadds %f1, M12, %f1 ! FGA Group, 2 cycle stall, f1 available
383 st %f1, [%g2 + 0x00] ! LSU
384 fadds %f3, M12, %f3 ! FGA Group, f3 available
385 st M13, [%g2 + 0x04] ! LSU
386 st M14, [%g2 + 0x08] ! LSU Group
387 st %f3, [%g2 + 0x10] ! LSU Group
388 st M13, [%g2 + 0x14] ! LSU Group
389 st M14, [%g2 + 0x18] ! LSU Group
390 cmp %o1, %o2 ! IEU1
391 bne 1b ! CTI
392 add %g2, 0x20, %g2 ! IEU0 Group
393
394 cmp %o1, %g3
395 be 3f
396 nop
397
398 2: ld [%g1 + 0x00], %f0
399 fmuls %f0, M0, %f1
400 fadds %f1, M12, %f1
401 st %f1, [%g2 + 0x00]
402 st M13, [%g2 + 0x04]
403 st M14, [%g2 + 0x08]
404
405 3:
406 ba __set_v4f_3
407 nop
408
409 .globl _mesa_sparc_transform_points1_perspective
410 _mesa_sparc_transform_points1_perspective:
411 ld [%o2 + V4F_STRIDE], %o5
412 LDPTR [%o2 + V4F_START], %g1
413 LDPTR [%o0 + V4F_START], %g2
414 ld [%o2 + V4F_COUNT], %g3
415
416 LDMATRIX_0_14(%o1)
417
418 cmp %g3, 1
419 st %g3, [%o0 + V4F_COUNT]
420 bl 3f
421 clr %o1
422
423 be 2f
424 andn %g3, 1, %o2
425
426 1: ld [%g1 + 0x00], %f0 ! LSU Group
427 add %g1, %o5, %g1 ! IEU0
428 ld [%g1 + 0x00], %f2 ! LSU Group
429 add %o1, 2, %o1 ! IEU0
430 add %g1, %o5, %g1 ! IEU1
431 fmuls %f0, M0, %f1 ! FGM Group
432 st %f1, [%g2 + 0x00] ! LSU
433 fmuls %f2, M0, %f3 ! FGM Group
434 st %g0, [%g2 + 0x04] ! LSU
435 st M14, [%g2 + 0x08] ! LSU Group
436 st %g0, [%g2 + 0x0c] ! LSU Group
437 st %f3, [%g2 + 0x10] ! LSU Group
438 st %g0, [%g2 + 0x14] ! LSU Group
439 st M14, [%g2 + 0x18] ! LSU Group
440 st %g0, [%g2 + 0x1c] ! LSU Group
441 cmp %o1, %o2 ! IEU1
442 bne 1b ! CTI
443 add %g2, 0x20, %g2 ! IEU0 Group
444
445 cmp %o1, %g3
446 be 3f
447 nop
448
449 2: ld [%g1 + 0x00], %f0
450 fmuls %f0, M0, %f1
451 st %f1, [%g2 + 0x00]
452 st %g0, [%g2 + 0x04]
453 st M14, [%g2 + 0x08]
454 st %g0, [%g2 + 0x0c]
455
456 3:
457 ba __set_v4f_4
458 nop
459
460 .globl _mesa_sparc_transform_points2_general
461 _mesa_sparc_transform_points2_general:
462 ld [%o2 + V4F_STRIDE], %o5
463 LDPTR [%o2 + V4F_START], %g1
464 LDPTR [%o0 + V4F_START], %g2
465 ld [%o2 + V4F_COUNT], %g3
466
467 LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(%o1)
468
469 cmp %g3, 0
470 st %g3, [%o0 + V4F_COUNT]
471 be 2f
472 clr %o1
473
474 1: ld [%g1 + 0x00], %f0 ! LSU Group
475 ld [%g1 + 0x04], %f1 ! LSU Group
476 add %o1, 1, %o1 ! IEU0
477 add %g1, %o5, %g1 ! IEU1
478 fmuls %f0, M0, %f2 ! FGM Group
479 fmuls %f0, M1, %f3 ! FGM Group
480 fmuls %f0, M2, %f4 ! FGM Group
481 fmuls %f0, M3, %f5 ! FGM Group
482 fadds %f2, M12, %f2 ! FGA Group f2 available
483 fmuls %f1, M4, %f6 ! FGM
484 fadds %f3, M13, %f3 ! FGA Group f3 available
485 fmuls %f1, M5, %f7 ! FGM
486 fadds %f4, M14, %f4 ! FGA Group f4 available
487 fmuls %f1, M6, %f8 ! FGM
488 fadds %f5, M15, %f5 ! FGA Group f5 available
489 fmuls %f1, M7, %f9 ! FGM
490 fadds %f2, %f6, %f2 ! FGA Group f6 available
491 st %f2, [%g2 + 0x00] ! LSU
492 fadds %f3, %f7, %f3 ! FGA Group f7 available
493 st %f3, [%g2 + 0x04] ! LSU
494 fadds %f4, %f8, %f4 ! FGA Group f8 available
495 st %f4, [%g2 + 0x08] ! LSU
496 fadds %f5, %f9, %f5 ! FGA Group f9 available
497 st %f5, [%g2 + 0x0c] ! LSU
498 cmp %o1, %g3 ! IEU1
499 bne 1b ! CTI
500 add %g2, 0x10, %g2 ! IEU0 Group
501 2:
502 ba __set_v4f_4
503 nop
504
505 .globl _mesa_sparc_transform_points2_identity
506 _mesa_sparc_transform_points2_identity:
507 cmp %o2, %o0
508 be 3f
509 ld [%o2 + V4F_STRIDE], %o5
510 LDPTR [%o2 + V4F_START], %g1
511 LDPTR [%o0 + V4F_START], %g2
512 ld [%o2 + V4F_COUNT], %g3
513
514 cmp %g3, 0
515 st %g3, [%o0 + V4F_COUNT]
516 be 2f
517 clr %o1
518
519 1: ld [%g1 + 0x00], %f0 ! LSU Group
520 add %o1, 1, %o1 ! IEU0
521 ld [%g1 + 0x04], %f1 ! LSU Group
522 add %g1, %o5, %g1 ! IEU0
523 cmp %o1, %g3 ! IEU1
524 st %f0, [%g2 + 0x00] ! LSU Group
525 st %f1, [%g2 + 0x04] ! LSU Group
526 bne 1b ! CTI
527 add %g2, 0x10, %g2 ! IEU0
528 2:
529 ba __set_v4f_2
530 nop
531
532 3: retl
533 nop
534
535 .globl _mesa_sparc_transform_points2_2d
536 _mesa_sparc_transform_points2_2d:
537 ld [%o2 + V4F_STRIDE], %o5
538 LDPTR [%o2 + V4F_START], %g1
539 LDPTR [%o0 + V4F_START], %g2
540 ld [%o2 + V4F_COUNT], %g3
541
542 LDMATRIX_0_1_4_5_12_13(%o1)
543
544 cmp %g3, 1
545 st %g3, [%o0 + V4F_COUNT]
546 bl 3f
547 clr %o1
548
549 be 2f
550 andn %g3, 1, %o2
551
552 1: ld [%g1 + 0x00], %f0 ! LSU Group
553 ld [%g1 + 0x04], %f1 ! LSU Group
554 add %o1, 2, %o1 ! IEU0
555 add %g1, %o5, %g1 ! IEU1
556 fmuls %f0, M0, %f2 ! FGM
557 ld [%g1 + 0x00], %f8 ! LSU Group
558 fmuls %f0, M1, %f3 ! FGM
559 ld [%g1 + 0x04], %f9 ! LSU Group
560 fmuls %f1, M4, %f6 ! FGM
561 fmuls %f1, M5, %f7 ! FGM Group
562 add %g1, %o5, %g1 ! IEU0
563 fmuls %f8, M0, %f10 ! FGM Group f2 available
564 fadds %f2, M12, %f2 ! FGA
565 fmuls %f8, M1, %f11 ! FGM Group f3 available
566 fadds %f3, M13, %f3 ! FGA
567 fmuls %f9, M4, %f12 ! FGM Group
568 fmuls %f9, M5, %f13 ! FGM Group
569 fadds %f10, M12, %f10 ! FGA Group f2, f10 available
570 fadds %f2, %f6, %f2 ! FGA Group f3, f11 available
571 st %f2, [%g2 + 0x00] ! LSU
572 fadds %f11, M13, %f11 ! FGA Group f12 available
573 fadds %f3, %f7, %f3 ! FGA Group f13 available
574 st %f3, [%g2 + 0x04] ! LSU
575 fadds %f10, %f12, %f10 ! FGA Group f10 available
576 st %f10, [%g2 + 0x10] ! LSU
577 fadds %f11, %f13, %f11 ! FGA Group f11 available
578 st %f11, [%g2 + 0x14] ! LSU
579 cmp %o1, %o2 ! IEU1
580 bne 1b ! CTI
581 add %g2, 0x20, %g2 ! IEU0 Group
582
583 cmp %o1, %g3
584 be 3f
585 nop
586
587 2: ld [%g1 + 0x00], %f0 ! LSU Group
588 ld [%g1 + 0x04], %f1 ! LSU Group
589 fmuls %f0, M0, %f2 ! FGM Group
590 fmuls %f0, M1, %f3 ! FGM Group
591 fmuls %f1, M4, %f6 ! FGM Group
592 fmuls %f1, M5, %f7 ! FGM Group
593 fadds %f2, M12, %f2 ! FGA Group f2 available
594 fadds %f3, M13, %f3 ! FGA Group f3 available
595 fadds %f2, %f6, %f2 ! FGA Group 2 cycle stall, f2 available
596 st %f2, [%g2 + 0x00] ! LSU
597 fadds %f3, %f7, %f3 ! FGA Group f3 available
598 st %f3, [%g2 + 0x04] ! LSU
599
600 3:
601 ba __set_v4f_2
602 nop
603
604 .globl _mesa_sparc_transform_points2_2d_no_rot
605 _mesa_sparc_transform_points2_2d_no_rot:
606 ld [%o2 + V4F_STRIDE], %o5
607 LDPTR [%o2 + V4F_START], %g1
608 LDPTR [%o0 + V4F_START], %g2
609 ld [%o2 + V4F_COUNT], %g3
610
611 LDMATRIX_0_5_12_13(%o1)
612
613 cmp %g3, 1
614 st %g3, [%o0 + V4F_COUNT]
615 bl 3f
616 clr %o1
617
618 be 2f
619 andn %g3, 1, %o2
620
621 1: ld [%g1 + 0x00], %f0 ! LSU Group
622 ld [%g1 + 0x04], %f1 ! LSU Group
623 add %o1, 2, %o1 ! IEU0
624 add %g1, %o5, %g1 ! IEU1
625 ld [%g1 + 0x00], %f4 ! LSU Group
626 fmuls %f0, M0, %f2 ! FGM
627 ld [%g1 + 0x04], %f5 ! LSU Group
628 fmuls %f1, M5, %f3 ! FGM
629 fmuls %f4, M0, %f6 ! FGM Group
630 add %g1, %o5, %g1 ! IEU0
631 fmuls %f5, M5, %f7 ! FGM Group
632 fadds %f2, M12, %f2 ! FGA Group f2 available
633 st %f2, [%g2 + 0x00] ! LSU
634 fadds %f3, M13, %f3 ! FGA Group f3 available
635 st %f3, [%g2 + 0x04] ! LSU
636 fadds %f6, M12, %f6 ! FGA Group f6 available
637 st %f6, [%g2 + 0x10] ! LSU
638 fadds %f7, M13, %f7 ! FGA Group f7 available
639 st %f7, [%g2 + 0x14] ! LSU
640 cmp %o1, %o2 ! IEU1
641 bne 1b ! CTI
642 add %g2, 0x20, %g2 ! IEU0 Group
643
644 cmp %o1, %g3
645 be 3f
646 nop
647
648 2: ld [%g1 + 0x00], %f0 ! LSU Group
649 ld [%g1 + 0x04], %f1 ! LSU Group
650 fmuls %f0, M0, %f2 ! FGM Group
651 fmuls %f1, M5, %f3 ! FGM Group
652 fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available
653 st %f2, [%g2 + 0x00] ! LSU
654 fadds %f3, M13, %f3 ! FGA Group f3 available
655 st %f3, [%g2 + 0x04] ! LSU
656
657 3:
658 ba __set_v4f_2
659 nop
660
661 /* orig: 12 cycles */
662 .globl _mesa_sparc_transform_points2_3d
663 _mesa_sparc_transform_points2_3d:
664 ld [%o2 + V4F_STRIDE], %o5
665 ld [%o2 + V4F_START], %g1
666 ld [%o0 + V4F_START], %g2
667 ld [%o2 + V4F_COUNT], %g3
668
669 LDMATRIX_0_1_2_3_4_5_6_12_13_14(%o1)
670
671 cmp %g3, 1
672 st %g3, [%o0 + V4F_COUNT]
673 bl 3f
674 clr %o1
675
676 be 2f
677 andn %g3, 1, %o2
678
679 1: ld [%g1 + 0x00], %f0 ! LSU Group
680 ld [%g1 + 0x04], %f1 ! LSU Group
681 add %o1, 2, %o1 ! IEU0
682 add %g1, %o5, %g1 ! IEU1
683 ld [%g1 + 0x00], %f9 ! LSU Group
684 fmuls %f0, M0, %f2 ! FGM
685 ld [%g1 + 0x04], %f10 ! LSU Group
686 fmuls %f0, M1, %f3 ! FGM
687 fmuls %f0, M2, %f4 ! FGM Group
688 add %g1, %o5, %g1 ! IEU0
689 fmuls %f1, M4, %f6 ! FGM Group
690 fmuls %f1, M5, %f7 ! FGM Group f2 available
691 fadds %f2, M12, %f2 ! FGA
692 fmuls %f1, M6, %f8 ! FGM Group f3 available
693 fadds %f3, M13, %f3 ! FGA
694 fmuls %f9, M0, %f11 ! FGM Group f4 available
695 fadds %f4, M14, %f4 ! FGA
696 fmuls %f9, M1, %f12 ! FGM Group f6 available
697 fmuls %f9, M2, %f13 ! FGM Group f2, f7 available
698 fadds %f2, %f6, %f2 ! FGA
699 st %f2, [%g2 + 0x00] ! LSU
700 fmuls %f10, M4, %f14 ! FGM Group f3, f8 available
701 fadds %f3, %f7, %f3 ! FGA
702 st %f3, [%g2 + 0x04] ! LSU
703 fmuls %f10, M5, %f15 ! FGM Group f4, f11 available
704 fadds %f11, M12, %f11 ! FGA
705 fmuls %f10, M6, %f0 ! FGM Group f12 available
706 fadds %f12, M13, %f12 ! FGA
707 fadds %f13, M14, %f13 ! FGA Group f13 available
708 fadds %f4, %f8, %f4 ! FGA Group f14 available
709 st %f4, [%g2 + 0x08] ! LSU
710 fadds %f11, %f14, %f11 ! FGA Group f15, f11 available
711 st %f11, [%g2 + 0x10] ! LSU
712 fadds %f12, %f15, %f12 ! FGA Group f0, f12 available
713 st %f12, [%g2 + 0x14] ! LSU
714 fadds %f13, %f0, %f13 ! FGA Group f13 available
715 st %f13, [%g2 + 0x18] ! LSU
716
717 cmp %o1, %o2 ! IEU1
718 bne 1b ! CTI
719 add %g2, 0x20, %g2 ! IEU0 Group
720
721 cmp %o1, %g3
722 be 3f
723 nop
724
725 2: ld [%g1 + 0x00], %f0 ! LSU Group
726 ld [%g1 + 0x04], %f1 ! LSU Group
727 fmuls %f0, M0, %f2 ! FGM Group
728 fmuls %f0, M1, %f3 ! FGM Group
729 fmuls %f0, M2, %f4 ! FGM Group
730 fmuls %f1, M4, %f6 ! FGM Group
731 fmuls %f1, M5, %f7 ! FGM Group f2 available
732 fadds %f2, M12, %f2 ! FGA
733 fmuls %f1, M6, %f8 ! FGM Group f3 available
734 fadds %f3, M13, %f3 ! FGA
735 fadds %f4, M14, %f4 ! FGA Group f4 available
736 fadds %f2, %f6, %f2 ! FGA Group stall, f2, f6, f7 available
737 st %f2, [%g2 + 0x00] ! LSU
738 fadds %f3, %f7, %f3 ! FGA Group f3, f8 available
739 st %f3, [%g2 + 0x04] ! LSU
740 fadds %f4, %f8, %f4 ! FGA Group f4 available
741 st %f4, [%g2 + 0x08] ! LSU
742
743 3:
744 ba __set_v4f_3
745 nop
746
747 .globl _mesa_sparc_transform_points2_3d_no_rot
748 _mesa_sparc_transform_points2_3d_no_rot:
749 ld [%o2 + V4F_STRIDE], %o5
750 LDPTR [%o2 + V4F_START], %g1
751 LDPTR [%o0 + V4F_START], %g2
752 ld [%o2 + V4F_COUNT], %g3
753
754 LDMATRIX_0_5_12_13_14(%o1)
755
756 cmp %g3, 1
757 st %g3, [%o0 + V4F_COUNT]
758 bl 3f
759 clr %o3
760
761 be 2f
762 andn %g3, 1, %o2
763
764 1: ld [%g1 + 0x00], %f0 ! LSU Group
765 ld [%g1 + 0x04], %f1 ! LSU Group
766 add %o3, 2, %o3 ! IEU0
767 add %g1, %o5, %g1 ! IEU1
768 ld [%g1 + 0x00], %f4 ! LSU Group
769 fmuls %f0, M0, %f2 ! FGM
770 ld [%g1 + 0x04], %f5 ! LSU Group
771 fmuls %f1, M5, %f3 ! FGM
772 fmuls %f4, M0, %f6 ! FGM Group
773 add %g1, %o5, %g1 ! IEU0
774 fmuls %f5, M5, %f7 ! FGM Group
775 fadds %f2, M12, %f2 ! FGA Group f2 available
776 st %f2, [%g2 + 0x00] ! LSU
777 fadds %f3, M13, %f3 ! FGA Group f3 available
778 st %f3, [%g2 + 0x04] ! LSU
779 fadds %f6, M12, %f6 ! FGA Group f6 available
780 st M14, [%g2 + 0x08] ! LSU
781 fadds %f7, M13, %f7 ! FGA Group f7 available
782 st %f6, [%g2 + 0x10] ! LSU
783 st %f7, [%g2 + 0x14] ! LSU Group
784 st M14, [%g2 + 0x18] ! LSU Group
785 cmp %o3, %o2 ! IEU1
786 bne 1b ! CTI
787 add %g2, 0x20, %g2 ! IEU0 Group
788
789 cmp %o3, %g3
790 be 3f
791 nop
792
793 2: ld [%g1 + 0x00], %f0 ! LSU Group
794 ld [%g1 + 0x04], %f1 ! LSU Group
795 fmuls %f0, M0, %f2 ! FGM Group
796 fmuls %f1, M5, %f3 ! FGM Group
797 fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available
798 st %f2, [%g2 + 0x00] ! LSU
799 fadds %f3, M13, %f3 ! FGA Group f3 available
800 st %f3, [%g2 + 0x04] ! LSU
801 st M14, [%g2 + 0x08] ! LSU Group
802
803 3: ld [%o1 + (14 * 0x4)], %g3
804 cmp %g3, 0
805 bne __set_v4f_3
806 nop
807 ba __set_v4f_2
808 nop
809
810 .globl _mesa_sparc_transform_points2_perspective
811 _mesa_sparc_transform_points2_perspective:
812 ld [%o2 + V4F_STRIDE], %o5
813 LDPTR [%o2 + V4F_START], %g1
814 LDPTR [%o0 + V4F_START], %g2
815 ld [%o2 + V4F_COUNT], %g3
816
817 LDMATRIX_0_5_14(%o1)
818
819 cmp %g3, 0
820 st %g3, [%o0 + V4F_COUNT]
821 be 2f
822 clr %o1
823
824 1: ld [%g1 + 0x00], %f0
825 ld [%g1 + 0x04], %f1
826 add %o1, 1, %o1
827 add %g1, %o5, %g1
828 fmuls %f0, M0, %f2
829 st %f2, [%g2 + 0x00]
830 fmuls %f1, M5, %f3
831 st %f3, [%g2 + 0x04]
832 st M14, [%g2 + 0x08]
833 st %g0, [%g2 + 0x0c]
834 cmp %o1, %g3
835 bne 1b
836 add %g2, 0x10, %g2
837 2:
838 ba __set_v4f_4
839 nop
840
841 .globl _mesa_sparc_transform_points3_general
842 _mesa_sparc_transform_points3_general:
843 ld [%o2 + V4F_STRIDE], %o5
844 LDPTR [%o2 + V4F_START], %g1
845 LDPTR [%o0 + V4F_START], %g2
846 ld [%o2 + V4F_COUNT], %g3
847
848 LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
849
850 cmp %g3, 0
851 st %g3, [%o0 + V4F_COUNT]
852 be 2f
853 clr %o1
854
855 1: ld [%g1 + 0x00], %f0 ! LSU Group
856 ld [%g1 + 0x04], %f1 ! LSU Group
857 ld [%g1 + 0x08], %f2 ! LSU Group
858 add %o1, 1, %o1 ! IEU0
859 add %g1, %o5, %g1 ! IEU1
860 fmuls %f0, M0, %f3 ! FGM
861 fmuls %f1, M4, %f7 ! FGM Group
862 fmuls %f0, M1, %f4 ! FGM Group
863 fmuls %f1, M5, %f8 ! FGM Group
864 fmuls %f0, M2, %f5 ! FGM Group f3 available
865 fmuls %f1, M6, %f9 ! FGM Group f7 available
866 fadds %f3, %f7, %f3 ! FGA
867 fmuls %f0, M3, %f6 ! FGM Group f4 available
868 fmuls %f1, M7, %f10 ! FGM Group f8 available
869 fadds %f4, %f8, %f4 ! FGA
870 fmuls %f2, M8, %f7 ! FGM Group f5 available
871 fmuls %f2, M9, %f8 ! FGM Group f9,f3 available
872 fadds %f5, %f9, %f5 ! FGA
873 fmuls %f2, M10, %f9 ! FGM Group f6 available
874 fadds %f6, %f10, %f6 ! FGA Group f10,f4 available
875 fmuls %f2, M11, %f10 ! FGM
876 fadds %f3, M12, %f3 ! FGA Group f7 available
877 fadds %f4, M13, %f4 ! FGA Group f8,f5 available
878 fadds %f5, M14, %f5 ! FGA Group f9 available
879 fadds %f6, M15, %f6 ! FGA Group f10,f6 available
880 fadds %f3, %f7, %f3 ! FGA Group f3 available
881 st %f3, [%g2 + 0x00] ! LSU
882 fadds %f4, %f8, %f4 ! FGA Group f4 available
883 st %f4, [%g2 + 0x04] ! LSU
884 fadds %f5, %f9, %f5 ! FGA Group f5 available
885 st %f5, [%g2 + 0x08] ! LSU
886 fadds %f6, %f10, %f6 ! FGA Group f6 available
887 st %f6, [%g2 + 0x0c] ! LSU
888 cmp %o1, %g3 ! IEU1
889 bne 1b ! CTI
890 add %g2, 0x10, %g2 ! IEU0 Group
891 2:
892 ba __set_v4f_4
893 nop
894
895 .globl _mesa_sparc_transform_points3_identity
896 _mesa_sparc_transform_points3_identity:
897 ld [%o2 + V4F_STRIDE], %o5
898 LDPTR [%o2 + V4F_START], %g1
899 LDPTR [%o0 + V4F_START], %g2
900 ld [%o2 + V4F_COUNT], %g3
901
902 cmp %g3, 0
903 st %g3, [%o0 + V4F_COUNT]
904 be 2f
905 clr %o1
906
907 1: ld [%g1 + 0x00], %f0
908 ld [%g1 + 0x04], %f1
909 ld [%g1 + 0x08], %f2
910 add %o1, 1, %o1
911 add %g1, %o5, %g1
912 cmp %o1, %g3
913 st %f0, [%g2 + 0x00]
914 st %f1, [%g2 + 0x04]
915 st %f2, [%g2 + 0x08]
916 bne 1b
917 add %g2, 0x10, %g2
918 2:
919 ba __set_v4f_3
920 nop
921
922 .globl _mesa_sparc_transform_points3_2d
923 _mesa_sparc_transform_points3_2d:
924 ld [%o2 + V4F_STRIDE], %o5
925 LDPTR [%o2 + V4F_START], %g1
926 LDPTR [%o0 + V4F_START], %g2
927 ld [%o2 + V4F_COUNT], %g3
928
929 LDMATRIX_0_1_4_5_12_13(%o1)
930
931 cmp %g3, 0
932 st %g3, [%o0 + V4F_COUNT]
933 be 2f
934 clr %o1
935
936 1: ld [%g1 + 0x00], %f0 ! LSU Group
937 ld [%g1 + 0x04], %f1 ! LSU Group
938 ld [%g1 + 0x08], %f2 ! LSU Group
939 add %o1, 1, %o1 ! IEU0
940 add %g1, %o5, %g1 ! IEU1
941 fmuls %f0, M0, %f3 ! FGM
942 fmuls %f0, M1, %f4 ! FGM Group
943 fmuls %f1, M4, %f6 ! FGM Group
944 fmuls %f1, M5, %f7 ! FGM Group
945 fadds %f3, M12, %f3 ! FGA Group f3 available
946 fadds %f4, M13, %f4 ! FGA Group f4 available
947 fadds %f3, %f6, %f3 ! FGA Group f6 available
948 st %f3, [%g2 + 0x00] ! LSU
949 fadds %f4, %f7, %f4 ! FGA Group f7 available
950 st %f4, [%g2 + 0x04] ! LSU
951 st %f2, [%g2 + 0x08] ! LSU Group
952 cmp %o1, %g3 ! IEU1
953 bne 1b ! CTI
954 add %g2, 0x10, %g2 ! IEU0 Group
955 2:
956 ba __set_v4f_3
957 nop
958
959 .globl _mesa_sparc_transform_points3_2d_no_rot
960 _mesa_sparc_transform_points3_2d_no_rot:
961 ld [%o2 + V4F_STRIDE], %o5
962 LDPTR [%o2 + V4F_START], %g1
963 LDPTR [%o0 + V4F_START], %g2
964 ld [%o2 + V4F_COUNT], %g3
965
966 LDMATRIX_0_5_12_13(%o1)
967
968 cmp %g3, 0
969 st %g3, [%o0 + V4F_COUNT]
970 be 2f
971 clr %o1
972
973 1: ld [%g1 + 0x00], %f0 ! LSU Group
974 ld [%g1 + 0x04], %f1 ! LSU Group
975 ld [%g1 + 0x08], %f2 ! LSU Group
976 add %o1, 1, %o1 ! IEU0
977 add %g1, %o5, %g1 ! IEU1
978 fmuls %f0, M0, %f3 ! FGM
979 fmuls %f1, M5, %f4 ! FGM Group
980 st %f2, [%g2 + 0x08] ! LSU
981 fadds %f3, M12, %f3 ! FGA Group
982 st %f3, [%g2 + 0x00] ! LSU
983 fadds %f4, M13, %f4 ! FGA Group
984 st %f4, [%g2 + 0x04] ! LSU
985 cmp %o1, %g3 ! IEU1
986 bne 1b ! CTI
987 add %g2, 0x10, %g2 ! IEU0 Group
988 2:
989 ba __set_v4f_3
990 nop
991
992 .globl _mesa_sparc_transform_points3_3d
993 _mesa_sparc_transform_points3_3d:
994 ld [%o2 + V4F_STRIDE], %o5
995 LDPTR [%o2 + V4F_START], %g1
996 LDPTR [%o0 + V4F_START], %g2
997 ld [%o2 + V4F_COUNT], %g3
998
999 LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
1000
1001 cmp %g3, 0
1002 st %g3, [%o0 + V4F_COUNT]
1003 be 2f
1004 clr %o1
1005
1006 1: ld [%g1 + 0x00], %f0 ! LSU Group
1007 ld [%g1 + 0x04], %f1 ! LSU Group
1008 ld [%g1 + 0x08], %f2 ! LSU Group
1009 add %o1, 1, %o1 ! IEU0
1010 add %g1, %o5, %g1 ! IEU1
1011 fmuls %f0, M0, %f3 ! FGM
1012 fmuls %f1, M4, %f6 ! FGM Group
1013 fmuls %f0, M1, %f4 ! FGM Group
1014 fmuls %f1, M5, %f7 ! FGM Group
1015 fmuls %f0, M2, %f5 ! FGM Group f3 available
1016 fmuls %f1, M6, %f8 ! FGM Group f6 available
1017 fadds %f3, %f6, %f3 ! FGA
1018 fmuls %f2, M8, %f9 ! FGM Group f4 available
1019 fmuls %f2, M9, %f10 ! FGM Group f7 available
1020 fadds %f4, %f7, %f4 ! FGA
1021 fmuls %f2, M10, %f11 ! FGM Group f5 available
1022 fadds %f5, %f8, %f5 ! FGA Group f8, f3 available
1023 fadds %f3, %f9, %f3 ! FGA Group f9 available
1024 fadds %f4, %f10, %f4 ! FGA Group f10, f4 available
1025 fadds %f5, %f11, %f5 ! FGA Group stall, f11, f5 available
1026 fadds %f3, M12, %f3 ! FGA Group f3 available
1027 st %f3, [%g2 + 0x00] ! LSU
1028 fadds %f4, M13, %f4 ! FGA Group f4 available
1029 st %f4, [%g2 + 0x04] ! LSU
1030 fadds %f5, M14, %f5 ! FGA Group f5 available
1031 st %f5, [%g2 + 0x08] ! LSU
1032 cmp %o1, %g3 ! IEU1
1033 bne 1b ! CTI
1034 add %g2, 0x10, %g2 ! IEU0 Group
1035 2:
1036 ba __set_v4f_3
1037 nop
1038
1039 .globl _mesa_sparc_transform_points3_3d_no_rot
1040 _mesa_sparc_transform_points3_3d_no_rot:
1041 ld [%o2 + V4F_STRIDE], %o5
1042 LDPTR [%o2 + V4F_START], %g1
1043 LDPTR [%o0 + V4F_START], %g2
1044 ld [%o2 + V4F_COUNT], %g3
1045
1046 LDMATRIX_0_5_10_12_13_14(%o1)
1047
1048 cmp %g3, 0
1049 st %g3, [%o0 + V4F_COUNT]
1050 be 2f
1051 clr %o1
1052
1053 1: ld [%g1 + 0x00], %f0 ! LSU Group
1054 ld [%g1 + 0x04], %f1 ! LSU Group
1055 ld [%g1 + 0x08], %f2 ! LSU Group
1056 add %o1, 1, %o1 ! IEU0
1057 add %g1, %o5, %g1 ! IEU1
1058 cmp %o1, %g3 ! IEU1 Group
1059 fmuls %f0, M0, %f3 ! FGM
1060 fmuls %f1, M5, %f4 ! FGM Group
1061 fmuls %f2, M10, %f5 ! FGM Group
1062 fadds %f3, M12, %f3 ! FGA Group, stall, f3 available
1063 st %f3, [%g2 + 0x00] ! LSU
1064 fadds %f4, M13, %f4 ! FGA Group, f4 available
1065 st %f4, [%g2 + 0x04] ! LSU
1066 fadds %f5, M14, %f5 ! FGA Group, f5 available
1067 st %f5, [%g2 + 0x08] ! LEU
1068 bne 1b ! CTI
1069 add %g2, 0x10, %g2 ! IEU0 Group
1070 2:
1071 ba __set_v4f_3
1072 nop
1073
1074 .globl _mesa_sparc_transform_points3_perspective
1075 _mesa_sparc_transform_points3_perspective:
1076 ld [%o2 + V4F_STRIDE], %o5
1077 LDPTR [%o2 + V4F_START], %g1
1078 LDPTR [%o0 + V4F_START], %g2
1079 ld [%o2 + V4F_COUNT], %g3
1080
1081 LDMATRIX_0_5_8_9_10_14(%o1)
1082
1083 cmp %g3, 0
1084 st %g3, [%o0 + V4F_COUNT]
1085 be 2f
1086 clr %o1
1087
1088 1: ld [%g1 + 0x00], %f0 ! LSU Group
1089 ld [%g1 + 0x04], %f1 ! LSU Group
1090 ld [%g1 + 0x08], %f2 ! LSU Group
1091 add %o1, 1, %o1 ! IEU0
1092 add %g1, %o5, %g1 ! IEU1
1093 fmuls %f0, M0, %f3 ! FGM
1094 fmuls %f2, M8, %f6 ! FGM Group
1095 fmuls %f1, M5, %f4 ! FGM Group
1096 fmuls %f2, M9, %f7 ! FGM Group
1097 fmuls %f2, M10, %f5 ! FGM Group f3 available
1098 fadds %f3, %f6, %f3 ! FGA Group f6 available
1099 st %f3, [%g2 + 0x00] ! LSU
1100 fadds %f4, %f7, %f4 ! FGA Group stall, f4, f7 available
1101 st %f4, [%g2 + 0x04] ! LSU
1102 fadds %f5, M14, %f5 ! FGA Group
1103 st %f5, [%g2 + 0x08] ! LSU
1104 fnegs %f2, %f6 ! FGA Group
1105 st %f6, [%g2 + 0x0c] ! LSU
1106 cmp %o1, %g3 ! IEU1
1107 bne 1b ! CTI
1108 add %g2, 0x10, %g2 ! IEU0 Group
1109 2:
1110 ba __set_v4f_4
1111 nop
1112
1113 .globl _mesa_sparc_transform_points4_general
1114 _mesa_sparc_transform_points4_general:
1115 ld [%o2 + V4F_STRIDE], %o5
1116 LDPTR [%o2 + V4F_START], %g1
1117 LDPTR [%o0 + V4F_START], %g2
1118 ld [%o2 + V4F_COUNT], %g3
1119
1120 LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
1121
1122 cmp %g3, 0
1123 st %g3, [%o0 + V4F_COUNT]
1124 be 2f
1125 clr %o1
1126
1127 1: ld [%g1 + 0x00], %f0 ! LSU Group
1128 ld [%g1 + 0x04], %f1 ! LSU Group
1129 ld [%g1 + 0x08], %f2 ! LSU Group
1130 ld [%g1 + 0x0c], %f3 ! LSU Group
1131 add %o1, 1, %o1 ! IEU0
1132 add %g1, %o5, %g1 ! IEU1
1133 fmuls %f0, M0, %f4 ! FGM Group
1134 fmuls %f1, M4, %f8 ! FGM Group
1135 fmuls %f0, M1, %f5 ! FGM Group
1136 fmuls %f1, M5, %f9 ! FGM Group
1137 fmuls %f0, M2, %f6 ! FGM Group f4 available
1138 fmuls %f1, M6, %f10 ! FGM Group f8 available
1139 fadds %f4, %f8, %f4 ! FGA
1140 fmuls %f0, M3, %f7 ! FGM Group f5 available
1141 fmuls %f1, M7, %f11 ! FGM Group f9 available
1142 fadds %f5, %f9, %f5 ! FGA
1143 fmuls %f2, M8, %f12 ! FGM Group f6 available
1144 fmuls %f2, M9, %f13 ! FGM Group f10, f4 available
1145 fadds %f6, %f10, %f6 ! FGA
1146 fmuls %f2, M10, %f14 ! FGM Group f7 available
1147 fmuls %f2, M11, %f15 ! FGM Group f11, f5 available
1148 fadds %f7, %f11, %f7 ! FGA
1149 fmuls %f3, M12, %f8 ! FGM Group f12 available
1150 fadds %f4, %f12, %f4 ! FGA
1151 fmuls %f3, M13, %f9 ! FGM Group f13, f6 available
1152 fadds %f5, %f13, %f5 ! FGA
1153 fmuls %f3, M14, %f10 ! FGM Group f14 available
1154 fadds %f6, %f14, %f6 ! FGA
1155 fmuls %f3, M15, %f11 ! FGM Group f15, f7 available
1156 fadds %f7, %f15, %f7 ! FGA
1157 fadds %f4, %f8, %f4 ! FGA Group f8, f4 available
1158 st %f4, [%g2 + 0x00] ! LSU
1159 fadds %f5, %f9, %f5 ! FGA Group f9, f5 available
1160 st %f5, [%g2 + 0x04] ! LSU
1161 fadds %f6, %f10, %f6 ! FGA Group f10, f6 available
1162 st %f6, [%g2 + 0x08] ! LSU
1163 fadds %f7, %f11, %f7 ! FGA Group f11, f7 available
1164 st %f7, [%g2 + 0x0c] ! LSU
1165 cmp %o1, %g3 ! IEU1
1166 bne 1b ! CTI
1167 add %g2, 0x10, %g2 ! IEU0 Group
1168 2:
1169 ba __set_v4f_4
1170 nop
1171
1172 .globl _mesa_sparc_transform_points4_identity
1173 _mesa_sparc_transform_points4_identity:
1174 ld [%o2 + V4F_STRIDE], %o5
1175 LDPTR [%o2 + V4F_START], %g1
1176 LDPTR [%o0 + V4F_START], %g2
1177 ld [%o2 + V4F_COUNT], %g3
1178
1179 cmp %g3, 0
1180 st %g3, [%o0 + V4F_COUNT]
1181 be 2f
1182 clr %o1
1183
1184 1: ld [%g1 + 0x00], %f0
1185 ld [%g1 + 0x04], %f1
1186 ld [%g1 + 0x08], %f2
1187 add %o1, 1, %o1
1188 ld [%g1 + 0x0c], %f3
1189 add %g1, %o5, %g1
1190 st %f0, [%g2 + 0x00]
1191 st %f1, [%g2 + 0x04]
1192 st %f2, [%g2 + 0x08]
1193 cmp %o1, %g3
1194 st %f3, [%g2 + 0x0c]
1195 bne 1b
1196 add %g2, 0x10, %g2
1197 2:
1198 ba __set_v4f_4
1199 nop
1200
1201 .globl _mesa_sparc_transform_points4_2d
1202 _mesa_sparc_transform_points4_2d:
1203 ld [%o2 + V4F_STRIDE], %o5
1204 LDPTR [%o2 + V4F_START], %g1
1205 LDPTR [%o0 + V4F_START], %g2
1206 ld [%o2 + V4F_COUNT], %g3
1207
1208 LDMATRIX_0_1_4_5_12_13(%o1)
1209
1210 cmp %g3, 0
1211 st %g3, [%o0 + V4F_COUNT]
1212 be 2f
1213 clr %o1
1214
1215 1: ld [%g1 + 0x00], %f0 ! LSU Group
1216 ld [%g1 + 0x04], %f1 ! LSU Group
1217 ld [%g1 + 0x08], %f2 ! LSU Group
1218 ld [%g1 + 0x0c], %f3 ! LSU Group
1219 add %o1, 1, %o1 ! IEU0
1220 add %g1, %o5, %g1 ! IEU1
1221 fmuls %f0, M0, %f4 ! FGM
1222 fmuls %f1, M4, %f8 ! FGM Group
1223 fmuls %f0, M1, %f5 ! FGM Group
1224 fmuls %f1, M5, %f9 ! FGM Group f4 available
1225 fmuls %f3, M12, %f12 ! FGM Group
1226 fmuls %f3, M13, %f13 ! FGM Group f8 available
1227 fadds %f4, %f8, %f4 ! FGA
1228 fadds %f5, %f9, %f5 ! FGA Group stall, f5, f9 available
1229 fadds %f4, %f12, %f4 ! FGA Group 2 cycle stall, f4, f12, f13 avail
1230 st %f4, [%g2 + 0x00] ! LSU
1231 fadds %f5, %f13, %f5 ! FGA Group f5 available
1232 st %f5, [%g2 + 0x04] ! LSU
1233 st %f2, [%g2 + 0x08] ! LSU Group
1234 st %f3, [%g2 + 0x0c] ! LSU Group
1235 cmp %o1, %g3 ! IEU1
1236 bne 1b ! CTI
1237 add %g2, 0x10, %g2 ! IEU0 Group
1238 2:
1239 ba __set_v4f_4
1240 nop
1241
1242 .globl _mesa_sparc_transform_points4_2d_no_rot
1243 _mesa_sparc_transform_points4_2d_no_rot:
1244 ld [%o2 + V4F_STRIDE], %o5
1245 LDPTR [%o2 + V4F_START], %g1
1246 LDPTR [%o0 + V4F_START], %g2
1247 ld [%o2 + V4F_COUNT], %g3
1248
1249 LDMATRIX_0_1_4_5_12_13(%o1)
1250
1251 cmp %g3, 0
1252 st %g3, [%o0 + V4F_COUNT]
1253 be 2f
1254 clr %o1
1255
1256 1: ld [%g1 + 0x00], %f0
1257 ld [%g1 + 0x04], %f1
1258 ld [%g1 + 0x08], %f2
1259 ld [%g1 + 0x0c], %f3
1260 add %o1, 1, %o1
1261 add %g1, %o5, %g1
1262 fmuls %f0, M0, %f4
1263 fmuls %f3, M12, %f8
1264 fmuls %f1, M5, %f5
1265 fmuls %f3, M13, %f9
1266 fadds %f4, %f8, %f4
1267 st %f4, [%g2 + 0x00]
1268 fadds %f5, %f9, %f5
1269 st %f5, [%g2 + 0x04]
1270 st %f2, [%g2 + 0x08]
1271 st %f3, [%g2 + 0x0c]
1272 cmp %o1, %g3
1273 bne 1b
1274 add %g2, 0x10, %g2
1275 2:
1276 ba __set_v4f_4
1277 nop
1278
1279 .globl _mesa_sparc_transform_points4_3d
1280 _mesa_sparc_transform_points4_3d:
1281 ld [%o2 + V4F_STRIDE], %o5
1282 LDPTR [%o2 + V4F_START], %g1
1283 LDPTR [%o0 + V4F_START], %g2
1284 ld [%o2 + V4F_COUNT], %g3
1285
1286 LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
1287
1288 cmp %g3, 0
1289 st %g3, [%o0 + V4F_COUNT]
1290 be 2f
1291 clr %o1
1292
1293 1: ld [%g1 + 0x00], %f0 ! LSU Group
1294 ld [%g1 + 0x04], %f1 ! LSU Group
1295 ld [%g1 + 0x08], %f2 ! LSU Group
1296 ld [%g1 + 0x0c], %f3 ! LSU Group
1297 add %o1, 1, %o1 ! IEU0
1298 add %g1, %o5, %g1 ! IEU1
1299 fmuls %f0, M0, %f4 ! FGM
1300 fmuls %f1, M4, %f7 ! FGM Group
1301 fmuls %f0, M1, %f5 ! FGM Group
1302 fmuls %f1, M5, %f8 ! FGM Group
1303 fmuls %f0, M2, %f6 ! FGM Group f4 available
1304 fmuls %f1, M6, %f9 ! FGM Group f7 available
1305 fadds %f4, %f7, %f4 ! FGA
1306 fmuls %f2, M8, %f10 ! FGM Group f5 available
1307 fmuls %f2, M9, %f11 ! FGM Group f8 available
1308 fadds %f5, %f8, %f5 ! FGA
1309 fmuls %f2, M10, %f12 ! FGM Group f6 available
1310 fmuls %f3, M12, %f13 ! FGM Group f9, f4 available
1311 fadds %f6, %f9, %f6 ! FGA
1312 fmuls %f3, M13, %f14 ! FGM Group f10 available
1313 fadds %f4, %f10, %f4 ! FGA
1314 fmuls %f3, M14, %f15 ! FGM Group f11, f5 available
1315 fadds %f5, %f11, %f5 ! FGA
1316 fadds %f6, %f12, %f6 ! FGA Group stall, f12, f13, f6 available
1317 fadds %f4, %f13, %f4 ! FGA Group f14, f4 available
1318 st %f4, [%g2 + 0x00] ! LSU
1319 fadds %f5, %f14, %f5 ! FGA Group f15, f5 available
1320 st %f5, [%g2 + 0x04] ! LSU
1321 fadds %f6, %f15, %f6 ! FGA Group f6 available
1322 st %f6, [%g2 + 0x08] ! LSU
1323 st %f3, [%g2 + 0x0c] ! LSU Group
1324 cmp %o1, %g3 ! IEU1
1325 bne 1b ! CTI
1326 add %g2, 0x10, %g2 ! IEU0 Group
1327 2:
1328 ba __set_v4f_4
1329 nop
1330
1331 .globl _mesa_sparc_transform_points4_3d_no_rot
1332 _mesa_sparc_transform_points4_3d_no_rot:
1333 ld [%o2 + V4F_STRIDE], %o5
1334 LDPTR [%o2 + V4F_START], %g1
1335 LDPTR [%o0 + V4F_START], %g2
1336 ld [%o2 + V4F_COUNT], %g3
1337
1338 LDMATRIX_0_5_10_12_13_14(%o1)
1339
1340 cmp %g3, 0
1341 st %g3, [%o0 + V4F_COUNT]
1342 be 2f
1343 clr %o1
1344
1345 1: ld [%g1 + 0x00], %f0 ! LSU Group
1346 ld [%g1 + 0x04], %f1 ! LSU Group
1347 ld [%g1 + 0x08], %f2 ! LSU Group
1348 ld [%g1 + 0x0c], %f3 ! LSU Group
1349 add %o1, 1, %o1 ! IEU0
1350 add %g1, %o5, %g1 ! IEU1
1351 fmuls %f0, M0, %f4 ! FGM
1352 fmuls %f3, M12, %f7 ! FGM Group
1353 fmuls %f1, M5, %f5 ! FGM Group
1354 fmuls %f3, M13, %f8 ! FGM Group
1355 fmuls %f2, M10, %f6 ! FGM Group f4 available
1356 fmuls %f3, M14, %f9 ! FGM Group f7 available
1357 fadds %f4, %f7, %f4 ! FGA
1358 st %f4, [%g2 + 0x00] ! LSU
1359 fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available
1360 st %f5, [%g2 + 0x04] ! LSU
1361 fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available
1362 st %f6, [%g2 + 0x08] ! LSU
1363 st %f3, [%g2 + 0x0c] ! LSU Group
1364 cmp %o1, %g3 ! IEU1
1365 bne 1b ! CTI
1366 add %g2, 0x10, %g2 ! IEU0 Group
1367 2:
1368 ba __set_v4f_4
1369 nop
1370
1371 .globl _mesa_sparc_transform_points4_perspective
1372 _mesa_sparc_transform_points4_perspective:
1373 ld [%o2 + V4F_STRIDE], %o5
1374 LDPTR [%o2 + V4F_START], %g1
1375 LDPTR [%o0 + V4F_START], %g2
1376 ld [%o2 + V4F_COUNT], %g3
1377
1378 LDMATRIX_0_5_8_9_10_14(%o1)
1379
1380 cmp %g3, 0
1381 st %g3, [%o0 + V4F_COUNT]
1382 be 2f
1383 clr %o1
1384
1385 1: ld [%g1 + 0x00], %f0 ! LSU Group
1386 ld [%g1 + 0x04], %f1 ! LSU Group
1387 ld [%g1 + 0x08], %f2 ! LSU Group
1388 ld [%g1 + 0x0c], %f3 ! LSU Group
1389 add %o1, 1, %o1 ! IEU0
1390 add %g1, %o5, %g1 ! IEU1
1391 fmuls %f0, M0, %f4 ! FGM
1392 fmuls %f2, M8, %f7 ! FGM Group
1393 fmuls %f1, M5, %f5 ! FGM Group
1394 fmuls %f2, M9, %f8 ! FGM Group
1395 fmuls %f2, M10, %f6 ! FGM Group f4 available
1396 fmuls %f3, M14, %f9 ! FGM Group f7 available
1397 fadds %f4, %f7, %f4 ! FGA
1398 st %f4, [%g2 + 0x00] ! LSU
1399 fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available
1400 st %f5, [%g2 + 0x04] ! LSU
1401 fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available
1402 st %f6, [%g2 + 0x08] ! LSU
1403 fnegs %f2, %f7 ! FGA Group
1404 st %f7, [%g2 + 0x0c] ! LSU
1405 cmp %o1, %g3 ! IEU1
1406 bne 1b ! CTI
1407 add %g2, 0x10, %g2 ! IEU0 Group
1408 2:
1409 ba __set_v4f_4
1410 nop