Fix VERT_SET_RGBA, VERT_SET_SPEC macros to account for change to floating
[mesa.git] / src / mesa / sparc / xform.S
1 /* $Id: xform.S,v 1.2 2001/06/05 23:54:01 davem69 Exp $ */
2
3 /* TODO
4 *
5 * 1) It would be nice if load/store double could be used
6 * at least for the matrix parts. I think for the matrices
7 * it is safe, but for the vertices it probably is not due to
8 * things like glInterleavedArrays etc.
9 *
10 * UPDATE: Trying this now in sparc_matrix.h -DaveM_990624
11 *
12 * 2) One extremely slick trick would be if we could enclose
13 * groups of xform calls on the same vertices such that
14 * we just load the matrix into f16-->f31 before the calls
15 * and then we would not have to do them here. This may be
16 * tricky and not much of a gain though.
17 */
18
19 #include "sparc_matrix.h"
20
21 .text
22 .align 64
23
24 __set_v4f_1:
25 ld [%o0 + V4F_FLAGS], %g2
26 mov 1, %g1
27 st %g1, [%o0 + V4F_SIZE]
28 or %g2, VEC_SIZE_1, %g2
29 retl
30 st %g2, [%o0 + V4F_FLAGS]
31 __set_v4f_2:
32 ld [%o0 + V4F_FLAGS], %g2
33 mov 2, %g1
34 st %g1, [%o0 + V4F_SIZE]
35 or %g2, VEC_SIZE_2, %g2
36 retl
37 st %g2, [%o0 + V4F_FLAGS]
38 __set_v4f_3:
39 ld [%o0 + V4F_FLAGS], %g2
40 mov 3, %g1
41 st %g1, [%o0 + V4F_SIZE]
42 or %g2, VEC_SIZE_3, %g2
43 retl
44 st %g2, [%o0 + V4F_FLAGS]
45 __set_v4f_4:
46 ld [%o0 + V4F_FLAGS], %g2
47 mov 4, %g1
48 st %g1, [%o0 + V4F_SIZE]
49 or %g2, VEC_SIZE_4, %g2
50 retl
51 st %g2, [%o0 + V4F_FLAGS]
52
53 /* First the raw versions. */
54
55 .globl _mesa_sparc_transform_points1_general
56 _mesa_sparc_transform_points1_general:
57 ld [%o2 + V4F_STRIDE], %o5
58 LDPTR [%o2 + V4F_START], %g1
59 LDPTR [%o0 + V4F_START], %g2
60 ld [%o2 + V4F_COUNT], %g3
61
62 LDMATRIX_0_1_2_3_12_13_14_15(%o1)
63
64 cmp %g3, 1
65 st %g3, [%o0 + V4F_COUNT]
66 bl 3f
67 clr %o1
68
69 be 2f
70 andn %g3, 1, %o2
71
72 1: ld [%g1 + 0x00], %f0 ! LSU Group
73 add %g1, %o5, %g1 ! IEU0
74 ld [%g1 + 0x00], %f8 ! LSU Group
75 add %o1, 2, %o1 ! IEU0
76 add %g1, %o5, %g1 ! IEU1
77 fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
78 fmuls %f0, M1, %f2 ! FGM Group
79 fmuls %f0, M2, %f3 ! FGM Group
80 fmuls %f0, M3, %f4 ! FGM Group
81 fmuls %f8, M0, %f9 ! FGM Group f1 available
82 fadds %f1, M12, %f1 ! FGA
83 st %f1, [%g2 + 0x00] ! LSU
84 fmuls %f8, M1, %f10 ! FGM Group f2 available
85 fadds %f2, M13, %f2 ! FGA
86 st %f2, [%g2 + 0x04] ! LSU
87 fmuls %f8, M2, %f11 ! FGM Group f3 available
88 fadds %f3, M14, %f3 ! FGA
89 st %f3, [%g2 + 0x08] ! LSU
90 fmuls %f8, M3, %f12 ! FGM Group f4 available
91 fadds %f4, M15, %f4 ! FGA
92 st %f4, [%g2 + 0x0c] ! LSU
93 fadds %f9, M12, %f9 ! FGA Group f9 available
94 st %f9, [%g2 + 0x10] ! LSU
95 fadds %f10, M13, %f10 ! FGA Group f10 available
96 st %f10, [%g2 + 0x14] ! LSU
97 fadds %f11, M14, %f11 ! FGA Group f11 available
98 st %f11, [%g2 + 0x18] ! LSU
99 fadds %f12, M15, %f12 ! FGA Group f12 available
100 st %f12, [%g2 + 0x1c] ! LSU
101 cmp %o1, %o2 ! IEU1
102 bne 1b ! CTI
103 add %g2, 0x20, %g2 ! IEU0 Group
104
105 cmp %o1, %g3
106 be 3f
107 nop
108
109 2: ld [%g1 + 0x00], %f0 ! LSU Group
110 fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
111 fmuls %f0, M1, %f2 ! FGM Group
112 fmuls %f0, M2, %f3 ! FGM Group
113 fmuls %f0, M3, %f4 ! FGM Group
114 fadds %f1, M12, %f1 ! FGA Group
115 st %f1, [%g2 + 0x00] ! LSU
116 fadds %f2, M13, %f2 ! FGA Group
117 st %f2, [%g2 + 0x04] ! LSU
118 fadds %f3, M14, %f3 ! FGA Group
119 st %f3, [%g2 + 0x08] ! LSU
120 fadds %f4, M15, %f4 ! FGA Group
121 st %f4, [%g2 + 0x0c] ! LSU
122
123 3:
124 ba __set_v4f_4
125 nop
126
127 .globl _mesa_sparc_transform_points1_identity
128 _mesa_sparc_transform_points1_identity:
129 cmp %o0, %o2
130 be 4f
131 ld [%o2 + V4F_STRIDE], %o5
132 LDPTR [%o2 + V4F_START], %g1
133 LDPTR [%o0 + V4F_START], %g2
134 ld [%o2 + V4F_COUNT], %g3
135
136 cmp %g3, 1
137 st %g3, [%o0 + V4F_COUNT]
138 bl 3f
139 clr %o1
140
141 be 2f
142 andn %g3, 1, %o2
143
144 1: ld [%g1 + 0x00], %f0 ! LSU Group
145 add %g1, %o5, %g1 ! IEU0
146 ld [%g1 + 0x00], %f1 ! LSU Group
147 add %o1, 2, %o1 ! IEU0
148 add %g1, %o5, %g1 ! IEU1
149 st %f0, [%g2 + 0x00] ! LSU Group
150 cmp %o1, %o2 ! IEU1
151 st %f1, [%g2 + 0x10] ! LSU Group
152 bne 1b ! CTI
153 add %g2, 0x20, %g2 ! IEU0
154
155 cmp %o1, %g3
156 be 3f
157 nop
158
159 2: ld [%g1 + 0x00], %f0
160 addx %g0, %g0, %g0
161 st %f0, [%g2 + 0x00]
162
163 3:
164 ba __set_v4f_1
165 nop
166
167 4: retl
168 nop
169
170 .globl _mesa_sparc_transform_points1_2d
171 _mesa_sparc_transform_points1_2d:
172 ld [%o2 + V4F_STRIDE], %o5
173 LDPTR [%o2 + V4F_START], %g1
174 LDPTR [%o0 + V4F_START], %g2
175 ld [%o2 + V4F_COUNT], %g3
176
177 LDMATRIX_0_1_12_13(%o1)
178
179 cmp %g3, 1
180 st %g3, [%o0 + V4F_COUNT]
181 bl 3f
182 clr %o1
183
184 be 2f
185 andn %g3, 1, %o2
186
187 1: ld [%g1 + 0x00], %f0 ! LSU Group
188 add %g1, %o5, %g1 ! IEU0
189 ld [%g1 + 0x00], %f8 ! LSU Group
190 add %o1, 2, %o1 ! IEU0
191 add %g1, %o5, %g1 ! IEU1
192 fmuls %f0, M0, %f1 ! FGM Group
193 fmuls %f0, M1, %f2 ! FGM Group
194 fmuls %f8, M0, %f9 ! FGM Group
195 fmuls %f8, M1, %f10 ! FGM Group
196 fadds %f1, M12, %f3 ! FGA Group f1 available
197 st %f3, [%g2 + 0x00] ! LSU
198 fadds %f2, M13, %f4 ! FGA Group f2 available
199 st %f4, [%g2 + 0x04] ! LSU
200 fadds %f9, M12, %f11 ! FGA Group f9 available
201 st %f11, [%g2 + 0x10] ! LSU
202 fadds %f10, M13, %f12 ! FGA Group f10 available
203 st %f12, [%g2 + 0x14] ! LSU
204 cmp %o1, %o2 ! IEU1
205 bne 1b ! CTI
206 add %g2, 0x20, %g2 ! IEU0 Group
207
208 cmp %o1, %g3
209 be 3f
210 nop
211
212 2: ld [%g1 + 0x00], %f0
213 fmuls %f0, M0, %f1
214 fmuls %f0, M1, %f2
215 fadds %f1, M12, %f3
216 st %f3, [%g2 + 0x00]
217 fadds %f2, M13, %f4
218 st %f4, [%g2 + 0x04]
219
220 3:
221 ba __set_v4f_2
222 nop
223
224 .globl _mesa_sparc_transform_points1_2d_no_rot
225 _mesa_sparc_transform_points1_2d_no_rot:
226 ld [%o2 + V4F_STRIDE], %o5
227 LDPTR [%o2 + V4F_START], %g1
228 LDPTR [%o0 + V4F_START], %g2
229 ld [%o2 + V4F_COUNT], %g3
230
231 LDMATRIX_0_12_13(%o1)
232
233 cmp %g3, 1
234 st %g3, [%o0 + V4F_COUNT]
235 bl 3f
236 clr %o1
237
238 be 2f
239 andn %g3, 1, %o2
240
241 1: ld [%g1 + 0x00], %f0 ! LSU Group
242 add %g1, %o5, %g1 ! IEU0
243 ld [%g1 + 0x00], %f4 ! LSU Group
244 add %o1, 2, %o1 ! IEU0
245 add %g1, %o5, %g1 ! IEU1
246 fmuls %f0, M0, %f1 ! FGM Group
247 fmuls %f4, M0, %f5 ! FGM Group
248 fadds %f1, M12, %f3 ! FGA Group, 2 cycle stall, f1 available
249 st %f3, [%g2 + 0x00] ! LSU
250 st M13, [%g2 + 0x04] ! LSU Group, f5 available
251 fadds %f5, M12, %f6 ! FGA
252 st %f6, [%g2 + 0x10] ! LSU Group
253 st M13, [%g2 + 0x14] ! LSU Group
254 cmp %o1, %o2 ! IEU1
255 bne 1b ! CTI
256 add %g2, 0x20, %g2 ! IEU0 Group
257
258 cmp %o1, %g3
259 be 3f
260 nop
261
262 2: ld [%g1 + 0x00], %f0
263 fmuls %f0, M0, %f1
264 fadds %f1, M12, %f3
265 st %f3, [%g2 + 0x00]
266 st M13, [%g2 + 0x04]
267
268 3:
269 ba __set_v4f_2
270 nop
271
272 .globl _mesa_sparc_transform_points1_3d
273 _mesa_sparc_transform_points1_3d:
274 ld [%o2 + V4F_STRIDE], %o5
275 LDPTR [%o2 + V4F_START], %g1
276 LDPTR [%o0 + V4F_START], %g2
277 ld [%o2 + V4F_COUNT], %g3
278
279 LDMATRIX_0_1_2_12_13_14(%o1)
280
281 cmp %g3, 1
282 st %g3, [%o0 + V4F_COUNT]
283 bl 3f
284 clr %o1
285
286 be 2f
287 andn %g3, 1, %o2
288
289 1: ld [%g1 + 0x00], %f0 ! LSU Group
290 add %g1, %o5, %g1 ! IEU0
291 ld [%g1 + 0x00], %f4 ! LSU Group
292 add %o1, 2, %o1 ! IEU0
293 add %g1, %o5, %g1 ! IEU1
294 fmuls %f0, M0, %f1 ! FGM Group
295 fmuls %f0, M1, %f2 ! FGM Group
296 fmuls %f0, M2, %f3 ! FGM Group
297 fmuls %f4, M0, %f5 ! FGM Group
298 fadds %f1, M12, %f1 ! FGA Group, f1 available
299 st %f1, [%g2 + 0x00] ! LSU
300 fmuls %f4, M1, %f6 ! FGM
301 fadds %f2, M13, %f2 ! FGA Group, f2 available
302 st %f2, [%g2 + 0x04] ! LSU
303 fmuls %f4, M2, %f7 ! FGM
304 fadds %f3, M14, %f3 ! FGA Group, f3 available
305 st %f3, [%g2 + 0x08] ! LSU
306 fadds %f5, M12, %f5 ! FGA Group, f5 available
307 st %f5, [%g2 + 0x10] ! LSU
308 fadds %f6, M13, %f6 ! FGA Group, f6 available
309 st %f6, [%g2 + 0x14] ! LSU
310 fadds %f7, M14, %f7 ! FGA Group, f7 available
311 st %f7, [%g2 + 0x18] ! LSU
312 cmp %o1, %o2 ! IEU1
313 bne 1b ! CTI
314 add %g2, 0x20, %g2 ! IEU0 Group
315
316 cmp %o1, %g3
317 be 3f
318 nop
319
320 2: ld [%g1 + 0x00], %f0
321 fmuls %f0, M0, %f1
322 fmuls %f0, M1, %f2
323 fmuls %f0, M2, %f3
324 fadds %f1, M12, %f1
325 st %f1, [%g2 + 0x00]
326 fadds %f2, M13, %f2
327 st %f2, [%g2 + 0x04]
328 fadds %f3, M14, %f3
329 st %f3, [%g2 + 0x08]
330
331 3:
332 ba __set_v4f_3
333 nop
334
335 .globl _mesa_sparc_transform_points1_3d_no_rot
336 _mesa_sparc_transform_points1_3d_no_rot:
337 ld [%o2 + V4F_STRIDE], %o5
338 LDPTR [%o2 + V4F_START], %g1
339 LDPTR [%o0 + V4F_START], %g2
340 ld [%o2 + V4F_COUNT], %g3
341
342 LDMATRIX_0_12_13_14(%o1)
343
344 cmp %g3, 1
345 st %g3, [%o0 + V4F_COUNT]
346 bl 3f
347 clr %o1
348
349 be 2f
350 andn %g3, 1, %o2
351
352 1: ld [%g1 + 0x00], %f0 ! LSU Group
353 add %g1, %o5, %g1 ! IEU0
354 ld [%g1 + 0x00], %f2 ! LSU Group
355 add %o1, 2, %o1 ! IEU0
356 add %g1, %o5, %g1 ! IEU1
357 fmuls %f0, M0, %f1 ! FGM Group
358 fmuls %f2, M0, %f3 ! FGM Group
359 fadds %f1, M12, %f1 ! FGA Group, 2 cycle stall, f1 available
360 st %f1, [%g2 + 0x00] ! LSU
361 fadds %f3, M12, %f3 ! FGA Group, f3 available
362 st M13, [%g2 + 0x04] ! LSU
363 st M14, [%g2 + 0x08] ! LSU Group
364 st %f3, [%g2 + 0x10] ! LSU Group
365 st M13, [%g2 + 0x14] ! LSU Group
366 st M14, [%g2 + 0x18] ! LSU Group
367 cmp %o1, %o2 ! IEU1
368 bne 1b ! CTI
369 add %g2, 0x20, %g2 ! IEU0 Group
370
371 cmp %o1, %g3
372 be 3f
373 nop
374
375 2: ld [%g1 + 0x00], %f0
376 fmuls %f0, M0, %f1
377 fadds %f1, M12, %f1
378 st %f1, [%g2 + 0x00]
379 st M13, [%g2 + 0x04]
380 st M14, [%g2 + 0x08]
381
382 3:
383 ba __set_v4f_3
384 nop
385
386 .globl _mesa_sparc_transform_points1_perspective
387 _mesa_sparc_transform_points1_perspective:
388 ld [%o2 + V4F_STRIDE], %o5
389 LDPTR [%o2 + V4F_START], %g1
390 LDPTR [%o0 + V4F_START], %g2
391 ld [%o2 + V4F_COUNT], %g3
392
393 LDMATRIX_0_14(%o1)
394
395 cmp %g3, 1
396 st %g3, [%o0 + V4F_COUNT]
397 bl 3f
398 clr %o1
399
400 be 2f
401 andn %g3, 1, %o2
402
403 1: ld [%g1 + 0x00], %f0 ! LSU Group
404 add %g1, %o5, %g1 ! IEU0
405 ld [%g1 + 0x00], %f2 ! LSU Group
406 add %o1, 2, %o1 ! IEU0
407 add %g1, %o5, %g1 ! IEU1
408 fmuls %f0, M0, %f1 ! FGM Group
409 st %f1, [%g2 + 0x00] ! LSU
410 fmuls %f2, M0, %f3 ! FGM Group
411 st %g0, [%g2 + 0x04] ! LSU
412 st M14, [%g2 + 0x08] ! LSU Group
413 st %g0, [%g2 + 0x0c] ! LSU Group
414 st %f3, [%g2 + 0x10] ! LSU Group
415 st %g0, [%g2 + 0x14] ! LSU Group
416 st M14, [%g2 + 0x18] ! LSU Group
417 st %g0, [%g2 + 0x1c] ! LSU Group
418 cmp %o1, %o2 ! IEU1
419 bne 1b ! CTI
420 add %g2, 0x20, %g2 ! IEU0 Group
421
422 cmp %o1, %g3
423 be 3f
424 nop
425
426 2: ld [%g1 + 0x00], %f0
427 fmuls %f0, M0, %f1
428 st %f1, [%g2 + 0x00]
429 st %g0, [%g2 + 0x04]
430 st M14, [%g2 + 0x08]
431 st %g0, [%g2 + 0x0c]
432
433 3:
434 ba __set_v4f_4
435 nop
436
437 .globl _mesa_sparc_transform_points2_general
438 _mesa_sparc_transform_points2_general:
439 ld [%o2 + V4F_STRIDE], %o5
440 LDPTR [%o2 + V4F_START], %g1
441 LDPTR [%o0 + V4F_START], %g2
442 ld [%o2 + V4F_COUNT], %g3
443
444 LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(%o1)
445
446 cmp %g3, 0
447 st %g3, [%o0 + V4F_COUNT]
448 be 2f
449 clr %o1
450
451 1: ld [%g1 + 0x00], %f0 ! LSU Group
452 ld [%g1 + 0x04], %f1 ! LSU Group
453 add %o1, 1, %o1 ! IEU0
454 add %g1, %o5, %g1 ! IEU1
455 fmuls %f0, M0, %f2 ! FGM Group
456 fmuls %f0, M1, %f3 ! FGM Group
457 fmuls %f0, M2, %f4 ! FGM Group
458 fmuls %f0, M3, %f5 ! FGM Group
459 fadds %f2, M12, %f2 ! FGA Group f2 available
460 fmuls %f1, M4, %f6 ! FGM
461 fadds %f3, M13, %f3 ! FGA Group f3 available
462 fmuls %f1, M5, %f7 ! FGM
463 fadds %f4, M14, %f4 ! FGA Group f4 available
464 fmuls %f1, M6, %f8 ! FGM
465 fadds %f5, M15, %f5 ! FGA Group f5 available
466 fmuls %f1, M7, %f9 ! FGM
467 fadds %f2, %f6, %f2 ! FGA Group f6 available
468 st %f2, [%g2 + 0x00] ! LSU
469 fadds %f3, %f7, %f3 ! FGA Group f7 available
470 st %f3, [%g2 + 0x04] ! LSU
471 fadds %f4, %f8, %f4 ! FGA Group f8 available
472 st %f4, [%g2 + 0x08] ! LSU
473 fadds %f5, %f9, %f5 ! FGA Group f9 available
474 st %f5, [%g2 + 0x0c] ! LSU
475 cmp %o1, %g3 ! IEU1
476 bne 1b ! CTI
477 add %g2, 0x10, %g2 ! IEU0 Group
478 2:
479 ba __set_v4f_4
480 nop
481
482 .globl _mesa_sparc_transform_points2_identity
483 _mesa_sparc_transform_points2_identity:
484 cmp %o2, %o0
485 be 3f
486 ld [%o2 + V4F_STRIDE], %o5
487 LDPTR [%o2 + V4F_START], %g1
488 LDPTR [%o0 + V4F_START], %g2
489 ld [%o2 + V4F_COUNT], %g3
490
491 cmp %g3, 0
492 st %g3, [%o0 + V4F_COUNT]
493 be 2f
494 clr %o1
495
496 1: ld [%g1 + 0x00], %f0 ! LSU Group
497 add %o1, 1, %o1 ! IEU0
498 ld [%g1 + 0x04], %f1 ! LSU Group
499 add %g1, %o5, %g1 ! IEU0
500 cmp %o1, %g3 ! IEU1
501 st %f0, [%g2 + 0x00] ! LSU Group
502 st %f1, [%g2 + 0x04] ! LSU Group
503 bne 1b ! CTI
504 add %g2, 0x10, %g2 ! IEU0
505 2:
506 ba __set_v4f_2
507 nop
508
509 3: retl
510 nop
511
512 .globl _mesa_sparc_transform_points2_2d
513 _mesa_sparc_transform_points2_2d:
514 ld [%o2 + V4F_STRIDE], %o5
515 LDPTR [%o2 + V4F_START], %g1
516 LDPTR [%o0 + V4F_START], %g2
517 ld [%o2 + V4F_COUNT], %g3
518
519 LDMATRIX_0_1_4_5_12_13(%o1)
520
521 cmp %g3, 1
522 st %g3, [%o0 + V4F_COUNT]
523 bl 3f
524 clr %o1
525
526 be 2f
527 andn %g3, 1, %o2
528
529 1: ld [%g1 + 0x00], %f0 ! LSU Group
530 ld [%g1 + 0x04], %f1 ! LSU Group
531 add %o1, 2, %o1 ! IEU0
532 add %g1, %o5, %g1 ! IEU1
533 fmuls %f0, M0, %f2 ! FGM
534 ld [%g1 + 0x00], %f8 ! LSU Group
535 fmuls %f0, M1, %f3 ! FGM
536 ld [%g1 + 0x04], %f9 ! LSU Group
537 fmuls %f1, M4, %f6 ! FGM
538 fmuls %f1, M5, %f7 ! FGM Group
539 add %g1, %o5, %g1 ! IEU0
540 fmuls %f8, M0, %f10 ! FGM Group f2 available
541 fadds %f2, M12, %f2 ! FGA
542 fmuls %f8, M1, %f11 ! FGM Group f3 available
543 fadds %f3, M13, %f3 ! FGA
544 fmuls %f9, M4, %f12 ! FGM Group
545 fmuls %f9, M5, %f13 ! FGM Group
546 fadds %f10, M12, %f10 ! FGA Group f2, f10 available
547 fadds %f2, %f6, %f2 ! FGA Group f3, f11 available
548 st %f2, [%g2 + 0x00] ! LSU
549 fadds %f11, M13, %f11 ! FGA Group f12 available
550 fadds %f3, %f7, %f3 ! FGA Group f13 available
551 st %f3, [%g2 + 0x04] ! LSU
552 fadds %f10, %f12, %f10 ! FGA Group f10 available
553 st %f10, [%g2 + 0x10] ! LSU
554 fadds %f11, %f13, %f11 ! FGA Group f11 available
555 st %f11, [%g2 + 0x14] ! LSU
556 cmp %o1, %o2 ! IEU1
557 bne 1b ! CTI
558 add %g2, 0x20, %g2 ! IEU0 Group
559
560 cmp %o1, %g3
561 be 3f
562 nop
563
564 2: ld [%g1 + 0x00], %f0 ! LSU Group
565 ld [%g1 + 0x04], %f1 ! LSU Group
566 fmuls %f0, M0, %f2 ! FGM Group
567 fmuls %f0, M1, %f3 ! FGM Group
568 fmuls %f1, M4, %f6 ! FGM Group
569 fmuls %f1, M5, %f7 ! FGM Group
570 fadds %f2, M12, %f2 ! FGA Group f2 available
571 fadds %f3, M13, %f3 ! FGA Group f3 available
572 fadds %f2, %f6, %f2 ! FGA Group 2 cycle stall, f2 available
573 st %f2, [%g2 + 0x00] ! LSU
574 fadds %f3, %f7, %f3 ! FGA Group f3 available
575 st %f3, [%g2 + 0x04] ! LSU
576
577 3:
578 ba __set_v4f_2
579 nop
580
581 .globl _mesa_sparc_transform_points2_2d_no_rot
582 _mesa_sparc_transform_points2_2d_no_rot:
583 ld [%o2 + V4F_STRIDE], %o5
584 LDPTR [%o2 + V4F_START], %g1
585 LDPTR [%o0 + V4F_START], %g2
586 ld [%o2 + V4F_COUNT], %g3
587
588 LDMATRIX_0_5_12_13(%o1)
589
590 cmp %g3, 1
591 st %g3, [%o0 + V4F_COUNT]
592 bl 3f
593 clr %o1
594
595 be 2f
596 andn %g3, 1, %o2
597
598 1: ld [%g1 + 0x00], %f0 ! LSU Group
599 ld [%g1 + 0x04], %f1 ! LSU Group
600 add %o1, 2, %o1 ! IEU0
601 add %g1, %o5, %g1 ! IEU1
602 ld [%g1 + 0x00], %f4 ! LSU Group
603 fmuls %f0, M0, %f2 ! FGM
604 ld [%g1 + 0x04], %f5 ! LSU Group
605 fmuls %f1, M5, %f3 ! FGM
606 fmuls %f4, M0, %f6 ! FGM Group
607 add %g1, %o5, %g1 ! IEU0
608 fmuls %f5, M5, %f7 ! FGM Group
609 fadds %f2, M12, %f2 ! FGA Group f2 available
610 st %f2, [%g2 + 0x00] ! LSU
611 fadds %f3, M13, %f3 ! FGA Group f3 available
612 st %f3, [%g2 + 0x04] ! LSU
613 fadds %f6, M12, %f6 ! FGA Group f6 available
614 st %f6, [%g2 + 0x10] ! LSU
615 fadds %f7, M13, %f7 ! FGA Group f7 available
616 st %f7, [%g2 + 0x14] ! LSU
617 cmp %o1, %o2 ! IEU1
618 bne 1b ! CTI
619 add %g2, 0x20, %g2 ! IEU0 Group
620
621 cmp %o1, %g3
622 be 3f
623 nop
624
625 2: ld [%g1 + 0x00], %f0 ! LSU Group
626 ld [%g1 + 0x04], %f1 ! LSU Group
627 fmuls %f0, M0, %f2 ! FGM Group
628 fmuls %f1, M5, %f3 ! FGM Group
629 fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available
630 st %f2, [%g2 + 0x00] ! LSU
631 fadds %f3, M13, %f3 ! FGA Group f3 available
632 st %f3, [%g2 + 0x04] ! LSU
633
634 3:
635 ba __set_v4f_2
636 nop
637
638 /* orig: 12 cycles */
639 .globl _mesa_sparc_transform_points2_3d
640 _mesa_sparc_transform_points2_3d:
641 ld [%o2 + V4F_STRIDE], %o5
642 ld [%o2 + V4F_START], %g1
643 ld [%o0 + V4F_START], %g2
644 ld [%o2 + V4F_COUNT], %g3
645
646 LDMATRIX_0_1_2_3_4_5_6_12_13_14(%o1)
647
648 cmp %g3, 1
649 st %g3, [%o0 + V4F_COUNT]
650 bl 3f
651 clr %o1
652
653 be 2f
654 andn %g3, 1, %o2
655
656 1: ld [%g1 + 0x00], %f0 ! LSU Group
657 ld [%g1 + 0x04], %f1 ! LSU Group
658 add %o1, 2, %o1 ! IEU0
659 add %g1, %o5, %g1 ! IEU1
660 ld [%g1 + 0x00], %f9 ! LSU Group
661 fmuls %f0, M0, %f2 ! FGM
662 ld [%g1 + 0x04], %f10 ! LSU Group
663 fmuls %f0, M1, %f3 ! FGM
664 fmuls %f0, M2, %f4 ! FGM Group
665 add %g1, %o5, %g1 ! IEU0
666 fmuls %f1, M4, %f6 ! FGM Group
667 fmuls %f1, M5, %f7 ! FGM Group f2 available
668 fadds %f2, M12, %f2 ! FGA
669 fmuls %f1, M6, %f8 ! FGM Group f3 available
670 fadds %f3, M13, %f3 ! FGA
671 fmuls %f9, M0, %f11 ! FGM Group f4 available
672 fadds %f4, M14, %f4 ! FGA
673 fmuls %f9, M1, %f12 ! FGM Group f6 available
674 fmuls %f9, M2, %f13 ! FGM Group f2, f7 available
675 fadds %f2, %f6, %f2 ! FGA
676 st %f2, [%g2 + 0x00] ! LSU
677 fmuls %f10, M4, %f14 ! FGM Group f3, f8 available
678 fadds %f3, %f7, %f3 ! FGA
679 st %f3, [%g2 + 0x04] ! LSU
680 fmuls %f10, M5, %f15 ! FGM Group f4, f11 available
681 fadds %f11, M12, %f11 ! FGA
682 fmuls %f10, M6, %f0 ! FGM Group f12 available
683 fadds %f12, M13, %f12 ! FGA
684 fadds %f13, M14, %f13 ! FGA Group f13 available
685 fadds %f4, %f8, %f4 ! FGA Group f14 available
686 st %f4, [%g2 + 0x08] ! LSU
687 fadds %f11, %f14, %f11 ! FGA Group f15, f11 available
688 st %f11, [%g2 + 0x10] ! LSU
689 fadds %f12, %f15, %f12 ! FGA Group f0, f12 available
690 st %f12, [%g2 + 0x14] ! LSU
691 fadds %f13, %f0, %f13 ! FGA Group f13 available
692 st %f13, [%g2 + 0x18] ! LSU
693
694 cmp %o1, %o2 ! IEU1
695 bne 1b ! CTI
696 add %g2, 0x20, %g2 ! IEU0 Group
697
698 cmp %o1, %g3
699 be 3f
700 nop
701
702 2: ld [%g1 + 0x00], %f0 ! LSU Group
703 ld [%g1 + 0x04], %f1 ! LSU Group
704 fmuls %f0, M0, %f2 ! FGM Group
705 fmuls %f0, M1, %f3 ! FGM Group
706 fmuls %f0, M2, %f4 ! FGM Group
707 fmuls %f1, M4, %f6 ! FGM Group
708 fmuls %f1, M5, %f7 ! FGM Group f2 available
709 fadds %f2, M12, %f2 ! FGA
710 fmuls %f1, M6, %f8 ! FGM Group f3 available
711 fadds %f3, M13, %f3 ! FGA
712 fadds %f4, M14, %f4 ! FGA Group f4 available
713 fadds %f2, %f6, %f2 ! FGA Group stall, f2, f6, f7 available
714 st %f2, [%g2 + 0x00] ! LSU
715 fadds %f3, %f7, %f3 ! FGA Group f3, f8 available
716 st %f3, [%g2 + 0x04] ! LSU
717 fadds %f4, %f8, %f4 ! FGA Group f4 available
718 st %f4, [%g2 + 0x08] ! LSU
719
720 3:
721 ba __set_v4f_3
722 nop
723
724 .globl _mesa_sparc_transform_points2_3d_no_rot
725 _mesa_sparc_transform_points2_3d_no_rot:
726 ld [%o2 + V4F_STRIDE], %o5
727 LDPTR [%o2 + V4F_START], %g1
728 LDPTR [%o0 + V4F_START], %g2
729 ld [%o2 + V4F_COUNT], %g3
730
731 LDMATRIX_0_5_12_13_14(%o1)
732
733 cmp %g3, 1
734 st %g3, [%o0 + V4F_COUNT]
735 bl 3f
736 clr %o3
737
738 be 2f
739 andn %g3, 1, %o2
740
741 1: ld [%g1 + 0x00], %f0 ! LSU Group
742 ld [%g1 + 0x04], %f1 ! LSU Group
743 add %o3, 2, %o3 ! IEU0
744 add %g1, %o5, %g1 ! IEU1
745 ld [%g1 + 0x00], %f4 ! LSU Group
746 fmuls %f0, M0, %f2 ! FGM
747 ld [%g1 + 0x04], %f5 ! LSU Group
748 fmuls %f1, M5, %f3 ! FGM
749 fmuls %f4, M0, %f6 ! FGM Group
750 add %g1, %o5, %g1 ! IEU0
751 fmuls %f5, M5, %f7 ! FGM Group
752 fadds %f2, M12, %f2 ! FGA Group f2 available
753 st %f2, [%g2 + 0x00] ! LSU
754 fadds %f3, M13, %f3 ! FGA Group f3 available
755 st %f3, [%g2 + 0x04] ! LSU
756 fadds %f6, M12, %f6 ! FGA Group f6 available
757 st M14, [%g2 + 0x08] ! LSU
758 fadds %f7, M13, %f7 ! FGA Group f7 available
759 st %f6, [%g2 + 0x10] ! LSU
760 st %f7, [%g2 + 0x14] ! LSU Group
761 st M14, [%g2 + 0x18] ! LSU Group
762 cmp %o3, %o2 ! IEU1
763 bne 1b ! CTI
764 add %g2, 0x20, %g2 ! IEU0 Group
765
766 cmp %o3, %g3
767 be 3f
768 nop
769
770 2: ld [%g1 + 0x00], %f0 ! LSU Group
771 ld [%g1 + 0x04], %f1 ! LSU Group
772 fmuls %f0, M0, %f2 ! FGM Group
773 fmuls %f1, M5, %f3 ! FGM Group
774 fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available
775 st %f2, [%g2 + 0x00] ! LSU
776 fadds %f3, M13, %f3 ! FGA Group f3 available
777 st %f3, [%g2 + 0x04] ! LSU
778 st M14, [%g2 + 0x08] ! LSU Group
779
780 3: ld [%o1 + (14 * 0x4)], %g3
781 cmp %g3, 0
782 bne __set_v4f_3
783 nop
784 ba __set_v4f_2
785 nop
786
787 .globl _mesa_sparc_transform_points2_perspective
788 _mesa_sparc_transform_points2_perspective:
789 ld [%o2 + V4F_STRIDE], %o5
790 LDPTR [%o2 + V4F_START], %g1
791 LDPTR [%o0 + V4F_START], %g2
792 ld [%o2 + V4F_COUNT], %g3
793
794 LDMATRIX_0_5_14(%o1)
795
796 cmp %g3, 0
797 st %g3, [%o0 + V4F_COUNT]
798 be 2f
799 clr %o1
800
801 1: ld [%g1 + 0x00], %f0
802 ld [%g1 + 0x04], %f1
803 add %o1, 1, %o1
804 add %g1, %o5, %g1
805 fmuls %f0, M0, %f2
806 st %f2, [%g2 + 0x00]
807 fmuls %f1, M5, %f3
808 st %f3, [%g2 + 0x04]
809 st M14, [%g2 + 0x08]
810 st %g0, [%g2 + 0x0c]
811 cmp %o1, %g3
812 bne 1b
813 add %g2, 0x10, %g2
814 2:
815 ba __set_v4f_4
816 nop
817
818 .globl _mesa_sparc_transform_points3_general
819 _mesa_sparc_transform_points3_general:
820 ld [%o2 + V4F_STRIDE], %o5
821 LDPTR [%o2 + V4F_START], %g1
822 LDPTR [%o0 + V4F_START], %g2
823 ld [%o2 + V4F_COUNT], %g3
824
825 LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
826
827 cmp %g3, 0
828 st %g3, [%o0 + V4F_COUNT]
829 be 2f
830 clr %o1
831
832 1: ld [%g1 + 0x00], %f0 ! LSU Group
833 ld [%g1 + 0x04], %f1 ! LSU Group
834 ld [%g1 + 0x08], %f2 ! LSU Group
835 add %o1, 1, %o1 ! IEU0
836 add %g1, %o5, %g1 ! IEU1
837 fmuls %f0, M0, %f3 ! FGM
838 fmuls %f1, M4, %f7 ! FGM Group
839 fmuls %f0, M1, %f4 ! FGM Group
840 fmuls %f1, M5, %f8 ! FGM Group
841 fmuls %f0, M2, %f5 ! FGM Group f3 available
842 fmuls %f1, M6, %f9 ! FGM Group f7 available
843 fadds %f3, %f7, %f3 ! FGA
844 fmuls %f0, M3, %f6 ! FGM Group f4 available
845 fmuls %f1, M7, %f10 ! FGM Group f8 available
846 fadds %f4, %f8, %f4 ! FGA
847 fmuls %f2, M8, %f7 ! FGM Group f5 available
848 fmuls %f2, M9, %f8 ! FGM Group f9,f3 available
849 fadds %f5, %f9, %f5 ! FGA
850 fmuls %f2, M10, %f9 ! FGM Group f6 available
851 fadds %f6, %f10, %f6 ! FGA Group f10,f4 available
852 fmuls %f2, M11, %f10 ! FGM
853 fadds %f3, M12, %f3 ! FGA Group f7 available
854 fadds %f4, M13, %f4 ! FGA Group f8,f5 available
855 fadds %f5, M14, %f5 ! FGA Group f9 available
856 fadds %f6, M15, %f6 ! FGA Group f10,f6 available
857 fadds %f3, %f7, %f3 ! FGA Group f3 available
858 st %f3, [%g2 + 0x00] ! LSU
859 fadds %f4, %f8, %f4 ! FGA Group f4 available
860 st %f4, [%g2 + 0x04] ! LSU
861 fadds %f5, %f9, %f5 ! FGA Group f5 available
862 st %f5, [%g2 + 0x08] ! LSU
863 fadds %f6, %f10, %f6 ! FGA Group f6 available
864 st %f6, [%g2 + 0x0c] ! LSU
865 cmp %o1, %g3 ! IEU1
866 bne 1b ! CTI
867 add %g2, 0x10, %g2 ! IEU0 Group
868 2:
869 ba __set_v4f_4
870 nop
871
872 .globl _mesa_sparc_transform_points3_identity
873 _mesa_sparc_transform_points3_identity:
874 ld [%o2 + V4F_STRIDE], %o5
875 LDPTR [%o2 + V4F_START], %g1
876 LDPTR [%o0 + V4F_START], %g2
877 ld [%o2 + V4F_COUNT], %g3
878
879 cmp %g3, 0
880 st %g3, [%o0 + V4F_COUNT]
881 be 2f
882 clr %o1
883
884 1: ld [%g1 + 0x00], %f0
885 ld [%g1 + 0x04], %f1
886 ld [%g1 + 0x08], %f2
887 add %o1, 1, %o1
888 add %g1, %o5, %g1
889 cmp %o1, %g3
890 st %f0, [%g2 + 0x00]
891 st %f1, [%g2 + 0x04]
892 st %f2, [%g2 + 0x08]
893 bne 1b
894 add %g2, 0x10, %g2
895 2:
896 ba __set_v4f_3
897 nop
898
899 .globl _mesa_sparc_transform_points3_2d
900 _mesa_sparc_transform_points3_2d:
901 ld [%o2 + V4F_STRIDE], %o5
902 LDPTR [%o2 + V4F_START], %g1
903 LDPTR [%o0 + V4F_START], %g2
904 ld [%o2 + V4F_COUNT], %g3
905
906 LDMATRIX_0_1_4_5_12_13(%o1)
907
908 cmp %g3, 0
909 st %g3, [%o0 + V4F_COUNT]
910 be 2f
911 clr %o1
912
913 1: ld [%g1 + 0x00], %f0 ! LSU Group
914 ld [%g1 + 0x04], %f1 ! LSU Group
915 ld [%g1 + 0x08], %f2 ! LSU Group
916 add %o1, 1, %o1 ! IEU0
917 add %g1, %o5, %g1 ! IEU1
918 fmuls %f0, M0, %f3 ! FGM
919 fmuls %f0, M1, %f4 ! FGM Group
920 fmuls %f1, M4, %f6 ! FGM Group
921 fmuls %f1, M5, %f7 ! FGM Group
922 fadds %f3, M12, %f3 ! FGA Group f3 available
923 fadds %f4, M13, %f4 ! FGA Group f4 available
924 fadds %f3, %f6, %f3 ! FGA Group f6 available
925 st %f3, [%g2 + 0x00] ! LSU
926 fadds %f4, %f7, %f4 ! FGA Group f7 available
927 st %f4, [%g2 + 0x04] ! LSU
928 st %f2, [%g2 + 0x08] ! LSU Group
929 cmp %o1, %g3 ! IEU1
930 bne 1b ! CTI
931 add %g2, 0x10, %g2 ! IEU0 Group
932 2:
933 ba __set_v4f_3
934 nop
935
936 .globl _mesa_sparc_transform_points3_2d_no_rot
937 _mesa_sparc_transform_points3_2d_no_rot:
938 ld [%o2 + V4F_STRIDE], %o5
939 LDPTR [%o2 + V4F_START], %g1
940 LDPTR [%o0 + V4F_START], %g2
941 ld [%o2 + V4F_COUNT], %g3
942
943 LDMATRIX_0_5_12_13(%o1)
944
945 cmp %g3, 0
946 st %g3, [%o0 + V4F_COUNT]
947 be 2f
948 clr %o1
949
950 1: ld [%g1 + 0x00], %f0 ! LSU Group
951 ld [%g1 + 0x04], %f1 ! LSU Group
952 ld [%g1 + 0x08], %f2 ! LSU Group
953 add %o1, 1, %o1 ! IEU0
954 add %g1, %o5, %g1 ! IEU1
955 fmuls %f0, M0, %f3 ! FGM
956 fmuls %f1, M5, %f4 ! FGM Group
957 st %f2, [%g2 + 0x08] ! LSU
958 fadds %f3, M12, %f3 ! FGA Group
959 st %f3, [%g2 + 0x00] ! LSU
960 fadds %f4, M13, %f4 ! FGA Group
961 st %f4, [%g2 + 0x04] ! LSU
962 cmp %o1, %g3 ! IEU1
963 bne 1b ! CTI
964 add %g2, 0x10, %g2 ! IEU0 Group
965 2:
966 ba __set_v4f_3
967 nop
968
969 .globl _mesa_sparc_transform_points3_3d
970 _mesa_sparc_transform_points3_3d:
971 ld [%o2 + V4F_STRIDE], %o5
972 LDPTR [%o2 + V4F_START], %g1
973 LDPTR [%o0 + V4F_START], %g2
974 ld [%o2 + V4F_COUNT], %g3
975
976 LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
977
978 cmp %g3, 0
979 st %g3, [%o0 + V4F_COUNT]
980 be 2f
981 clr %o1
982
983 1: ld [%g1 + 0x00], %f0 ! LSU Group
984 ld [%g1 + 0x04], %f1 ! LSU Group
985 ld [%g1 + 0x08], %f2 ! LSU Group
986 add %o1, 1, %o1 ! IEU0
987 add %g1, %o5, %g1 ! IEU1
988 fmuls %f0, M0, %f3 ! FGM
989 fmuls %f1, M4, %f6 ! FGM Group
990 fmuls %f0, M1, %f4 ! FGM Group
991 fmuls %f1, M5, %f7 ! FGM Group
992 fmuls %f0, M2, %f5 ! FGM Group f3 available
993 fmuls %f1, M6, %f8 ! FGM Group f6 available
994 fadds %f3, %f6, %f3 ! FGA
995 fmuls %f2, M8, %f9 ! FGM Group f4 available
996 fmuls %f2, M9, %f10 ! FGM Group f7 available
997 fadds %f4, %f7, %f4 ! FGA
998 fmuls %f2, M10, %f11 ! FGM Group f5 available
999 fadds %f5, %f8, %f5 ! FGA Group f8, f3 available
1000 fadds %f3, %f9, %f3 ! FGA Group f9 available
1001 fadds %f4, %f10, %f4 ! FGA Group f10, f4 available
1002 fadds %f5, %f11, %f5 ! FGA Group stall, f11, f5 available
1003 fadds %f3, M12, %f3 ! FGA Group f3 available
1004 st %f3, [%g2 + 0x00] ! LSU
1005 fadds %f4, M13, %f4 ! FGA Group f4 available
1006 st %f4, [%g2 + 0x04] ! LSU
1007 fadds %f5, M14, %f5 ! FGA Group f5 available
1008 st %f5, [%g2 + 0x08] ! LSU
1009 cmp %o1, %g3 ! IEU1
1010 bne 1b ! CTI
1011 add %g2, 0x10, %g2 ! IEU0 Group
1012 2:
1013 ba __set_v4f_3
1014 nop
1015
1016 .globl _mesa_sparc_transform_points3_3d_no_rot
1017 _mesa_sparc_transform_points3_3d_no_rot:
1018 ld [%o2 + V4F_STRIDE], %o5
1019 LDPTR [%o2 + V4F_START], %g1
1020 LDPTR [%o0 + V4F_START], %g2
1021 ld [%o2 + V4F_COUNT], %g3
1022
1023 LDMATRIX_0_5_10_12_13_14(%o1)
1024
1025 cmp %g3, 0
1026 st %g3, [%o0 + V4F_COUNT]
1027 be 2f
1028 clr %o1
1029
1030 1: ld [%g1 + 0x00], %f0 ! LSU Group
1031 ld [%g1 + 0x04], %f1 ! LSU Group
1032 ld [%g1 + 0x08], %f2 ! LSU Group
1033 add %o1, 1, %o1 ! IEU0
1034 add %g1, %o5, %g1 ! IEU1
1035 cmp %o1, %g3 ! IEU1 Group
1036 fmuls %f0, M0, %f3 ! FGM
1037 fmuls %f1, M5, %f4 ! FGM Group
1038 fmuls %f2, M10, %f5 ! FGM Group
1039 fadds %f3, M12, %f3 ! FGA Group, stall, f3 available
1040 st %f3, [%g2 + 0x00] ! LSU
1041 fadds %f4, M13, %f4 ! FGA Group, f4 available
1042 st %f4, [%g2 + 0x04] ! LSU
1043 fadds %f5, M14, %f5 ! FGA Group, f5 available
1044 st %f5, [%g2 + 0x08] ! LEU
1045 bne 1b ! CTI
1046 add %g2, 0x10, %g2 ! IEU0 Group
1047 2:
1048 ba __set_v4f_3
1049 nop
1050
1051 .globl _mesa_sparc_transform_points3_perspective
1052 _mesa_sparc_transform_points3_perspective:
1053 ld [%o2 + V4F_STRIDE], %o5
1054 LDPTR [%o2 + V4F_START], %g1
1055 LDPTR [%o0 + V4F_START], %g2
1056 ld [%o2 + V4F_COUNT], %g3
1057
1058 LDMATRIX_0_5_8_9_10_14(%o1)
1059
1060 cmp %g3, 0
1061 st %g3, [%o0 + V4F_COUNT]
1062 be 2f
1063 clr %o1
1064
1065 1: ld [%g1 + 0x00], %f0 ! LSU Group
1066 ld [%g1 + 0x04], %f1 ! LSU Group
1067 ld [%g1 + 0x08], %f2 ! LSU Group
1068 add %o1, 1, %o1 ! IEU0
1069 add %g1, %o5, %g1 ! IEU1
1070 fmuls %f0, M0, %f3 ! FGM
1071 fmuls %f2, M8, %f6 ! FGM Group
1072 fmuls %f1, M5, %f4 ! FGM Group
1073 fmuls %f2, M9, %f7 ! FGM Group
1074 fmuls %f2, M10, %f5 ! FGM Group f3 available
1075 fadds %f3, %f6, %f3 ! FGA Group f6 available
1076 st %f3, [%g2 + 0x00] ! LSU
1077 fadds %f4, %f7, %f4 ! FGA Group stall, f4, f7 available
1078 st %f4, [%g2 + 0x04] ! LSU
1079 fadds %f5, M14, %f5 ! FGA Group
1080 st %f5, [%g2 + 0x08] ! LSU
1081 fnegs %f2, %f6 ! FGA Group
1082 st %f6, [%g2 + 0x0c] ! LSU
1083 cmp %o1, %g3 ! IEU1
1084 bne 1b ! CTI
1085 add %g2, 0x10, %g2 ! IEU0 Group
1086 2:
1087 ba __set_v4f_4
1088 nop
1089
1090 .globl _mesa_sparc_transform_points4_general
1091 _mesa_sparc_transform_points4_general:
1092 ld [%o2 + V4F_STRIDE], %o5
1093 LDPTR [%o2 + V4F_START], %g1
1094 LDPTR [%o0 + V4F_START], %g2
1095 ld [%o2 + V4F_COUNT], %g3
1096
1097 LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
1098
1099 cmp %g3, 0
1100 st %g3, [%o0 + V4F_COUNT]
1101 be 2f
1102 clr %o1
1103
1104 1: ld [%g1 + 0x00], %f0 ! LSU Group
1105 ld [%g1 + 0x04], %f1 ! LSU Group
1106 ld [%g1 + 0x08], %f2 ! LSU Group
1107 ld [%g1 + 0x0c], %f3 ! LSU Group
1108 add %o1, 1, %o1 ! IEU0
1109 add %g1, %o5, %g1 ! IEU1
1110 fmuls %f0, M0, %f4 ! FGM Group
1111 fmuls %f1, M4, %f8 ! FGM Group
1112 fmuls %f0, M1, %f5 ! FGM Group
1113 fmuls %f1, M5, %f9 ! FGM Group
1114 fmuls %f0, M2, %f6 ! FGM Group f4 available
1115 fmuls %f1, M6, %f10 ! FGM Group f8 available
1116 fadds %f4, %f8, %f4 ! FGA
1117 fmuls %f0, M3, %f7 ! FGM Group f5 available
1118 fmuls %f1, M7, %f11 ! FGM Group f9 available
1119 fadds %f5, %f9, %f5 ! FGA
1120 fmuls %f2, M8, %f12 ! FGM Group f6 available
1121 fmuls %f2, M9, %f13 ! FGM Group f10, f4 available
1122 fadds %f6, %f10, %f6 ! FGA
1123 fmuls %f2, M10, %f14 ! FGM Group f7 available
1124 fmuls %f2, M11, %f15 ! FGM Group f11, f5 available
1125 fadds %f7, %f11, %f7 ! FGA
1126 fmuls %f3, M12, %f8 ! FGM Group f12 available
1127 fadds %f4, %f12, %f4 ! FGA
1128 fmuls %f3, M13, %f9 ! FGM Group f13, f6 available
1129 fadds %f5, %f13, %f5 ! FGA
1130 fmuls %f3, M14, %f10 ! FGM Group f14 available
1131 fadds %f6, %f14, %f6 ! FGA
1132 fmuls %f3, M15, %f11 ! FGM Group f15, f7 available
1133 fadds %f7, %f15, %f7 ! FGA
1134 fadds %f4, %f8, %f4 ! FGA Group f8, f4 available
1135 st %f4, [%g2 + 0x00] ! LSU
1136 fadds %f5, %f9, %f5 ! FGA Group f9, f5 available
1137 st %f5, [%g2 + 0x04] ! LSU
1138 fadds %f6, %f10, %f6 ! FGA Group f10, f6 available
1139 st %f6, [%g2 + 0x08] ! LSU
1140 fadds %f7, %f11, %f7 ! FGA Group f11, f7 available
1141 st %f7, [%g2 + 0x0c] ! LSU
1142 cmp %o1, %g3 ! IEU1
1143 bne 1b ! CTI
1144 add %g2, 0x10, %g2 ! IEU0 Group
1145 2:
1146 ba __set_v4f_4
1147 nop
1148
1149 .globl _mesa_sparc_transform_points4_identity
1150 _mesa_sparc_transform_points4_identity:
1151 ld [%o2 + V4F_STRIDE], %o5
1152 LDPTR [%o2 + V4F_START], %g1
1153 LDPTR [%o0 + V4F_START], %g2
1154 ld [%o2 + V4F_COUNT], %g3
1155
1156 cmp %g3, 0
1157 st %g3, [%o0 + V4F_COUNT]
1158 be 2f
1159 clr %o1
1160
1161 1: ld [%g1 + 0x00], %f0
1162 ld [%g1 + 0x04], %f1
1163 ld [%g1 + 0x08], %f2
1164 add %o1, 1, %o1
1165 ld [%g1 + 0x0c], %f3
1166 add %g1, %o5, %g1
1167 st %f0, [%g2 + 0x00]
1168 st %f1, [%g2 + 0x04]
1169 st %f2, [%g2 + 0x08]
1170 cmp %o1, %g3
1171 st %f3, [%g2 + 0x0c]
1172 bne 1b
1173 add %g2, 0x10, %g2
1174 2:
1175 ba __set_v4f_4
1176 nop
1177
1178 .globl _mesa_sparc_transform_points4_2d
1179 _mesa_sparc_transform_points4_2d:
1180 ld [%o2 + V4F_STRIDE], %o5
1181 LDPTR [%o2 + V4F_START], %g1
1182 LDPTR [%o0 + V4F_START], %g2
1183 ld [%o2 + V4F_COUNT], %g3
1184
1185 LDMATRIX_0_1_4_5_12_13(%o1)
1186
1187 cmp %g3, 0
1188 st %g3, [%o0 + V4F_COUNT]
1189 be 2f
1190 clr %o1
1191
1192 1: ld [%g1 + 0x00], %f0 ! LSU Group
1193 ld [%g1 + 0x04], %f1 ! LSU Group
1194 ld [%g1 + 0x08], %f2 ! LSU Group
1195 ld [%g1 + 0x0c], %f3 ! LSU Group
1196 add %o1, 1, %o1 ! IEU0
1197 add %g1, %o5, %g1 ! IEU1
1198 fmuls %f0, M0, %f4 ! FGM
1199 fmuls %f1, M4, %f8 ! FGM Group
1200 fmuls %f0, M1, %f5 ! FGM Group
1201 fmuls %f1, M5, %f9 ! FGM Group f4 available
1202 fmuls %f3, M12, %f12 ! FGM Group
1203 fmuls %f3, M13, %f13 ! FGM Group f8 available
1204 fadds %f4, %f8, %f4 ! FGA
1205 fadds %f5, %f9, %f5 ! FGA Group stall, f5, f9 available
1206 fadds %f4, %f12, %f4 ! FGA Group 2 cycle stall, f4, f12, f13 avail
1207 st %f4, [%g2 + 0x00] ! LSU
1208 fadds %f5, %f13, %f5 ! FGA Group f5 available
1209 st %f5, [%g2 + 0x04] ! LSU
1210 st %f2, [%g2 + 0x08] ! LSU Group
1211 st %f3, [%g2 + 0x0c] ! LSU Group
1212 cmp %o1, %g3 ! IEU1
1213 bne 1b ! CTI
1214 add %g2, 0x10, %g2 ! IEU0 Group
1215 2:
1216 ba __set_v4f_4
1217 nop
1218
1219 .globl _mesa_sparc_transform_points4_2d_no_rot
1220 _mesa_sparc_transform_points4_2d_no_rot:
1221 ld [%o2 + V4F_STRIDE], %o5
1222 LDPTR [%o2 + V4F_START], %g1
1223 LDPTR [%o0 + V4F_START], %g2
1224 ld [%o2 + V4F_COUNT], %g3
1225
1226 LDMATRIX_0_1_4_5_12_13(%o1)
1227
1228 cmp %g3, 0
1229 st %g3, [%o0 + V4F_COUNT]
1230 be 2f
1231 clr %o1
1232
1233 1: ld [%g1 + 0x00], %f0
1234 ld [%g1 + 0x04], %f1
1235 ld [%g1 + 0x08], %f2
1236 ld [%g1 + 0x0c], %f3
1237 add %o1, 1, %o1
1238 add %g1, %o5, %g1
1239 fmuls %f0, M0, %f4
1240 fmuls %f3, M12, %f8
1241 fmuls %f1, M5, %f5
1242 fmuls %f3, M13, %f9
1243 fadds %f4, %f8, %f4
1244 st %f4, [%g2 + 0x00]
1245 fadds %f5, %f9, %f5
1246 st %f5, [%g2 + 0x04]
1247 st %f2, [%g2 + 0x08]
1248 st %f3, [%g2 + 0x0c]
1249 cmp %o1, %g3
1250 bne 1b
1251 add %g2, 0x10, %g2
1252 2:
1253 ba __set_v4f_4
1254 nop
1255
1256 .globl _mesa_sparc_transform_points4_3d
1257 _mesa_sparc_transform_points4_3d:
1258 ld [%o2 + V4F_STRIDE], %o5
1259 LDPTR [%o2 + V4F_START], %g1
1260 LDPTR [%o0 + V4F_START], %g2
1261 ld [%o2 + V4F_COUNT], %g3
1262
1263 LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
1264
1265 cmp %g3, 0
1266 st %g3, [%o0 + V4F_COUNT]
1267 be 2f
1268 clr %o1
1269
1270 1: ld [%g1 + 0x00], %f0 ! LSU Group
1271 ld [%g1 + 0x04], %f1 ! LSU Group
1272 ld [%g1 + 0x08], %f2 ! LSU Group
1273 ld [%g1 + 0x0c], %f3 ! LSU Group
1274 add %o1, 1, %o1 ! IEU0
1275 add %g1, %o5, %g1 ! IEU1
1276 fmuls %f0, M0, %f4 ! FGM
1277 fmuls %f1, M4, %f7 ! FGM Group
1278 fmuls %f0, M1, %f5 ! FGM Group
1279 fmuls %f1, M5, %f8 ! FGM Group
1280 fmuls %f0, M2, %f6 ! FGM Group f4 available
1281 fmuls %f1, M6, %f9 ! FGM Group f7 available
1282 fadds %f4, %f7, %f4 ! FGA
1283 fmuls %f2, M8, %f10 ! FGM Group f5 available
1284 fmuls %f2, M9, %f11 ! FGM Group f8 available
1285 fadds %f5, %f8, %f5 ! FGA
1286 fmuls %f2, M10, %f12 ! FGM Group f6 available
1287 fmuls %f3, M12, %f13 ! FGM Group f9, f4 available
1288 fadds %f6, %f9, %f6 ! FGA
1289 fmuls %f3, M13, %f14 ! FGM Group f10 available
1290 fadds %f4, %f10, %f4 ! FGA
1291 fmuls %f3, M14, %f15 ! FGM Group f11, f5 available
1292 fadds %f5, %f11, %f5 ! FGA
1293 fadds %f6, %f12, %f6 ! FGA Group stall, f12, f13, f6 available
1294 fadds %f4, %f13, %f4 ! FGA Group f14, f4 available
1295 st %f4, [%g2 + 0x00] ! LSU
1296 fadds %f5, %f14, %f5 ! FGA Group f15, f5 available
1297 st %f5, [%g2 + 0x04] ! LSU
1298 fadds %f6, %f15, %f6 ! FGA Group f6 available
1299 st %f6, [%g2 + 0x08] ! LSU
1300 st %f3, [%g2 + 0x0c] ! LSU Group
1301 cmp %o1, %g3 ! IEU1
1302 bne 1b ! CTI
1303 add %g2, 0x10, %g2 ! IEU0 Group
1304 2:
1305 ba __set_v4f_4
1306 nop
1307
1308 .globl _mesa_sparc_transform_points4_3d_no_rot
1309 _mesa_sparc_transform_points4_3d_no_rot:
1310 ld [%o2 + V4F_STRIDE], %o5
1311 LDPTR [%o2 + V4F_START], %g1
1312 LDPTR [%o0 + V4F_START], %g2
1313 ld [%o2 + V4F_COUNT], %g3
1314
1315 LDMATRIX_0_5_10_12_13_14(%o1)
1316
1317 cmp %g3, 0
1318 st %g3, [%o0 + V4F_COUNT]
1319 be 2f
1320 clr %o1
1321
1322 1: ld [%g1 + 0x00], %f0 ! LSU Group
1323 ld [%g1 + 0x04], %f1 ! LSU Group
1324 ld [%g1 + 0x08], %f2 ! LSU Group
1325 ld [%g1 + 0x0c], %f3 ! LSU Group
1326 add %o1, 1, %o1 ! IEU0
1327 add %g1, %o5, %g1 ! IEU1
1328 fmuls %f0, M0, %f4 ! FGM
1329 fmuls %f3, M12, %f7 ! FGM Group
1330 fmuls %f1, M5, %f5 ! FGM Group
1331 fmuls %f3, M13, %f8 ! FGM Group
1332 fmuls %f2, M10, %f6 ! FGM Group f4 available
1333 fmuls %f3, M14, %f9 ! FGM Group f7 available
1334 fadds %f4, %f7, %f4 ! FGA
1335 st %f4, [%g2 + 0x00] ! LSU
1336 fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available
1337 st %f5, [%g2 + 0x04] ! LSU
1338 fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available
1339 st %f6, [%g2 + 0x08] ! LSU
1340 st %f3, [%g2 + 0x0c] ! LSU Group
1341 cmp %o1, %g3 ! IEU1
1342 bne 1b ! CTI
1343 add %g2, 0x10, %g2 ! IEU0 Group
1344 2:
1345 ba __set_v4f_4
1346 nop
1347
1348 .globl _mesa_sparc_transform_points4_perspective
1349 _mesa_sparc_transform_points4_perspective:
1350 ld [%o2 + V4F_STRIDE], %o5
1351 LDPTR [%o2 + V4F_START], %g1
1352 LDPTR [%o0 + V4F_START], %g2
1353 ld [%o2 + V4F_COUNT], %g3
1354
1355 LDMATRIX_0_5_8_9_10_14(%o1)
1356
1357 cmp %g3, 0
1358 st %g3, [%o0 + V4F_COUNT]
1359 be 2f
1360 clr %o1
1361
1362 1: ld [%g1 + 0x00], %f0 ! LSU Group
1363 ld [%g1 + 0x04], %f1 ! LSU Group
1364 ld [%g1 + 0x08], %f2 ! LSU Group
1365 ld [%g1 + 0x0c], %f3 ! LSU Group
1366 add %o1, 1, %o1 ! IEU0
1367 add %g1, %o5, %g1 ! IEU1
1368 fmuls %f0, M0, %f4 ! FGM
1369 fmuls %f2, M8, %f7 ! FGM Group
1370 fmuls %f1, M5, %f5 ! FGM Group
1371 fmuls %f2, M9, %f8 ! FGM Group
1372 fmuls %f2, M10, %f6 ! FGM Group f4 available
1373 fmuls %f3, M14, %f9 ! FGM Group f7 available
1374 fadds %f4, %f7, %f4 ! FGA
1375 st %f4, [%g2 + 0x00] ! LSU
1376 fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available
1377 st %f5, [%g2 + 0x04] ! LSU
1378 fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available
1379 st %f6, [%g2 + 0x08] ! LSU
1380 fnegs %f2, %f7 ! FGA Group
1381 st %f7, [%g2 + 0x0c] ! LSU
1382 cmp %o1, %g3 ! IEU1
1383 bne 1b ! CTI
1384 add %g2, 0x10, %g2 ! IEU0 Group
1385 2:
1386 ba __set_v4f_4
1387 nop