gallium: replace 16BIT_TEMPS cap with 16BIT_CONSTS
[mesa.git] / src / mesa / sparc / xform.S
1
2 /* TODO
3 *
4 * 1) It would be nice if load/store double could be used
5 * at least for the matrix parts. I think for the matrices
6 * it is safe, but for the vertices it probably is not due to
7 * things like glInterleavedArrays etc.
8 *
9 * UPDATE: Trying this now in sparc_matrix.h -DaveM_990624
10 *
11 * 2) One extremely slick trick would be if we could enclose
12 * groups of xform calls on the same vertices such that
13 * we just load the matrix into f16-->f31 before the calls
14 * and then we would not have to do them here. This may be
15 * tricky and not much of a gain though.
16 */
17
18 #include "sparc_matrix.h"
19
20 #if defined(SVR4) || defined(__SVR4) || defined(__svr4__) || defined(__arch64__)
21 /* Solaris requires this for 64-bit. */
22 .register %g2, #scratch
23 .register %g3, #scratch
24 #endif
25
26 .text
27 .align 64
28
29 __set_v4f_1:
30 ld [%o0 + V4F_FLAGS], %g2
31 mov 1, %g1
32 st %g1, [%o0 + V4F_SIZE]
33 or %g2, VEC_SIZE_1, %g2
34 retl
35 st %g2, [%o0 + V4F_FLAGS]
36 __set_v4f_2:
37 ld [%o0 + V4F_FLAGS], %g2
38 mov 2, %g1
39 st %g1, [%o0 + V4F_SIZE]
40 or %g2, VEC_SIZE_2, %g2
41 retl
42 st %g2, [%o0 + V4F_FLAGS]
43 __set_v4f_3:
44 ld [%o0 + V4F_FLAGS], %g2
45 mov 3, %g1
46 st %g1, [%o0 + V4F_SIZE]
47 or %g2, VEC_SIZE_3, %g2
48 retl
49 st %g2, [%o0 + V4F_FLAGS]
50 __set_v4f_4:
51 ld [%o0 + V4F_FLAGS], %g2
52 mov 4, %g1
53 st %g1, [%o0 + V4F_SIZE]
54 or %g2, VEC_SIZE_4, %g2
55 retl
56 st %g2, [%o0 + V4F_FLAGS]
57
58 /* First the raw versions. */
59
60 .globl _mesa_sparc_transform_points1_general
61 _mesa_sparc_transform_points1_general:
62 ld [%o2 + V4F_STRIDE], %o5
63 LDPTR [%o2 + V4F_START], %g1
64 LDPTR [%o0 + V4F_START], %g2
65 ld [%o2 + V4F_COUNT], %g3
66
67 LDMATRIX_0_1_2_3_12_13_14_15(%o1)
68
69 cmp %g3, 1
70 st %g3, [%o0 + V4F_COUNT]
71 bl 3f
72 clr %o1
73
74 be 2f
75 andn %g3, 1, %o2
76
77 1: ld [%g1 + 0x00], %f0 ! LSU Group
78 add %g1, %o5, %g1 ! IEU0
79 ld [%g1 + 0x00], %f8 ! LSU Group
80 add %o1, 2, %o1 ! IEU0
81 add %g1, %o5, %g1 ! IEU1
82 fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
83 fmuls %f0, M1, %f2 ! FGM Group
84 fmuls %f0, M2, %f3 ! FGM Group
85 fmuls %f0, M3, %f4 ! FGM Group
86 fmuls %f8, M0, %f9 ! FGM Group f1 available
87 fadds %f1, M12, %f1 ! FGA
88 st %f1, [%g2 + 0x00] ! LSU
89 fmuls %f8, M1, %f10 ! FGM Group f2 available
90 fadds %f2, M13, %f2 ! FGA
91 st %f2, [%g2 + 0x04] ! LSU
92 fmuls %f8, M2, %f11 ! FGM Group f3 available
93 fadds %f3, M14, %f3 ! FGA
94 st %f3, [%g2 + 0x08] ! LSU
95 fmuls %f8, M3, %f12 ! FGM Group f4 available
96 fadds %f4, M15, %f4 ! FGA
97 st %f4, [%g2 + 0x0c] ! LSU
98 fadds %f9, M12, %f9 ! FGA Group f9 available
99 st %f9, [%g2 + 0x10] ! LSU
100 fadds %f10, M13, %f10 ! FGA Group f10 available
101 st %f10, [%g2 + 0x14] ! LSU
102 fadds %f11, M14, %f11 ! FGA Group f11 available
103 st %f11, [%g2 + 0x18] ! LSU
104 fadds %f12, M15, %f12 ! FGA Group f12 available
105 st %f12, [%g2 + 0x1c] ! LSU
106 cmp %o1, %o2 ! IEU1
107 bne 1b ! CTI
108 add %g2, 0x20, %g2 ! IEU0 Group
109
110 cmp %o1, %g3
111 be 3f
112 nop
113
114 2: ld [%g1 + 0x00], %f0 ! LSU Group
115 fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
116 fmuls %f0, M1, %f2 ! FGM Group
117 fmuls %f0, M2, %f3 ! FGM Group
118 fmuls %f0, M3, %f4 ! FGM Group
119 fadds %f1, M12, %f1 ! FGA Group
120 st %f1, [%g2 + 0x00] ! LSU
121 fadds %f2, M13, %f2 ! FGA Group
122 st %f2, [%g2 + 0x04] ! LSU
123 fadds %f3, M14, %f3 ! FGA Group
124 st %f3, [%g2 + 0x08] ! LSU
125 fadds %f4, M15, %f4 ! FGA Group
126 st %f4, [%g2 + 0x0c] ! LSU
127
128 3:
129 ba __set_v4f_4
130 nop
131
132 .globl _mesa_sparc_transform_points1_identity
133 _mesa_sparc_transform_points1_identity:
134 cmp %o0, %o2
135 be 4f
136 ld [%o2 + V4F_STRIDE], %o5
137 LDPTR [%o2 + V4F_START], %g1
138 LDPTR [%o0 + V4F_START], %g2
139 ld [%o2 + V4F_COUNT], %g3
140
141 cmp %g3, 1
142 st %g3, [%o0 + V4F_COUNT]
143 bl 3f
144 clr %o1
145
146 be 2f
147 andn %g3, 1, %o2
148
149 1: ld [%g1 + 0x00], %f0 ! LSU Group
150 add %g1, %o5, %g1 ! IEU0
151 ld [%g1 + 0x00], %f1 ! LSU Group
152 add %o1, 2, %o1 ! IEU0
153 add %g1, %o5, %g1 ! IEU1
154 st %f0, [%g2 + 0x00] ! LSU Group
155 cmp %o1, %o2 ! IEU1
156 st %f1, [%g2 + 0x10] ! LSU Group
157 bne 1b ! CTI
158 add %g2, 0x20, %g2 ! IEU0
159
160 cmp %o1, %g3
161 be 3f
162 nop
163
164 2: ld [%g1 + 0x00], %f0
165 addx %g0, %g0, %g0
166 st %f0, [%g2 + 0x00]
167
168 3:
169 ba __set_v4f_1
170 nop
171
172 4: retl
173 nop
174
175 .globl _mesa_sparc_transform_points1_2d
176 _mesa_sparc_transform_points1_2d:
177 ld [%o2 + V4F_STRIDE], %o5
178 LDPTR [%o2 + V4F_START], %g1
179 LDPTR [%o0 + V4F_START], %g2
180 ld [%o2 + V4F_COUNT], %g3
181
182 LDMATRIX_0_1_12_13(%o1)
183
184 cmp %g3, 1
185 st %g3, [%o0 + V4F_COUNT]
186 bl 3f
187 clr %o1
188
189 be 2f
190 andn %g3, 1, %o2
191
192 1: ld [%g1 + 0x00], %f0 ! LSU Group
193 add %g1, %o5, %g1 ! IEU0
194 ld [%g1 + 0x00], %f8 ! LSU Group
195 add %o1, 2, %o1 ! IEU0
196 add %g1, %o5, %g1 ! IEU1
197 fmuls %f0, M0, %f1 ! FGM Group
198 fmuls %f0, M1, %f2 ! FGM Group
199 fmuls %f8, M0, %f9 ! FGM Group
200 fmuls %f8, M1, %f10 ! FGM Group
201 fadds %f1, M12, %f3 ! FGA Group f1 available
202 st %f3, [%g2 + 0x00] ! LSU
203 fadds %f2, M13, %f4 ! FGA Group f2 available
204 st %f4, [%g2 + 0x04] ! LSU
205 fadds %f9, M12, %f11 ! FGA Group f9 available
206 st %f11, [%g2 + 0x10] ! LSU
207 fadds %f10, M13, %f12 ! FGA Group f10 available
208 st %f12, [%g2 + 0x14] ! LSU
209 cmp %o1, %o2 ! IEU1
210 bne 1b ! CTI
211 add %g2, 0x20, %g2 ! IEU0 Group
212
213 cmp %o1, %g3
214 be 3f
215 nop
216
217 2: ld [%g1 + 0x00], %f0
218 fmuls %f0, M0, %f1
219 fmuls %f0, M1, %f2
220 fadds %f1, M12, %f3
221 st %f3, [%g2 + 0x00]
222 fadds %f2, M13, %f4
223 st %f4, [%g2 + 0x04]
224
225 3:
226 ba __set_v4f_2
227 nop
228
229 .globl _mesa_sparc_transform_points1_2d_no_rot
230 _mesa_sparc_transform_points1_2d_no_rot:
231 ld [%o2 + V4F_STRIDE], %o5
232 LDPTR [%o2 + V4F_START], %g1
233 LDPTR [%o0 + V4F_START], %g2
234 ld [%o2 + V4F_COUNT], %g3
235
236 LDMATRIX_0_12_13(%o1)
237
238 cmp %g3, 1
239 st %g3, [%o0 + V4F_COUNT]
240 bl 3f
241 clr %o1
242
243 be 2f
244 andn %g3, 1, %o2
245
246 1: ld [%g1 + 0x00], %f0 ! LSU Group
247 add %g1, %o5, %g1 ! IEU0
248 ld [%g1 + 0x00], %f4 ! LSU Group
249 add %o1, 2, %o1 ! IEU0
250 add %g1, %o5, %g1 ! IEU1
251 fmuls %f0, M0, %f1 ! FGM Group
252 fmuls %f4, M0, %f5 ! FGM Group
253 fadds %f1, M12, %f3 ! FGA Group, 2 cycle stall, f1 available
254 st %f3, [%g2 + 0x00] ! LSU
255 st M13, [%g2 + 0x04] ! LSU Group, f5 available
256 fadds %f5, M12, %f6 ! FGA
257 st %f6, [%g2 + 0x10] ! LSU Group
258 st M13, [%g2 + 0x14] ! LSU Group
259 cmp %o1, %o2 ! IEU1
260 bne 1b ! CTI
261 add %g2, 0x20, %g2 ! IEU0 Group
262
263 cmp %o1, %g3
264 be 3f
265 nop
266
267 2: ld [%g1 + 0x00], %f0
268 fmuls %f0, M0, %f1
269 fadds %f1, M12, %f3
270 st %f3, [%g2 + 0x00]
271 st M13, [%g2 + 0x04]
272
273 3:
274 ba __set_v4f_2
275 nop
276
277 .globl _mesa_sparc_transform_points1_3d
278 _mesa_sparc_transform_points1_3d:
279 ld [%o2 + V4F_STRIDE], %o5
280 LDPTR [%o2 + V4F_START], %g1
281 LDPTR [%o0 + V4F_START], %g2
282 ld [%o2 + V4F_COUNT], %g3
283
284 LDMATRIX_0_1_2_12_13_14(%o1)
285
286 cmp %g3, 1
287 st %g3, [%o0 + V4F_COUNT]
288 bl 3f
289 clr %o1
290
291 be 2f
292 andn %g3, 1, %o2
293
294 1: ld [%g1 + 0x00], %f0 ! LSU Group
295 add %g1, %o5, %g1 ! IEU0
296 ld [%g1 + 0x00], %f4 ! LSU Group
297 add %o1, 2, %o1 ! IEU0
298 add %g1, %o5, %g1 ! IEU1
299 fmuls %f0, M0, %f1 ! FGM Group
300 fmuls %f0, M1, %f2 ! FGM Group
301 fmuls %f0, M2, %f3 ! FGM Group
302 fmuls %f4, M0, %f5 ! FGM Group
303 fadds %f1, M12, %f1 ! FGA Group, f1 available
304 st %f1, [%g2 + 0x00] ! LSU
305 fmuls %f4, M1, %f6 ! FGM
306 fadds %f2, M13, %f2 ! FGA Group, f2 available
307 st %f2, [%g2 + 0x04] ! LSU
308 fmuls %f4, M2, %f7 ! FGM
309 fadds %f3, M14, %f3 ! FGA Group, f3 available
310 st %f3, [%g2 + 0x08] ! LSU
311 fadds %f5, M12, %f5 ! FGA Group, f5 available
312 st %f5, [%g2 + 0x10] ! LSU
313 fadds %f6, M13, %f6 ! FGA Group, f6 available
314 st %f6, [%g2 + 0x14] ! LSU
315 fadds %f7, M14, %f7 ! FGA Group, f7 available
316 st %f7, [%g2 + 0x18] ! LSU
317 cmp %o1, %o2 ! IEU1
318 bne 1b ! CTI
319 add %g2, 0x20, %g2 ! IEU0 Group
320
321 cmp %o1, %g3
322 be 3f
323 nop
324
325 2: ld [%g1 + 0x00], %f0
326 fmuls %f0, M0, %f1
327 fmuls %f0, M1, %f2
328 fmuls %f0, M2, %f3
329 fadds %f1, M12, %f1
330 st %f1, [%g2 + 0x00]
331 fadds %f2, M13, %f2
332 st %f2, [%g2 + 0x04]
333 fadds %f3, M14, %f3
334 st %f3, [%g2 + 0x08]
335
336 3:
337 ba __set_v4f_3
338 nop
339
340 .globl _mesa_sparc_transform_points1_3d_no_rot
341 _mesa_sparc_transform_points1_3d_no_rot:
342 ld [%o2 + V4F_STRIDE], %o5
343 LDPTR [%o2 + V4F_START], %g1
344 LDPTR [%o0 + V4F_START], %g2
345 ld [%o2 + V4F_COUNT], %g3
346
347 LDMATRIX_0_12_13_14(%o1)
348
349 cmp %g3, 1
350 st %g3, [%o0 + V4F_COUNT]
351 bl 3f
352 clr %o1
353
354 be 2f
355 andn %g3, 1, %o2
356
357 1: ld [%g1 + 0x00], %f0 ! LSU Group
358 add %g1, %o5, %g1 ! IEU0
359 ld [%g1 + 0x00], %f2 ! LSU Group
360 add %o1, 2, %o1 ! IEU0
361 add %g1, %o5, %g1 ! IEU1
362 fmuls %f0, M0, %f1 ! FGM Group
363 fmuls %f2, M0, %f3 ! FGM Group
364 fadds %f1, M12, %f1 ! FGA Group, 2 cycle stall, f1 available
365 st %f1, [%g2 + 0x00] ! LSU
366 fadds %f3, M12, %f3 ! FGA Group, f3 available
367 st M13, [%g2 + 0x04] ! LSU
368 st M14, [%g2 + 0x08] ! LSU Group
369 st %f3, [%g2 + 0x10] ! LSU Group
370 st M13, [%g2 + 0x14] ! LSU Group
371 st M14, [%g2 + 0x18] ! LSU Group
372 cmp %o1, %o2 ! IEU1
373 bne 1b ! CTI
374 add %g2, 0x20, %g2 ! IEU0 Group
375
376 cmp %o1, %g3
377 be 3f
378 nop
379
380 2: ld [%g1 + 0x00], %f0
381 fmuls %f0, M0, %f1
382 fadds %f1, M12, %f1
383 st %f1, [%g2 + 0x00]
384 st M13, [%g2 + 0x04]
385 st M14, [%g2 + 0x08]
386
387 3:
388 ba __set_v4f_3
389 nop
390
391 .globl _mesa_sparc_transform_points1_perspective
392 _mesa_sparc_transform_points1_perspective:
393 ld [%o2 + V4F_STRIDE], %o5
394 LDPTR [%o2 + V4F_START], %g1
395 LDPTR [%o0 + V4F_START], %g2
396 ld [%o2 + V4F_COUNT], %g3
397
398 LDMATRIX_0_14(%o1)
399
400 cmp %g3, 1
401 st %g3, [%o0 + V4F_COUNT]
402 bl 3f
403 clr %o1
404
405 be 2f
406 andn %g3, 1, %o2
407
408 1: ld [%g1 + 0x00], %f0 ! LSU Group
409 add %g1, %o5, %g1 ! IEU0
410 ld [%g1 + 0x00], %f2 ! LSU Group
411 add %o1, 2, %o1 ! IEU0
412 add %g1, %o5, %g1 ! IEU1
413 fmuls %f0, M0, %f1 ! FGM Group
414 st %f1, [%g2 + 0x00] ! LSU
415 fmuls %f2, M0, %f3 ! FGM Group
416 st %g0, [%g2 + 0x04] ! LSU
417 st M14, [%g2 + 0x08] ! LSU Group
418 st %g0, [%g2 + 0x0c] ! LSU Group
419 st %f3, [%g2 + 0x10] ! LSU Group
420 st %g0, [%g2 + 0x14] ! LSU Group
421 st M14, [%g2 + 0x18] ! LSU Group
422 st %g0, [%g2 + 0x1c] ! LSU Group
423 cmp %o1, %o2 ! IEU1
424 bne 1b ! CTI
425 add %g2, 0x20, %g2 ! IEU0 Group
426
427 cmp %o1, %g3
428 be 3f
429 nop
430
431 2: ld [%g1 + 0x00], %f0
432 fmuls %f0, M0, %f1
433 st %f1, [%g2 + 0x00]
434 st %g0, [%g2 + 0x04]
435 st M14, [%g2 + 0x08]
436 st %g0, [%g2 + 0x0c]
437
438 3:
439 ba __set_v4f_4
440 nop
441
442 .globl _mesa_sparc_transform_points2_general
443 _mesa_sparc_transform_points2_general:
444 ld [%o2 + V4F_STRIDE], %o5
445 LDPTR [%o2 + V4F_START], %g1
446 LDPTR [%o0 + V4F_START], %g2
447 ld [%o2 + V4F_COUNT], %g3
448
449 LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(%o1)
450
451 cmp %g3, 0
452 st %g3, [%o0 + V4F_COUNT]
453 be 2f
454 clr %o1
455
456 1: ld [%g1 + 0x00], %f0 ! LSU Group
457 ld [%g1 + 0x04], %f1 ! LSU Group
458 add %o1, 1, %o1 ! IEU0
459 add %g1, %o5, %g1 ! IEU1
460 fmuls %f0, M0, %f2 ! FGM Group
461 fmuls %f0, M1, %f3 ! FGM Group
462 fmuls %f0, M2, %f4 ! FGM Group
463 fmuls %f0, M3, %f5 ! FGM Group
464 fadds %f2, M12, %f2 ! FGA Group f2 available
465 fmuls %f1, M4, %f6 ! FGM
466 fadds %f3, M13, %f3 ! FGA Group f3 available
467 fmuls %f1, M5, %f7 ! FGM
468 fadds %f4, M14, %f4 ! FGA Group f4 available
469 fmuls %f1, M6, %f8 ! FGM
470 fadds %f5, M15, %f5 ! FGA Group f5 available
471 fmuls %f1, M7, %f9 ! FGM
472 fadds %f2, %f6, %f2 ! FGA Group f6 available
473 st %f2, [%g2 + 0x00] ! LSU
474 fadds %f3, %f7, %f3 ! FGA Group f7 available
475 st %f3, [%g2 + 0x04] ! LSU
476 fadds %f4, %f8, %f4 ! FGA Group f8 available
477 st %f4, [%g2 + 0x08] ! LSU
478 fadds %f5, %f9, %f5 ! FGA Group f9 available
479 st %f5, [%g2 + 0x0c] ! LSU
480 cmp %o1, %g3 ! IEU1
481 bne 1b ! CTI
482 add %g2, 0x10, %g2 ! IEU0 Group
483 2:
484 ba __set_v4f_4
485 nop
486
487 .globl _mesa_sparc_transform_points2_identity
488 _mesa_sparc_transform_points2_identity:
489 cmp %o2, %o0
490 be 3f
491 ld [%o2 + V4F_STRIDE], %o5
492 LDPTR [%o2 + V4F_START], %g1
493 LDPTR [%o0 + V4F_START], %g2
494 ld [%o2 + V4F_COUNT], %g3
495
496 cmp %g3, 0
497 st %g3, [%o0 + V4F_COUNT]
498 be 2f
499 clr %o1
500
501 1: ld [%g1 + 0x00], %f0 ! LSU Group
502 add %o1, 1, %o1 ! IEU0
503 ld [%g1 + 0x04], %f1 ! LSU Group
504 add %g1, %o5, %g1 ! IEU0
505 cmp %o1, %g3 ! IEU1
506 st %f0, [%g2 + 0x00] ! LSU Group
507 st %f1, [%g2 + 0x04] ! LSU Group
508 bne 1b ! CTI
509 add %g2, 0x10, %g2 ! IEU0
510 2:
511 ba __set_v4f_2
512 nop
513
514 3: retl
515 nop
516
517 .globl _mesa_sparc_transform_points2_2d
518 _mesa_sparc_transform_points2_2d:
519 ld [%o2 + V4F_STRIDE], %o5
520 LDPTR [%o2 + V4F_START], %g1
521 LDPTR [%o0 + V4F_START], %g2
522 ld [%o2 + V4F_COUNT], %g3
523
524 LDMATRIX_0_1_4_5_12_13(%o1)
525
526 cmp %g3, 1
527 st %g3, [%o0 + V4F_COUNT]
528 bl 3f
529 clr %o1
530
531 be 2f
532 andn %g3, 1, %o2
533
534 1: ld [%g1 + 0x00], %f0 ! LSU Group
535 ld [%g1 + 0x04], %f1 ! LSU Group
536 add %o1, 2, %o1 ! IEU0
537 add %g1, %o5, %g1 ! IEU1
538 fmuls %f0, M0, %f2 ! FGM
539 ld [%g1 + 0x00], %f8 ! LSU Group
540 fmuls %f0, M1, %f3 ! FGM
541 ld [%g1 + 0x04], %f9 ! LSU Group
542 fmuls %f1, M4, %f6 ! FGM
543 fmuls %f1, M5, %f7 ! FGM Group
544 add %g1, %o5, %g1 ! IEU0
545 fmuls %f8, M0, %f10 ! FGM Group f2 available
546 fadds %f2, M12, %f2 ! FGA
547 fmuls %f8, M1, %f11 ! FGM Group f3 available
548 fadds %f3, M13, %f3 ! FGA
549 fmuls %f9, M4, %f12 ! FGM Group
550 fmuls %f9, M5, %f13 ! FGM Group
551 fadds %f10, M12, %f10 ! FGA Group f2, f10 available
552 fadds %f2, %f6, %f2 ! FGA Group f3, f11 available
553 st %f2, [%g2 + 0x00] ! LSU
554 fadds %f11, M13, %f11 ! FGA Group f12 available
555 fadds %f3, %f7, %f3 ! FGA Group f13 available
556 st %f3, [%g2 + 0x04] ! LSU
557 fadds %f10, %f12, %f10 ! FGA Group f10 available
558 st %f10, [%g2 + 0x10] ! LSU
559 fadds %f11, %f13, %f11 ! FGA Group f11 available
560 st %f11, [%g2 + 0x14] ! LSU
561 cmp %o1, %o2 ! IEU1
562 bne 1b ! CTI
563 add %g2, 0x20, %g2 ! IEU0 Group
564
565 cmp %o1, %g3
566 be 3f
567 nop
568
569 2: ld [%g1 + 0x00], %f0 ! LSU Group
570 ld [%g1 + 0x04], %f1 ! LSU Group
571 fmuls %f0, M0, %f2 ! FGM Group
572 fmuls %f0, M1, %f3 ! FGM Group
573 fmuls %f1, M4, %f6 ! FGM Group
574 fmuls %f1, M5, %f7 ! FGM Group
575 fadds %f2, M12, %f2 ! FGA Group f2 available
576 fadds %f3, M13, %f3 ! FGA Group f3 available
577 fadds %f2, %f6, %f2 ! FGA Group 2 cycle stall, f2 available
578 st %f2, [%g2 + 0x00] ! LSU
579 fadds %f3, %f7, %f3 ! FGA Group f3 available
580 st %f3, [%g2 + 0x04] ! LSU
581
582 3:
583 ba __set_v4f_2
584 nop
585
586 .globl _mesa_sparc_transform_points2_2d_no_rot
587 _mesa_sparc_transform_points2_2d_no_rot:
588 ld [%o2 + V4F_STRIDE], %o5
589 LDPTR [%o2 + V4F_START], %g1
590 LDPTR [%o0 + V4F_START], %g2
591 ld [%o2 + V4F_COUNT], %g3
592
593 LDMATRIX_0_5_12_13(%o1)
594
595 cmp %g3, 1
596 st %g3, [%o0 + V4F_COUNT]
597 bl 3f
598 clr %o1
599
600 be 2f
601 andn %g3, 1, %o2
602
603 1: ld [%g1 + 0x00], %f0 ! LSU Group
604 ld [%g1 + 0x04], %f1 ! LSU Group
605 add %o1, 2, %o1 ! IEU0
606 add %g1, %o5, %g1 ! IEU1
607 ld [%g1 + 0x00], %f4 ! LSU Group
608 fmuls %f0, M0, %f2 ! FGM
609 ld [%g1 + 0x04], %f5 ! LSU Group
610 fmuls %f1, M5, %f3 ! FGM
611 fmuls %f4, M0, %f6 ! FGM Group
612 add %g1, %o5, %g1 ! IEU0
613 fmuls %f5, M5, %f7 ! FGM Group
614 fadds %f2, M12, %f2 ! FGA Group f2 available
615 st %f2, [%g2 + 0x00] ! LSU
616 fadds %f3, M13, %f3 ! FGA Group f3 available
617 st %f3, [%g2 + 0x04] ! LSU
618 fadds %f6, M12, %f6 ! FGA Group f6 available
619 st %f6, [%g2 + 0x10] ! LSU
620 fadds %f7, M13, %f7 ! FGA Group f7 available
621 st %f7, [%g2 + 0x14] ! LSU
622 cmp %o1, %o2 ! IEU1
623 bne 1b ! CTI
624 add %g2, 0x20, %g2 ! IEU0 Group
625
626 cmp %o1, %g3
627 be 3f
628 nop
629
630 2: ld [%g1 + 0x00], %f0 ! LSU Group
631 ld [%g1 + 0x04], %f1 ! LSU Group
632 fmuls %f0, M0, %f2 ! FGM Group
633 fmuls %f1, M5, %f3 ! FGM Group
634 fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available
635 st %f2, [%g2 + 0x00] ! LSU
636 fadds %f3, M13, %f3 ! FGA Group f3 available
637 st %f3, [%g2 + 0x04] ! LSU
638
639 3:
640 ba __set_v4f_2
641 nop
642
643 /* orig: 12 cycles */
644 .globl _mesa_sparc_transform_points2_3d
645 _mesa_sparc_transform_points2_3d:
646 ld [%o2 + V4F_STRIDE], %o5
647 ld [%o2 + V4F_START], %g1
648 ld [%o0 + V4F_START], %g2
649 ld [%o2 + V4F_COUNT], %g3
650
651 LDMATRIX_0_1_2_3_4_5_6_12_13_14(%o1)
652
653 cmp %g3, 1
654 st %g3, [%o0 + V4F_COUNT]
655 bl 3f
656 clr %o1
657
658 be 2f
659 andn %g3, 1, %o2
660
661 1: ld [%g1 + 0x00], %f0 ! LSU Group
662 ld [%g1 + 0x04], %f1 ! LSU Group
663 add %o1, 2, %o1 ! IEU0
664 add %g1, %o5, %g1 ! IEU1
665 ld [%g1 + 0x00], %f9 ! LSU Group
666 fmuls %f0, M0, %f2 ! FGM
667 ld [%g1 + 0x04], %f10 ! LSU Group
668 fmuls %f0, M1, %f3 ! FGM
669 fmuls %f0, M2, %f4 ! FGM Group
670 add %g1, %o5, %g1 ! IEU0
671 fmuls %f1, M4, %f6 ! FGM Group
672 fmuls %f1, M5, %f7 ! FGM Group f2 available
673 fadds %f2, M12, %f2 ! FGA
674 fmuls %f1, M6, %f8 ! FGM Group f3 available
675 fadds %f3, M13, %f3 ! FGA
676 fmuls %f9, M0, %f11 ! FGM Group f4 available
677 fadds %f4, M14, %f4 ! FGA
678 fmuls %f9, M1, %f12 ! FGM Group f6 available
679 fmuls %f9, M2, %f13 ! FGM Group f2, f7 available
680 fadds %f2, %f6, %f2 ! FGA
681 st %f2, [%g2 + 0x00] ! LSU
682 fmuls %f10, M4, %f14 ! FGM Group f3, f8 available
683 fadds %f3, %f7, %f3 ! FGA
684 st %f3, [%g2 + 0x04] ! LSU
685 fmuls %f10, M5, %f15 ! FGM Group f4, f11 available
686 fadds %f11, M12, %f11 ! FGA
687 fmuls %f10, M6, %f0 ! FGM Group f12 available
688 fadds %f12, M13, %f12 ! FGA
689 fadds %f13, M14, %f13 ! FGA Group f13 available
690 fadds %f4, %f8, %f4 ! FGA Group f14 available
691 st %f4, [%g2 + 0x08] ! LSU
692 fadds %f11, %f14, %f11 ! FGA Group f15, f11 available
693 st %f11, [%g2 + 0x10] ! LSU
694 fadds %f12, %f15, %f12 ! FGA Group f0, f12 available
695 st %f12, [%g2 + 0x14] ! LSU
696 fadds %f13, %f0, %f13 ! FGA Group f13 available
697 st %f13, [%g2 + 0x18] ! LSU
698
699 cmp %o1, %o2 ! IEU1
700 bne 1b ! CTI
701 add %g2, 0x20, %g2 ! IEU0 Group
702
703 cmp %o1, %g3
704 be 3f
705 nop
706
707 2: ld [%g1 + 0x00], %f0 ! LSU Group
708 ld [%g1 + 0x04], %f1 ! LSU Group
709 fmuls %f0, M0, %f2 ! FGM Group
710 fmuls %f0, M1, %f3 ! FGM Group
711 fmuls %f0, M2, %f4 ! FGM Group
712 fmuls %f1, M4, %f6 ! FGM Group
713 fmuls %f1, M5, %f7 ! FGM Group f2 available
714 fadds %f2, M12, %f2 ! FGA
715 fmuls %f1, M6, %f8 ! FGM Group f3 available
716 fadds %f3, M13, %f3 ! FGA
717 fadds %f4, M14, %f4 ! FGA Group f4 available
718 fadds %f2, %f6, %f2 ! FGA Group stall, f2, f6, f7 available
719 st %f2, [%g2 + 0x00] ! LSU
720 fadds %f3, %f7, %f3 ! FGA Group f3, f8 available
721 st %f3, [%g2 + 0x04] ! LSU
722 fadds %f4, %f8, %f4 ! FGA Group f4 available
723 st %f4, [%g2 + 0x08] ! LSU
724
725 3:
726 ba __set_v4f_3
727 nop
728
729 .globl _mesa_sparc_transform_points2_3d_no_rot
730 _mesa_sparc_transform_points2_3d_no_rot:
731 ld [%o2 + V4F_STRIDE], %o5
732 LDPTR [%o2 + V4F_START], %g1
733 LDPTR [%o0 + V4F_START], %g2
734 ld [%o2 + V4F_COUNT], %g3
735
736 LDMATRIX_0_5_12_13_14(%o1)
737
738 cmp %g3, 1
739 st %g3, [%o0 + V4F_COUNT]
740 bl 3f
741 clr %o3
742
743 be 2f
744 andn %g3, 1, %o2
745
746 1: ld [%g1 + 0x00], %f0 ! LSU Group
747 ld [%g1 + 0x04], %f1 ! LSU Group
748 add %o3, 2, %o3 ! IEU0
749 add %g1, %o5, %g1 ! IEU1
750 ld [%g1 + 0x00], %f4 ! LSU Group
751 fmuls %f0, M0, %f2 ! FGM
752 ld [%g1 + 0x04], %f5 ! LSU Group
753 fmuls %f1, M5, %f3 ! FGM
754 fmuls %f4, M0, %f6 ! FGM Group
755 add %g1, %o5, %g1 ! IEU0
756 fmuls %f5, M5, %f7 ! FGM Group
757 fadds %f2, M12, %f2 ! FGA Group f2 available
758 st %f2, [%g2 + 0x00] ! LSU
759 fadds %f3, M13, %f3 ! FGA Group f3 available
760 st %f3, [%g2 + 0x04] ! LSU
761 fadds %f6, M12, %f6 ! FGA Group f6 available
762 st M14, [%g2 + 0x08] ! LSU
763 fadds %f7, M13, %f7 ! FGA Group f7 available
764 st %f6, [%g2 + 0x10] ! LSU
765 st %f7, [%g2 + 0x14] ! LSU Group
766 st M14, [%g2 + 0x18] ! LSU Group
767 cmp %o3, %o2 ! IEU1
768 bne 1b ! CTI
769 add %g2, 0x20, %g2 ! IEU0 Group
770
771 cmp %o3, %g3
772 be 3f
773 nop
774
775 2: ld [%g1 + 0x00], %f0 ! LSU Group
776 ld [%g1 + 0x04], %f1 ! LSU Group
777 fmuls %f0, M0, %f2 ! FGM Group
778 fmuls %f1, M5, %f3 ! FGM Group
779 fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available
780 st %f2, [%g2 + 0x00] ! LSU
781 fadds %f3, M13, %f3 ! FGA Group f3 available
782 st %f3, [%g2 + 0x04] ! LSU
783 st M14, [%g2 + 0x08] ! LSU Group
784
785 3: ld [%o1 + (14 * 0x4)], %g3
786 cmp %g3, 0
787 bne __set_v4f_3
788 nop
789 ba __set_v4f_2
790 nop
791
792 .globl _mesa_sparc_transform_points2_perspective
793 _mesa_sparc_transform_points2_perspective:
794 ld [%o2 + V4F_STRIDE], %o5
795 LDPTR [%o2 + V4F_START], %g1
796 LDPTR [%o0 + V4F_START], %g2
797 ld [%o2 + V4F_COUNT], %g3
798
799 LDMATRIX_0_5_14(%o1)
800
801 cmp %g3, 0
802 st %g3, [%o0 + V4F_COUNT]
803 be 2f
804 clr %o1
805
806 1: ld [%g1 + 0x00], %f0
807 ld [%g1 + 0x04], %f1
808 add %o1, 1, %o1
809 add %g1, %o5, %g1
810 fmuls %f0, M0, %f2
811 st %f2, [%g2 + 0x00]
812 fmuls %f1, M5, %f3
813 st %f3, [%g2 + 0x04]
814 st M14, [%g2 + 0x08]
815 st %g0, [%g2 + 0x0c]
816 cmp %o1, %g3
817 bne 1b
818 add %g2, 0x10, %g2
819 2:
820 ba __set_v4f_4
821 nop
822
823 .globl _mesa_sparc_transform_points3_general
824 _mesa_sparc_transform_points3_general:
825 ld [%o2 + V4F_STRIDE], %o5
826 LDPTR [%o2 + V4F_START], %g1
827 LDPTR [%o0 + V4F_START], %g2
828 ld [%o2 + V4F_COUNT], %g3
829
830 LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
831
832 cmp %g3, 0
833 st %g3, [%o0 + V4F_COUNT]
834 be 2f
835 clr %o1
836
837 1: ld [%g1 + 0x00], %f0 ! LSU Group
838 ld [%g1 + 0x04], %f1 ! LSU Group
839 ld [%g1 + 0x08], %f2 ! LSU Group
840 add %o1, 1, %o1 ! IEU0
841 add %g1, %o5, %g1 ! IEU1
842 fmuls %f0, M0, %f3 ! FGM
843 fmuls %f1, M4, %f7 ! FGM Group
844 fmuls %f0, M1, %f4 ! FGM Group
845 fmuls %f1, M5, %f8 ! FGM Group
846 fmuls %f0, M2, %f5 ! FGM Group f3 available
847 fmuls %f1, M6, %f9 ! FGM Group f7 available
848 fadds %f3, %f7, %f3 ! FGA
849 fmuls %f0, M3, %f6 ! FGM Group f4 available
850 fmuls %f1, M7, %f10 ! FGM Group f8 available
851 fadds %f4, %f8, %f4 ! FGA
852 fmuls %f2, M8, %f7 ! FGM Group f5 available
853 fmuls %f2, M9, %f8 ! FGM Group f9,f3 available
854 fadds %f5, %f9, %f5 ! FGA
855 fmuls %f2, M10, %f9 ! FGM Group f6 available
856 fadds %f6, %f10, %f6 ! FGA Group f10,f4 available
857 fmuls %f2, M11, %f10 ! FGM
858 fadds %f3, M12, %f3 ! FGA Group f7 available
859 fadds %f4, M13, %f4 ! FGA Group f8,f5 available
860 fadds %f5, M14, %f5 ! FGA Group f9 available
861 fadds %f6, M15, %f6 ! FGA Group f10,f6 available
862 fadds %f3, %f7, %f3 ! FGA Group f3 available
863 st %f3, [%g2 + 0x00] ! LSU
864 fadds %f4, %f8, %f4 ! FGA Group f4 available
865 st %f4, [%g2 + 0x04] ! LSU
866 fadds %f5, %f9, %f5 ! FGA Group f5 available
867 st %f5, [%g2 + 0x08] ! LSU
868 fadds %f6, %f10, %f6 ! FGA Group f6 available
869 st %f6, [%g2 + 0x0c] ! LSU
870 cmp %o1, %g3 ! IEU1
871 bne 1b ! CTI
872 add %g2, 0x10, %g2 ! IEU0 Group
873 2:
874 ba __set_v4f_4
875 nop
876
877 .globl _mesa_sparc_transform_points3_identity
878 _mesa_sparc_transform_points3_identity:
879 ld [%o2 + V4F_STRIDE], %o5
880 LDPTR [%o2 + V4F_START], %g1
881 LDPTR [%o0 + V4F_START], %g2
882 ld [%o2 + V4F_COUNT], %g3
883
884 cmp %g3, 0
885 st %g3, [%o0 + V4F_COUNT]
886 be 2f
887 clr %o1
888
889 1: ld [%g1 + 0x00], %f0
890 ld [%g1 + 0x04], %f1
891 ld [%g1 + 0x08], %f2
892 add %o1, 1, %o1
893 add %g1, %o5, %g1
894 cmp %o1, %g3
895 st %f0, [%g2 + 0x00]
896 st %f1, [%g2 + 0x04]
897 st %f2, [%g2 + 0x08]
898 bne 1b
899 add %g2, 0x10, %g2
900 2:
901 ba __set_v4f_3
902 nop
903
904 .globl _mesa_sparc_transform_points3_2d
905 _mesa_sparc_transform_points3_2d:
906 ld [%o2 + V4F_STRIDE], %o5
907 LDPTR [%o2 + V4F_START], %g1
908 LDPTR [%o0 + V4F_START], %g2
909 ld [%o2 + V4F_COUNT], %g3
910
911 LDMATRIX_0_1_4_5_12_13(%o1)
912
913 cmp %g3, 0
914 st %g3, [%o0 + V4F_COUNT]
915 be 2f
916 clr %o1
917
918 1: ld [%g1 + 0x00], %f0 ! LSU Group
919 ld [%g1 + 0x04], %f1 ! LSU Group
920 ld [%g1 + 0x08], %f2 ! LSU Group
921 add %o1, 1, %o1 ! IEU0
922 add %g1, %o5, %g1 ! IEU1
923 fmuls %f0, M0, %f3 ! FGM
924 fmuls %f0, M1, %f4 ! FGM Group
925 fmuls %f1, M4, %f6 ! FGM Group
926 fmuls %f1, M5, %f7 ! FGM Group
927 fadds %f3, M12, %f3 ! FGA Group f3 available
928 fadds %f4, M13, %f4 ! FGA Group f4 available
929 fadds %f3, %f6, %f3 ! FGA Group f6 available
930 st %f3, [%g2 + 0x00] ! LSU
931 fadds %f4, %f7, %f4 ! FGA Group f7 available
932 st %f4, [%g2 + 0x04] ! LSU
933 st %f2, [%g2 + 0x08] ! LSU Group
934 cmp %o1, %g3 ! IEU1
935 bne 1b ! CTI
936 add %g2, 0x10, %g2 ! IEU0 Group
937 2:
938 ba __set_v4f_3
939 nop
940
941 .globl _mesa_sparc_transform_points3_2d_no_rot
942 _mesa_sparc_transform_points3_2d_no_rot:
943 ld [%o2 + V4F_STRIDE], %o5
944 LDPTR [%o2 + V4F_START], %g1
945 LDPTR [%o0 + V4F_START], %g2
946 ld [%o2 + V4F_COUNT], %g3
947
948 LDMATRIX_0_5_12_13(%o1)
949
950 cmp %g3, 0
951 st %g3, [%o0 + V4F_COUNT]
952 be 2f
953 clr %o1
954
955 1: ld [%g1 + 0x00], %f0 ! LSU Group
956 ld [%g1 + 0x04], %f1 ! LSU Group
957 ld [%g1 + 0x08], %f2 ! LSU Group
958 add %o1, 1, %o1 ! IEU0
959 add %g1, %o5, %g1 ! IEU1
960 fmuls %f0, M0, %f3 ! FGM
961 fmuls %f1, M5, %f4 ! FGM Group
962 st %f2, [%g2 + 0x08] ! LSU
963 fadds %f3, M12, %f3 ! FGA Group
964 st %f3, [%g2 + 0x00] ! LSU
965 fadds %f4, M13, %f4 ! FGA Group
966 st %f4, [%g2 + 0x04] ! LSU
967 cmp %o1, %g3 ! IEU1
968 bne 1b ! CTI
969 add %g2, 0x10, %g2 ! IEU0 Group
970 2:
971 ba __set_v4f_3
972 nop
973
974 .globl _mesa_sparc_transform_points3_3d
975 _mesa_sparc_transform_points3_3d:
976 ld [%o2 + V4F_STRIDE], %o5
977 LDPTR [%o2 + V4F_START], %g1
978 LDPTR [%o0 + V4F_START], %g2
979 ld [%o2 + V4F_COUNT], %g3
980
981 LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
982
983 cmp %g3, 0
984 st %g3, [%o0 + V4F_COUNT]
985 be 2f
986 clr %o1
987
988 1: ld [%g1 + 0x00], %f0 ! LSU Group
989 ld [%g1 + 0x04], %f1 ! LSU Group
990 ld [%g1 + 0x08], %f2 ! LSU Group
991 add %o1, 1, %o1 ! IEU0
992 add %g1, %o5, %g1 ! IEU1
993 fmuls %f0, M0, %f3 ! FGM
994 fmuls %f1, M4, %f6 ! FGM Group
995 fmuls %f0, M1, %f4 ! FGM Group
996 fmuls %f1, M5, %f7 ! FGM Group
997 fmuls %f0, M2, %f5 ! FGM Group f3 available
998 fmuls %f1, M6, %f8 ! FGM Group f6 available
999 fadds %f3, %f6, %f3 ! FGA
1000 fmuls %f2, M8, %f9 ! FGM Group f4 available
1001 fmuls %f2, M9, %f10 ! FGM Group f7 available
1002 fadds %f4, %f7, %f4 ! FGA
1003 fmuls %f2, M10, %f11 ! FGM Group f5 available
1004 fadds %f5, %f8, %f5 ! FGA Group f8, f3 available
1005 fadds %f3, %f9, %f3 ! FGA Group f9 available
1006 fadds %f4, %f10, %f4 ! FGA Group f10, f4 available
1007 fadds %f5, %f11, %f5 ! FGA Group stall, f11, f5 available
1008 fadds %f3, M12, %f3 ! FGA Group f3 available
1009 st %f3, [%g2 + 0x00] ! LSU
1010 fadds %f4, M13, %f4 ! FGA Group f4 available
1011 st %f4, [%g2 + 0x04] ! LSU
1012 fadds %f5, M14, %f5 ! FGA Group f5 available
1013 st %f5, [%g2 + 0x08] ! LSU
1014 cmp %o1, %g3 ! IEU1
1015 bne 1b ! CTI
1016 add %g2, 0x10, %g2 ! IEU0 Group
1017 2:
1018 ba __set_v4f_3
1019 nop
1020
1021 .globl _mesa_sparc_transform_points3_3d_no_rot
1022 _mesa_sparc_transform_points3_3d_no_rot:
1023 ld [%o2 + V4F_STRIDE], %o5
1024 LDPTR [%o2 + V4F_START], %g1
1025 LDPTR [%o0 + V4F_START], %g2
1026 ld [%o2 + V4F_COUNT], %g3
1027
1028 LDMATRIX_0_5_10_12_13_14(%o1)
1029
1030 cmp %g3, 0
1031 st %g3, [%o0 + V4F_COUNT]
1032 be 2f
1033 clr %o1
1034
1035 1: ld [%g1 + 0x00], %f0 ! LSU Group
1036 ld [%g1 + 0x04], %f1 ! LSU Group
1037 ld [%g1 + 0x08], %f2 ! LSU Group
1038 add %o1, 1, %o1 ! IEU0
1039 add %g1, %o5, %g1 ! IEU1
1040 cmp %o1, %g3 ! IEU1 Group
1041 fmuls %f0, M0, %f3 ! FGM
1042 fmuls %f1, M5, %f4 ! FGM Group
1043 fmuls %f2, M10, %f5 ! FGM Group
1044 fadds %f3, M12, %f3 ! FGA Group, stall, f3 available
1045 st %f3, [%g2 + 0x00] ! LSU
1046 fadds %f4, M13, %f4 ! FGA Group, f4 available
1047 st %f4, [%g2 + 0x04] ! LSU
1048 fadds %f5, M14, %f5 ! FGA Group, f5 available
1049 st %f5, [%g2 + 0x08] ! LEU
1050 bne 1b ! CTI
1051 add %g2, 0x10, %g2 ! IEU0 Group
1052 2:
1053 ba __set_v4f_3
1054 nop
1055
1056 .globl _mesa_sparc_transform_points3_perspective
1057 _mesa_sparc_transform_points3_perspective:
1058 ld [%o2 + V4F_STRIDE], %o5
1059 LDPTR [%o2 + V4F_START], %g1
1060 LDPTR [%o0 + V4F_START], %g2
1061 ld [%o2 + V4F_COUNT], %g3
1062
1063 LDMATRIX_0_5_8_9_10_14(%o1)
1064
1065 cmp %g3, 0
1066 st %g3, [%o0 + V4F_COUNT]
1067 be 2f
1068 clr %o1
1069
1070 1: ld [%g1 + 0x00], %f0 ! LSU Group
1071 ld [%g1 + 0x04], %f1 ! LSU Group
1072 ld [%g1 + 0x08], %f2 ! LSU Group
1073 add %o1, 1, %o1 ! IEU0
1074 add %g1, %o5, %g1 ! IEU1
1075 fmuls %f0, M0, %f3 ! FGM
1076 fmuls %f2, M8, %f6 ! FGM Group
1077 fmuls %f1, M5, %f4 ! FGM Group
1078 fmuls %f2, M9, %f7 ! FGM Group
1079 fmuls %f2, M10, %f5 ! FGM Group f3 available
1080 fadds %f3, %f6, %f3 ! FGA Group f6 available
1081 st %f3, [%g2 + 0x00] ! LSU
1082 fadds %f4, %f7, %f4 ! FGA Group stall, f4, f7 available
1083 st %f4, [%g2 + 0x04] ! LSU
1084 fadds %f5, M14, %f5 ! FGA Group
1085 st %f5, [%g2 + 0x08] ! LSU
1086 fnegs %f2, %f6 ! FGA Group
1087 st %f6, [%g2 + 0x0c] ! LSU
1088 cmp %o1, %g3 ! IEU1
1089 bne 1b ! CTI
1090 add %g2, 0x10, %g2 ! IEU0 Group
1091 2:
1092 ba __set_v4f_4
1093 nop
1094
1095 .globl _mesa_sparc_transform_points4_general
1096 _mesa_sparc_transform_points4_general:
1097 ld [%o2 + V4F_STRIDE], %o5
1098 LDPTR [%o2 + V4F_START], %g1
1099 LDPTR [%o0 + V4F_START], %g2
1100 ld [%o2 + V4F_COUNT], %g3
1101
1102 LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
1103
1104 cmp %g3, 0
1105 st %g3, [%o0 + V4F_COUNT]
1106 be 2f
1107 clr %o1
1108
1109 1: ld [%g1 + 0x00], %f0 ! LSU Group
1110 ld [%g1 + 0x04], %f1 ! LSU Group
1111 ld [%g1 + 0x08], %f2 ! LSU Group
1112 ld [%g1 + 0x0c], %f3 ! LSU Group
1113 add %o1, 1, %o1 ! IEU0
1114 add %g1, %o5, %g1 ! IEU1
1115 fmuls %f0, M0, %f4 ! FGM Group
1116 fmuls %f1, M4, %f8 ! FGM Group
1117 fmuls %f0, M1, %f5 ! FGM Group
1118 fmuls %f1, M5, %f9 ! FGM Group
1119 fmuls %f0, M2, %f6 ! FGM Group f4 available
1120 fmuls %f1, M6, %f10 ! FGM Group f8 available
1121 fadds %f4, %f8, %f4 ! FGA
1122 fmuls %f0, M3, %f7 ! FGM Group f5 available
1123 fmuls %f1, M7, %f11 ! FGM Group f9 available
1124 fadds %f5, %f9, %f5 ! FGA
1125 fmuls %f2, M8, %f12 ! FGM Group f6 available
1126 fmuls %f2, M9, %f13 ! FGM Group f10, f4 available
1127 fadds %f6, %f10, %f6 ! FGA
1128 fmuls %f2, M10, %f14 ! FGM Group f7 available
1129 fmuls %f2, M11, %f15 ! FGM Group f11, f5 available
1130 fadds %f7, %f11, %f7 ! FGA
1131 fmuls %f3, M12, %f8 ! FGM Group f12 available
1132 fadds %f4, %f12, %f4 ! FGA
1133 fmuls %f3, M13, %f9 ! FGM Group f13, f6 available
1134 fadds %f5, %f13, %f5 ! FGA
1135 fmuls %f3, M14, %f10 ! FGM Group f14 available
1136 fadds %f6, %f14, %f6 ! FGA
1137 fmuls %f3, M15, %f11 ! FGM Group f15, f7 available
1138 fadds %f7, %f15, %f7 ! FGA
1139 fadds %f4, %f8, %f4 ! FGA Group f8, f4 available
1140 st %f4, [%g2 + 0x00] ! LSU
1141 fadds %f5, %f9, %f5 ! FGA Group f9, f5 available
1142 st %f5, [%g2 + 0x04] ! LSU
1143 fadds %f6, %f10, %f6 ! FGA Group f10, f6 available
1144 st %f6, [%g2 + 0x08] ! LSU
1145 fadds %f7, %f11, %f7 ! FGA Group f11, f7 available
1146 st %f7, [%g2 + 0x0c] ! LSU
1147 cmp %o1, %g3 ! IEU1
1148 bne 1b ! CTI
1149 add %g2, 0x10, %g2 ! IEU0 Group
1150 2:
1151 ba __set_v4f_4
1152 nop
1153
1154 .globl _mesa_sparc_transform_points4_identity
1155 _mesa_sparc_transform_points4_identity:
1156 ld [%o2 + V4F_STRIDE], %o5
1157 LDPTR [%o2 + V4F_START], %g1
1158 LDPTR [%o0 + V4F_START], %g2
1159 ld [%o2 + V4F_COUNT], %g3
1160
1161 cmp %g3, 0
1162 st %g3, [%o0 + V4F_COUNT]
1163 be 2f
1164 clr %o1
1165
1166 1: ld [%g1 + 0x00], %f0
1167 ld [%g1 + 0x04], %f1
1168 ld [%g1 + 0x08], %f2
1169 add %o1, 1, %o1
1170 ld [%g1 + 0x0c], %f3
1171 add %g1, %o5, %g1
1172 st %f0, [%g2 + 0x00]
1173 st %f1, [%g2 + 0x04]
1174 st %f2, [%g2 + 0x08]
1175 cmp %o1, %g3
1176 st %f3, [%g2 + 0x0c]
1177 bne 1b
1178 add %g2, 0x10, %g2
1179 2:
1180 ba __set_v4f_4
1181 nop
1182
1183 .globl _mesa_sparc_transform_points4_2d
1184 _mesa_sparc_transform_points4_2d:
1185 ld [%o2 + V4F_STRIDE], %o5
1186 LDPTR [%o2 + V4F_START], %g1
1187 LDPTR [%o0 + V4F_START], %g2
1188 ld [%o2 + V4F_COUNT], %g3
1189
1190 LDMATRIX_0_1_4_5_12_13(%o1)
1191
1192 cmp %g3, 0
1193 st %g3, [%o0 + V4F_COUNT]
1194 be 2f
1195 clr %o1
1196
1197 1: ld [%g1 + 0x00], %f0 ! LSU Group
1198 ld [%g1 + 0x04], %f1 ! LSU Group
1199 ld [%g1 + 0x08], %f2 ! LSU Group
1200 ld [%g1 + 0x0c], %f3 ! LSU Group
1201 add %o1, 1, %o1 ! IEU0
1202 add %g1, %o5, %g1 ! IEU1
1203 fmuls %f0, M0, %f4 ! FGM
1204 fmuls %f1, M4, %f8 ! FGM Group
1205 fmuls %f0, M1, %f5 ! FGM Group
1206 fmuls %f1, M5, %f9 ! FGM Group f4 available
1207 fmuls %f3, M12, %f12 ! FGM Group
1208 fmuls %f3, M13, %f13 ! FGM Group f8 available
1209 fadds %f4, %f8, %f4 ! FGA
1210 fadds %f5, %f9, %f5 ! FGA Group stall, f5, f9 available
1211 fadds %f4, %f12, %f4 ! FGA Group 2 cycle stall, f4, f12, f13 avail
1212 st %f4, [%g2 + 0x00] ! LSU
1213 fadds %f5, %f13, %f5 ! FGA Group f5 available
1214 st %f5, [%g2 + 0x04] ! LSU
1215 st %f2, [%g2 + 0x08] ! LSU Group
1216 st %f3, [%g2 + 0x0c] ! LSU Group
1217 cmp %o1, %g3 ! IEU1
1218 bne 1b ! CTI
1219 add %g2, 0x10, %g2 ! IEU0 Group
1220 2:
1221 ba __set_v4f_4
1222 nop
1223
1224 .globl _mesa_sparc_transform_points4_2d_no_rot
1225 _mesa_sparc_transform_points4_2d_no_rot:
1226 ld [%o2 + V4F_STRIDE], %o5
1227 LDPTR [%o2 + V4F_START], %g1
1228 LDPTR [%o0 + V4F_START], %g2
1229 ld [%o2 + V4F_COUNT], %g3
1230
1231 LDMATRIX_0_1_4_5_12_13(%o1)
1232
1233 cmp %g3, 0
1234 st %g3, [%o0 + V4F_COUNT]
1235 be 2f
1236 clr %o1
1237
1238 1: ld [%g1 + 0x00], %f0
1239 ld [%g1 + 0x04], %f1
1240 ld [%g1 + 0x08], %f2
1241 ld [%g1 + 0x0c], %f3
1242 add %o1, 1, %o1
1243 add %g1, %o5, %g1
1244 fmuls %f0, M0, %f4
1245 fmuls %f3, M12, %f8
1246 fmuls %f1, M5, %f5
1247 fmuls %f3, M13, %f9
1248 fadds %f4, %f8, %f4
1249 st %f4, [%g2 + 0x00]
1250 fadds %f5, %f9, %f5
1251 st %f5, [%g2 + 0x04]
1252 st %f2, [%g2 + 0x08]
1253 st %f3, [%g2 + 0x0c]
1254 cmp %o1, %g3
1255 bne 1b
1256 add %g2, 0x10, %g2
1257 2:
1258 ba __set_v4f_4
1259 nop
1260
1261 .globl _mesa_sparc_transform_points4_3d
1262 _mesa_sparc_transform_points4_3d:
1263 ld [%o2 + V4F_STRIDE], %o5
1264 LDPTR [%o2 + V4F_START], %g1
1265 LDPTR [%o0 + V4F_START], %g2
1266 ld [%o2 + V4F_COUNT], %g3
1267
1268 LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
1269
1270 cmp %g3, 0
1271 st %g3, [%o0 + V4F_COUNT]
1272 be 2f
1273 clr %o1
1274
1275 1: ld [%g1 + 0x00], %f0 ! LSU Group
1276 ld [%g1 + 0x04], %f1 ! LSU Group
1277 ld [%g1 + 0x08], %f2 ! LSU Group
1278 ld [%g1 + 0x0c], %f3 ! LSU Group
1279 add %o1, 1, %o1 ! IEU0
1280 add %g1, %o5, %g1 ! IEU1
1281 fmuls %f0, M0, %f4 ! FGM
1282 fmuls %f1, M4, %f7 ! FGM Group
1283 fmuls %f0, M1, %f5 ! FGM Group
1284 fmuls %f1, M5, %f8 ! FGM Group
1285 fmuls %f0, M2, %f6 ! FGM Group f4 available
1286 fmuls %f1, M6, %f9 ! FGM Group f7 available
1287 fadds %f4, %f7, %f4 ! FGA
1288 fmuls %f2, M8, %f10 ! FGM Group f5 available
1289 fmuls %f2, M9, %f11 ! FGM Group f8 available
1290 fadds %f5, %f8, %f5 ! FGA
1291 fmuls %f2, M10, %f12 ! FGM Group f6 available
1292 fmuls %f3, M12, %f13 ! FGM Group f9, f4 available
1293 fadds %f6, %f9, %f6 ! FGA
1294 fmuls %f3, M13, %f14 ! FGM Group f10 available
1295 fadds %f4, %f10, %f4 ! FGA
1296 fmuls %f3, M14, %f15 ! FGM Group f11, f5 available
1297 fadds %f5, %f11, %f5 ! FGA
1298 fadds %f6, %f12, %f6 ! FGA Group stall, f12, f13, f6 available
1299 fadds %f4, %f13, %f4 ! FGA Group f14, f4 available
1300 st %f4, [%g2 + 0x00] ! LSU
1301 fadds %f5, %f14, %f5 ! FGA Group f15, f5 available
1302 st %f5, [%g2 + 0x04] ! LSU
1303 fadds %f6, %f15, %f6 ! FGA Group f6 available
1304 st %f6, [%g2 + 0x08] ! LSU
1305 st %f3, [%g2 + 0x0c] ! LSU Group
1306 cmp %o1, %g3 ! IEU1
1307 bne 1b ! CTI
1308 add %g2, 0x10, %g2 ! IEU0 Group
1309 2:
1310 ba __set_v4f_4
1311 nop
1312
1313 .globl _mesa_sparc_transform_points4_3d_no_rot
1314 _mesa_sparc_transform_points4_3d_no_rot:
1315 ld [%o2 + V4F_STRIDE], %o5
1316 LDPTR [%o2 + V4F_START], %g1
1317 LDPTR [%o0 + V4F_START], %g2
1318 ld [%o2 + V4F_COUNT], %g3
1319
1320 LDMATRIX_0_5_10_12_13_14(%o1)
1321
1322 cmp %g3, 0
1323 st %g3, [%o0 + V4F_COUNT]
1324 be 2f
1325 clr %o1
1326
1327 1: ld [%g1 + 0x00], %f0 ! LSU Group
1328 ld [%g1 + 0x04], %f1 ! LSU Group
1329 ld [%g1 + 0x08], %f2 ! LSU Group
1330 ld [%g1 + 0x0c], %f3 ! LSU Group
1331 add %o1, 1, %o1 ! IEU0
1332 add %g1, %o5, %g1 ! IEU1
1333 fmuls %f0, M0, %f4 ! FGM
1334 fmuls %f3, M12, %f7 ! FGM Group
1335 fmuls %f1, M5, %f5 ! FGM Group
1336 fmuls %f3, M13, %f8 ! FGM Group
1337 fmuls %f2, M10, %f6 ! FGM Group f4 available
1338 fmuls %f3, M14, %f9 ! FGM Group f7 available
1339 fadds %f4, %f7, %f4 ! FGA
1340 st %f4, [%g2 + 0x00] ! LSU
1341 fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available
1342 st %f5, [%g2 + 0x04] ! LSU
1343 fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available
1344 st %f6, [%g2 + 0x08] ! LSU
1345 st %f3, [%g2 + 0x0c] ! LSU Group
1346 cmp %o1, %g3 ! IEU1
1347 bne 1b ! CTI
1348 add %g2, 0x10, %g2 ! IEU0 Group
1349 2:
1350 ba __set_v4f_4
1351 nop
1352
1353 .globl _mesa_sparc_transform_points4_perspective
1354 _mesa_sparc_transform_points4_perspective:
1355 ld [%o2 + V4F_STRIDE], %o5
1356 LDPTR [%o2 + V4F_START], %g1
1357 LDPTR [%o0 + V4F_START], %g2
1358 ld [%o2 + V4F_COUNT], %g3
1359
1360 LDMATRIX_0_5_8_9_10_14(%o1)
1361
1362 cmp %g3, 0
1363 st %g3, [%o0 + V4F_COUNT]
1364 be 2f
1365 clr %o1
1366
1367 1: ld [%g1 + 0x00], %f0 ! LSU Group
1368 ld [%g1 + 0x04], %f1 ! LSU Group
1369 ld [%g1 + 0x08], %f2 ! LSU Group
1370 ld [%g1 + 0x0c], %f3 ! LSU Group
1371 add %o1, 1, %o1 ! IEU0
1372 add %g1, %o5, %g1 ! IEU1
1373 fmuls %f0, M0, %f4 ! FGM
1374 fmuls %f2, M8, %f7 ! FGM Group
1375 fmuls %f1, M5, %f5 ! FGM Group
1376 fmuls %f2, M9, %f8 ! FGM Group
1377 fmuls %f2, M10, %f6 ! FGM Group f4 available
1378 fmuls %f3, M14, %f9 ! FGM Group f7 available
1379 fadds %f4, %f7, %f4 ! FGA
1380 st %f4, [%g2 + 0x00] ! LSU
1381 fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available
1382 st %f5, [%g2 + 0x04] ! LSU
1383 fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available
1384 st %f6, [%g2 + 0x08] ! LSU
1385 fnegs %f2, %f7 ! FGA Group
1386 st %f7, [%g2 + 0x0c] ! LSU
1387 cmp %o1, %g3 ! IEU1
1388 bne 1b ! CTI
1389 add %g2, 0x10, %g2 ! IEU0 Group
1390 2:
1391 ba __set_v4f_4
1392 nop