2 #include "sparc_matrix.h"
4 #if defined(SVR4) || defined(__SVR4) || defined(__svr4__)
5 /* Solaris requires this for 64-bit. */
6 .register %g2, #scratch
7 .register %g3, #scratch
13 #define STACK_VAR_OFF (2047 + (8 * 16))
15 #define STACK_VAR_OFF (4 * 16)
18 /* Newton-Raphson approximation turns out to be slower
19 * (and less accurate) than direct fsqrts/fdivs.
21 #define ONE_DOT_ZERO 0x3f800000
23 .globl _mesa_sparc_transform_normalize_normals
24 _mesa_sparc_transform_normalize_normals:
25 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
27 sethi %hi(ONE_DOT_ZERO), %g2
29 st %g2, [%sp + STACK_VAR_OFF+0x0]
30 st %o1, [%sp + STACK_VAR_OFF+0x4]
31 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
32 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
35 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
36 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
37 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
38 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
39 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
41 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
43 /* dest->count = in->count */
44 st %g1, [%o4 + V4F_COUNT]
50 clr %o4 ! 'i' for STRIDE_LOOP
52 1: /* LENGTHS == NULL */
53 ld [%o5 + 0x00], %f0 ! ux = from[0]
54 ld [%o5 + 0x04], %f1 ! uy = from[1]
55 ld [%o5 + 0x08], %f2 ! uz = from[2]
56 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
59 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
60 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
61 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
63 fmuls %f0, M0, %f3 ! FGM Group
64 fmuls %f1, M1, %f4 ! FGM Group
65 fmuls %f0, M4, %f5 ! FGM Group
66 fmuls %f1, M5, %f6 ! FGM Group
67 fmuls %f0, M8, %f7 ! FGM Group f3 available
68 fmuls %f1, M9, %f8 ! FGM Group f4 available
69 fadds %f3, %f4, %f3 ! FGA
70 fmuls %f2, M2, %f10 ! FGM Group f5 available
71 fmuls %f2, M6, %f0 ! FGM Group f6 available
72 fadds %f5, %f6, %f5 ! FGA
73 fmuls %f2, M10, %f4 ! FGM Group f7 available
74 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
75 fadds %f3, %f10, %f3 ! FGA Group f10 available
76 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
77 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
79 /* f3=tx, f5=ty, f7=tz */
81 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
82 fmuls %f3, %f3, %f6 ! FGM Group f3 available
83 fmuls %f5, %f5, %f8 ! FGM Group f5 available
84 fmuls %f7, %f7, %f10 ! FGM Group f7 available
85 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
86 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
88 /* scale (f6) = 1.0 / sqrt(len) */
89 fsqrts %f6, %f6 ! FDIV 20 cycles
90 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
93 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
95 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
97 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
99 cmp %o4, %g1 ! continue if (i < count)
101 add %g3, 0x0c, %g3 ! advance out vector pointer
106 4: /* LENGTHS != NULL */
118 ld [%o5 + 0x00], %f0 ! ux = from[0]
119 ld [%o5 + 0x04], %f1 ! uy = from[1]
120 ld [%o5 + 0x08], %f2 ! uz = from[2]
121 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
122 add %o4, 1, %o4 ! i++
124 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
125 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
126 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
128 fmuls %f0, M0, %f3 ! FGM Group
129 fmuls %f1, M1, %f4 ! FGM Group
130 fmuls %f0, M4, %f5 ! FGM Group
131 fmuls %f1, M5, %f6 ! FGM Group
132 fmuls %f0, M8, %f7 ! FGM Group f3 available
133 fmuls %f1, M9, %f8 ! FGM Group f4 available
134 fadds %f3, %f4, %f3 ! FGA
135 fmuls %f2, M2, %f10 ! FGM Group f5 available
136 fmuls %f2, M6, %f0 ! FGM Group f6 available
137 fadds %f5, %f6, %f5 ! FGA
138 fmuls %f2, M10, %f4 ! FGM Group f7 available
139 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
140 fadds %f3, %f10, %f3 ! FGA Group f10 available
142 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
143 add %o3, 4, %o3 ! IEU0
144 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
146 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
149 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
151 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
153 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
155 cmp %o4, %g1 ! continue if (i < count)
157 add %g3, 0x0c, %g3 ! advance out vector pointer
162 .globl _mesa_sparc_transform_normalize_normals_no_rot
163 _mesa_sparc_transform_normalize_normals_no_rot:
164 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
166 sethi %hi(ONE_DOT_ZERO), %g2
168 st %g2, [%sp + STACK_VAR_OFF+0x0]
169 st %o1, [%sp + STACK_VAR_OFF+0x4]
170 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
171 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
174 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
175 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
176 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
177 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
178 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
182 /* dest->count = in->count */
183 st %g1, [%o4 + V4F_COUNT]
189 clr %o4 ! 'i' for STRIDE_LOOP
191 1: /* LENGTHS == NULL */
192 ld [%o5 + 0x00], %f0 ! ux = from[0]
193 ld [%o5 + 0x04], %f1 ! uy = from[1]
194 ld [%o5 + 0x08], %f2 ! uz = from[2]
195 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
196 add %o4, 1, %o4 ! i++
198 /* tx (f3) = (ux * m0)
199 * ty (f5) = (uy * m5)
200 * tz (f7) = (uz * m10)
202 fmuls %f0, M0, %f3 ! FGM Group
203 fmuls %f1, M5, %f5 ! FGM Group
204 fmuls %f2, M10, %f7 ! FGM Group
206 /* f3=tx, f5=ty, f7=tz */
208 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
209 fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available
210 fmuls %f5, %f5, %f8 ! FGM Group f5 available
211 fmuls %f7, %f7, %f10 ! FGM Group f7 available
212 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
213 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
215 /* scale (f6) = 1.0 / sqrt(len) */
216 fsqrts %f6, %f6 ! FDIV 20 cycles
217 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
220 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
222 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
224 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
226 cmp %o4, %g1 ! continue if (i < count)
228 add %g3, 0x0c, %g3 ! advance out vector pointer
233 4: /* LENGTHS != NULL */
239 ld [%o5 + 0x00], %f0 ! ux = from[0]
240 ld [%o5 + 0x04], %f1 ! uy = from[1]
241 ld [%o5 + 0x08], %f2 ! uz = from[2]
242 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
243 add %o4, 1, %o4 ! i++
245 /* tx (f3) = (ux * m0)
246 * ty (f5) = (uy * m5)
247 * tz (f7) = (uz * m10)
249 fmuls %f0, M0, %f3 ! FGM Group
251 fmuls %f1, M5, %f5 ! FGM Group
252 add %o3, 4, %o3 ! IEU0
253 fmuls %f2, M10, %f7 ! FGM Group
255 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
258 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
260 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
262 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
264 cmp %o4, %g1 ! continue if (i < count)
266 add %g3, 0x0c, %g3 ! advance out vector pointer
271 .globl _mesa_sparc_transform_rescale_normals_no_rot
272 _mesa_sparc_transform_rescale_normals_no_rot:
273 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
275 st %o1, [%sp + STACK_VAR_OFF+0x0]
276 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
279 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
280 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
281 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
282 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
283 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
287 /* dest->count = in->count */
288 st %g1, [%o4 + V4F_COUNT]
292 clr %o4 ! 'i' for STRIDE_LOOP
298 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
299 ld [%o5 + 0x04], %f1 ! uy = from[1]
300 ld [%o5 + 0x08], %f2 ! uz = from[2]
301 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
302 add %o4, 1, %o4 ! i++
304 /* tx (f3) = (ux * m0)
305 * ty (f5) = (uy * m5)
306 * tz (f7) = (uz * m10)
308 fmuls %f0, M0, %f3 ! FGM Group
309 st %f3, [%g3 + 0x00] ! LSU
310 fmuls %f1, M5, %f5 ! FGM Group
311 st %f5, [%g3 + 0x04] ! LSU
312 fmuls %f2, M10, %f7 ! FGM Group
313 st %f7, [%g3 + 0x08] ! LSU
315 cmp %o4, %g1 ! continue if (i < count)
317 add %g3, 0x0c, %g3 ! advance out vector pointer
322 .globl _mesa_sparc_transform_rescale_normals
323 _mesa_sparc_transform_rescale_normals:
324 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
326 st %o1, [%sp + STACK_VAR_OFF+0x0]
327 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
330 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
331 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
332 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
333 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
334 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
336 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
338 /* dest->count = in->count */
339 st %g1, [%o4 + V4F_COUNT]
343 clr %o4 ! 'i' for STRIDE_LOOP
355 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
356 ld [%o5 + 0x04], %f1 ! uy = from[1]
357 ld [%o5 + 0x08], %f2 ! uz = from[2]
358 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
359 add %o4, 1, %o4 ! i++
361 fmuls %f0, M0, %f3 ! FGM Group
362 fmuls %f1, M1, %f4 ! FGM Group
363 fmuls %f0, M4, %f5 ! FGM Group
364 fmuls %f1, M5, %f6 ! FGM Group
365 fmuls %f0, M8, %f7 ! FGM Group f3 available
366 fmuls %f1, M9, %f8 ! FGM Group f4 available
367 fadds %f3, %f4, %f3 ! FGA
368 fmuls %f2, M2, %f10 ! FGM Group f5 available
369 fmuls %f2, M6, %f0 ! FGM Group f6 available
370 fadds %f5, %f6, %f5 ! FGA
371 fmuls %f2, M10, %f4 ! FGM Group f7 available
372 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
373 fadds %f3, %f10, %f3 ! FGA Group f10 available
374 st %f3, [%g3 + 0x00] ! LSU
375 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
376 st %f5, [%g3 + 0x04] ! LSU
377 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
378 st %f7, [%g3 + 0x08] ! LSU
380 cmp %o4, %g1 ! continue if (i < count)
382 add %g3, 0x0c, %g3 ! advance out vector pointer
387 .globl _mesa_sparc_transform_normals_no_rot
388 _mesa_sparc_transform_normals_no_rot:
389 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
390 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
391 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
392 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
393 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
394 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
398 /* dest->count = in->count */
399 st %g1, [%o4 + V4F_COUNT]
403 clr %o4 ! 'i' for STRIDE_LOOP
405 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
406 ld [%o5 + 0x04], %f1 ! uy = from[1]
407 ld [%o5 + 0x08], %f2 ! uz = from[2]
408 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
409 add %o4, 1, %o4 ! i++
411 /* tx (f3) = (ux * m0)
412 * ty (f5) = (uy * m5)
413 * tz (f7) = (uz * m10)
415 fmuls %f0, M0, %f3 ! FGM Group
416 st %f3, [%g3 + 0x00] ! LSU
417 fmuls %f1, M5, %f5 ! FGM Group
418 st %f5, [%g3 + 0x04] ! LSU
419 fmuls %f2, M10, %f7 ! FGM Group
420 st %f7, [%g3 + 0x08] ! LSU
422 cmp %o4, %g1 ! continue if (i < count)
424 add %g3, 0x0c, %g3 ! advance out vector pointer
429 .globl _mesa_sparc_transform_normals
430 _mesa_sparc_transform_normals:
431 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
432 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
433 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
434 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
435 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
436 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
438 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
440 /* dest->count = in->count */
441 st %g1, [%o4 + V4F_COUNT]
445 clr %o4 ! 'i' for STRIDE_LOOP
447 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
448 ld [%o5 + 0x04], %f1 ! uy = from[1]
449 ld [%o5 + 0x08], %f2 ! uz = from[2]
450 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
451 add %o4, 1, %o4 ! i++
453 fmuls %f0, M0, %f3 ! FGM Group
454 fmuls %f1, M1, %f4 ! FGM Group
455 fmuls %f0, M4, %f5 ! FGM Group
456 fmuls %f1, M5, %f6 ! FGM Group
457 fmuls %f0, M8, %f7 ! FGM Group f3 available
458 fmuls %f1, M9, %f8 ! FGM Group f4 available
459 fadds %f3, %f4, %f3 ! FGA
460 fmuls %f2, M2, %f10 ! FGM Group f5 available
461 fmuls %f2, M6, %f0 ! FGM Group f6 available
462 fadds %f5, %f6, %f5 ! FGA
463 fmuls %f2, M10, %f4 ! FGM Group f7 available
464 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
465 fadds %f3, %f10, %f3 ! FGA Group f10 available
466 st %f3, [%g3 + 0x00] ! LSU
467 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
468 st %f5, [%g3 + 0x04] ! LSU
469 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
470 st %f7, [%g3 + 0x08] ! LSU
472 cmp %o4, %g1 ! continue if (i < count)
474 add %g3, 0x0c, %g3 ! advance out vector pointer
479 .globl _mesa_sparc_normalize_normals
480 _mesa_sparc_normalize_normals:
481 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
483 sethi %hi(ONE_DOT_ZERO), %g2
485 st %g2, [%sp + STACK_VAR_OFF+0x0]
486 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
489 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
490 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
491 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
492 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
494 /* dest->count = in->count */
495 st %g1, [%o4 + V4F_COUNT]
501 clr %o4 ! 'i' for STRIDE_LOOP
503 1: /* LENGTHS == NULL */
504 ld [%o5 + 0x00], %f3 ! ux = from[0]
505 ld [%o5 + 0x04], %f5 ! uy = from[1]
506 ld [%o5 + 0x08], %f7 ! uz = from[2]
507 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
508 add %o4, 1, %o4 ! i++
510 /* f3=tx, f5=ty, f7=tz */
512 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
513 fmuls %f3, %f3, %f6 ! FGM Group f3 available
514 fmuls %f5, %f5, %f8 ! FGM Group f5 available
515 fmuls %f7, %f7, %f10 ! FGM Group f7 available
516 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
517 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
519 /* scale (f6) = 1.0 / sqrt(len) */
520 fsqrts %f6, %f6 ! FDIV 20 cycles
521 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
524 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
526 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
528 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
530 cmp %o4, %g1 ! continue if (i < count)
532 add %g3, 0x0c, %g3 ! advance out vector pointer
537 4: /* LENGTHS != NULL */
540 ld [%o5 + 0x00], %f3 ! ux = from[0]
541 ld [%o5 + 0x04], %f5 ! uy = from[1]
542 ld [%o5 + 0x08], %f7 ! uz = from[2]
543 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
544 add %o4, 1, %o4 ! i++
547 add %o3, 4, %o3 ! IEU0
549 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
552 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
554 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
556 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
558 cmp %o4, %g1 ! continue if (i < count)
560 add %g3, 0x0c, %g3 ! advance out vector pointer
565 .globl _mesa_sparc_rescale_normals
566 _mesa_sparc_rescale_normals:
567 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
569 sethi %hi(ONE_DOT_ZERO), %g2
571 st %o1, [%sp + STACK_VAR_OFF+0x0]
572 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
575 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
576 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
577 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
578 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
580 /* dest->count = in->count */
581 st %g1, [%o4 + V4F_COUNT]
585 clr %o4 ! 'i' for STRIDE_LOOP
588 ld [%o5 + 0x00], %f3 ! ux = from[0]
589 ld [%o5 + 0x04], %f5 ! uy = from[1]
590 ld [%o5 + 0x08], %f7 ! uz = from[2]
591 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
592 add %o4, 1, %o4 ! i++
594 /* f3=tx, f5=ty, f7=tz */
597 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
599 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
601 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
603 cmp %o4, %g1 ! continue if (i < count)
605 add %g3, 0x0c, %g3 ! advance out vector pointer