2 #include "sparc_matrix.h"
4 .register %g2, #scratch
5 .register %g3, #scratch
10 #define STACK_VAR_OFF (2047 + (8 * 16))
12 #define STACK_VAR_OFF (4 * 16)
15 /* Newton-Raphson approximation turns out to be slower
16 * (and less accurate) than direct fsqrts/fdivs.
18 #define ONE_DOT_ZERO 0x3f800000
20 .globl _mesa_sparc_transform_normalize_normals
21 _mesa_sparc_transform_normalize_normals:
22 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
24 sethi %hi(ONE_DOT_ZERO), %g2
26 st %g2, [%sp + STACK_VAR_OFF+0x0]
27 st %o1, [%sp + STACK_VAR_OFF+0x4]
28 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
29 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
32 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
33 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
34 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
35 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
36 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
38 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
40 /* dest->count = in->count */
41 st %g1, [%o4 + V4F_COUNT]
47 clr %o4 ! 'i' for STRIDE_LOOP
49 1: /* LENGTHS == NULL */
50 ld [%o5 + 0x00], %f0 ! ux = from[0]
51 ld [%o5 + 0x04], %f1 ! uy = from[1]
52 ld [%o5 + 0x08], %f2 ! uz = from[2]
53 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
56 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
57 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
58 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
60 fmuls %f0, M0, %f3 ! FGM Group
61 fmuls %f1, M1, %f4 ! FGM Group
62 fmuls %f0, M4, %f5 ! FGM Group
63 fmuls %f1, M5, %f6 ! FGM Group
64 fmuls %f0, M8, %f7 ! FGM Group f3 available
65 fmuls %f1, M9, %f8 ! FGM Group f4 available
66 fadds %f3, %f4, %f3 ! FGA
67 fmuls %f2, M2, %f10 ! FGM Group f5 available
68 fmuls %f2, M6, %f0 ! FGM Group f6 available
69 fadds %f5, %f6, %f5 ! FGA
70 fmuls %f2, M10, %f4 ! FGM Group f7 available
71 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
72 fadds %f3, %f10, %f3 ! FGA Group f10 available
73 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
74 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
76 /* f3=tx, f5=ty, f7=tz */
78 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
79 fmuls %f3, %f3, %f6 ! FGM Group f3 available
80 fmuls %f5, %f5, %f8 ! FGM Group f5 available
81 fmuls %f7, %f7, %f10 ! FGM Group f7 available
82 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
83 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
85 /* scale (f6) = 1.0 / sqrt(len) */
86 fsqrts %f6, %f6 ! FDIV 20 cycles
87 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
90 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
92 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
94 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
96 cmp %o4, %g1 ! continue if (i < count)
98 add %g3, 0x10, %g3 ! advance out vector pointer
103 4: /* LENGTHS != NULL */
115 ld [%o5 + 0x00], %f0 ! ux = from[0]
116 ld [%o5 + 0x04], %f1 ! uy = from[1]
117 ld [%o5 + 0x08], %f2 ! uz = from[2]
118 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
119 add %o4, 1, %o4 ! i++
121 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
122 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
123 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
125 fmuls %f0, M0, %f3 ! FGM Group
126 fmuls %f1, M1, %f4 ! FGM Group
127 fmuls %f0, M4, %f5 ! FGM Group
128 fmuls %f1, M5, %f6 ! FGM Group
129 fmuls %f0, M8, %f7 ! FGM Group f3 available
130 fmuls %f1, M9, %f8 ! FGM Group f4 available
131 fadds %f3, %f4, %f3 ! FGA
132 fmuls %f2, M2, %f10 ! FGM Group f5 available
133 fmuls %f2, M6, %f0 ! FGM Group f6 available
134 fadds %f5, %f6, %f5 ! FGA
135 fmuls %f2, M10, %f4 ! FGM Group f7 available
136 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
137 fadds %f3, %f10, %f3 ! FGA Group f10 available
139 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
140 add %o3, 4, %o3 ! IEU0
141 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
143 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
146 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
148 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
150 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
152 cmp %o4, %g1 ! continue if (i < count)
154 add %g3, 0x10, %g3 ! advance out vector pointer
159 .globl _mesa_sparc_transform_normalize_normals_no_rot
160 _mesa_sparc_transform_normalize_normals_no_rot:
161 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
163 sethi %hi(ONE_DOT_ZERO), %g2
165 st %g2, [%sp + STACK_VAR_OFF+0x0]
166 st %o1, [%sp + STACK_VAR_OFF+0x4]
167 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
168 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
171 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
172 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
173 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
174 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
175 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
179 /* dest->count = in->count */
180 st %g1, [%o4 + V4F_COUNT]
186 clr %o4 ! 'i' for STRIDE_LOOP
188 1: /* LENGTHS == NULL */
189 ld [%o5 + 0x00], %f0 ! ux = from[0]
190 ld [%o5 + 0x04], %f1 ! uy = from[1]
191 ld [%o5 + 0x08], %f2 ! uz = from[2]
192 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
193 add %o4, 1, %o4 ! i++
195 /* tx (f3) = (ux * m0)
196 * ty (f5) = (uy * m5)
197 * tz (f7) = (uz * m10)
199 fmuls %f0, M0, %f3 ! FGM Group
200 fmuls %f1, M5, %f5 ! FGM Group
201 fmuls %f2, M10, %f7 ! FGM Group
203 /* f3=tx, f5=ty, f7=tz */
205 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
206 fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available
207 fmuls %f5, %f5, %f8 ! FGM Group f5 available
208 fmuls %f7, %f7, %f10 ! FGM Group f7 available
209 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
210 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
212 /* scale (f6) = 1.0 / sqrt(len) */
213 fsqrts %f6, %f6 ! FDIV 20 cycles
214 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
217 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
219 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
221 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
223 cmp %o4, %g1 ! continue if (i < count)
225 add %g3, 0x10, %g3 ! advance out vector pointer
230 4: /* LENGTHS != NULL */
236 ld [%o5 + 0x00], %f0 ! ux = from[0]
237 ld [%o5 + 0x04], %f1 ! uy = from[1]
238 ld [%o5 + 0x08], %f2 ! uz = from[2]
239 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
240 add %o4, 1, %o4 ! i++
242 /* tx (f3) = (ux * m0)
243 * ty (f5) = (uy * m5)
244 * tz (f7) = (uz * m10)
246 fmuls %f0, M0, %f3 ! FGM Group
248 fmuls %f1, M5, %f5 ! FGM Group
249 add %o3, 4, %o3 ! IEU0
250 fmuls %f2, M10, %f7 ! FGM Group
252 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
255 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
257 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
259 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
261 cmp %o4, %g1 ! continue if (i < count)
263 add %g3, 0x10, %g3 ! advance out vector pointer
268 .globl _mesa_sparc_transform_rescale_normals_no_rot
269 _mesa_sparc_transform_rescale_normals_no_rot:
270 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
272 st %o1, [%sp + STACK_VAR_OFF+0x0]
273 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
276 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
277 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
278 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
279 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
280 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
284 /* dest->count = in->count */
285 st %g1, [%o4 + V4F_COUNT]
289 clr %o4 ! 'i' for STRIDE_LOOP
295 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
296 ld [%o5 + 0x04], %f1 ! uy = from[1]
297 ld [%o5 + 0x08], %f2 ! uz = from[2]
298 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
299 add %o4, 1, %o4 ! i++
301 /* tx (f3) = (ux * m0)
302 * ty (f5) = (uy * m5)
303 * tz (f7) = (uz * m10)
305 fmuls %f0, M0, %f3 ! FGM Group
306 st %f3, [%g3 + 0x00] ! LSU
307 fmuls %f1, M5, %f5 ! FGM Group
308 st %f5, [%g3 + 0x04] ! LSU
309 fmuls %f2, M10, %f7 ! FGM Group
310 st %f7, [%g3 + 0x08] ! LSU
312 cmp %o4, %g1 ! continue if (i < count)
314 add %g3, 0x10, %g3 ! advance out vector pointer
319 .globl _mesa_sparc_transform_rescale_normals
320 _mesa_sparc_transform_rescale_normals:
321 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
323 st %o1, [%sp + STACK_VAR_OFF+0x0]
324 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
327 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
328 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
329 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
330 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
331 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
333 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
335 /* dest->count = in->count */
336 st %g1, [%o4 + V4F_COUNT]
340 clr %o4 ! 'i' for STRIDE_LOOP
352 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
353 ld [%o5 + 0x04], %f1 ! uy = from[1]
354 ld [%o5 + 0x08], %f2 ! uz = from[2]
355 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
356 add %o4, 1, %o4 ! i++
358 fmuls %f0, M0, %f3 ! FGM Group
359 fmuls %f1, M1, %f4 ! FGM Group
360 fmuls %f0, M4, %f5 ! FGM Group
361 fmuls %f1, M5, %f6 ! FGM Group
362 fmuls %f0, M8, %f7 ! FGM Group f3 available
363 fmuls %f1, M9, %f8 ! FGM Group f4 available
364 fadds %f3, %f4, %f3 ! FGA
365 fmuls %f2, M2, %f10 ! FGM Group f5 available
366 fmuls %f2, M6, %f0 ! FGM Group f6 available
367 fadds %f5, %f6, %f5 ! FGA
368 fmuls %f2, M10, %f4 ! FGM Group f7 available
369 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
370 fadds %f3, %f10, %f3 ! FGA Group f10 available
371 st %f3, [%g3 + 0x00] ! LSU
372 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
373 st %f5, [%g3 + 0x04] ! LSU
374 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
375 st %f7, [%g3 + 0x08] ! LSU
377 cmp %o4, %g1 ! continue if (i < count)
379 add %g3, 0x10, %g3 ! advance out vector pointer
384 .globl _mesa_sparc_transform_normals_no_rot
385 _mesa_sparc_transform_normals_no_rot:
386 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
387 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
388 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
389 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
390 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
391 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
395 /* dest->count = in->count */
396 st %g1, [%o4 + V4F_COUNT]
400 clr %o4 ! 'i' for STRIDE_LOOP
402 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
403 ld [%o5 + 0x04], %f1 ! uy = from[1]
404 ld [%o5 + 0x08], %f2 ! uz = from[2]
405 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
406 add %o4, 1, %o4 ! i++
408 /* tx (f3) = (ux * m0)
409 * ty (f5) = (uy * m5)
410 * tz (f7) = (uz * m10)
412 fmuls %f0, M0, %f3 ! FGM Group
413 st %f3, [%g3 + 0x00] ! LSU
414 fmuls %f1, M5, %f5 ! FGM Group
415 st %f5, [%g3 + 0x04] ! LSU
416 fmuls %f2, M10, %f7 ! FGM Group
417 st %f7, [%g3 + 0x08] ! LSU
419 cmp %o4, %g1 ! continue if (i < count)
421 add %g3, 0x10, %g3 ! advance out vector pointer
426 .globl _mesa_sparc_transform_normals
427 _mesa_sparc_transform_normals:
428 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
429 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
430 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
431 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
432 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
433 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
435 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
437 /* dest->count = in->count */
438 st %g1, [%o4 + V4F_COUNT]
442 clr %o4 ! 'i' for STRIDE_LOOP
444 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
445 ld [%o5 + 0x04], %f1 ! uy = from[1]
446 ld [%o5 + 0x08], %f2 ! uz = from[2]
447 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
448 add %o4, 1, %o4 ! i++
450 fmuls %f0, M0, %f3 ! FGM Group
451 fmuls %f1, M1, %f4 ! FGM Group
452 fmuls %f0, M4, %f5 ! FGM Group
453 fmuls %f1, M5, %f6 ! FGM Group
454 fmuls %f0, M8, %f7 ! FGM Group f3 available
455 fmuls %f1, M9, %f8 ! FGM Group f4 available
456 fadds %f3, %f4, %f3 ! FGA
457 fmuls %f2, M2, %f10 ! FGM Group f5 available
458 fmuls %f2, M6, %f0 ! FGM Group f6 available
459 fadds %f5, %f6, %f5 ! FGA
460 fmuls %f2, M10, %f4 ! FGM Group f7 available
461 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
462 fadds %f3, %f10, %f3 ! FGA Group f10 available
463 st %f3, [%g3 + 0x00] ! LSU
464 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
465 st %f5, [%g3 + 0x04] ! LSU
466 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
467 st %f7, [%g3 + 0x08] ! LSU
469 cmp %o4, %g1 ! continue if (i < count)
471 add %g3, 0x10, %g3 ! advance out vector pointer
476 .globl _mesa_sparc_normalize_normals
477 _mesa_sparc_normalize_normals:
478 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
480 sethi %hi(ONE_DOT_ZERO), %g2
482 st %g2, [%sp + STACK_VAR_OFF+0x0]
483 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
486 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
487 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
488 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
489 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
491 /* dest->count = in->count */
492 st %g1, [%o4 + V4F_COUNT]
498 clr %o4 ! 'i' for STRIDE_LOOP
500 1: /* LENGTHS == NULL */
501 ld [%o5 + 0x00], %f3 ! ux = from[0]
502 ld [%o5 + 0x04], %f5 ! uy = from[1]
503 ld [%o5 + 0x08], %f7 ! uz = from[2]
504 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
505 add %o4, 1, %o4 ! i++
507 /* f3=tx, f5=ty, f7=tz */
509 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
510 fmuls %f3, %f3, %f6 ! FGM Group f3 available
511 fmuls %f5, %f5, %f8 ! FGM Group f5 available
512 fmuls %f7, %f7, %f10 ! FGM Group f7 available
513 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
514 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
516 /* scale (f6) = 1.0 / sqrt(len) */
517 fsqrts %f6, %f6 ! FDIV 20 cycles
518 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
521 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
523 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
525 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
527 cmp %o4, %g1 ! continue if (i < count)
529 add %g3, 0x10, %g3 ! advance out vector pointer
534 4: /* LENGTHS != NULL */
537 ld [%o5 + 0x00], %f3 ! ux = from[0]
538 ld [%o5 + 0x04], %f5 ! uy = from[1]
539 ld [%o5 + 0x08], %f7 ! uz = from[2]
540 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
541 add %o4, 1, %o4 ! i++
544 add %o3, 4, %o3 ! IEU0
546 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
549 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
551 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
553 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
555 cmp %o4, %g1 ! continue if (i < count)
557 add %g3, 0x10, %g3 ! advance out vector pointer
562 .globl _mesa_sparc_rescale_normals
563 _mesa_sparc_rescale_normals:
564 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
566 sethi %hi(ONE_DOT_ZERO), %g2
568 st %o1, [%sp + STACK_VAR_OFF+0x0]
569 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
572 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
573 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
574 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
575 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
577 /* dest->count = in->count */
578 st %g1, [%o4 + V4F_COUNT]
582 clr %o4 ! 'i' for STRIDE_LOOP
585 ld [%o5 + 0x00], %f3 ! ux = from[0]
586 ld [%o5 + 0x04], %f5 ! uy = from[1]
587 ld [%o5 + 0x08], %f7 ! uz = from[2]
588 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
589 add %o4, 1, %o4 ! i++
591 /* f3=tx, f5=ty, f7=tz */
594 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
596 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
598 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
600 cmp %o4, %g1 ! continue if (i < count)
602 add %g3, 0x10, %g3 ! advance out vector pointer