1 /* $Id: norm.S,v 1.3 2004/04/26 10:10:25 alanh Exp $ */
3 #include "sparc_matrix.h"
6 /* Solaris requires this for 64-bit. */
7 .register %g2, #scratch
8 .register %g3, #scratch
13 #if defined(__sparc_v9__) && !defined(__linux__)
14 #define STACK_VAR_OFF (2047 + (8 * 16))
16 #define STACK_VAR_OFF (4 * 16)
19 /* Newton-Raphson approximation turns out to be slower
20 * (and less accurate) than direct fsqrts/fdivs.
22 #define ONE_DOT_ZERO 0x3f800000
24 .globl _mesa_sparc_transform_normalize_normals
25 _mesa_sparc_transform_normalize_normals:
26 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
28 sethi %hi(ONE_DOT_ZERO), %g2
30 st %g2, [%sp + STACK_VAR_OFF+0x0]
31 st %o1, [%sp + STACK_VAR_OFF+0x4]
32 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
33 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
36 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
37 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
38 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
39 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
40 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
42 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
44 /* dest->count = in->count */
45 st %g1, [%o4 + V4F_COUNT]
51 clr %o4 ! 'i' for STRIDE_LOOP
53 1: /* LENGTHS == NULL */
54 ld [%o5 + 0x00], %f0 ! ux = from[0]
55 ld [%o5 + 0x04], %f1 ! uy = from[1]
56 ld [%o5 + 0x08], %f2 ! uz = from[2]
57 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
60 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
61 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
62 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
64 fmuls %f0, M0, %f3 ! FGM Group
65 fmuls %f1, M1, %f4 ! FGM Group
66 fmuls %f0, M4, %f5 ! FGM Group
67 fmuls %f1, M5, %f6 ! FGM Group
68 fmuls %f0, M8, %f7 ! FGM Group f3 available
69 fmuls %f1, M9, %f8 ! FGM Group f4 available
70 fadds %f3, %f4, %f3 ! FGA
71 fmuls %f2, M2, %f10 ! FGM Group f5 available
72 fmuls %f2, M6, %f0 ! FGM Group f6 available
73 fadds %f5, %f6, %f5 ! FGA
74 fmuls %f2, M10, %f4 ! FGM Group f7 available
75 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
76 fadds %f3, %f10, %f3 ! FGA Group f10 available
77 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
78 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
80 /* f3=tx, f5=ty, f7=tz */
82 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
83 fmuls %f3, %f3, %f6 ! FGM Group f3 available
84 fmuls %f5, %f5, %f8 ! FGM Group f5 available
85 fmuls %f7, %f7, %f10 ! FGM Group f7 available
86 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
87 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
89 /* scale (f6) = 1.0 / sqrt(len) */
90 fsqrts %f6, %f6 ! FDIV 20 cycles
91 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
94 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
96 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
98 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
100 cmp %o4, %g1 ! continue if (i < count)
102 add %g3, 0x0c, %g3 ! advance out vector pointer
107 4: /* LENGTHS != NULL */
119 ld [%o5 + 0x00], %f0 ! ux = from[0]
120 ld [%o5 + 0x04], %f1 ! uy = from[1]
121 ld [%o5 + 0x08], %f2 ! uz = from[2]
122 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
123 add %o4, 1, %o4 ! i++
125 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
126 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
127 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
129 fmuls %f0, M0, %f3 ! FGM Group
130 fmuls %f1, M1, %f4 ! FGM Group
131 fmuls %f0, M4, %f5 ! FGM Group
132 fmuls %f1, M5, %f6 ! FGM Group
133 fmuls %f0, M8, %f7 ! FGM Group f3 available
134 fmuls %f1, M9, %f8 ! FGM Group f4 available
135 fadds %f3, %f4, %f3 ! FGA
136 fmuls %f2, M2, %f10 ! FGM Group f5 available
137 fmuls %f2, M6, %f0 ! FGM Group f6 available
138 fadds %f5, %f6, %f5 ! FGA
139 fmuls %f2, M10, %f4 ! FGM Group f7 available
140 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
141 fadds %f3, %f10, %f3 ! FGA Group f10 available
143 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
144 add %o3, 4, %o3 ! IEU0
145 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
147 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
150 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
152 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
154 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
156 cmp %o4, %g1 ! continue if (i < count)
158 add %g3, 0x0c, %g3 ! advance out vector pointer
163 .globl _mesa_sparc_transform_normalize_normals_no_rot
164 _mesa_sparc_transform_normalize_normals_no_rot:
165 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
167 sethi %hi(ONE_DOT_ZERO), %g2
169 st %g2, [%sp + STACK_VAR_OFF+0x0]
170 st %o1, [%sp + STACK_VAR_OFF+0x4]
171 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
172 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
175 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
176 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
177 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
178 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
179 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
183 /* dest->count = in->count */
184 st %g1, [%o4 + V4F_COUNT]
190 clr %o4 ! 'i' for STRIDE_LOOP
192 1: /* LENGTHS == NULL */
193 ld [%o5 + 0x00], %f0 ! ux = from[0]
194 ld [%o5 + 0x04], %f1 ! uy = from[1]
195 ld [%o5 + 0x08], %f2 ! uz = from[2]
196 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
197 add %o4, 1, %o4 ! i++
199 /* tx (f3) = (ux * m0)
200 * ty (f5) = (uy * m5)
201 * tz (f7) = (uz * m10)
203 fmuls %f0, M0, %f3 ! FGM Group
204 fmuls %f1, M5, %f5 ! FGM Group
205 fmuls %f2, M10, %f7 ! FGM Group
207 /* f3=tx, f5=ty, f7=tz */
209 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
210 fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available
211 fmuls %f5, %f5, %f8 ! FGM Group f5 available
212 fmuls %f7, %f7, %f10 ! FGM Group f7 available
213 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
214 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
216 /* scale (f6) = 1.0 / sqrt(len) */
217 fsqrts %f6, %f6 ! FDIV 20 cycles
218 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
221 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
223 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
225 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
227 cmp %o4, %g1 ! continue if (i < count)
229 add %g3, 0x0c, %g3 ! advance out vector pointer
234 4: /* LENGTHS != NULL */
240 ld [%o5 + 0x00], %f0 ! ux = from[0]
241 ld [%o5 + 0x04], %f1 ! uy = from[1]
242 ld [%o5 + 0x08], %f2 ! uz = from[2]
243 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
244 add %o4, 1, %o4 ! i++
246 /* tx (f3) = (ux * m0)
247 * ty (f5) = (uy * m5)
248 * tz (f7) = (uz * m10)
250 fmuls %f0, M0, %f3 ! FGM Group
252 fmuls %f1, M5, %f5 ! FGM Group
253 add %o3, 4, %o3 ! IEU0
254 fmuls %f2, M10, %f7 ! FGM Group
256 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
259 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
261 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
263 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
265 cmp %o4, %g1 ! continue if (i < count)
267 add %g3, 0x0c, %g3 ! advance out vector pointer
272 .globl _mesa_sparc_transform_rescale_normals_no_rot
273 _mesa_sparc_transform_rescale_normals_no_rot:
274 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
276 st %o1, [%sp + STACK_VAR_OFF+0x0]
277 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
280 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
281 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
282 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
283 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
284 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
288 /* dest->count = in->count */
289 st %g1, [%o4 + V4F_COUNT]
293 clr %o4 ! 'i' for STRIDE_LOOP
299 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
300 ld [%o5 + 0x04], %f1 ! uy = from[1]
301 ld [%o5 + 0x08], %f2 ! uz = from[2]
302 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
303 add %o4, 1, %o4 ! i++
305 /* tx (f3) = (ux * m0)
306 * ty (f5) = (uy * m5)
307 * tz (f7) = (uz * m10)
309 fmuls %f0, M0, %f3 ! FGM Group
310 st %f3, [%g3 + 0x00] ! LSU
311 fmuls %f1, M5, %f5 ! FGM Group
312 st %f5, [%g3 + 0x04] ! LSU
313 fmuls %f2, M10, %f7 ! FGM Group
314 st %f7, [%g3 + 0x08] ! LSU
316 cmp %o4, %g1 ! continue if (i < count)
318 add %g3, 0x0c, %g3 ! advance out vector pointer
323 .globl _mesa_sparc_transform_rescale_normals
324 _mesa_sparc_transform_rescale_normals:
325 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
327 st %o1, [%sp + STACK_VAR_OFF+0x0]
328 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
331 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
332 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
333 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
334 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
335 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
337 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
339 /* dest->count = in->count */
340 st %g1, [%o4 + V4F_COUNT]
344 clr %o4 ! 'i' for STRIDE_LOOP
356 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
357 ld [%o5 + 0x04], %f1 ! uy = from[1]
358 ld [%o5 + 0x08], %f2 ! uz = from[2]
359 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
360 add %o4, 1, %o4 ! i++
362 fmuls %f0, M0, %f3 ! FGM Group
363 fmuls %f1, M1, %f4 ! FGM Group
364 fmuls %f0, M4, %f5 ! FGM Group
365 fmuls %f1, M5, %f6 ! FGM Group
366 fmuls %f0, M8, %f7 ! FGM Group f3 available
367 fmuls %f1, M9, %f8 ! FGM Group f4 available
368 fadds %f3, %f4, %f3 ! FGA
369 fmuls %f2, M2, %f10 ! FGM Group f5 available
370 fmuls %f2, M6, %f0 ! FGM Group f6 available
371 fadds %f5, %f6, %f5 ! FGA
372 fmuls %f2, M10, %f4 ! FGM Group f7 available
373 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
374 fadds %f3, %f10, %f3 ! FGA Group f10 available
375 st %f3, [%g3 + 0x00] ! LSU
376 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
377 st %f5, [%g3 + 0x04] ! LSU
378 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
379 st %f7, [%g3 + 0x08] ! LSU
381 cmp %o4, %g1 ! continue if (i < count)
383 add %g3, 0x0c, %g3 ! advance out vector pointer
388 .globl _mesa_sparc_transform_normals_no_rot
389 _mesa_sparc_transform_normals_no_rot:
390 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
391 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
392 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
393 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
394 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
395 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
399 /* dest->count = in->count */
400 st %g1, [%o4 + V4F_COUNT]
404 clr %o4 ! 'i' for STRIDE_LOOP
406 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
407 ld [%o5 + 0x04], %f1 ! uy = from[1]
408 ld [%o5 + 0x08], %f2 ! uz = from[2]
409 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
410 add %o4, 1, %o4 ! i++
412 /* tx (f3) = (ux * m0)
413 * ty (f5) = (uy * m5)
414 * tz (f7) = (uz * m10)
416 fmuls %f0, M0, %f3 ! FGM Group
417 st %f3, [%g3 + 0x00] ! LSU
418 fmuls %f1, M5, %f5 ! FGM Group
419 st %f5, [%g3 + 0x04] ! LSU
420 fmuls %f2, M10, %f7 ! FGM Group
421 st %f7, [%g3 + 0x08] ! LSU
423 cmp %o4, %g1 ! continue if (i < count)
425 add %g3, 0x0c, %g3 ! advance out vector pointer
430 .globl _mesa_sparc_transform_normals
431 _mesa_sparc_transform_normals:
432 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
433 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
434 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
435 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
436 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
437 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
439 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
441 /* dest->count = in->count */
442 st %g1, [%o4 + V4F_COUNT]
446 clr %o4 ! 'i' for STRIDE_LOOP
448 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
449 ld [%o5 + 0x04], %f1 ! uy = from[1]
450 ld [%o5 + 0x08], %f2 ! uz = from[2]
451 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
452 add %o4, 1, %o4 ! i++
454 fmuls %f0, M0, %f3 ! FGM Group
455 fmuls %f1, M1, %f4 ! FGM Group
456 fmuls %f0, M4, %f5 ! FGM Group
457 fmuls %f1, M5, %f6 ! FGM Group
458 fmuls %f0, M8, %f7 ! FGM Group f3 available
459 fmuls %f1, M9, %f8 ! FGM Group f4 available
460 fadds %f3, %f4, %f3 ! FGA
461 fmuls %f2, M2, %f10 ! FGM Group f5 available
462 fmuls %f2, M6, %f0 ! FGM Group f6 available
463 fadds %f5, %f6, %f5 ! FGA
464 fmuls %f2, M10, %f4 ! FGM Group f7 available
465 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
466 fadds %f3, %f10, %f3 ! FGA Group f10 available
467 st %f3, [%g3 + 0x00] ! LSU
468 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
469 st %f5, [%g3 + 0x04] ! LSU
470 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
471 st %f7, [%g3 + 0x08] ! LSU
473 cmp %o4, %g1 ! continue if (i < count)
475 add %g3, 0x0c, %g3 ! advance out vector pointer
480 .globl _mesa_sparc_normalize_normals
481 _mesa_sparc_normalize_normals:
482 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
484 sethi %hi(ONE_DOT_ZERO), %g2
486 st %g2, [%sp + STACK_VAR_OFF+0x0]
487 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
490 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
491 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
492 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
493 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
495 /* dest->count = in->count */
496 st %g1, [%o4 + V4F_COUNT]
502 clr %o4 ! 'i' for STRIDE_LOOP
504 1: /* LENGTHS == NULL */
505 ld [%o5 + 0x00], %f3 ! ux = from[0]
506 ld [%o5 + 0x04], %f5 ! uy = from[1]
507 ld [%o5 + 0x08], %f7 ! uz = from[2]
508 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
509 add %o4, 1, %o4 ! i++
511 /* f3=tx, f5=ty, f7=tz */
513 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
514 fmuls %f3, %f3, %f6 ! FGM Group f3 available
515 fmuls %f5, %f5, %f8 ! FGM Group f5 available
516 fmuls %f7, %f7, %f10 ! FGM Group f7 available
517 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
518 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
520 /* scale (f6) = 1.0 / sqrt(len) */
521 fsqrts %f6, %f6 ! FDIV 20 cycles
522 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
525 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
527 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
529 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
531 cmp %o4, %g1 ! continue if (i < count)
533 add %g3, 0x0c, %g3 ! advance out vector pointer
538 4: /* LENGTHS != NULL */
541 ld [%o5 + 0x00], %f3 ! ux = from[0]
542 ld [%o5 + 0x04], %f5 ! uy = from[1]
543 ld [%o5 + 0x08], %f7 ! uz = from[2]
544 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
545 add %o4, 1, %o4 ! i++
548 add %o3, 4, %o3 ! IEU0
550 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
553 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
555 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
557 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
559 cmp %o4, %g1 ! continue if (i < count)
561 add %g3, 0x0c, %g3 ! advance out vector pointer
566 .globl _mesa_sparc_rescale_normals
567 _mesa_sparc_rescale_normals:
568 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
570 sethi %hi(ONE_DOT_ZERO), %g2
572 st %o1, [%sp + STACK_VAR_OFF+0x0]
573 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
576 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
577 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
578 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
579 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
581 /* dest->count = in->count */
582 st %g1, [%o4 + V4F_COUNT]
586 clr %o4 ! 'i' for STRIDE_LOOP
589 ld [%o5 + 0x00], %f3 ! ux = from[0]
590 ld [%o5 + 0x04], %f5 ! uy = from[1]
591 ld [%o5 + 0x08], %f7 ! uz = from[2]
592 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
593 add %o4, 1, %o4 ! i++
595 /* f3=tx, f5=ty, f7=tz */
598 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
600 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
602 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
604 cmp %o4, %g1 ! continue if (i < count)
606 add %g3, 0x0c, %g3 ! advance out vector pointer