1 /* $Id: norm.S,v 1.1 2001/06/06 11:46:04 davem69 Exp $ */
3 #include "sparc_matrix.h"
8 #define STACK_VAR_OFF (2047 + (8 * 16))
10 #define STACK_VAR_OFF (4 * 16)
13 /* Newton-Raphson approximation turns out to be slower
14 * (and less accurate) than direct fsqrts/fdivs.
16 #define ONE_DOT_ZERO 0x3f800000
18 .globl _mesa_sparc_transform_normalize_normals
19 _mesa_sparc_transform_normalize_normals:
20 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
22 sethi %hi(ONE_DOT_ZERO), %g2
24 st %g2, [%sp + STACK_VAR_OFF+0x0]
25 st %o1, [%sp + STACK_VAR_OFF+0x4]
26 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
27 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
30 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
31 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
32 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
33 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
34 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
36 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
38 /* dest->count = in->count */
39 st %g1, [%o4 + V4F_COUNT]
45 clr %o4 ! 'i' for STRIDE_LOOP
47 1: /* LENGTHS == NULL */
48 ld [%o5 + 0x00], %f0 ! ux = from[0]
49 ld [%o5 + 0x04], %f1 ! uy = from[1]
50 ld [%o5 + 0x08], %f2 ! uz = from[2]
51 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
54 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
55 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
56 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
58 fmuls %f0, M0, %f3 ! FGM Group
59 fmuls %f1, M1, %f4 ! FGM Group
60 fmuls %f0, M4, %f5 ! FGM Group
61 fmuls %f1, M5, %f6 ! FGM Group
62 fmuls %f0, M8, %f7 ! FGM Group f3 available
63 fmuls %f1, M9, %f8 ! FGM Group f4 available
64 fadds %f3, %f4, %f3 ! FGA
65 fmuls %f2, M2, %f10 ! FGM Group f5 available
66 fmuls %f2, M6, %f0 ! FGM Group f6 available
67 fadds %f5, %f6, %f5 ! FGA
68 fmuls %f2, M10, %f4 ! FGM Group f7 available
69 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
70 fadds %f3, %f10, %f3 ! FGA Group f10 available
71 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
72 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
74 /* f3=tx, f5=ty, f7=tz */
76 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
77 fmuls %f3, %f3, %f6 ! FGM Group f3 available
78 fmuls %f5, %f5, %f8 ! FGM Group f5 available
79 fmuls %f7, %f7, %f10 ! FGM Group f7 available
80 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
81 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
83 /* scale (f6) = 1.0 / sqrt(len) */
84 fsqrts %f6, %f6 ! FDIV 20 cycles
85 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
88 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
90 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
92 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
94 cmp %o4, %g1 ! continue if (i < count)
96 add %g3, 0x0c, %g3 ! advance out vector pointer
101 4: /* LENGTHS != NULL */
113 ld [%o5 + 0x00], %f0 ! ux = from[0]
114 ld [%o5 + 0x04], %f1 ! uy = from[1]
115 ld [%o5 + 0x08], %f2 ! uz = from[2]
116 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
117 add %o4, 1, %o4 ! i++
119 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
120 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
121 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
123 fmuls %f0, M0, %f3 ! FGM Group
124 fmuls %f1, M1, %f4 ! FGM Group
125 fmuls %f0, M4, %f5 ! FGM Group
126 fmuls %f1, M5, %f6 ! FGM Group
127 fmuls %f0, M8, %f7 ! FGM Group f3 available
128 fmuls %f1, M9, %f8 ! FGM Group f4 available
129 fadds %f3, %f4, %f3 ! FGA
130 fmuls %f2, M2, %f10 ! FGM Group f5 available
131 fmuls %f2, M6, %f0 ! FGM Group f6 available
132 fadds %f5, %f6, %f5 ! FGA
133 fmuls %f2, M10, %f4 ! FGM Group f7 available
134 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
135 fadds %f3, %f10, %f3 ! FGA Group f10 available
137 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
138 add %o3, 4, %o3 ! IEU0
139 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
141 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
144 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
146 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
148 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
150 cmp %o4, %g1 ! continue if (i < count)
152 add %g3, 0x0c, %g3 ! advance out vector pointer
157 .globl _mesa_sparc_transform_normalize_normals_no_rot
158 _mesa_sparc_transform_normalize_normals_no_rot:
159 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
161 sethi %hi(ONE_DOT_ZERO), %g2
163 st %g2, [%sp + STACK_VAR_OFF+0x0]
164 st %o1, [%sp + STACK_VAR_OFF+0x4]
165 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
166 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
169 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
170 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
171 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
172 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
173 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
177 /* dest->count = in->count */
178 st %g1, [%o4 + V4F_COUNT]
184 clr %o4 ! 'i' for STRIDE_LOOP
186 1: /* LENGTHS == NULL */
187 ld [%o5 + 0x00], %f0 ! ux = from[0]
188 ld [%o5 + 0x04], %f1 ! uy = from[1]
189 ld [%o5 + 0x08], %f2 ! uz = from[2]
190 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
191 add %o4, 1, %o4 ! i++
193 /* tx (f3) = (ux * m0)
194 * ty (f5) = (uy * m5)
195 * tz (f7) = (uz * m10)
197 fmuls %f0, M0, %f3 ! FGM Group
198 fmuls %f1, M5, %f5 ! FGM Group
199 fmuls %f2, M10, %f7 ! FGM Group
201 /* f3=tx, f5=ty, f7=tz */
203 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
204 fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available
205 fmuls %f5, %f5, %f8 ! FGM Group f5 available
206 fmuls %f7, %f7, %f10 ! FGM Group f7 available
207 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
208 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
210 /* scale (f6) = 1.0 / sqrt(len) */
211 fsqrts %f6, %f6 ! FDIV 20 cycles
212 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
215 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
217 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
219 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
221 cmp %o4, %g1 ! continue if (i < count)
223 add %g3, 0x0c, %g3 ! advance out vector pointer
228 4: /* LENGTHS != NULL */
234 ld [%o5 + 0x00], %f0 ! ux = from[0]
235 ld [%o5 + 0x04], %f1 ! uy = from[1]
236 ld [%o5 + 0x08], %f2 ! uz = from[2]
237 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
238 add %o4, 1, %o4 ! i++
240 /* tx (f3) = (ux * m0)
241 * ty (f5) = (uy * m5)
242 * tz (f7) = (uz * m10)
244 fmuls %f0, M0, %f3 ! FGM Group
246 fmuls %f1, M5, %f5 ! FGM Group
247 add %o3, 4, %o3 ! IEU0
248 fmuls %f2, M10, %f7 ! FGM Group
250 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
253 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
255 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
257 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
259 cmp %o4, %g1 ! continue if (i < count)
261 add %g3, 0x0c, %g3 ! advance out vector pointer
266 .globl _mesa_sparc_transform_rescale_normals_no_rot
267 _mesa_sparc_transform_rescale_normals_no_rot:
268 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
270 st %o1, [%sp + STACK_VAR_OFF+0x0]
271 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
274 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
275 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
276 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
277 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
278 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
282 /* dest->count = in->count */
283 st %g1, [%o4 + V4F_COUNT]
287 clr %o4 ! 'i' for STRIDE_LOOP
293 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
294 ld [%o5 + 0x04], %f1 ! uy = from[1]
295 ld [%o5 + 0x08], %f2 ! uz = from[2]
296 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
297 add %o4, 1, %o4 ! i++
299 /* tx (f3) = (ux * m0)
300 * ty (f5) = (uy * m5)
301 * tz (f7) = (uz * m10)
303 fmuls %f0, M0, %f3 ! FGM Group
304 st %f3, [%g3 + 0x00] ! LSU
305 fmuls %f1, M5, %f5 ! FGM Group
306 st %f5, [%g3 + 0x04] ! LSU
307 fmuls %f2, M10, %f7 ! FGM Group
308 st %f7, [%g3 + 0x08] ! LSU
310 cmp %o4, %g1 ! continue if (i < count)
312 add %g3, 0x0c, %g3 ! advance out vector pointer
317 .globl _mesa_sparc_transform_rescale_normals
318 _mesa_sparc_transform_rescale_normals:
319 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
321 st %o1, [%sp + STACK_VAR_OFF+0x0]
322 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
325 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
326 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
327 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
328 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
329 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
331 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
333 /* dest->count = in->count */
334 st %g1, [%o4 + V4F_COUNT]
338 clr %o4 ! 'i' for STRIDE_LOOP
350 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
351 ld [%o5 + 0x04], %f1 ! uy = from[1]
352 ld [%o5 + 0x08], %f2 ! uz = from[2]
353 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
354 add %o4, 1, %o4 ! i++
356 fmuls %f0, M0, %f3 ! FGM Group
357 fmuls %f1, M1, %f4 ! FGM Group
358 fmuls %f0, M4, %f5 ! FGM Group
359 fmuls %f1, M5, %f6 ! FGM Group
360 fmuls %f0, M8, %f7 ! FGM Group f3 available
361 fmuls %f1, M9, %f8 ! FGM Group f4 available
362 fadds %f3, %f4, %f3 ! FGA
363 fmuls %f2, M2, %f10 ! FGM Group f5 available
364 fmuls %f2, M6, %f0 ! FGM Group f6 available
365 fadds %f5, %f6, %f5 ! FGA
366 fmuls %f2, M10, %f4 ! FGM Group f7 available
367 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
368 fadds %f3, %f10, %f3 ! FGA Group f10 available
369 st %f3, [%g3 + 0x00] ! LSU
370 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
371 st %f5, [%g3 + 0x04] ! LSU
372 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
373 st %f7, [%g3 + 0x08] ! LSU
375 cmp %o4, %g1 ! continue if (i < count)
377 add %g3, 0x0c, %g3 ! advance out vector pointer
382 .globl _mesa_sparc_transform_normals_no_rot
383 _mesa_sparc_transform_normals_no_rot:
384 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
385 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
386 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
387 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
388 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
389 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
393 /* dest->count = in->count */
394 st %g1, [%o4 + V4F_COUNT]
398 clr %o4 ! 'i' for STRIDE_LOOP
400 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
401 ld [%o5 + 0x04], %f1 ! uy = from[1]
402 ld [%o5 + 0x08], %f2 ! uz = from[2]
403 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
404 add %o4, 1, %o4 ! i++
406 /* tx (f3) = (ux * m0)
407 * ty (f5) = (uy * m5)
408 * tz (f7) = (uz * m10)
410 fmuls %f0, M0, %f3 ! FGM Group
411 st %f3, [%g3 + 0x00] ! LSU
412 fmuls %f1, M5, %f5 ! FGM Group
413 st %f5, [%g3 + 0x04] ! LSU
414 fmuls %f2, M10, %f7 ! FGM Group
415 st %f7, [%g3 + 0x08] ! LSU
417 cmp %o4, %g1 ! continue if (i < count)
419 add %g3, 0x0c, %g3 ! advance out vector pointer
424 .globl _mesa_sparc_transform_normals
425 _mesa_sparc_transform_normals:
426 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
427 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
428 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
429 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
430 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
431 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
433 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
435 /* dest->count = in->count */
436 st %g1, [%o4 + V4F_COUNT]
440 clr %o4 ! 'i' for STRIDE_LOOP
442 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
443 ld [%o5 + 0x04], %f1 ! uy = from[1]
444 ld [%o5 + 0x08], %f2 ! uz = from[2]
445 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
446 add %o4, 1, %o4 ! i++
448 fmuls %f0, M0, %f3 ! FGM Group
449 fmuls %f1, M1, %f4 ! FGM Group
450 fmuls %f0, M4, %f5 ! FGM Group
451 fmuls %f1, M5, %f6 ! FGM Group
452 fmuls %f0, M8, %f7 ! FGM Group f3 available
453 fmuls %f1, M9, %f8 ! FGM Group f4 available
454 fadds %f3, %f4, %f3 ! FGA
455 fmuls %f2, M2, %f10 ! FGM Group f5 available
456 fmuls %f2, M6, %f0 ! FGM Group f6 available
457 fadds %f5, %f6, %f5 ! FGA
458 fmuls %f2, M10, %f4 ! FGM Group f7 available
459 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
460 fadds %f3, %f10, %f3 ! FGA Group f10 available
461 st %f3, [%g3 + 0x00] ! LSU
462 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
463 st %f5, [%g3 + 0x04] ! LSU
464 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
465 st %f7, [%g3 + 0x08] ! LSU
467 cmp %o4, %g1 ! continue if (i < count)
469 add %g3, 0x0c, %g3 ! advance out vector pointer
474 .globl _mesa_sparc_normalize_normals
475 _mesa_sparc_normalize_normals:
476 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
478 sethi %hi(ONE_DOT_ZERO), %g2
480 st %g2, [%sp + STACK_VAR_OFF+0x0]
481 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
484 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
485 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
486 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
487 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
489 /* dest->count = in->count */
490 st %g1, [%o4 + V4F_COUNT]
496 clr %o4 ! 'i' for STRIDE_LOOP
498 1: /* LENGTHS == NULL */
499 ld [%o5 + 0x00], %f3 ! ux = from[0]
500 ld [%o5 + 0x04], %f5 ! uy = from[1]
501 ld [%o5 + 0x08], %f7 ! uz = from[2]
502 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
503 add %o4, 1, %o4 ! i++
505 /* f3=tx, f5=ty, f7=tz */
507 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
508 fmuls %f3, %f3, %f6 ! FGM Group f3 available
509 fmuls %f5, %f5, %f8 ! FGM Group f5 available
510 fmuls %f7, %f7, %f10 ! FGM Group f7 available
511 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
512 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
514 /* scale (f6) = 1.0 / sqrt(len) */
515 fsqrts %f6, %f6 ! FDIV 20 cycles
516 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
519 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
521 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
523 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
525 cmp %o4, %g1 ! continue if (i < count)
527 add %g3, 0x0c, %g3 ! advance out vector pointer
532 4: /* LENGTHS != NULL */
535 ld [%o5 + 0x00], %f3 ! ux = from[0]
536 ld [%o5 + 0x04], %f5 ! uy = from[1]
537 ld [%o5 + 0x08], %f7 ! uz = from[2]
538 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
539 add %o4, 1, %o4 ! i++
542 add %o3, 4, %o3 ! IEU0
544 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
547 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
549 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
551 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
553 cmp %o4, %g1 ! continue if (i < count)
555 add %g3, 0x0c, %g3 ! advance out vector pointer
560 .globl _mesa_sparc_rescale_normals
561 _mesa_sparc_rescale_normals:
562 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
564 sethi %hi(ONE_DOT_ZERO), %g2
566 st %o1, [%sp + STACK_VAR_OFF+0x0]
567 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
570 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
571 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
572 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
573 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
575 /* dest->count = in->count */
576 st %g1, [%o4 + V4F_COUNT]
580 clr %o4 ! 'i' for STRIDE_LOOP
583 ld [%o5 + 0x00], %f3 ! ux = from[0]
584 ld [%o5 + 0x04], %f5 ! uy = from[1]
585 ld [%o5 + 0x08], %f7 ! uz = from[2]
586 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
587 add %o4, 1, %o4 ! i++
589 /* f3=tx, f5=ty, f7=tz */
592 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
594 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
596 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
598 cmp %o4, %g1 ! continue if (i < count)
600 add %g3, 0x0c, %g3 ! advance out vector pointer