Remove CVS keywords.
[mesa.git] / src / mesa / sparc / norm.S
1
2 #include "sparc_matrix.h"
3
4 #if defined(SVR4) || defined(__SVR4) || defined(__svr4__)
5 /* Solaris requires this for 64-bit. */
6 .register %g2, #scratch
7 .register %g3, #scratch
8 #endif
9
10 .text
11
12 #ifdef __arch64__
13 #define STACK_VAR_OFF (2047 + (8 * 16))
14 #else
15 #define STACK_VAR_OFF (4 * 16)
16 #endif
17
18 /* Newton-Raphson approximation turns out to be slower
19 * (and less accurate) than direct fsqrts/fdivs.
20 */
21 #define ONE_DOT_ZERO 0x3f800000
22
23 .globl _mesa_sparc_transform_normalize_normals
24 _mesa_sparc_transform_normalize_normals:
25 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
26
27 sethi %hi(ONE_DOT_ZERO), %g2
28 sub %sp, 16, %sp
29 st %g2, [%sp + STACK_VAR_OFF+0x0]
30 st %o1, [%sp + STACK_VAR_OFF+0x4]
31 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
32 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
33 add %sp, 16, %sp
34
35 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
36 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
37 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
38 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
39 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
40
41 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
42
43 /* dest->count = in->count */
44 st %g1, [%o4 + V4F_COUNT]
45
46 cmp %g1, 1
47 bl 7f
48 cmp %o3, 0
49 bne 4f
50 clr %o4 ! 'i' for STRIDE_LOOP
51
52 1: /* LENGTHS == NULL */
53 ld [%o5 + 0x00], %f0 ! ux = from[0]
54 ld [%o5 + 0x04], %f1 ! uy = from[1]
55 ld [%o5 + 0x08], %f2 ! uz = from[2]
56 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
57 add %o4, 1, %o4 ! i++
58
59 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
60 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
61 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
62 */
63 fmuls %f0, M0, %f3 ! FGM Group
64 fmuls %f1, M1, %f4 ! FGM Group
65 fmuls %f0, M4, %f5 ! FGM Group
66 fmuls %f1, M5, %f6 ! FGM Group
67 fmuls %f0, M8, %f7 ! FGM Group f3 available
68 fmuls %f1, M9, %f8 ! FGM Group f4 available
69 fadds %f3, %f4, %f3 ! FGA
70 fmuls %f2, M2, %f10 ! FGM Group f5 available
71 fmuls %f2, M6, %f0 ! FGM Group f6 available
72 fadds %f5, %f6, %f5 ! FGA
73 fmuls %f2, M10, %f4 ! FGM Group f7 available
74 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
75 fadds %f3, %f10, %f3 ! FGA Group f10 available
76 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
77 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
78
79 /* f3=tx, f5=ty, f7=tz */
80
81 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
82 fmuls %f3, %f3, %f6 ! FGM Group f3 available
83 fmuls %f5, %f5, %f8 ! FGM Group f5 available
84 fmuls %f7, %f7, %f10 ! FGM Group f7 available
85 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
86 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
87
88 /* scale (f6) = 1.0 / sqrt(len) */
89 fsqrts %f6, %f6 ! FDIV 20 cycles
90 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
91
92 fmuls %f3, %f6, %f3
93 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
94 fmuls %f5, %f6, %f5
95 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
96 fmuls %f7, %f6, %f7
97 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
98
99 cmp %o4, %g1 ! continue if (i < count)
100 bl 1b
101 add %g3, 0x0c, %g3 ! advance out vector pointer
102
103 ba 7f
104 nop
105
106 4: /* LENGTHS != NULL */
107 fmuls M0, %f15, M0
108 fmuls M1, %f15, M1
109 fmuls M2, %f15, M2
110 fmuls M4, %f15, M4
111 fmuls M5, %f15, M5
112 fmuls M6, %f15, M6
113 fmuls M8, %f15, M8
114 fmuls M9, %f15, M9
115 fmuls M10, %f15, M10
116
117 5:
118 ld [%o5 + 0x00], %f0 ! ux = from[0]
119 ld [%o5 + 0x04], %f1 ! uy = from[1]
120 ld [%o5 + 0x08], %f2 ! uz = from[2]
121 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
122 add %o4, 1, %o4 ! i++
123
124 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
125 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
126 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
127 */
128 fmuls %f0, M0, %f3 ! FGM Group
129 fmuls %f1, M1, %f4 ! FGM Group
130 fmuls %f0, M4, %f5 ! FGM Group
131 fmuls %f1, M5, %f6 ! FGM Group
132 fmuls %f0, M8, %f7 ! FGM Group f3 available
133 fmuls %f1, M9, %f8 ! FGM Group f4 available
134 fadds %f3, %f4, %f3 ! FGA
135 fmuls %f2, M2, %f10 ! FGM Group f5 available
136 fmuls %f2, M6, %f0 ! FGM Group f6 available
137 fadds %f5, %f6, %f5 ! FGA
138 fmuls %f2, M10, %f4 ! FGM Group f7 available
139 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
140 fadds %f3, %f10, %f3 ! FGA Group f10 available
141 ld [%o3], %f13 ! LSU
142 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
143 add %o3, 4, %o3 ! IEU0
144 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
145
146 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
147
148 fmuls %f3, %f13, %f3
149 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
150 fmuls %f5, %f13, %f5
151 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
152 fmuls %f7, %f13, %f7
153 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
154
155 cmp %o4, %g1 ! continue if (i < count)
156 bl 5b
157 add %g3, 0x0c, %g3 ! advance out vector pointer
158
159 7: retl
160 nop
161
162 .globl _mesa_sparc_transform_normalize_normals_no_rot
163 _mesa_sparc_transform_normalize_normals_no_rot:
164 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
165
166 sethi %hi(ONE_DOT_ZERO), %g2
167 sub %sp, 16, %sp
168 st %g2, [%sp + STACK_VAR_OFF+0x0]
169 st %o1, [%sp + STACK_VAR_OFF+0x4]
170 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
171 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
172 add %sp, 16, %sp
173
174 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
175 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
176 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
177 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
178 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
179
180 LDMATRIX_0_5_10(%o0)
181
182 /* dest->count = in->count */
183 st %g1, [%o4 + V4F_COUNT]
184
185 cmp %g1, 1
186 bl 7f
187 cmp %o3, 0
188 bne 4f
189 clr %o4 ! 'i' for STRIDE_LOOP
190
191 1: /* LENGTHS == NULL */
192 ld [%o5 + 0x00], %f0 ! ux = from[0]
193 ld [%o5 + 0x04], %f1 ! uy = from[1]
194 ld [%o5 + 0x08], %f2 ! uz = from[2]
195 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
196 add %o4, 1, %o4 ! i++
197
198 /* tx (f3) = (ux * m0)
199 * ty (f5) = (uy * m5)
200 * tz (f7) = (uz * m10)
201 */
202 fmuls %f0, M0, %f3 ! FGM Group
203 fmuls %f1, M5, %f5 ! FGM Group
204 fmuls %f2, M10, %f7 ! FGM Group
205
206 /* f3=tx, f5=ty, f7=tz */
207
208 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
209 fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available
210 fmuls %f5, %f5, %f8 ! FGM Group f5 available
211 fmuls %f7, %f7, %f10 ! FGM Group f7 available
212 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
213 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
214
215 /* scale (f6) = 1.0 / sqrt(len) */
216 fsqrts %f6, %f6 ! FDIV 20 cycles
217 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
218
219 fmuls %f3, %f6, %f3
220 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
221 fmuls %f5, %f6, %f5
222 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
223 fmuls %f7, %f6, %f7
224 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
225
226 cmp %o4, %g1 ! continue if (i < count)
227 bl 1b
228 add %g3, 0x0c, %g3 ! advance out vector pointer
229
230 ba 7f
231 nop
232
233 4: /* LENGTHS != NULL */
234 fmuls M0, %f15, M0
235 fmuls M5, %f15, M5
236 fmuls M10, %f15, M10
237
238 5:
239 ld [%o5 + 0x00], %f0 ! ux = from[0]
240 ld [%o5 + 0x04], %f1 ! uy = from[1]
241 ld [%o5 + 0x08], %f2 ! uz = from[2]
242 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
243 add %o4, 1, %o4 ! i++
244
245 /* tx (f3) = (ux * m0)
246 * ty (f5) = (uy * m5)
247 * tz (f7) = (uz * m10)
248 */
249 fmuls %f0, M0, %f3 ! FGM Group
250 ld [%o3], %f13 ! LSU
251 fmuls %f1, M5, %f5 ! FGM Group
252 add %o3, 4, %o3 ! IEU0
253 fmuls %f2, M10, %f7 ! FGM Group
254
255 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
256
257 fmuls %f3, %f13, %f3
258 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
259 fmuls %f5, %f13, %f5
260 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
261 fmuls %f7, %f13, %f7
262 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
263
264 cmp %o4, %g1 ! continue if (i < count)
265 bl 5b
266 add %g3, 0x0c, %g3 ! advance out vector pointer
267
268 7: retl
269 nop
270
271 .globl _mesa_sparc_transform_rescale_normals_no_rot
272 _mesa_sparc_transform_rescale_normals_no_rot:
273 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
274 sub %sp, 16, %sp
275 st %o1, [%sp + STACK_VAR_OFF+0x0]
276 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
277 add %sp, 16, %sp
278
279 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
280 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
281 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
282 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
283 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
284
285 LDMATRIX_0_5_10(%o0)
286
287 /* dest->count = in->count */
288 st %g1, [%o4 + V4F_COUNT]
289
290 cmp %g1, 1
291 bl 7f
292 clr %o4 ! 'i' for STRIDE_LOOP
293
294 fmuls M0, %f15, M0
295 fmuls M5, %f15, M5
296 fmuls M10, %f15, M10
297
298 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
299 ld [%o5 + 0x04], %f1 ! uy = from[1]
300 ld [%o5 + 0x08], %f2 ! uz = from[2]
301 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
302 add %o4, 1, %o4 ! i++
303
304 /* tx (f3) = (ux * m0)
305 * ty (f5) = (uy * m5)
306 * tz (f7) = (uz * m10)
307 */
308 fmuls %f0, M0, %f3 ! FGM Group
309 st %f3, [%g3 + 0x00] ! LSU
310 fmuls %f1, M5, %f5 ! FGM Group
311 st %f5, [%g3 + 0x04] ! LSU
312 fmuls %f2, M10, %f7 ! FGM Group
313 st %f7, [%g3 + 0x08] ! LSU
314
315 cmp %o4, %g1 ! continue if (i < count)
316 bl 1b
317 add %g3, 0x0c, %g3 ! advance out vector pointer
318
319 7: retl
320 nop
321
322 .globl _mesa_sparc_transform_rescale_normals
323 _mesa_sparc_transform_rescale_normals:
324 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
325 sub %sp, 16, %sp
326 st %o1, [%sp + STACK_VAR_OFF+0x0]
327 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
328 add %sp, 16, %sp
329
330 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
331 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
332 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
333 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
334 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
335
336 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
337
338 /* dest->count = in->count */
339 st %g1, [%o4 + V4F_COUNT]
340
341 cmp %g1, 1
342 bl 7f
343 clr %o4 ! 'i' for STRIDE_LOOP
344
345 fmuls M0, %f15, M0
346 fmuls M1, %f15, M1
347 fmuls M2, %f15, M2
348 fmuls M4, %f15, M4
349 fmuls M5, %f15, M5
350 fmuls M6, %f15, M6
351 fmuls M8, %f15, M8
352 fmuls M9, %f15, M9
353 fmuls M10, %f15, M10
354
355 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
356 ld [%o5 + 0x04], %f1 ! uy = from[1]
357 ld [%o5 + 0x08], %f2 ! uz = from[2]
358 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
359 add %o4, 1, %o4 ! i++
360
361 fmuls %f0, M0, %f3 ! FGM Group
362 fmuls %f1, M1, %f4 ! FGM Group
363 fmuls %f0, M4, %f5 ! FGM Group
364 fmuls %f1, M5, %f6 ! FGM Group
365 fmuls %f0, M8, %f7 ! FGM Group f3 available
366 fmuls %f1, M9, %f8 ! FGM Group f4 available
367 fadds %f3, %f4, %f3 ! FGA
368 fmuls %f2, M2, %f10 ! FGM Group f5 available
369 fmuls %f2, M6, %f0 ! FGM Group f6 available
370 fadds %f5, %f6, %f5 ! FGA
371 fmuls %f2, M10, %f4 ! FGM Group f7 available
372 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
373 fadds %f3, %f10, %f3 ! FGA Group f10 available
374 st %f3, [%g3 + 0x00] ! LSU
375 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
376 st %f5, [%g3 + 0x04] ! LSU
377 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
378 st %f7, [%g3 + 0x08] ! LSU
379
380 cmp %o4, %g1 ! continue if (i < count)
381 bl 1b
382 add %g3, 0x0c, %g3 ! advance out vector pointer
383
384 7: retl
385 nop
386
387 .globl _mesa_sparc_transform_normals_no_rot
388 _mesa_sparc_transform_normals_no_rot:
389 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
390 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
391 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
392 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
393 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
394 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
395
396 LDMATRIX_0_5_10(%o0)
397
398 /* dest->count = in->count */
399 st %g1, [%o4 + V4F_COUNT]
400
401 cmp %g1, 1
402 bl 7f
403 clr %o4 ! 'i' for STRIDE_LOOP
404
405 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
406 ld [%o5 + 0x04], %f1 ! uy = from[1]
407 ld [%o5 + 0x08], %f2 ! uz = from[2]
408 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
409 add %o4, 1, %o4 ! i++
410
411 /* tx (f3) = (ux * m0)
412 * ty (f5) = (uy * m5)
413 * tz (f7) = (uz * m10)
414 */
415 fmuls %f0, M0, %f3 ! FGM Group
416 st %f3, [%g3 + 0x00] ! LSU
417 fmuls %f1, M5, %f5 ! FGM Group
418 st %f5, [%g3 + 0x04] ! LSU
419 fmuls %f2, M10, %f7 ! FGM Group
420 st %f7, [%g3 + 0x08] ! LSU
421
422 cmp %o4, %g1 ! continue if (i < count)
423 bl 1b
424 add %g3, 0x0c, %g3 ! advance out vector pointer
425
426 7: retl
427 nop
428
429 .globl _mesa_sparc_transform_normals
430 _mesa_sparc_transform_normals:
431 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
432 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
433 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
434 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
435 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
436 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
437
438 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
439
440 /* dest->count = in->count */
441 st %g1, [%o4 + V4F_COUNT]
442
443 cmp %g1, 1
444 bl 7f
445 clr %o4 ! 'i' for STRIDE_LOOP
446
447 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
448 ld [%o5 + 0x04], %f1 ! uy = from[1]
449 ld [%o5 + 0x08], %f2 ! uz = from[2]
450 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
451 add %o4, 1, %o4 ! i++
452
453 fmuls %f0, M0, %f3 ! FGM Group
454 fmuls %f1, M1, %f4 ! FGM Group
455 fmuls %f0, M4, %f5 ! FGM Group
456 fmuls %f1, M5, %f6 ! FGM Group
457 fmuls %f0, M8, %f7 ! FGM Group f3 available
458 fmuls %f1, M9, %f8 ! FGM Group f4 available
459 fadds %f3, %f4, %f3 ! FGA
460 fmuls %f2, M2, %f10 ! FGM Group f5 available
461 fmuls %f2, M6, %f0 ! FGM Group f6 available
462 fadds %f5, %f6, %f5 ! FGA
463 fmuls %f2, M10, %f4 ! FGM Group f7 available
464 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
465 fadds %f3, %f10, %f3 ! FGA Group f10 available
466 st %f3, [%g3 + 0x00] ! LSU
467 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
468 st %f5, [%g3 + 0x04] ! LSU
469 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
470 st %f7, [%g3 + 0x08] ! LSU
471
472 cmp %o4, %g1 ! continue if (i < count)
473 bl 1b
474 add %g3, 0x0c, %g3 ! advance out vector pointer
475
476 7: retl
477 nop
478
479 .globl _mesa_sparc_normalize_normals
480 _mesa_sparc_normalize_normals:
481 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
482
483 sethi %hi(ONE_DOT_ZERO), %g2
484 sub %sp, 16, %sp
485 st %g2, [%sp + STACK_VAR_OFF+0x0]
486 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
487 add %sp, 16, %sp
488
489 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
490 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
491 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
492 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
493
494 /* dest->count = in->count */
495 st %g1, [%o4 + V4F_COUNT]
496
497 cmp %g1, 1
498 bl 7f
499 cmp %o3, 0
500 bne 4f
501 clr %o4 ! 'i' for STRIDE_LOOP
502
503 1: /* LENGTHS == NULL */
504 ld [%o5 + 0x00], %f3 ! ux = from[0]
505 ld [%o5 + 0x04], %f5 ! uy = from[1]
506 ld [%o5 + 0x08], %f7 ! uz = from[2]
507 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
508 add %o4, 1, %o4 ! i++
509
510 /* f3=tx, f5=ty, f7=tz */
511
512 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
513 fmuls %f3, %f3, %f6 ! FGM Group f3 available
514 fmuls %f5, %f5, %f8 ! FGM Group f5 available
515 fmuls %f7, %f7, %f10 ! FGM Group f7 available
516 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
517 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
518
519 /* scale (f6) = 1.0 / sqrt(len) */
520 fsqrts %f6, %f6 ! FDIV 20 cycles
521 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
522
523 fmuls %f3, %f6, %f3
524 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
525 fmuls %f5, %f6, %f5
526 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
527 fmuls %f7, %f6, %f7
528 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
529
530 cmp %o4, %g1 ! continue if (i < count)
531 bl 1b
532 add %g3, 0x0c, %g3 ! advance out vector pointer
533
534 ba 7f
535 nop
536
537 4: /* LENGTHS != NULL */
538
539 5:
540 ld [%o5 + 0x00], %f3 ! ux = from[0]
541 ld [%o5 + 0x04], %f5 ! uy = from[1]
542 ld [%o5 + 0x08], %f7 ! uz = from[2]
543 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
544 add %o4, 1, %o4 ! i++
545
546 ld [%o3], %f13 ! LSU
547 add %o3, 4, %o3 ! IEU0
548
549 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
550
551 fmuls %f3, %f13, %f3
552 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
553 fmuls %f5, %f13, %f5
554 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
555 fmuls %f7, %f13, %f7
556 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
557
558 cmp %o4, %g1 ! continue if (i < count)
559 bl 5b
560 add %g3, 0x0c, %g3 ! advance out vector pointer
561
562 7: retl
563 nop
564
565 .globl _mesa_sparc_rescale_normals
566 _mesa_sparc_rescale_normals:
567 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
568
569 sethi %hi(ONE_DOT_ZERO), %g2
570 sub %sp, 16, %sp
571 st %o1, [%sp + STACK_VAR_OFF+0x0]
572 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
573 add %sp, 16, %sp
574
575 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
576 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
577 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
578 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
579
580 /* dest->count = in->count */
581 st %g1, [%o4 + V4F_COUNT]
582
583 cmp %g1, 1
584 bl 7f
585 clr %o4 ! 'i' for STRIDE_LOOP
586
587 1:
588 ld [%o5 + 0x00], %f3 ! ux = from[0]
589 ld [%o5 + 0x04], %f5 ! uy = from[1]
590 ld [%o5 + 0x08], %f7 ! uz = from[2]
591 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
592 add %o4, 1, %o4 ! i++
593
594 /* f3=tx, f5=ty, f7=tz */
595
596 fmuls %f3, %f15, %f3
597 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
598 fmuls %f5, %f15, %f5
599 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
600 fmuls %f7, %f15, %f7
601 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
602
603 cmp %o4, %g1 ! continue if (i < count)
604 bl 1b
605 add %g3, 0x0c, %g3 ! advance out vector pointer
606
607 7: retl
608 nop