Merge remote branch 'origin/mesa_7_6_branch'
[mesa.git] / src / mesa / sparc / norm.S
1
2 #include "sparc_matrix.h"
3
4 .register %g2, #scratch
5 .register %g3, #scratch
6
7 .text
8
9 #ifdef __arch64__
10 #define STACK_VAR_OFF (2047 + (8 * 16))
11 #else
12 #define STACK_VAR_OFF (4 * 16)
13 #endif
14
15 /* Newton-Raphson approximation turns out to be slower
16 * (and less accurate) than direct fsqrts/fdivs.
17 */
18 #define ONE_DOT_ZERO 0x3f800000
19
20 .globl _mesa_sparc_transform_normalize_normals
21 _mesa_sparc_transform_normalize_normals:
22 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
23
24 sethi %hi(ONE_DOT_ZERO), %g2
25 sub %sp, 16, %sp
26 st %g2, [%sp + STACK_VAR_OFF+0x0]
27 st %o1, [%sp + STACK_VAR_OFF+0x4]
28 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
29 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
30 add %sp, 16, %sp
31
32 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
33 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
34 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
35 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
36 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
37
38 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
39
40 /* dest->count = in->count */
41 st %g1, [%o4 + V4F_COUNT]
42
43 cmp %g1, 1
44 bl 7f
45 cmp %o3, 0
46 bne 4f
47 clr %o4 ! 'i' for STRIDE_LOOP
48
49 1: /* LENGTHS == NULL */
50 ld [%o5 + 0x00], %f0 ! ux = from[0]
51 ld [%o5 + 0x04], %f1 ! uy = from[1]
52 ld [%o5 + 0x08], %f2 ! uz = from[2]
53 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
54 add %o4, 1, %o4 ! i++
55
56 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
57 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
58 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
59 */
60 fmuls %f0, M0, %f3 ! FGM Group
61 fmuls %f1, M1, %f4 ! FGM Group
62 fmuls %f0, M4, %f5 ! FGM Group
63 fmuls %f1, M5, %f6 ! FGM Group
64 fmuls %f0, M8, %f7 ! FGM Group f3 available
65 fmuls %f1, M9, %f8 ! FGM Group f4 available
66 fadds %f3, %f4, %f3 ! FGA
67 fmuls %f2, M2, %f10 ! FGM Group f5 available
68 fmuls %f2, M6, %f0 ! FGM Group f6 available
69 fadds %f5, %f6, %f5 ! FGA
70 fmuls %f2, M10, %f4 ! FGM Group f7 available
71 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
72 fadds %f3, %f10, %f3 ! FGA Group f10 available
73 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
74 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
75
76 /* f3=tx, f5=ty, f7=tz */
77
78 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
79 fmuls %f3, %f3, %f6 ! FGM Group f3 available
80 fmuls %f5, %f5, %f8 ! FGM Group f5 available
81 fmuls %f7, %f7, %f10 ! FGM Group f7 available
82 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
83 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
84
85 /* scale (f6) = 1.0 / sqrt(len) */
86 fsqrts %f6, %f6 ! FDIV 20 cycles
87 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
88
89 fmuls %f3, %f6, %f3
90 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
91 fmuls %f5, %f6, %f5
92 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
93 fmuls %f7, %f6, %f7
94 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
95
96 cmp %o4, %g1 ! continue if (i < count)
97 bl 1b
98 add %g3, 0x10, %g3 ! advance out vector pointer
99
100 ba 7f
101 nop
102
103 4: /* LENGTHS != NULL */
104 fmuls M0, %f15, M0
105 fmuls M1, %f15, M1
106 fmuls M2, %f15, M2
107 fmuls M4, %f15, M4
108 fmuls M5, %f15, M5
109 fmuls M6, %f15, M6
110 fmuls M8, %f15, M8
111 fmuls M9, %f15, M9
112 fmuls M10, %f15, M10
113
114 5:
115 ld [%o5 + 0x00], %f0 ! ux = from[0]
116 ld [%o5 + 0x04], %f1 ! uy = from[1]
117 ld [%o5 + 0x08], %f2 ! uz = from[2]
118 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
119 add %o4, 1, %o4 ! i++
120
121 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
122 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
123 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
124 */
125 fmuls %f0, M0, %f3 ! FGM Group
126 fmuls %f1, M1, %f4 ! FGM Group
127 fmuls %f0, M4, %f5 ! FGM Group
128 fmuls %f1, M5, %f6 ! FGM Group
129 fmuls %f0, M8, %f7 ! FGM Group f3 available
130 fmuls %f1, M9, %f8 ! FGM Group f4 available
131 fadds %f3, %f4, %f3 ! FGA
132 fmuls %f2, M2, %f10 ! FGM Group f5 available
133 fmuls %f2, M6, %f0 ! FGM Group f6 available
134 fadds %f5, %f6, %f5 ! FGA
135 fmuls %f2, M10, %f4 ! FGM Group f7 available
136 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
137 fadds %f3, %f10, %f3 ! FGA Group f10 available
138 ld [%o3], %f13 ! LSU
139 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
140 add %o3, 4, %o3 ! IEU0
141 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
142
143 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
144
145 fmuls %f3, %f13, %f3
146 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
147 fmuls %f5, %f13, %f5
148 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
149 fmuls %f7, %f13, %f7
150 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
151
152 cmp %o4, %g1 ! continue if (i < count)
153 bl 5b
154 add %g3, 0x10, %g3 ! advance out vector pointer
155
156 7: retl
157 nop
158
159 .globl _mesa_sparc_transform_normalize_normals_no_rot
160 _mesa_sparc_transform_normalize_normals_no_rot:
161 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
162
163 sethi %hi(ONE_DOT_ZERO), %g2
164 sub %sp, 16, %sp
165 st %g2, [%sp + STACK_VAR_OFF+0x0]
166 st %o1, [%sp + STACK_VAR_OFF+0x4]
167 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
168 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
169 add %sp, 16, %sp
170
171 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
172 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
173 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
174 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
175 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
176
177 LDMATRIX_0_5_10(%o0)
178
179 /* dest->count = in->count */
180 st %g1, [%o4 + V4F_COUNT]
181
182 cmp %g1, 1
183 bl 7f
184 cmp %o3, 0
185 bne 4f
186 clr %o4 ! 'i' for STRIDE_LOOP
187
188 1: /* LENGTHS == NULL */
189 ld [%o5 + 0x00], %f0 ! ux = from[0]
190 ld [%o5 + 0x04], %f1 ! uy = from[1]
191 ld [%o5 + 0x08], %f2 ! uz = from[2]
192 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
193 add %o4, 1, %o4 ! i++
194
195 /* tx (f3) = (ux * m0)
196 * ty (f5) = (uy * m5)
197 * tz (f7) = (uz * m10)
198 */
199 fmuls %f0, M0, %f3 ! FGM Group
200 fmuls %f1, M5, %f5 ! FGM Group
201 fmuls %f2, M10, %f7 ! FGM Group
202
203 /* f3=tx, f5=ty, f7=tz */
204
205 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
206 fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available
207 fmuls %f5, %f5, %f8 ! FGM Group f5 available
208 fmuls %f7, %f7, %f10 ! FGM Group f7 available
209 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
210 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
211
212 /* scale (f6) = 1.0 / sqrt(len) */
213 fsqrts %f6, %f6 ! FDIV 20 cycles
214 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
215
216 fmuls %f3, %f6, %f3
217 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
218 fmuls %f5, %f6, %f5
219 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
220 fmuls %f7, %f6, %f7
221 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
222
223 cmp %o4, %g1 ! continue if (i < count)
224 bl 1b
225 add %g3, 0x10, %g3 ! advance out vector pointer
226
227 ba 7f
228 nop
229
230 4: /* LENGTHS != NULL */
231 fmuls M0, %f15, M0
232 fmuls M5, %f15, M5
233 fmuls M10, %f15, M10
234
235 5:
236 ld [%o5 + 0x00], %f0 ! ux = from[0]
237 ld [%o5 + 0x04], %f1 ! uy = from[1]
238 ld [%o5 + 0x08], %f2 ! uz = from[2]
239 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
240 add %o4, 1, %o4 ! i++
241
242 /* tx (f3) = (ux * m0)
243 * ty (f5) = (uy * m5)
244 * tz (f7) = (uz * m10)
245 */
246 fmuls %f0, M0, %f3 ! FGM Group
247 ld [%o3], %f13 ! LSU
248 fmuls %f1, M5, %f5 ! FGM Group
249 add %o3, 4, %o3 ! IEU0
250 fmuls %f2, M10, %f7 ! FGM Group
251
252 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
253
254 fmuls %f3, %f13, %f3
255 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
256 fmuls %f5, %f13, %f5
257 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
258 fmuls %f7, %f13, %f7
259 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
260
261 cmp %o4, %g1 ! continue if (i < count)
262 bl 5b
263 add %g3, 0x10, %g3 ! advance out vector pointer
264
265 7: retl
266 nop
267
268 .globl _mesa_sparc_transform_rescale_normals_no_rot
269 _mesa_sparc_transform_rescale_normals_no_rot:
270 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
271 sub %sp, 16, %sp
272 st %o1, [%sp + STACK_VAR_OFF+0x0]
273 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
274 add %sp, 16, %sp
275
276 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
277 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
278 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
279 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
280 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
281
282 LDMATRIX_0_5_10(%o0)
283
284 /* dest->count = in->count */
285 st %g1, [%o4 + V4F_COUNT]
286
287 cmp %g1, 1
288 bl 7f
289 clr %o4 ! 'i' for STRIDE_LOOP
290
291 fmuls M0, %f15, M0
292 fmuls M5, %f15, M5
293 fmuls M10, %f15, M10
294
295 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
296 ld [%o5 + 0x04], %f1 ! uy = from[1]
297 ld [%o5 + 0x08], %f2 ! uz = from[2]
298 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
299 add %o4, 1, %o4 ! i++
300
301 /* tx (f3) = (ux * m0)
302 * ty (f5) = (uy * m5)
303 * tz (f7) = (uz * m10)
304 */
305 fmuls %f0, M0, %f3 ! FGM Group
306 st %f3, [%g3 + 0x00] ! LSU
307 fmuls %f1, M5, %f5 ! FGM Group
308 st %f5, [%g3 + 0x04] ! LSU
309 fmuls %f2, M10, %f7 ! FGM Group
310 st %f7, [%g3 + 0x08] ! LSU
311
312 cmp %o4, %g1 ! continue if (i < count)
313 bl 1b
314 add %g3, 0x10, %g3 ! advance out vector pointer
315
316 7: retl
317 nop
318
319 .globl _mesa_sparc_transform_rescale_normals
320 _mesa_sparc_transform_rescale_normals:
321 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
322 sub %sp, 16, %sp
323 st %o1, [%sp + STACK_VAR_OFF+0x0]
324 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
325 add %sp, 16, %sp
326
327 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
328 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
329 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
330 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
331 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
332
333 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
334
335 /* dest->count = in->count */
336 st %g1, [%o4 + V4F_COUNT]
337
338 cmp %g1, 1
339 bl 7f
340 clr %o4 ! 'i' for STRIDE_LOOP
341
342 fmuls M0, %f15, M0
343 fmuls M1, %f15, M1
344 fmuls M2, %f15, M2
345 fmuls M4, %f15, M4
346 fmuls M5, %f15, M5
347 fmuls M6, %f15, M6
348 fmuls M8, %f15, M8
349 fmuls M9, %f15, M9
350 fmuls M10, %f15, M10
351
352 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
353 ld [%o5 + 0x04], %f1 ! uy = from[1]
354 ld [%o5 + 0x08], %f2 ! uz = from[2]
355 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
356 add %o4, 1, %o4 ! i++
357
358 fmuls %f0, M0, %f3 ! FGM Group
359 fmuls %f1, M1, %f4 ! FGM Group
360 fmuls %f0, M4, %f5 ! FGM Group
361 fmuls %f1, M5, %f6 ! FGM Group
362 fmuls %f0, M8, %f7 ! FGM Group f3 available
363 fmuls %f1, M9, %f8 ! FGM Group f4 available
364 fadds %f3, %f4, %f3 ! FGA
365 fmuls %f2, M2, %f10 ! FGM Group f5 available
366 fmuls %f2, M6, %f0 ! FGM Group f6 available
367 fadds %f5, %f6, %f5 ! FGA
368 fmuls %f2, M10, %f4 ! FGM Group f7 available
369 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
370 fadds %f3, %f10, %f3 ! FGA Group f10 available
371 st %f3, [%g3 + 0x00] ! LSU
372 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
373 st %f5, [%g3 + 0x04] ! LSU
374 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
375 st %f7, [%g3 + 0x08] ! LSU
376
377 cmp %o4, %g1 ! continue if (i < count)
378 bl 1b
379 add %g3, 0x10, %g3 ! advance out vector pointer
380
381 7: retl
382 nop
383
384 .globl _mesa_sparc_transform_normals_no_rot
385 _mesa_sparc_transform_normals_no_rot:
386 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
387 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
388 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
389 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
390 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
391 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
392
393 LDMATRIX_0_5_10(%o0)
394
395 /* dest->count = in->count */
396 st %g1, [%o4 + V4F_COUNT]
397
398 cmp %g1, 1
399 bl 7f
400 clr %o4 ! 'i' for STRIDE_LOOP
401
402 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
403 ld [%o5 + 0x04], %f1 ! uy = from[1]
404 ld [%o5 + 0x08], %f2 ! uz = from[2]
405 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
406 add %o4, 1, %o4 ! i++
407
408 /* tx (f3) = (ux * m0)
409 * ty (f5) = (uy * m5)
410 * tz (f7) = (uz * m10)
411 */
412 fmuls %f0, M0, %f3 ! FGM Group
413 st %f3, [%g3 + 0x00] ! LSU
414 fmuls %f1, M5, %f5 ! FGM Group
415 st %f5, [%g3 + 0x04] ! LSU
416 fmuls %f2, M10, %f7 ! FGM Group
417 st %f7, [%g3 + 0x08] ! LSU
418
419 cmp %o4, %g1 ! continue if (i < count)
420 bl 1b
421 add %g3, 0x10, %g3 ! advance out vector pointer
422
423 7: retl
424 nop
425
426 .globl _mesa_sparc_transform_normals
427 _mesa_sparc_transform_normals:
428 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
429 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
430 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
431 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
432 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
433 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
434
435 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
436
437 /* dest->count = in->count */
438 st %g1, [%o4 + V4F_COUNT]
439
440 cmp %g1, 1
441 bl 7f
442 clr %o4 ! 'i' for STRIDE_LOOP
443
444 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
445 ld [%o5 + 0x04], %f1 ! uy = from[1]
446 ld [%o5 + 0x08], %f2 ! uz = from[2]
447 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
448 add %o4, 1, %o4 ! i++
449
450 fmuls %f0, M0, %f3 ! FGM Group
451 fmuls %f1, M1, %f4 ! FGM Group
452 fmuls %f0, M4, %f5 ! FGM Group
453 fmuls %f1, M5, %f6 ! FGM Group
454 fmuls %f0, M8, %f7 ! FGM Group f3 available
455 fmuls %f1, M9, %f8 ! FGM Group f4 available
456 fadds %f3, %f4, %f3 ! FGA
457 fmuls %f2, M2, %f10 ! FGM Group f5 available
458 fmuls %f2, M6, %f0 ! FGM Group f6 available
459 fadds %f5, %f6, %f5 ! FGA
460 fmuls %f2, M10, %f4 ! FGM Group f7 available
461 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
462 fadds %f3, %f10, %f3 ! FGA Group f10 available
463 st %f3, [%g3 + 0x00] ! LSU
464 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
465 st %f5, [%g3 + 0x04] ! LSU
466 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
467 st %f7, [%g3 + 0x08] ! LSU
468
469 cmp %o4, %g1 ! continue if (i < count)
470 bl 1b
471 add %g3, 0x10, %g3 ! advance out vector pointer
472
473 7: retl
474 nop
475
476 .globl _mesa_sparc_normalize_normals
477 _mesa_sparc_normalize_normals:
478 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
479
480 sethi %hi(ONE_DOT_ZERO), %g2
481 sub %sp, 16, %sp
482 st %g2, [%sp + STACK_VAR_OFF+0x0]
483 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
484 add %sp, 16, %sp
485
486 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
487 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
488 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
489 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
490
491 /* dest->count = in->count */
492 st %g1, [%o4 + V4F_COUNT]
493
494 cmp %g1, 1
495 bl 7f
496 cmp %o3, 0
497 bne 4f
498 clr %o4 ! 'i' for STRIDE_LOOP
499
500 1: /* LENGTHS == NULL */
501 ld [%o5 + 0x00], %f3 ! ux = from[0]
502 ld [%o5 + 0x04], %f5 ! uy = from[1]
503 ld [%o5 + 0x08], %f7 ! uz = from[2]
504 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
505 add %o4, 1, %o4 ! i++
506
507 /* f3=tx, f5=ty, f7=tz */
508
509 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
510 fmuls %f3, %f3, %f6 ! FGM Group f3 available
511 fmuls %f5, %f5, %f8 ! FGM Group f5 available
512 fmuls %f7, %f7, %f10 ! FGM Group f7 available
513 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
514 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
515
516 /* scale (f6) = 1.0 / sqrt(len) */
517 fsqrts %f6, %f6 ! FDIV 20 cycles
518 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
519
520 fmuls %f3, %f6, %f3
521 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
522 fmuls %f5, %f6, %f5
523 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
524 fmuls %f7, %f6, %f7
525 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
526
527 cmp %o4, %g1 ! continue if (i < count)
528 bl 1b
529 add %g3, 0x10, %g3 ! advance out vector pointer
530
531 ba 7f
532 nop
533
534 4: /* LENGTHS != NULL */
535
536 5:
537 ld [%o5 + 0x00], %f3 ! ux = from[0]
538 ld [%o5 + 0x04], %f5 ! uy = from[1]
539 ld [%o5 + 0x08], %f7 ! uz = from[2]
540 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
541 add %o4, 1, %o4 ! i++
542
543 ld [%o3], %f13 ! LSU
544 add %o3, 4, %o3 ! IEU0
545
546 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
547
548 fmuls %f3, %f13, %f3
549 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
550 fmuls %f5, %f13, %f5
551 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
552 fmuls %f7, %f13, %f7
553 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
554
555 cmp %o4, %g1 ! continue if (i < count)
556 bl 5b
557 add %g3, 0x10, %g3 ! advance out vector pointer
558
559 7: retl
560 nop
561
562 .globl _mesa_sparc_rescale_normals
563 _mesa_sparc_rescale_normals:
564 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
565
566 sethi %hi(ONE_DOT_ZERO), %g2
567 sub %sp, 16, %sp
568 st %o1, [%sp + STACK_VAR_OFF+0x0]
569 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
570 add %sp, 16, %sp
571
572 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
573 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
574 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
575 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
576
577 /* dest->count = in->count */
578 st %g1, [%o4 + V4F_COUNT]
579
580 cmp %g1, 1
581 bl 7f
582 clr %o4 ! 'i' for STRIDE_LOOP
583
584 1:
585 ld [%o5 + 0x00], %f3 ! ux = from[0]
586 ld [%o5 + 0x04], %f5 ! uy = from[1]
587 ld [%o5 + 0x08], %f7 ! uz = from[2]
588 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
589 add %o4, 1, %o4 ! i++
590
591 /* f3=tx, f5=ty, f7=tz */
592
593 fmuls %f3, %f15, %f3
594 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
595 fmuls %f5, %f15, %f5
596 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
597 fmuls %f7, %f15, %f7
598 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
599
600 cmp %o4, %g1 ! continue if (i < count)
601 bl 1b
602 add %g3, 0x10, %g3 ! advance out vector pointer
603
604 7: retl
605 nop