SPARC normal tnl optimizations.
[mesa.git] / src / mesa / sparc / norm.S
1 /* $Id: norm.S,v 1.1 2001/06/06 11:46:04 davem69 Exp $ */
2
3 #include "sparc_matrix.h"
4
5 .text
6
7 #ifdef __sparc_v9__
8 #define STACK_VAR_OFF (2047 + (8 * 16))
9 #else
10 #define STACK_VAR_OFF (4 * 16)
11 #endif
12
13 /* Newton-Raphson approximation turns out to be slower
14 * (and less accurate) than direct fsqrts/fdivs.
15 */
16 #define ONE_DOT_ZERO 0x3f800000
17
18 .globl _mesa_sparc_transform_normalize_normals
19 _mesa_sparc_transform_normalize_normals:
20 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
21
22 sethi %hi(ONE_DOT_ZERO), %g2
23 sub %sp, 16, %sp
24 st %g2, [%sp + STACK_VAR_OFF+0x0]
25 st %o1, [%sp + STACK_VAR_OFF+0x4]
26 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
27 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
28 add %sp, 16, %sp
29
30 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
31 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
32 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
33 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
34 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
35
36 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
37
38 /* dest->count = in->count */
39 st %g1, [%o4 + V4F_COUNT]
40
41 cmp %g1, 1
42 bl 7f
43 cmp %o3, 0
44 bne 4f
45 clr %o4 ! 'i' for STRIDE_LOOP
46
47 1: /* LENGTHS == NULL */
48 ld [%o5 + 0x00], %f0 ! ux = from[0]
49 ld [%o5 + 0x04], %f1 ! uy = from[1]
50 ld [%o5 + 0x08], %f2 ! uz = from[2]
51 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
52 add %o4, 1, %o4 ! i++
53
54 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
55 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
56 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
57 */
58 fmuls %f0, M0, %f3 ! FGM Group
59 fmuls %f1, M1, %f4 ! FGM Group
60 fmuls %f0, M4, %f5 ! FGM Group
61 fmuls %f1, M5, %f6 ! FGM Group
62 fmuls %f0, M8, %f7 ! FGM Group f3 available
63 fmuls %f1, M9, %f8 ! FGM Group f4 available
64 fadds %f3, %f4, %f3 ! FGA
65 fmuls %f2, M2, %f10 ! FGM Group f5 available
66 fmuls %f2, M6, %f0 ! FGM Group f6 available
67 fadds %f5, %f6, %f5 ! FGA
68 fmuls %f2, M10, %f4 ! FGM Group f7 available
69 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
70 fadds %f3, %f10, %f3 ! FGA Group f10 available
71 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
72 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
73
74 /* f3=tx, f5=ty, f7=tz */
75
76 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
77 fmuls %f3, %f3, %f6 ! FGM Group f3 available
78 fmuls %f5, %f5, %f8 ! FGM Group f5 available
79 fmuls %f7, %f7, %f10 ! FGM Group f7 available
80 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
81 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
82
83 /* scale (f6) = 1.0 / sqrt(len) */
84 fsqrts %f6, %f6 ! FDIV 20 cycles
85 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
86
87 fmuls %f3, %f6, %f3
88 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
89 fmuls %f5, %f6, %f5
90 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
91 fmuls %f7, %f6, %f7
92 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
93
94 cmp %o4, %g1 ! continue if (i < count)
95 bl 1b
96 add %g3, 0x0c, %g3 ! advance out vector pointer
97
98 ba 7f
99 nop
100
101 4: /* LENGTHS != NULL */
102 fmuls M0, %f15, M0
103 fmuls M1, %f15, M1
104 fmuls M2, %f15, M2
105 fmuls M4, %f15, M4
106 fmuls M5, %f15, M5
107 fmuls M6, %f15, M6
108 fmuls M8, %f15, M8
109 fmuls M9, %f15, M9
110 fmuls M10, %f15, M10
111
112 5:
113 ld [%o5 + 0x00], %f0 ! ux = from[0]
114 ld [%o5 + 0x04], %f1 ! uy = from[1]
115 ld [%o5 + 0x08], %f2 ! uz = from[2]
116 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
117 add %o4, 1, %o4 ! i++
118
119 /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
120 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
121 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
122 */
123 fmuls %f0, M0, %f3 ! FGM Group
124 fmuls %f1, M1, %f4 ! FGM Group
125 fmuls %f0, M4, %f5 ! FGM Group
126 fmuls %f1, M5, %f6 ! FGM Group
127 fmuls %f0, M8, %f7 ! FGM Group f3 available
128 fmuls %f1, M9, %f8 ! FGM Group f4 available
129 fadds %f3, %f4, %f3 ! FGA
130 fmuls %f2, M2, %f10 ! FGM Group f5 available
131 fmuls %f2, M6, %f0 ! FGM Group f6 available
132 fadds %f5, %f6, %f5 ! FGA
133 fmuls %f2, M10, %f4 ! FGM Group f7 available
134 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
135 fadds %f3, %f10, %f3 ! FGA Group f10 available
136 ld [%o3], %f13 ! LSU
137 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
138 add %o3, 4, %o3 ! IEU0
139 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
140
141 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
142
143 fmuls %f3, %f13, %f3
144 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
145 fmuls %f5, %f13, %f5
146 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
147 fmuls %f7, %f13, %f7
148 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
149
150 cmp %o4, %g1 ! continue if (i < count)
151 bl 5b
152 add %g3, 0x0c, %g3 ! advance out vector pointer
153
154 7: retl
155 nop
156
157 .globl _mesa_sparc_transform_normalize_normals_no_rot
158 _mesa_sparc_transform_normalize_normals_no_rot:
159 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
160
161 sethi %hi(ONE_DOT_ZERO), %g2
162 sub %sp, 16, %sp
163 st %g2, [%sp + STACK_VAR_OFF+0x0]
164 st %o1, [%sp + STACK_VAR_OFF+0x4]
165 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
166 ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
167 add %sp, 16, %sp
168
169 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
170 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
171 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
172 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
173 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
174
175 LDMATRIX_0_5_10(%o0)
176
177 /* dest->count = in->count */
178 st %g1, [%o4 + V4F_COUNT]
179
180 cmp %g1, 1
181 bl 7f
182 cmp %o3, 0
183 bne 4f
184 clr %o4 ! 'i' for STRIDE_LOOP
185
186 1: /* LENGTHS == NULL */
187 ld [%o5 + 0x00], %f0 ! ux = from[0]
188 ld [%o5 + 0x04], %f1 ! uy = from[1]
189 ld [%o5 + 0x08], %f2 ! uz = from[2]
190 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
191 add %o4, 1, %o4 ! i++
192
193 /* tx (f3) = (ux * m0)
194 * ty (f5) = (uy * m5)
195 * tz (f7) = (uz * m10)
196 */
197 fmuls %f0, M0, %f3 ! FGM Group
198 fmuls %f1, M5, %f5 ! FGM Group
199 fmuls %f2, M10, %f7 ! FGM Group
200
201 /* f3=tx, f5=ty, f7=tz */
202
203 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
204 fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available
205 fmuls %f5, %f5, %f8 ! FGM Group f5 available
206 fmuls %f7, %f7, %f10 ! FGM Group f7 available
207 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
208 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
209
210 /* scale (f6) = 1.0 / sqrt(len) */
211 fsqrts %f6, %f6 ! FDIV 20 cycles
212 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
213
214 fmuls %f3, %f6, %f3
215 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
216 fmuls %f5, %f6, %f5
217 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
218 fmuls %f7, %f6, %f7
219 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
220
221 cmp %o4, %g1 ! continue if (i < count)
222 bl 1b
223 add %g3, 0x0c, %g3 ! advance out vector pointer
224
225 ba 7f
226 nop
227
228 4: /* LENGTHS != NULL */
229 fmuls M0, %f15, M0
230 fmuls M5, %f15, M5
231 fmuls M10, %f15, M10
232
233 5:
234 ld [%o5 + 0x00], %f0 ! ux = from[0]
235 ld [%o5 + 0x04], %f1 ! uy = from[1]
236 ld [%o5 + 0x08], %f2 ! uz = from[2]
237 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
238 add %o4, 1, %o4 ! i++
239
240 /* tx (f3) = (ux * m0)
241 * ty (f5) = (uy * m5)
242 * tz (f7) = (uz * m10)
243 */
244 fmuls %f0, M0, %f3 ! FGM Group
245 ld [%o3], %f13 ! LSU
246 fmuls %f1, M5, %f5 ! FGM Group
247 add %o3, 4, %o3 ! IEU0
248 fmuls %f2, M10, %f7 ! FGM Group
249
250 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
251
252 fmuls %f3, %f13, %f3
253 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
254 fmuls %f5, %f13, %f5
255 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
256 fmuls %f7, %f13, %f7
257 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
258
259 cmp %o4, %g1 ! continue if (i < count)
260 bl 5b
261 add %g3, 0x0c, %g3 ! advance out vector pointer
262
263 7: retl
264 nop
265
266 .globl _mesa_sparc_transform_rescale_normals_no_rot
267 _mesa_sparc_transform_rescale_normals_no_rot:
268 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
269 sub %sp, 16, %sp
270 st %o1, [%sp + STACK_VAR_OFF+0x0]
271 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
272 add %sp, 16, %sp
273
274 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
275 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
276 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
277 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
278 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
279
280 LDMATRIX_0_5_10(%o0)
281
282 /* dest->count = in->count */
283 st %g1, [%o4 + V4F_COUNT]
284
285 cmp %g1, 1
286 bl 7f
287 clr %o4 ! 'i' for STRIDE_LOOP
288
289 fmuls M0, %f15, M0
290 fmuls M5, %f15, M5
291 fmuls M10, %f15, M10
292
293 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
294 ld [%o5 + 0x04], %f1 ! uy = from[1]
295 ld [%o5 + 0x08], %f2 ! uz = from[2]
296 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
297 add %o4, 1, %o4 ! i++
298
299 /* tx (f3) = (ux * m0)
300 * ty (f5) = (uy * m5)
301 * tz (f7) = (uz * m10)
302 */
303 fmuls %f0, M0, %f3 ! FGM Group
304 st %f3, [%g3 + 0x00] ! LSU
305 fmuls %f1, M5, %f5 ! FGM Group
306 st %f5, [%g3 + 0x04] ! LSU
307 fmuls %f2, M10, %f7 ! FGM Group
308 st %f7, [%g3 + 0x08] ! LSU
309
310 cmp %o4, %g1 ! continue if (i < count)
311 bl 1b
312 add %g3, 0x0c, %g3 ! advance out vector pointer
313
314 7: retl
315 nop
316
317 .globl _mesa_sparc_transform_rescale_normals
318 _mesa_sparc_transform_rescale_normals:
319 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
320 sub %sp, 16, %sp
321 st %o1, [%sp + STACK_VAR_OFF+0x0]
322 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
323 add %sp, 16, %sp
324
325 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
326 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
327 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
328 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
329 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
330
331 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
332
333 /* dest->count = in->count */
334 st %g1, [%o4 + V4F_COUNT]
335
336 cmp %g1, 1
337 bl 7f
338 clr %o4 ! 'i' for STRIDE_LOOP
339
340 fmuls M0, %f15, M0
341 fmuls M1, %f15, M1
342 fmuls M2, %f15, M2
343 fmuls M4, %f15, M4
344 fmuls M5, %f15, M5
345 fmuls M6, %f15, M6
346 fmuls M8, %f15, M8
347 fmuls M9, %f15, M9
348 fmuls M10, %f15, M10
349
350 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
351 ld [%o5 + 0x04], %f1 ! uy = from[1]
352 ld [%o5 + 0x08], %f2 ! uz = from[2]
353 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
354 add %o4, 1, %o4 ! i++
355
356 fmuls %f0, M0, %f3 ! FGM Group
357 fmuls %f1, M1, %f4 ! FGM Group
358 fmuls %f0, M4, %f5 ! FGM Group
359 fmuls %f1, M5, %f6 ! FGM Group
360 fmuls %f0, M8, %f7 ! FGM Group f3 available
361 fmuls %f1, M9, %f8 ! FGM Group f4 available
362 fadds %f3, %f4, %f3 ! FGA
363 fmuls %f2, M2, %f10 ! FGM Group f5 available
364 fmuls %f2, M6, %f0 ! FGM Group f6 available
365 fadds %f5, %f6, %f5 ! FGA
366 fmuls %f2, M10, %f4 ! FGM Group f7 available
367 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
368 fadds %f3, %f10, %f3 ! FGA Group f10 available
369 st %f3, [%g3 + 0x00] ! LSU
370 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
371 st %f5, [%g3 + 0x04] ! LSU
372 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
373 st %f7, [%g3 + 0x08] ! LSU
374
375 cmp %o4, %g1 ! continue if (i < count)
376 bl 1b
377 add %g3, 0x0c, %g3 ! advance out vector pointer
378
379 7: retl
380 nop
381
382 .globl _mesa_sparc_transform_normals_no_rot
383 _mesa_sparc_transform_normals_no_rot:
384 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
385 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
386 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
387 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
388 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
389 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
390
391 LDMATRIX_0_5_10(%o0)
392
393 /* dest->count = in->count */
394 st %g1, [%o4 + V4F_COUNT]
395
396 cmp %g1, 1
397 bl 7f
398 clr %o4 ! 'i' for STRIDE_LOOP
399
400 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
401 ld [%o5 + 0x04], %f1 ! uy = from[1]
402 ld [%o5 + 0x08], %f2 ! uz = from[2]
403 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
404 add %o4, 1, %o4 ! i++
405
406 /* tx (f3) = (ux * m0)
407 * ty (f5) = (uy * m5)
408 * tz (f7) = (uz * m10)
409 */
410 fmuls %f0, M0, %f3 ! FGM Group
411 st %f3, [%g3 + 0x00] ! LSU
412 fmuls %f1, M5, %f5 ! FGM Group
413 st %f5, [%g3 + 0x04] ! LSU
414 fmuls %f2, M10, %f7 ! FGM Group
415 st %f7, [%g3 + 0x08] ! LSU
416
417 cmp %o4, %g1 ! continue if (i < count)
418 bl 1b
419 add %g3, 0x0c, %g3 ! advance out vector pointer
420
421 7: retl
422 nop
423
424 .globl _mesa_sparc_transform_normals
425 _mesa_sparc_transform_normals:
426 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
427 LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
428 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
429 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
430 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
431 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
432
433 LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
434
435 /* dest->count = in->count */
436 st %g1, [%o4 + V4F_COUNT]
437
438 cmp %g1, 1
439 bl 7f
440 clr %o4 ! 'i' for STRIDE_LOOP
441
442 1: ld [%o5 + 0x00], %f0 ! ux = from[0]
443 ld [%o5 + 0x04], %f1 ! uy = from[1]
444 ld [%o5 + 0x08], %f2 ! uz = from[2]
445 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
446 add %o4, 1, %o4 ! i++
447
448 fmuls %f0, M0, %f3 ! FGM Group
449 fmuls %f1, M1, %f4 ! FGM Group
450 fmuls %f0, M4, %f5 ! FGM Group
451 fmuls %f1, M5, %f6 ! FGM Group
452 fmuls %f0, M8, %f7 ! FGM Group f3 available
453 fmuls %f1, M9, %f8 ! FGM Group f4 available
454 fadds %f3, %f4, %f3 ! FGA
455 fmuls %f2, M2, %f10 ! FGM Group f5 available
456 fmuls %f2, M6, %f0 ! FGM Group f6 available
457 fadds %f5, %f6, %f5 ! FGA
458 fmuls %f2, M10, %f4 ! FGM Group f7 available
459 fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
460 fadds %f3, %f10, %f3 ! FGA Group f10 available
461 st %f3, [%g3 + 0x00] ! LSU
462 fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
463 st %f5, [%g3 + 0x04] ! LSU
464 fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
465 st %f7, [%g3 + 0x08] ! LSU
466
467 cmp %o4, %g1 ! continue if (i < count)
468 bl 1b
469 add %g3, 0x0c, %g3 ! advance out vector pointer
470
471 7: retl
472 nop
473
474 .globl _mesa_sparc_normalize_normals
475 _mesa_sparc_normalize_normals:
476 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
477
478 sethi %hi(ONE_DOT_ZERO), %g2
479 sub %sp, 16, %sp
480 st %g2, [%sp + STACK_VAR_OFF+0x0]
481 ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
482 add %sp, 16, %sp
483
484 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
485 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
486 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
487 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
488
489 /* dest->count = in->count */
490 st %g1, [%o4 + V4F_COUNT]
491
492 cmp %g1, 1
493 bl 7f
494 cmp %o3, 0
495 bne 4f
496 clr %o4 ! 'i' for STRIDE_LOOP
497
498 1: /* LENGTHS == NULL */
499 ld [%o5 + 0x00], %f3 ! ux = from[0]
500 ld [%o5 + 0x04], %f5 ! uy = from[1]
501 ld [%o5 + 0x08], %f7 ! uz = from[2]
502 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
503 add %o4, 1, %o4 ! i++
504
505 /* f3=tx, f5=ty, f7=tz */
506
507 /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
508 fmuls %f3, %f3, %f6 ! FGM Group f3 available
509 fmuls %f5, %f5, %f8 ! FGM Group f5 available
510 fmuls %f7, %f7, %f10 ! FGM Group f7 available
511 fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
512 fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
513
514 /* scale (f6) = 1.0 / sqrt(len) */
515 fsqrts %f6, %f6 ! FDIV 20 cycles
516 fdivs %f12, %f6, %f6 ! FDIV 14 cycles
517
518 fmuls %f3, %f6, %f3
519 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
520 fmuls %f5, %f6, %f5
521 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
522 fmuls %f7, %f6, %f7
523 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
524
525 cmp %o4, %g1 ! continue if (i < count)
526 bl 1b
527 add %g3, 0x0c, %g3 ! advance out vector pointer
528
529 ba 7f
530 nop
531
532 4: /* LENGTHS != NULL */
533
534 5:
535 ld [%o5 + 0x00], %f3 ! ux = from[0]
536 ld [%o5 + 0x04], %f5 ! uy = from[1]
537 ld [%o5 + 0x08], %f7 ! uz = from[2]
538 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
539 add %o4, 1, %o4 ! i++
540
541 ld [%o3], %f13 ! LSU
542 add %o3, 4, %o3 ! IEU0
543
544 /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
545
546 fmuls %f3, %f13, %f3
547 st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
548 fmuls %f5, %f13, %f5
549 st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
550 fmuls %f7, %f13, %f7
551 st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
552
553 cmp %o4, %g1 ! continue if (i < count)
554 bl 5b
555 add %g3, 0x0c, %g3 ! advance out vector pointer
556
557 7: retl
558 nop
559
560 .globl _mesa_sparc_rescale_normals
561 _mesa_sparc_rescale_normals:
562 /* o0=mat o1=scale o2=in o3=lengths o4=dest */
563
564 sethi %hi(ONE_DOT_ZERO), %g2
565 sub %sp, 16, %sp
566 st %o1, [%sp + STACK_VAR_OFF+0x0]
567 ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
568 add %sp, 16, %sp
569
570 LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
571 ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
572 ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
573 LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
574
575 /* dest->count = in->count */
576 st %g1, [%o4 + V4F_COUNT]
577
578 cmp %g1, 1
579 bl 7f
580 clr %o4 ! 'i' for STRIDE_LOOP
581
582 1:
583 ld [%o5 + 0x00], %f3 ! ux = from[0]
584 ld [%o5 + 0x04], %f5 ! uy = from[1]
585 ld [%o5 + 0x08], %f7 ! uz = from[2]
586 add %o5, %g2, %o5 ! STRIDE_F(from, stride)
587 add %o4, 1, %o4 ! i++
588
589 /* f3=tx, f5=ty, f7=tz */
590
591 fmuls %f3, %f15, %f3
592 st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
593 fmuls %f5, %f15, %f5
594 st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
595 fmuls %f7, %f15, %f7
596 st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
597
598 cmp %o4, %g1 ! continue if (i < count)
599 bl 1b
600 add %g3, 0x0c, %g3 ! advance out vector pointer
601
602 7: retl
603 nop