initial commit
[glibc.git] / sysdeps / ia64 / fpu / e_atan2f.S
1 .file "atan2f.s"
2
3
4 // Copyright (c) 2000 - 2003, Intel Corporation
5 // All rights reserved.
6 //
7 //
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
10 // met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 //
15 // * Redistributions in binary form must reproduce the above copyright
16 // notice, this list of conditions and the following disclaimer in the
17 // documentation and/or other materials provided with the distribution.
18 //
19 // * The name of Intel Corporation may not be used to endorse or promote
20 // products derived from this software without specific prior written
21 // permission.
22
23 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
32 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 //
35 // Intel Corporation is the author of this code, and requests that all
36 // problem reports or change requests be submitted to it directly at
37 // http://www.intel.com/software/products/opensource/libraries/num.htm.
38
39 // History
40 //==============================================================
41 // 06/01/00 Initial version
42 // 08/15/00 Bundle added after call to __libm_error_support to properly
43 // set [the previously overwritten] GR_Parameter_RESULT.
44 // 08/17/00 Changed predicate register macro-usage to direct predicate
45 // names due to an assembler bug.
46 // 01/05/01 Fixed flag settings for denormal input.
47 // 01/19/01 Added documentation
48 // 01/30/01 Improved speed
49 // 02/06/02 Corrected .section statement
50 // 05/20/02 Cleaned up namespace and sf0 syntax
51 // 02/06/03 Reordered header: .section, .global, .proc, .align
52
53 // Description
54 //=========================================
55 // The atan2 function computes the principle value of the arc tangent of y/x using
56 // the signs of both arguments to determine the quadrant of the return value.
57 // A domain error may occur if both arguments are zero.
58
59 // The atan2 function returns the arc tangent of y/x in the range [-pi,+pi] radians.
60
61 //..
62 //..Let (v,u) = (y,x) if |y| <= |x|, and (v,u) = (x,y) otherwise. Note that
63 //..v and u can be negative. We state the relationship between atan2(y,x) and
64 //..atan(v/u).
65 //..
66 //..Let swap = false if v = y, and swap = true if v = x.
67 //..Define C according to the matrix
68 //..
69 //.. TABLE FOR C
70 //.. x +ve x -ve
71 //.. no swap (swap = false) sgn(y)*0 sgn(y)*pi
72 //.. swap (swap = true ) sgn(y)*pi/2 sgn(y)*pi/2
73 //..
74 //.. atan2(y,x) = C + atan(v/u) if no swap
75 //.. atan2(y,x) = C - atan(v/u) if swap
76 //..
77 //..These relationship is more efficient to compute as we accommodate signs in v and u
78 //..saving the need to obtain the absolute value before computation can proceed.
79 //..
80 //..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows:
81 //..A = y * frcpa(x) (so A = (y/x)(1 - beta))
82 //..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is
83 //..a correction.
84 //..atan(A) is approximated by a polynomial
85 //..A + p1 A^3 + p2 A^5 + ... + p10 A^21,
86 //..atan(G) is approximated as follows:
87 //..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1
88 //..where g is a limited precision approximation to G via g = (y - Ax)*frcpa(x + Ay).
89 //..
90 //..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows:
91 //..Z = x * frcpa(y) (so Z = (x/y)(1 - beta))
92 //..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is
93 //..a correction.
94 //..atan(Z) is approximated by a polynomial
95 //..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21,
96 //..atan(T) is approximated as follows:
97 //..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1
98 //..where t is a limited precision approximation to T via t = (x - Ay)*frcpa(y + Ax).
99 //..
100 //..
101 //..A = y * frcpa(x)
102 //..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
103 //..
104 //..This polynomial is computed as follows:
105 //..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
106 //..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
107 //..
108 //..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
109 //..poly_A1 = poly_A2 + A4 * poly_A1
110 //..poly_A1 = poly_A3 + A4 * poly_A1
111 //..
112 //..poly_A4 = p1 * A
113 //,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
114 //..poly_A5 = p2 + Asq * poly_A5
115 //..poly_A4 = poly_A4 + A5 * poly_A5
116 //..
117 //..atan_A = poly_A4 + A11 * poly_A1
118 //..
119 //..atan(G) is approximated as follows:
120 //..G_numer = y - A*x, G_denom = x + A*y
121 //..H1 = frcpa(G_denom)
122 //..H_beta = 1 - H1 * G_denom
123 //..H2 = H1 + H1 * H_beta
124 //..H_beta2 = H_beta*H_beta
125 //..H3 = H2 + H2*H_beta2
126 //..g = H1 * G_numer; gsq = g*g; atan_G = g*p1, atan_G = atan_G*gsq
127 //..atan_G = G_numer*H3 + atan_G
128 //..
129 //..
130 //..A = y * frcpa(x)
131 //..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
132 //..
133 //..This polynomial is computed as follows:
134 //..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
135 //..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
136 //..
137 //..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
138 //..poly_A1 = poly_A2 + A4 * poly_A1
139 //..poly_A1 = poly_A3 + A4 * poly_A1
140 //..
141 //..poly_A4 = p1 * A
142 //,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
143 //..poly_A5 = p2 + Asq * poly_A5
144 //..poly_A4 = poly_A4 + A5 * poly_A5
145 //..
146 //..atan_A = poly_A4 + A11 * poly_A1
147 //..
148 //..
149 //..====================================================================
150 //.. COEFFICIENTS USED IN THE COMPUTATION
151 //..====================================================================
152
153 //coef_pj, j = 1,2,...,10; atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21
154 //
155 // coef_p1 = -.3333332707155439167401311806315789E+00
156 // coef_p1 in dbl = BFD5 5555 1219 1621
157 //
158 // coef_p2 = .1999967670926658391827857030875748E+00
159 // coef_p2 in dbl = 3FC9 997E 7AFB FF4E
160 //
161 // coef_p3 = -.1427989384500152360161563301087296E+00
162 // coef_p3 in dbl = BFC2 473C 5145 EE38
163 //
164 // coef_p4 = .1105852823460720770079031213661163E+00
165 // coef_p4 in dbl = 3FBC 4F51 2B18 65F5
166 //
167 // coef_p5 = -.8811839915595312348625710228448363E-01
168 // coef_p5 in dbl = BFB6 8EED 6A8C FA32
169 //
170 // coef_p6 = .6742329836955067042153645159059714E-01
171 // coef_p6 in dbl = 3FB1 42A7 3D7C 54E3
172 //
173 // coef_p7 = -.4468571068774672908561591262231909E-01
174 // coef_p7 in dbl = BFA6 E10B A401 393F
175 //
176 // coef_p8 = .2252333246746511135532726960586493E-01
177 // coef_p8 in dbl = 3F97 105B 4160 F86B
178 //
179 // coef_p9 = -.7303884867007574742501716845542314E-02
180 // coef_p9 in dbl = BF7D EAAD AA33 6451
181 //
182 // coef_p10 = .1109686868355312093949039454619058E-02
183 // coef_p10 in dbl = 3F52 2E5D 33BC 9BAA
184 //
185
186 // Special values
187 //==============================================================
188 // Y x Result
189 // +number +inf +0
190 // -number +inf -0
191 // +number -inf +pi
192 // -number -inf -pi
193 //
194 // +inf +number +pi/2
195 // -inf +number -pi/2
196 // +inf -number +pi/2
197 // -inf -number -pi/2
198 //
199 // +inf +inf +pi/4
200 // -inf +inf -pi/4
201 // +inf -inf +3pi/4
202 // -inf -inf -3pi/4
203 //
204 // +1 +1 +pi/4
205 // -1 +1 -pi/4
206 // +1 -1 +3pi/4
207 // -1 -1 -3pi/4
208 //
209 // +number +0 +pi/2 // does not raise DBZ
210 // -number +0 -pi/2 // does not raise DBZ
211 // +number -0 +pi/2 // does not raise DBZ
212 // -number -0 -pi/2 // does not raise DBZ
213 //
214 // +0 +number +0
215 // -0 +number -0
216 // +0 -number +pi
217 // -0 -number -pi
218 //
219 // +0 +0 +0 // does not raise invalid
220 // -0 +0 -0 // does not raise invalid
221 // +0 -0 +pi // does not raise invalid
222 // -0 -0 -pi // does not raise invalid
223 //
224 // Nan anything quiet Y
225 // anything NaN quiet X
226
227 // atan2(+-0/+-0) sets double error tag to 37
228 // atan2f(+-0/+-0) sets single error tag to 38
229 // These are domain errors.
230
231
232 //
233 // Assembly macros
234 //=========================================
235
236
237 // integer registers
238 atan2f_GR_Addr_1 = r33
239 atan2f_GR_Addr_2 = r34
240 GR_SAVE_B0 = r35
241
242 GR_SAVE_PFS = r36
243 GR_SAVE_GP = r37
244
245 GR_Parameter_X = r38
246 GR_Parameter_Y = r39
247 GR_Parameter_RESULT = r40
248 GR_Parameter_TAG = r41
249
250 // floating point registers
251 atan2f_coef_p1 = f32
252 atan2f_coef_p10 = f33
253 atan2f_coef_p7 = f34
254 atan2f_coef_p6 = f35
255
256 atan2f_coef_p3 = f36
257 atan2f_coef_p2 = f37
258 atan2f_coef_p9 = f38
259 atan2f_coef_p8 = f39
260 atan2f_coef_p5 = f40
261
262 atan2f_coef_p4 = f41
263 atan2f_const_piby2 = f42
264 atan2f_const_pi = f43
265 atan2f_const_piby4 = f44
266 atan2f_const_3piby4 = f45
267
268 atan2f_xsq = f46
269 atan2f_ysq = f47
270 atan2f_xy = f48
271 atan2f_const_1 = f49
272 atan2f_sgn_Y = f50
273
274 atan2f_Z0 = f51
275 atan2f_A0 = f52
276 atan2f_Z = f53
277 atan2f_A = f54
278 atan2f_C = f55
279
280 atan2f_U = f56
281 atan2f_Usq = f57
282 atan2f_U4 = f58
283 atan2f_U6 = f59
284 atan2f_U8 = f60
285
286 atan2f_poly_u109 = f61
287 atan2f_poly_u87 = f62
288 atan2f_poly_u65 = f63
289 atan2f_poly_u43 = f64
290 atan2f_poly_u21 = f65
291
292 atan2f_poly_u10to7 = f66
293 atan2f_poly_u6to3 = f67
294 atan2f_poly_u10to3 = f68
295 atan2f_poly_u10to0 = f69
296 atan2f_poly_u210 = f70
297
298 atan2f_T_numer = f71
299 atan2f_T_denom = f72
300 atan2f_G_numer = f73
301 atan2f_G_denom = f74
302 atan2f_p1rnum = f75
303
304 atan2f_R_denom = f76
305 atan2f_R_numer = f77
306 atan2f_pR = f78
307 atan2f_pRC = f79
308 atan2f_pQRC = f80
309
310 atan2f_Q1 = f81
311 atan2f_Q_beta = f82
312 atan2f_Q2 = f83
313 atan2f_Q_beta2 = f84
314 atan2f_Q3 = f85
315
316 atan2f_r = f86
317 atan2f_rsq = f87
318 atan2f_poly_atan_U = f88
319
320
321 // predicate registers
322 //atan2f_Pred_Swap = p6 // |y| > |x|
323 //atan2f_Pred_noSwap = p7 // |y| <= |x|
324 //atan2f_Pred_Xpos = p8 // x >= 0
325 //atan2f_Pred_Xneg = p9 // x < 0
326
327
328 RODATA
329
330 .align 16
331
332 LOCAL_OBJECT_START(atan2f_coef_table1)
333 data8 0xBFD5555512191621 // p1
334 data8 0x3F522E5D33BC9BAA // p10
335 data8 0xBFA6E10BA401393F // p7
336 data8 0x3FB142A73D7C54E3 // p6
337 data8 0xBFC2473C5145EE38 // p3
338 data8 0x3FC9997E7AFBFF4E // p2
339 LOCAL_OBJECT_END(atan2f_coef_table1)
340
341 LOCAL_OBJECT_START(atan2f_coef_table2)
342 data8 0xBF7DEAADAA336451 // p9
343 data8 0x3F97105B4160F86B // p8
344 data8 0xBFB68EED6A8CFA32 // p5
345 data8 0x3FBC4F512B1865F5 // p4
346 data8 0x3ff921fb54442d18 // pi/2
347 data8 0x400921fb54442d18 // pi
348 data8 0x3fe921fb54442d18 // pi/4
349 data8 0x4002d97c7f3321d2 // 3pi/4
350 LOCAL_OBJECT_END(atan2f_coef_table2)
351
352
353
354 .section .text
355 GLOBAL_IEEE754_ENTRY(atan2f)
356
357 { .mfi
358 alloc r32 = ar.pfs,1,5,4,0
359 frcpa.s1 atan2f_Z0,p0 = f1,f8 // Approx to 1/y
360 nop.i 999
361 }
362 { .mfi
363 addl atan2f_GR_Addr_1 = @ltoff(atan2f_coef_table1),gp
364 fma.s1 atan2f_xsq = f9,f9,f0
365 nop.i 999 ;;
366 }
367
368
369 { .mfi
370 ld8 atan2f_GR_Addr_1 = [atan2f_GR_Addr_1]
371 frcpa.s1 atan2f_A0,p0 = f1,f9 // Approx to 1/x
372 nop.i 999
373 }
374 { .mfi
375 nop.m 999
376 fma.s1 atan2f_ysq = f8,f8,f0
377 nop.i 999 ;;
378 }
379
380 { .mfi
381 nop.m 999
382 fcmp.ge.s1 p8,p9 = f9,f0 // Set p8 if x>=0, p9 if x<0
383 nop.i 999
384 }
385 { .mfi
386 nop.m 999
387 fma.s1 atan2f_xy = f9,f8,f0
388 nop.i 999 ;;
389 }
390
391
392 { .mfi
393 add atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1
394 fmerge.s atan2f_sgn_Y = f8,f1
395 nop.i 999 ;;
396 }
397
398 { .mmf
399 ldfpd atan2f_coef_p1,atan2f_coef_p10 = [atan2f_GR_Addr_1],16
400 ldfpd atan2f_coef_p9,atan2f_coef_p8 = [atan2f_GR_Addr_2],16
401 fclass.m p10,p0 = f9,0xe7 // Test x @inf|@snan|@qnan|@zero
402 }
403 ;;
404
405 { .mfi
406 ldfpd atan2f_coef_p7,atan2f_coef_p6 = [atan2f_GR_Addr_1],16
407 fma.s1 atan2f_T_denom = atan2f_Z0,atan2f_xsq,f8
408 nop.i 999
409 }
410 { .mfi
411 ldfpd atan2f_coef_p5,atan2f_coef_p4 = [atan2f_GR_Addr_2],16
412 fma.s1 atan2f_Z = atan2f_Z0,f9,f0
413 nop.i 999 ;;
414 }
415
416
417 { .mfi
418 ldfpd atan2f_coef_p3,atan2f_coef_p2 = [atan2f_GR_Addr_1],16
419 fma.s1 atan2f_G_denom = atan2f_A0,atan2f_ysq,f9
420 nop.i 999
421 }
422 { .mfi
423 ldfpd atan2f_const_piby2,atan2f_const_pi = [atan2f_GR_Addr_2],16
424 fma.s1 atan2f_A = atan2f_A0,f8,f0
425 nop.i 999 ;;
426 }
427
428 { .mfi
429 ldfpd atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2]
430 fclass.m p11,p0 = f8,0xe7 // Test y @inf|@snan|@qnan|@zero
431 nop.i 999
432 }
433 { .mfb
434 nop.m 999
435 fnma.s1 atan2f_T_numer = atan2f_Z0,atan2f_xy,f9
436 (p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on x nan,inf,zero
437 }
438
439
440 // p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test
441 { .mfi
442 nop.m 999
443 fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq
444 nop.i 999
445 }
446 { .mfb
447 nop.m 999
448 fnma.s1 atan2f_G_numer = atan2f_A0,atan2f_xy,f8
449 (p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on y nan,inf,zero
450 }
451
452
453 { .mfi
454 nop.m 999
455 (p8) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f0,f0
456 nop.i 999
457 }
458 { .mfi
459 nop.m 999
460 (p9) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f1,f0
461 nop.i 999 ;;
462 }
463
464
465 { .mfi
466 nop.m 999
467 (p6) fnma.s1 atan2f_U = atan2f_Z,f1,f0
468 nop.i 999
469 }
470 { .mfi
471 nop.m 999
472 (p6) fma.s1 atan2f_Usq = atan2f_Z,atan2f_Z,f0
473 nop.i 999 ;;
474 }
475
476
477 { .mfi
478 nop.m 999
479 (p7) fma.s1 atan2f_U = atan2f_A,f1,f0
480 nop.i 999
481 }
482 { .mfi
483 nop.m 999
484 (p7) fma.s1 atan2f_Usq = atan2f_A,atan2f_A,f0
485 nop.i 999 ;;
486 }
487
488
489 { .mfi
490 nop.m 999
491 (p6) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_T_denom
492 nop.i 999
493 }
494 { .mfi
495 nop.m 999
496 (p6) fma.s1 atan2f_R_denom = atan2f_T_denom,f1,f0
497 nop.i 999 ;;
498 }
499
500
501 { .mfi
502 nop.m 999
503 (p7) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_G_denom
504 nop.i 999
505 }
506 { .mfi
507 nop.m 999
508 (p7) fma.s1 atan2f_R_denom = atan2f_G_denom,f1,f0
509 nop.i 999 ;;
510 }
511
512
513 { .mfi
514 nop.m 999
515 (p6) fnma.s1 atan2f_R_numer = atan2f_T_numer,f1,f0
516 nop.i 999
517 }
518 { .mfi
519 nop.m 999
520 (p7) fma.s1 atan2f_R_numer = atan2f_G_numer,f1,f0
521 nop.i 999 ;;
522 }
523
524
525 { .mfi
526 nop.m 999
527 (p6) fnma.s1 atan2f_p1rnum = atan2f_T_numer,atan2f_coef_p1,f0
528 nop.i 999 ;;
529 }
530 { .mfi
531 nop.m 999
532 (p7) fma.s1 atan2f_p1rnum = atan2f_G_numer,atan2f_coef_p1,f0
533 nop.i 999 ;;
534 }
535
536
537 { .mfi
538 nop.m 999
539 fma.s1 atan2f_U4 = atan2f_Usq,atan2f_Usq,f0
540 nop.i 999
541 }
542 { .mfi
543 nop.m 999
544 fma.s1 atan2f_poly_u109 = atan2f_Usq,atan2f_coef_p10,atan2f_coef_p9
545 nop.i 999 ;;
546 }
547
548 { .mfi
549 nop.m 999
550 fma.s1 atan2f_poly_u87 = atan2f_Usq,atan2f_coef_p8,atan2f_coef_p7
551 nop.i 999
552 }
553 { .mfi
554 nop.m 999
555 fma.s1 atan2f_poly_u65 = atan2f_Usq,atan2f_coef_p6,atan2f_coef_p5
556 nop.i 999 ;;
557 }
558
559
560 { .mfi
561 nop.m 999
562 fma.s1 atan2f_poly_u43 = atan2f_Usq,atan2f_coef_p4,atan2f_coef_p3
563 nop.i 999
564 }
565 { .mfi
566 nop.m 999
567 fnma.s1 atan2f_Q_beta = atan2f_Q1,atan2f_R_denom,f1
568 nop.i 999 ;;
569 }
570
571
572 { .mfi
573 nop.m 999
574 fma.s1 atan2f_poly_u21 = atan2f_Usq,atan2f_coef_p2,atan2f_coef_p1
575 nop.i 999
576 }
577 { .mfi
578 nop.m 999
579 fma.s1 atan2f_r = atan2f_Q1,atan2f_R_numer,f0
580 nop.i 999 ;;
581 }
582
583 { .mfi
584 nop.m 999
585 (p6) fma.s1 atan2f_C = atan2f_sgn_Y,atan2f_const_piby2,f0
586 nop.i 999
587 }
588 { .mfi
589 nop.m 999
590 (p7) fma.s1 atan2f_C = atan2f_const_1,atan2f_const_pi,f0
591 nop.i 999 ;;
592 }
593
594 { .mfi
595 nop.m 999
596 fma.s1 atan2f_U6 = atan2f_U4,atan2f_Usq,f0
597 nop.i 999
598 }
599 { .mfi
600 nop.m 999
601 fma.s1 atan2f_U8 = atan2f_U4,atan2f_U4,f0
602 nop.i 999 ;;
603 }
604
605 { .mfi
606 nop.m 999
607 fma.s1 atan2f_poly_u10to7 = atan2f_U4,atan2f_poly_u109,atan2f_poly_u87
608 nop.i 999
609 }
610 { .mfi
611 nop.m 999
612 fma.s1 atan2f_pR = atan2f_p1rnum,atan2f_Q1,f0
613 nop.i 999 ;;
614 }
615
616 { .mfi
617 nop.m 999
618 fma.s1 atan2f_poly_u6to3 = atan2f_U4,atan2f_poly_u65,atan2f_poly_u43
619 nop.i 999
620 }
621 { .mfi
622 nop.m 999
623 fma.s1 atan2f_Q2 = atan2f_Q1,atan2f_Q_beta,atan2f_Q1
624 nop.i 999 ;;
625 }
626
627 { .mfi
628 nop.m 999
629 fma.s1 atan2f_Q_beta2 = atan2f_Q_beta,atan2f_Q_beta,f0
630 nop.i 999
631 }
632 { .mfi
633 nop.m 999
634 fma.s1 atan2f_rsq = atan2f_r,atan2f_r,f0
635 nop.i 999 ;;
636 }
637
638 { .mfi
639 nop.m 999
640 fma.s1 atan2f_poly_u210 = atan2f_Usq,atan2f_poly_u21,f1
641 nop.i 999 ;;
642 }
643
644 { .mfi
645 nop.m 999
646 fcmp.eq.s0 p8,p0 = f8,f9 // Dummy op to set flag on denormal inputs
647 nop.i 999
648 }
649 { .mfi
650 nop.m 999
651 fma.s1 atan2f_poly_u10to3 = atan2f_U8,atan2f_poly_u10to7,atan2f_poly_u6to3
652 nop.i 999 ;;
653 }
654
655 { .mfi
656 nop.m 999
657 fma.s1 atan2f_Q3 = atan2f_Q2,atan2f_Q_beta2,atan2f_Q2
658 nop.i 999
659 }
660 { .mfi
661 nop.m 999
662 fma.s1 atan2f_pRC = atan2f_rsq,atan2f_pR,atan2f_C
663 nop.i 999 ;;
664 }
665
666 { .mfi
667 nop.m 999
668 fma.s1 atan2f_poly_u10to0 = atan2f_U6,atan2f_poly_u10to3,atan2f_poly_u210
669 nop.i 999 ;;
670 }
671
672 { .mfi
673 nop.m 999
674 fma.s1 atan2f_pQRC = atan2f_R_numer,atan2f_Q3,atan2f_pRC
675 nop.i 999 ;;
676 }
677
678 { .mfb
679 nop.m 999
680 fma.s.s0 f8 = atan2f_U,atan2f_poly_u10to0,atan2f_pQRC
681 br.ret.sptk b0 ;;
682 }
683
684
685
686 ATAN2F_XY_INF_NAN_ZERO:
687
688 { .mfi
689 nop.m 999
690 fclass.m p10,p0 = f8,0xc3 // Is y nan
691 nop.i 999
692 }
693 ;;
694
695 { .mfi
696 nop.m 999
697 fclass.m p12,p0 = f9,0xc3 // Is x nan
698 nop.i 999
699 }
700 ;;
701
702 { .mfi
703 nop.m 999
704 fclass.m p6,p0 = f9,0x21 // Is x +inf
705 nop.i 999
706 }
707 { .mfb
708 nop.m 999
709 (p10) fma.s.s0 f8 = f9,f8,f0 // Result quietized y if y is nan
710 (p10) br.ret.spnt b0 // Exit if y is nan
711 }
712 ;;
713
714
715 { .mfi
716 nop.m 999
717 (p6) fclass.m.unc p7,p8 = f8,0x23 // x +inf, is y inf
718 nop.i 999
719 }
720 { .mfb
721 nop.m 999
722 (p12) fnorm.s.s0 f8 = f9 // Result quietized x if x is nan, y not nan
723 (p12) br.ret.spnt b0 // Exit if x is nan, y not nan
724 }
725 ;;
726
727 // Here if x or y inf, or x or y zero
728 { .mfi
729 nop.m 999
730 fcmp.eq.s0 p15,p0 = f8,f9 // Dummy op to set flag on denormal inputs
731 nop.i 999
732 }
733 ;;
734
735 { .mfi
736 nop.m 999
737 fclass.m p11,p12 = f9,0x22 // Is x -inf
738 nop.i 999
739 }
740 { .mfb
741 nop.m 999
742 (p7) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
743 (p7) br.ret.spnt b0 // Exit if x +inf and y inf
744 }
745 ;;
746
747 { .mfb
748 nop.m 999
749 (p8) fmerge.s f8 = f8,f0 // If x +inf and y not inf, result +-0
750 (p8) br.ret.spnt b0 // Exit if x +inf and y not inf
751 }
752 ;;
753
754 { .mfi
755 nop.m 999
756 (p12) fclass.m.unc p13,p0 = f8,0x23 // x not -inf, is y inf
757 nop.i 999
758 }
759 ;;
760
761 { .mfi
762 nop.m 999
763 (p11) fclass.m.unc p14,p15 = f8,0x23 // x -inf, is y inf
764 nop.i 999
765 }
766 ;;
767
768 { .mfi
769 nop.m 999
770 fclass.m p6,p7 = f9,0x7 // Is x zero
771 nop.i 999
772 }
773 { .mfb
774 nop.m 999
775 (p13) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
776 (p13) br.ret.spnt b0 // Exit if x not -inf and y inf
777 }
778 ;;
779
780 { .mfi
781 nop.m 999
782 (p14) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
783 nop.i 999
784 }
785 { .mfb
786 nop.m 999
787 (p15) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
788 (p11) br.ret.spnt b0 // Exit if x -inf
789 }
790 ;;
791
792 // Here if x or y zero
793 { .mfi
794 nop.m 999
795 (p7) fclass.m.unc p8,p9 = f9,0x19 // x not zero, y zero, is x > zero
796 nop.i 999
797 }
798 ;;
799
800 { .mfi
801 nop.m 999
802 (p6) fclass.m.unc p10,p11 = f8,0x7 // x zero, is y zero
803 nop.i 999
804 }
805 ;;
806
807 { .mfi
808 nop.m 999
809 (p8) fmerge.s f8 = f8, f0 // x > zero and y zero, result is +-zero
810 nop.i 999
811 }
812 { .mfb
813 nop.m 999
814 (p9) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
815 (p10) br.cond.spnt __libm_error_region // Branch if x zero and y zero
816 }
817 ;;
818
819 { .mfb
820 nop.m 999
821 (p11) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
822 br.ret.sptk b0 // Final special case exit
823 }
824 ;;
825
826
827 GLOBAL_IEEE754_END(atan2f)
828 libm_alias_float_other (__atan2, atan2)
829
830
831 LOCAL_LIBM_ENTRY(__libm_error_region)
832 .prologue
833 mov GR_Parameter_TAG = 38
834 fclass.m p10,p11 = f9,0x5 // @zero | @pos
835 ;;
836 (p10) fmerge.s f10 = f8, f0
837 (p11) fma.s.s0 f10 = atan2f_sgn_Y, atan2f_const_pi,f0
838 ;;
839
840 { .mfi
841 add GR_Parameter_Y=-32,sp // Parameter 2 value
842 nop.f 999
843 .save ar.pfs,GR_SAVE_PFS
844 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
845 }
846
847 { .mfi
848 .fframe 64
849 add sp=-64,sp // Create new stack
850 nop.f 0
851 mov GR_SAVE_GP=gp // Save gp
852 }
853 ;;
854
855 { .mmi
856 stfs [GR_Parameter_Y] = f9,16 // Store Parameter 2 on stack
857 add GR_Parameter_X = 16,sp // Parameter 1 address
858 .save b0, GR_SAVE_B0
859 mov GR_SAVE_B0=b0 // Save b0
860 }
861 ;;
862
863
864 .body
865 { .mib
866 stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
867 add GR_Parameter_RESULT = 0,GR_Parameter_Y
868 nop.b 0 // Parameter 3 address
869 }
870 { .mib
871 stfs [GR_Parameter_Y] = f10 // Store Parameter 3 on stack
872 add GR_Parameter_Y = -16,GR_Parameter_Y
873 br.call.sptk b0=__libm_error_support# // Call error handling function
874 }
875 ;;
876 { .mmi
877 nop.m 0
878 nop.m 0
879 add GR_Parameter_RESULT = 48,sp
880 };;
881
882 { .mmi
883 ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
884 .restore sp
885 add sp = 64,sp // Restore stack pointer
886 mov b0 = GR_SAVE_B0 // Restore return address
887 }
888 ;;
889
890 { .mib
891 mov gp = GR_SAVE_GP // Restore gp
892 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
893 br.ret.sptk b0 // Return
894 }
895 ;;
896
897 LOCAL_LIBM_END(__libm_error_region)
898
899 .type __libm_error_support#,@function
900 .global __libm_error_support#