libgcc/config/avr/lib1funcs-fixed.S

   1 /*  -*- Mode: Asm -*-  */
   2 ;;    Copyright (C) 2012-2013
   3 ;;    Free Software Foundation, Inc.
   4 ;;    Contributed by Sean D'Epagnier  (sean@depagnier.com)
   5 ;;                   Georg-Johann Lay (avr@gjlay.de)
   6
   7 ;; This file is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by the
   9 ;; Free Software Foundation; either version 3, or (at your option) any
  10 ;; later version.
  11
  12 ;; In addition to the permissions in the GNU General Public License, the
  13 ;; Free Software Foundation gives you unlimited permission to link the
  14 ;; compiled version of this file into combinations with other programs,
  15 ;; and to distribute those combinations without any restriction coming
  16 ;; from the use of this file.  (The General Public License restrictions
  17 ;; do apply in other respects; for example, they cover modification of
  18 ;; the file, and distribution when not linked into a combine
  19 ;; executable.)
  20
  21 ;; This file is distributed in the hope that it will be useful, but
  22 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  24 ;; General Public License for more details.
  25
  26 ;; You should have received a copy of the GNU General Public License
  27 ;; along with this program; see the file COPYING.  If not, write to
  28 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  29 ;; Boston, MA 02110-1301, USA.
  30
  31 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  32 ;; Fixed point library routines for AVR
  33 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  34
  35 .section .text.libgcc.fixed, "ax", @progbits
  36
  37 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  38 ;; Conversions to float
  39 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  40
  41 #if defined (L_fractqqsf)
  42 DEFUN __fractqqsf
  43     ;; Move in place for SA -> SF conversion
  44     clr     r22
  45     mov     r23, r24
  46     ;; Sign-extend
  47     lsl     r24
  48     sbc     r24, r24
  49     mov     r25, r24
  50     XJMP    __fractsasf
  51 ENDF __fractqqsf
  52 #endif  /* L_fractqqsf */
  53
  54 #if defined (L_fractuqqsf)
  55 DEFUN __fractuqqsf
  56     ;; Move in place for USA -> SF conversion
  57     clr     r22
  58     mov     r23, r24
  59     ;; Zero-extend
  60     clr     r24
  61     clr     r25
  62     XJMP    __fractusasf
  63 ENDF __fractuqqsf
  64 #endif  /* L_fractuqqsf */
  65
  66 #if defined (L_fracthqsf)
  67 DEFUN __fracthqsf
  68     ;; Move in place for SA -> SF conversion
  69     wmov    22, 24
  70     ;; Sign-extend
  71     lsl     r25
  72     sbc     r24, r24
  73     mov     r25, r24
  74     XJMP    __fractsasf
  75 ENDF __fracthqsf
  76 #endif  /* L_fracthqsf */
  77
  78 #if defined (L_fractuhqsf)
  79 DEFUN __fractuhqsf
  80     ;; Move in place for USA -> SF conversion
  81     wmov    22, 24
  82     ;; Zero-extend
  83     clr     r24
  84     clr     r25
  85     XJMP    __fractusasf
  86 ENDF __fractuhqsf
  87 #endif  /* L_fractuhqsf */
  88
  89 #if defined (L_fracthasf)
  90 DEFUN __fracthasf
  91     ;; Move in place for SA -> SF conversion
  92     clr     r22
  93     mov     r23, r24
  94     mov     r24, r25
  95     ;; Sign-extend
  96     lsl     r25
  97     sbc     r25, r25
  98     XJMP    __fractsasf
  99 ENDF __fracthasf
 100 #endif  /* L_fracthasf */
 101
 102 #if defined (L_fractuhasf)
 103 DEFUN __fractuhasf
 104     ;; Move in place for USA -> SF conversion
 105     clr     r22
 106     mov     r23, r24
 107     mov     r24, r25
 108     ;; Zero-extend
 109     clr     r25
 110     XJMP    __fractusasf
 111 ENDF __fractuhasf
 112 #endif  /* L_fractuhasf */
 113
 114
 115 #if defined (L_fractsqsf)
 116 DEFUN __fractsqsf
 117     XCALL   __floatsisf
 118     ;; Divide non-zero results by 2^31 to move the
 119     ;; decimal point into place
 120     tst     r25
 121     breq    0f
 122     subi    r24, exp_lo (31)
 123     sbci    r25, exp_hi (31)
 124 0:  ret
 125 ENDF __fractsqsf
 126 #endif  /* L_fractsqsf */
 127
 128 #if defined (L_fractusqsf)
 129 DEFUN __fractusqsf
 130     XCALL   __floatunsisf
 131     ;; Divide non-zero results by 2^32 to move the
 132     ;; decimal point into place
 133     cpse    r25, __zero_reg__
 134     subi    r25, exp_hi (32)
 135     ret
 136 ENDF __fractusqsf
 137 #endif  /* L_fractusqsf */
 138
 139 #if defined (L_fractsasf)
 140 DEFUN __fractsasf
 141     XCALL   __floatsisf
 142     ;; Divide non-zero results by 2^15 to move the
 143     ;; decimal point into place
 144     tst     r25
 145     breq    0f
 146     subi    r24, exp_lo (15)
 147     sbci    r25, exp_hi (15)
 148 0:  ret
 149 ENDF __fractsasf
 150 #endif  /* L_fractsasf */
 151
 152 #if defined (L_fractusasf)
 153 DEFUN __fractusasf
 154     XCALL   __floatunsisf
 155     ;; Divide non-zero results by 2^16 to move the
 156     ;; decimal point into place
 157     cpse    r25, __zero_reg__
 158     subi    r25, exp_hi (16)
 159     ret
 160 ENDF __fractusasf
 161 #endif  /* L_fractusasf */
 162
 163 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 164 ;; Conversions from float
 165 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 166
 167 #if defined (L_fractsfqq)
 168 DEFUN __fractsfqq
 169     ;; Multiply with 2^{24+7} to get a QQ result in r25
 170     subi    r24, exp_lo (-31)
 171     sbci    r25, exp_hi (-31)
 172     XCALL   __fixsfsi
 173     mov     r24, r25
 174     ret
 175 ENDF __fractsfqq
 176 #endif  /* L_fractsfqq */
 177
 178 #if defined (L_fractsfuqq)
 179 DEFUN __fractsfuqq
 180     ;; Multiply with 2^{24+8} to get a UQQ result in r25
 181     subi    r25, exp_hi (-32)
 182     XCALL   __fixunssfsi
 183     mov     r24, r25
 184     ret
 185 ENDF __fractsfuqq
 186 #endif  /* L_fractsfuqq */
 187
 188 #if defined (L_fractsfha)
 189 DEFUN __fractsfha
 190     ;; Multiply with 2^{16+7} to get a HA result in r25:r24
 191     subi    r24, exp_lo (-23)
 192     sbci    r25, exp_hi (-23)
 193     XJMP    __fixsfsi
 194 ENDF __fractsfha
 195 #endif  /* L_fractsfha */
 196
 197 #if defined (L_fractsfuha)
 198 DEFUN __fractsfuha
 199     ;; Multiply with 2^24 to get a UHA result in r25:r24
 200     subi    r25, exp_hi (-24)
 201     XJMP    __fixunssfsi
 202 ENDF __fractsfuha
 203 #endif  /* L_fractsfuha */
 204
 205 #if defined (L_fractsfhq)
 206 FALIAS __fractsfsq
 207
 208 DEFUN __fractsfhq
 209     ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
 210     ;; resp. with 2^31 to get a SQ result in r25:r22
 211     subi    r24, exp_lo (-31)
 212     sbci    r25, exp_hi (-31)
 213     XJMP    __fixsfsi
 214 ENDF __fractsfhq
 215 #endif  /* L_fractsfhq */
 216
 217 #if defined (L_fractsfuhq)
 218 FALIAS __fractsfusq
 219
 220 DEFUN __fractsfuhq
 221     ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
 222     ;; resp. with 2^32 to get a USQ result in r25:r22
 223     subi    r25, exp_hi (-32)
 224     XJMP    __fixunssfsi
 225 ENDF __fractsfuhq
 226 #endif  /* L_fractsfuhq */
 227
 228 #if defined (L_fractsfsa)
 229 DEFUN __fractsfsa
 230     ;; Multiply with 2^15 to get a SA result in r25:r22
 231     subi    r24, exp_lo (-15)
 232     sbci    r25, exp_hi (-15)
 233     XJMP    __fixsfsi
 234 ENDF __fractsfsa
 235 #endif  /* L_fractsfsa */
 236
 237 #if defined (L_fractsfusa)
 238 DEFUN __fractsfusa
 239     ;; Multiply with 2^16 to get a USA result in r25:r22
 240     subi    r25, exp_hi (-16)
 241     XJMP    __fixunssfsi
 242 ENDF __fractsfusa
 243 #endif  /* L_fractsfusa */
 244
 245
 246 ;; For multiplication the functions here are called directly from
 247 ;; avr-fixed.md instead of using the standard libcall mechanisms.
 248 ;; This can make better code because GCC knows exactly which
 249 ;; of the call-used registers (not all of them) are clobbered.  */
 250
 251 /*******************************************************
 252     Fractional  Multiplication  8 x 8  without MUL
 253 *******************************************************/
 254
 255 #if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
 256 ;;; R23 = R24 * R25
 257 ;;; Clobbers: __tmp_reg__, R22, R24, R25
 258 ;;; Rounding: ???
 259 DEFUN __mulqq3
 260     XCALL   __fmuls
 261     ;; TR 18037 requires that  (-1) * (-1)  does not overflow
 262     ;; The only input that can produce  -1  is  (-1)^2.
 263     dec     r23
 264     brvs    0f
 265     inc     r23
 266 0:  ret
 267 ENDF  __mulqq3
 268 #endif /* L_mulqq3 && ! HAVE_MUL */
 269
 270 /*******************************************************
 271     Fractional Multiply  .16 x .16  with and without MUL
 272 *******************************************************/
 273
 274 #if defined (L_mulhq3)
 275 ;;; Same code with and without MUL, but the interfaces differ:
 276 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
 277 ;;;         Clobbers: ABI, called by optabs
 278 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 279 ;;;         Clobbers: __tmp_reg__, R22, R23
 280 ;;; Rounding:  -0.5 LSB  <= error  <=  0.5 LSB
 281 DEFUN   __mulhq3
 282     XCALL   __mulhisi3
 283     ;; Shift result into place
 284     lsl     r23
 285     rol     r24
 286     rol     r25
 287     brvs    1f
 288     ;; Round
 289     sbrc    r23, 7
 290     adiw    r24, 1
 291     ret
 292 1:  ;; Overflow.  TR 18037 requires  (-1)^2  not to overflow
 293     ldi     r24, lo8 (0x7fff)
 294     ldi     r25, hi8 (0x7fff)
 295     ret
 296 ENDF __mulhq3
 297 #endif  /* defined (L_mulhq3) */
 298
 299 #if defined (L_muluhq3)
 300 ;;; Same code with and without MUL, but the interfaces differ:
 301 ;;; no MUL: (R25:R24) *= (R23:R22)
 302 ;;;         Clobbers: ABI, called by optabs
 303 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 304 ;;;         Clobbers: __tmp_reg__, R22, R23
 305 ;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
 306 DEFUN   __muluhq3
 307     XCALL   __umulhisi3
 308     ;; Round
 309     sbrc    r23, 7
 310     adiw    r24, 1
 311     ret
 312 ENDF __muluhq3
 313 #endif  /* L_muluhq3 */
 314
 315
 316 /*******************************************************
 317     Fixed  Multiply  8.8 x 8.8  with and without MUL
 318 *******************************************************/
 319
 320 #if defined (L_mulha3)
 321 ;;; Same code with and without MUL, but the interfaces differ:
 322 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
 323 ;;;         Clobbers: ABI, called by optabs
 324 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 325 ;;;         Clobbers: __tmp_reg__, R22, R23
 326 ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
 327 DEFUN   __mulha3
 328     XCALL   __mulhisi3
 329     lsl     r22
 330     rol     r23
 331     rol     r24
 332     XJMP    __muluha3_round
 333 ENDF __mulha3
 334 #endif  /* L_mulha3 */
 335
 336 #if defined (L_muluha3)
 337 ;;; Same code with and without MUL, but the interfaces differ:
 338 ;;; no MUL: (R25:R24) *= (R23:R22)
 339 ;;;         Clobbers: ABI, called by optabs
 340 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 341 ;;;         Clobbers: __tmp_reg__, R22, R23
 342 ;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
 343 DEFUN   __muluha3
 344     XCALL   __umulhisi3
 345     XJMP    __muluha3_round
 346 ENDF __muluha3
 347 #endif  /* L_muluha3 */
 348
 349 #if defined (L_muluha3_round)
 350 DEFUN   __muluha3_round
 351     ;; Shift result into place
 352     mov     r25, r24
 353     mov     r24, r23
 354     ;; Round
 355     sbrc    r22, 7
 356     adiw    r24, 1
 357     ret
 358 ENDF __muluha3_round
 359 #endif  /* L_muluha3_round */
 360
 361
 362 /*******************************************************
 363     Fixed  Multiplication  16.16 x 16.16
 364 *******************************************************/
 365
 366 ;; Bits outside the result (below LSB), used in the signed version
 367 #define GUARD __tmp_reg__
 368
 369 #if defined (__AVR_HAVE_MUL__)
 370
 371 ;; Multiplier
 372 #define A0  16
 373 #define A1  A0+1
 374 #define A2  A1+1
 375 #define A3  A2+1
 376
 377 ;; Multiplicand
 378 #define B0  20
 379 #define B1  B0+1
 380 #define B2  B1+1
 381 #define B3  B2+1
 382
 383 ;; Result
 384 #define C0  24
 385 #define C1  C0+1
 386 #define C2  C1+1
 387 #define C3  C2+1
 388
 389 #if defined (L_mulusa3)
 390 ;;; (C3:C0) = (A3:A0) * (B3:B0)
 391 DEFUN __mulusa3
 392     set
 393     ;; Fallthru
 394 ENDF  __mulusa3
 395
 396 ;;; Round for last digit iff T = 1
 397 ;;; Return guard bits in GUARD (__tmp_reg__).
 398 ;;; Rounding, T = 0:  -1.0 LSB  <  error  <=  0   LSB
 399 ;;; Rounding, T = 1:  -0.5 LSB  <  error  <=  0.5 LSB
 400 DEFUN __mulusa3_round
 401     ;; Some of the MUL instructions have LSBs outside the result.
 402     ;; Don't ignore these LSBs in order to tame rounding error.
 403     ;; Use C2/C3 for these LSBs.
 404
 405     clr C0
 406     clr C1
 407     mul A0, B0  $  movw C2, r0
 408
 409     mul A1, B0  $  add  C3, r0  $  adc C0, r1
 410     mul A0, B1  $  add  C3, r0  $  adc C0, r1  $  rol C1
 411
 412     ;; Round if T = 1.  Store guarding bits outside the result for rounding
 413     ;; and left-shift by the signed version (function below).
 414     brtc 0f
 415     sbrc C3, 7
 416     adiw C0, 1
 417 0:  push C3
 418
 419     ;; The following MULs don't have LSBs outside the result.
 420     ;; C2/C3 is the high part.
 421
 422     mul  A0, B2  $  add C0, r0  $  adc C1, r1  $  sbc  C2, C2
 423     mul  A1, B1  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
 424     mul  A2, B0  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
 425     neg  C2
 426
 427     mul  A0, B3  $  add C1, r0  $  adc C2, r1  $  sbc  C3, C3
 428     mul  A1, B2  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 429     mul  A2, B1  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 430     mul  A3, B0  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 431     neg  C3
 432
 433     mul  A1, B3  $  add C2, r0  $  adc C3, r1
 434     mul  A2, B2  $  add C2, r0  $  adc C3, r1
 435     mul  A3, B1  $  add C2, r0  $  adc C3, r1
 436
 437     mul  A2, B3  $  add C3, r0
 438     mul  A3, B2  $  add C3, r0
 439
 440     ;; Guard bits used in the signed version below.
 441     pop  GUARD
 442     clr  __zero_reg__
 443     ret
 444 ENDF __mulusa3_round
 445 #endif /* L_mulusa3 */
 446
 447 #if defined (L_mulsa3)
 448 ;;; (C3:C0) = (A3:A0) * (B3:B0)
 449 ;;; Clobbers: __tmp_reg__, T
 450 ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
 451 DEFUN __mulsa3
 452     clt
 453     XCALL   __mulusa3_round
 454     ;; A posteriori sign extension of the operands
 455     tst     B3
 456     brpl 1f
 457     sub     C2, A0
 458     sbc     C3, A1
 459 1:  sbrs    A3, 7
 460     rjmp 2f
 461     sub     C2, B0
 462     sbc     C3, B1
 463 2:
 464     ;;  Shift 1 bit left to adjust for 15 fractional bits
 465     lsl     GUARD
 466     rol     C0
 467     rol     C1
 468     rol     C2
 469     rol     C3
 470     ;; Round last digit
 471     lsl     GUARD
 472     adc     C0, __zero_reg__
 473     adc     C1, __zero_reg__
 474     adc     C2, __zero_reg__
 475     adc     C3, __zero_reg__
 476     ret
 477 ENDF __mulsa3
 478 #endif /* L_mulsa3 */
 479
 480 #undef A0
 481 #undef A1
 482 #undef A2
 483 #undef A3
 484 #undef B0
 485 #undef B1
 486 #undef B2
 487 #undef B3
 488 #undef C0
 489 #undef C1
 490 #undef C2
 491 #undef C3
 492
 493 #else /* __AVR_HAVE_MUL__ */
 494
 495 #define A0 18
 496 #define A1 A0+1
 497 #define A2 A0+2
 498 #define A3 A0+3
 499
 500 #define B0 22
 501 #define B1 B0+1
 502 #define B2 B0+2
 503 #define B3 B0+3
 504
 505 #define C0  22
 506 #define C1  C0+1
 507 #define C2  C0+2
 508 #define C3  C0+3
 509
 510 ;; __tmp_reg__
 511 #define CC0  0
 512 ;; __zero_reg__
 513 #define CC1  1
 514 #define CC2  16
 515 #define CC3  17
 516
 517 #define AA0  26
 518 #define AA1  AA0+1
 519 #define AA2  30
 520 #define AA3  AA2+1
 521
 522 #if defined (L_mulsa3)
 523 ;;; (R25:R22)  *=  (R21:R18)
 524 ;;; Clobbers: ABI, called by optabs
 525 ;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
 526 DEFUN   __mulsa3
 527     push    B0
 528     push    B1
 529     push    B3
 530     clt
 531     XCALL   __mulusa3_round
 532     pop     r30
 533     ;; sign-extend B
 534     bst     r30, 7
 535     brtc 1f
 536     ;; A1, A0 survived in  R27:R26
 537     sub     C2, AA0
 538     sbc     C3, AA1
 539 1:
 540     pop     AA1  ;; B1
 541     pop     AA0  ;; B0
 542
 543     ;; sign-extend A.  A3 survived in  R31
 544     bst     AA3, 7
 545     brtc 2f
 546     sub     C2, AA0
 547     sbc     C3, AA1
 548 2:
 549     ;;  Shift 1 bit left to adjust for 15 fractional bits
 550     lsl     GUARD
 551     rol     C0
 552     rol     C1
 553     rol     C2
 554     rol     C3
 555     ;; Round last digit
 556     lsl     GUARD
 557     adc     C0, __zero_reg__
 558     adc     C1, __zero_reg__
 559     adc     C2, __zero_reg__
 560     adc     C3, __zero_reg__
 561     ret
 562 ENDF __mulsa3
 563 #endif  /* L_mulsa3 */
 564
 565 #if defined (L_mulusa3)
 566 ;;; (R25:R22)  *=  (R21:R18)
 567 ;;; Clobbers: ABI, called by optabs
 568 ;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
 569 DEFUN __mulusa3
 570     set
 571     ;; Fallthru
 572 ENDF  __mulusa3
 573
 574 ;;; A[] survives in 26, 27, 30, 31
 575 ;;; Also used by __mulsa3 with T = 0
 576 ;;; Round if T = 1
 577 ;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
 578 DEFUN __mulusa3_round
 579     push    CC2
 580     push    CC3
 581     ; clear result
 582     clr     __tmp_reg__
 583     wmov    CC2, CC0
 584     ; save multiplicand
 585     wmov    AA0, A0
 586     wmov    AA2, A2
 587     rjmp 3f
 588
 589     ;; Loop the integral part
 590
 591 1:  ;; CC += A * 2^n;  n >= 0
 592     add  CC0,A0  $  adc CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
 593
 594 2:  ;; A <<= 1
 595     lsl  A0      $  rol A1      $  rol  A2      $  rol  A3
 596
 597 3:  ;; IBIT(B) >>= 1
 598     ;; Carry = n-th bit of B;  n >= 0
 599     lsr     B3
 600     ror     B2
 601     brcs 1b
 602     sbci    B3, 0
 603     brne 2b
 604
 605     ;; Loop the fractional part
 606     ;; B2/B3 is 0 now, use as guard bits for rounding
 607     ;; Restore multiplicand
 608     wmov    A0, AA0
 609     wmov    A2, AA2
 610     rjmp 5f
 611
 612 4:  ;; CC += A:Guard * 2^n;  n < 0
 613     add  B3,B2 $  adc  CC0,A0  $  adc  CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
 614 5:
 615     ;; A:Guard >>= 1
 616     lsr  A3   $  ror  A2  $  ror  A1  $  ror   A0  $   ror  B2
 617
 618     ;; FBIT(B) <<= 1
 619     ;; Carry = n-th bit of B;  n < 0
 620     lsl     B0
 621     rol     B1
 622     brcs 4b
 623     sbci    B0, 0
 624     brne 5b
 625
 626     ;; Save guard bits and set carry for rounding
 627     push    B3
 628     lsl     B3
 629     ;; Move result into place
 630     wmov    C2, CC2
 631     wmov    C0, CC0
 632     clr     __zero_reg__
 633     brtc 6f
 634     ;; Round iff T = 1
 635     adc     C0, __zero_reg__
 636     adc     C1, __zero_reg__
 637     adc     C2, __zero_reg__
 638     adc     C3, __zero_reg__
 639 6:
 640     pop     GUARD
 641     ;; Epilogue
 642     pop     CC3
 643     pop     CC2
 644     ret
 645 ENDF __mulusa3_round
 646 #endif  /* L_mulusa3 */
 647
 648 #undef A0
 649 #undef A1
 650 #undef A2
 651 #undef A3
 652 #undef B0
 653 #undef B1
 654 #undef B2
 655 #undef B3
 656 #undef C0
 657 #undef C1
 658 #undef C2
 659 #undef C3
 660 #undef AA0
 661 #undef AA1
 662 #undef AA2
 663 #undef AA3
 664 #undef CC0
 665 #undef CC1
 666 #undef CC2
 667 #undef CC3
 668
 669 #endif /* __AVR_HAVE_MUL__ */
 670
 671 #undef GUARD
 672
 673 /*******************************************************
 674       Fractional Division 8 / 8
 675 *******************************************************/
 676
 677 #define r_divd  r25     /* dividend */
 678 #define r_quo   r24     /* quotient */
 679 #define r_div   r22     /* divisor */
 680 #define r_sign  __tmp_reg__
 681
 682 #if defined (L_divqq3)
 683 DEFUN   __divqq3
 684     mov     r_sign, r_divd
 685     eor     r_sign, r_div
 686     sbrc    r_div, 7
 687     neg     r_div
 688     sbrc    r_divd, 7
 689     neg     r_divd
 690     XCALL   __divqq_helper
 691     lsr     r_quo
 692     sbrc    r_sign, 7   ; negate result if needed
 693     neg     r_quo
 694     ret
 695 ENDF __divqq3
 696 #endif  /* L_divqq3 */
 697
 698 #if defined (L_udivuqq3)
 699 DEFUN   __udivuqq3
 700     cp      r_divd, r_div
 701     brsh    0f
 702     XJMP __divqq_helper
 703     ;; Result is out of [0, 1)  ==>  Return 1 - eps.
 704 0:  ldi     r_quo, 0xff
 705     ret
 706 ENDF __udivuqq3
 707 #endif  /* L_udivuqq3 */
 708
 709
 710 #if defined (L_divqq_helper)
 711 DEFUN   __divqq_helper
 712     clr     r_quo           ; clear quotient
 713     inc     __zero_reg__    ; init loop counter, used per shift
 714 __udivuqq3_loop:
 715     lsl     r_divd          ; shift dividend
 716     brcs    0f              ; dividend overflow
 717     cp      r_divd,r_div    ; compare dividend & divisor
 718     brcc    0f              ; dividend >= divisor
 719     rol     r_quo           ; shift quotient (with CARRY)
 720     rjmp    __udivuqq3_cont
 721 0:
 722     sub     r_divd,r_div    ; restore dividend
 723     lsl     r_quo           ; shift quotient (without CARRY)
 724 __udivuqq3_cont:
 725     lsl     __zero_reg__    ; shift loop-counter bit
 726     brne    __udivuqq3_loop
 727     com     r_quo           ; complement result
 728                             ; because C flag was complemented in loop
 729     ret
 730 ENDF __divqq_helper
 731 #endif  /* L_divqq_helper */
 732
 733 #undef  r_divd
 734 #undef  r_quo
 735 #undef  r_div
 736 #undef  r_sign
 737
 738
 739 /*******************************************************
 740     Fractional Division 16 / 16
 741 *******************************************************/
 742 #define r_divdL 26     /* dividend Low */
 743 #define r_divdH 27     /* dividend Hig */
 744 #define r_quoL  24     /* quotient Low */
 745 #define r_quoH  25     /* quotient High */
 746 #define r_divL  22     /* divisor */
 747 #define r_divH  23     /* divisor */
 748 #define r_cnt   21
 749
 750 #if defined (L_divhq3)
 751 DEFUN   __divhq3
 752     mov     r0, r_divdH
 753     eor     r0, r_divH
 754     sbrs    r_divH, 7
 755     rjmp    1f
 756     NEG2    r_divL
 757 1:
 758     sbrs    r_divdH, 7
 759     rjmp    2f
 760     NEG2    r_divdL
 761 2:
 762     cp      r_divdL, r_divL
 763     cpc     r_divdH, r_divH
 764     breq    __divhq3_minus1  ; if equal return -1
 765     XCALL   __udivuhq3
 766     lsr     r_quoH
 767     ror     r_quoL
 768     brpl    9f
 769     ;; negate result if needed
 770     NEG2    r_quoL
 771 9:
 772     ret
 773 __divhq3_minus1:
 774     ldi     r_quoH, 0x80
 775     clr     r_quoL
 776     ret
 777 ENDF __divhq3
 778 #endif  /* defined (L_divhq3) */
 779
 780 #if defined (L_udivuhq3)
 781 DEFUN   __udivuhq3
 782     sub     r_quoH,r_quoH   ; clear quotient and carry
 783     ;; FALLTHRU
 784 ENDF __udivuhq3
 785
 786 DEFUN   __udivuha3_common
 787     clr     r_quoL          ; clear quotient
 788     ldi     r_cnt,16        ; init loop counter
 789 __udivuhq3_loop:
 790     rol     r_divdL         ; shift dividend (with CARRY)
 791     rol     r_divdH
 792     brcs    __udivuhq3_ep   ; dividend overflow
 793     cp      r_divdL,r_divL  ; compare dividend & divisor
 794     cpc     r_divdH,r_divH
 795     brcc    __udivuhq3_ep   ; dividend >= divisor
 796     rol     r_quoL          ; shift quotient (with CARRY)
 797     rjmp    __udivuhq3_cont
 798 __udivuhq3_ep:
 799     sub     r_divdL,r_divL  ; restore dividend
 800     sbc     r_divdH,r_divH
 801     lsl     r_quoL          ; shift quotient (without CARRY)
 802 __udivuhq3_cont:
 803     rol     r_quoH          ; shift quotient
 804     dec     r_cnt           ; decrement loop counter
 805     brne    __udivuhq3_loop
 806     com     r_quoL          ; complement result
 807     com     r_quoH          ; because C flag was complemented in loop
 808     ret
 809 ENDF __udivuha3_common
 810 #endif  /* defined (L_udivuhq3) */
 811
 812 /*******************************************************
 813     Fixed Division 8.8 / 8.8
 814 *******************************************************/
 815 #if defined (L_divha3)
 816 DEFUN   __divha3
 817     mov     r0, r_divdH
 818     eor     r0, r_divH
 819     sbrs    r_divH, 7
 820     rjmp    1f
 821     NEG2    r_divL
 822 1:
 823     sbrs    r_divdH, 7
 824     rjmp    2f
 825     NEG2    r_divdL
 826 2:
 827     XCALL   __udivuha3
 828     lsr     r_quoH  ; adjust to 7 fractional bits
 829     ror     r_quoL
 830     sbrs    r0, 7   ; negate result if needed
 831     ret
 832     NEG2    r_quoL
 833     ret
 834 ENDF __divha3
 835 #endif  /* defined (L_divha3) */
 836
 837 #if defined (L_udivuha3)
 838 DEFUN   __udivuha3
 839     mov     r_quoH, r_divdL
 840     mov     r_divdL, r_divdH
 841     clr     r_divdH
 842     lsl     r_quoH     ; shift quotient into carry
 843     XJMP    __udivuha3_common ; same as fractional after rearrange
 844 ENDF __udivuha3
 845 #endif  /* defined (L_udivuha3) */
 846
 847 #undef  r_divdL
 848 #undef  r_divdH
 849 #undef  r_quoL
 850 #undef  r_quoH
 851 #undef  r_divL
 852 #undef  r_divH
 853 #undef  r_cnt
 854
 855 /*******************************************************
 856     Fixed Division 16.16 / 16.16
 857 *******************************************************/
 858
 859 #define r_arg1L  24    /* arg1 gets passed already in place */
 860 #define r_arg1H  25
 861 #define r_arg1HL 26
 862 #define r_arg1HH 27
 863 #define r_divdL  26    /* dividend Low */
 864 #define r_divdH  27
 865 #define r_divdHL 30
 866 #define r_divdHH 31    /* dividend High */
 867 #define r_quoL   22    /* quotient Low */
 868 #define r_quoH   23
 869 #define r_quoHL  24
 870 #define r_quoHH  25    /* quotient High */
 871 #define r_divL   18    /* divisor Low */
 872 #define r_divH   19
 873 #define r_divHL  20
 874 #define r_divHH  21    /* divisor High */
 875 #define r_cnt  __zero_reg__  /* loop count (0 after the loop!) */
 876
 877 #if defined (L_divsa3)
 878 DEFUN   __divsa3
 879     mov     r0, r_arg1HH
 880     eor     r0, r_divHH
 881     sbrs    r_divHH, 7
 882     rjmp    1f
 883     NEG4    r_divL
 884 1:
 885     sbrs    r_arg1HH, 7
 886     rjmp    2f
 887     NEG4    r_arg1L
 888 2:
 889     XCALL   __udivusa3
 890     lsr     r_quoHH ; adjust to 15 fractional bits
 891     ror     r_quoHL
 892     ror     r_quoH
 893     ror     r_quoL
 894     sbrs    r0, 7   ; negate result if needed
 895     ret
 896     ;; negate r_quoL
 897     XJMP    __negsi2
 898 ENDF __divsa3
 899 #endif  /* defined (L_divsa3) */
 900
 901 #if defined (L_udivusa3)
 902 DEFUN   __udivusa3
 903     ldi     r_divdHL, 32    ; init loop counter
 904     mov     r_cnt, r_divdHL
 905     clr     r_divdHL
 906     clr     r_divdHH
 907     wmov    r_quoL, r_divdHL
 908     lsl     r_quoHL         ; shift quotient into carry
 909     rol     r_quoHH
 910 __udivusa3_loop:
 911     rol     r_divdL         ; shift dividend (with CARRY)
 912     rol     r_divdH
 913     rol     r_divdHL
 914     rol     r_divdHH
 915     brcs    __udivusa3_ep   ; dividend overflow
 916     cp      r_divdL,r_divL  ; compare dividend & divisor
 917     cpc     r_divdH,r_divH
 918     cpc     r_divdHL,r_divHL
 919     cpc     r_divdHH,r_divHH
 920     brcc    __udivusa3_ep   ; dividend >= divisor
 921     rol     r_quoL          ; shift quotient (with CARRY)
 922     rjmp    __udivusa3_cont
 923 __udivusa3_ep:
 924     sub     r_divdL,r_divL  ; restore dividend
 925     sbc     r_divdH,r_divH
 926     sbc     r_divdHL,r_divHL
 927     sbc     r_divdHH,r_divHH
 928     lsl     r_quoL          ; shift quotient (without CARRY)
 929 __udivusa3_cont:
 930     rol     r_quoH          ; shift quotient
 931     rol     r_quoHL
 932     rol     r_quoHH
 933     dec     r_cnt           ; decrement loop counter
 934     brne    __udivusa3_loop
 935     com     r_quoL          ; complement result
 936     com     r_quoH          ; because C flag was complemented in loop
 937     com     r_quoHL
 938     com     r_quoHH
 939     ret
 940 ENDF __udivusa3
 941 #endif  /* defined (L_udivusa3) */
 942
 943 #undef  r_arg1L
 944 #undef  r_arg1H
 945 #undef  r_arg1HL
 946 #undef  r_arg1HH
 947 #undef  r_divdL
 948 #undef  r_divdH
 949 #undef  r_divdHL
 950 #undef  r_divdHH
 951 #undef  r_quoL
 952 #undef  r_quoH
 953 #undef  r_quoHL
 954 #undef  r_quoHH
 955 #undef  r_divL
 956 #undef  r_divH
 957 #undef  r_divHL
 958 #undef  r_divHH
 959 #undef  r_cnt
 960
 961 \f
 962 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 963 ;; Saturation, 2 Bytes
 964 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 965
 966 ;; First Argument and Return Register
 967 #define A0  24
 968 #define A1  A0+1
 969
 970 #if defined (L_ssneg_2)
 971 DEFUN __ssneg_2
 972     NEG2    A0
 973     brvc 0f
 974     sbiw    A0, 1
 975 0:  ret
 976 ENDF __ssneg_2
 977 #endif /* L_ssneg_2 */
 978
 979 #if defined (L_ssabs_2)
 980 DEFUN __ssabs_2
 981     sbrs    A1, 7
 982     ret
 983     XJMP    __ssneg_2
 984 ENDF __ssabs_2
 985 #endif /* L_ssabs_2 */
 986
 987 #undef A0
 988 #undef A1
 989
 990
 991 \f
 992 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 993 ;; Saturation, 4 Bytes
 994 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 995
 996 ;; First Argument and Return Register
 997 #define A0  22
 998 #define A1  A0+1
 999 #define A2  A0+2
1000 #define A3  A0+3
1001
1002 #if defined (L_ssneg_4)
1003 DEFUN __ssneg_4
1004     XCALL   __negsi2
1005     brvc 0f
1006     ldi     A3, 0x7f
1007     ldi     A2, 0xff
1008     ldi     A1, 0xff
1009     ldi     A0, 0xff
1010 0:  ret
1011 ENDF __ssneg_4
1012 #endif /* L_ssneg_4 */
1013
1014 #if defined (L_ssabs_4)
1015 DEFUN __ssabs_4
1016     sbrs    A3, 7
1017     ret
1018     XJMP    __ssneg_4
1019 ENDF __ssabs_4
1020 #endif /* L_ssabs_4 */
1021
1022 #undef A0
1023 #undef A1
1024 #undef A2
1025 #undef A3
1026
1027
1028 \f
1029 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1030 ;; Saturation, 8 Bytes
1031 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1032
1033 ;; First Argument and Return Register
1034 #define A0  18
1035 #define A1  A0+1
1036 #define A2  A0+2
1037 #define A3  A0+3
1038 #define A4  A0+4
1039 #define A5  A0+5
1040 #define A6  A0+6
1041 #define A7  A0+7
1042
1043 #if defined (L_clr_8)
1044 FALIAS __usneguta2
1045 FALIAS __usneguda2
1046 FALIAS __usnegudq2
1047
1048 ;; Clear Carry and all Bytes
1049 DEFUN __clr_8
1050     ;; Clear Carry and set Z
1051     sub     A7, A7
1052     ;; FALLTHRU
1053 ENDF  __clr_8
1054 ;; Propagate Carry to all Bytes, Carry unaltered
1055 DEFUN __sbc_8
1056     sbc     A7, A7
1057     sbc     A6, A6
1058     wmov    A4, A6
1059     wmov    A2, A6
1060     wmov    A0, A6
1061     ret
1062 ENDF __sbc_8
1063 #endif /* L_clr_8 */
1064
1065 #if defined (L_ssneg_8)
1066 FALIAS __ssnegta2
1067 FALIAS __ssnegda2
1068 FALIAS __ssnegdq2
1069
1070 DEFUN __ssneg_8
1071     XCALL   __negdi2
1072     brvc 0f
1073     ;; A[] = 0x7fffffff
1074     sec
1075     XCALL   __sbc_8
1076     ldi     A7, 0x7f
1077 0:  ret
1078 ENDF __ssneg_8
1079 #endif /* L_ssneg_8 */
1080
1081 #if defined (L_ssabs_8)
1082 FALIAS __ssabsta2
1083 FALIAS __ssabsda2
1084 FALIAS __ssabsdq2
1085
1086 DEFUN __ssabs_8
1087     sbrs    A7, 7
1088     ret
1089     XJMP    __ssneg_8
1090 ENDF __ssabs_8
1091 #endif /* L_ssabs_8 */
1092
1093 ;; Second Argument
1094 #define B0  10
1095 #define B1  B0+1
1096 #define B2  B0+2
1097 #define B3  B0+3
1098 #define B4  B0+4
1099 #define B5  B0+5
1100 #define B6  B0+6
1101 #define B7  B0+7
1102
1103 #if defined (L_usadd_8)
1104 FALIAS __usadduta3
1105 FALIAS __usadduda3
1106 FALIAS __usaddudq3
1107
1108 DEFUN __usadd_8
1109     XCALL   __adddi3
1110     brcs 0f
1111     ret
1112 0:  ;; A[] = 0xffffffff
1113     XJMP    __sbc_8
1114 ENDF __usadd_8
1115 #endif /* L_usadd_8 */
1116
1117 #if defined (L_ussub_8)
1118 FALIAS __ussubuta3
1119 FALIAS __ussubuda3
1120 FALIAS __ussubudq3
1121
1122 DEFUN __ussub_8
1123     XCALL   __subdi3
1124     brcs 0f
1125     ret
1126 0:  ;; A[] = 0
1127     XJMP    __clr_8
1128 ENDF __ussub_8
1129 #endif /* L_ussub_8 */
1130
1131 #if defined (L_ssadd_8)
1132 FALIAS __ssaddta3
1133 FALIAS __ssaddda3
1134 FALIAS __ssadddq3
1135
1136 DEFUN __ssadd_8
1137     XCALL   __adddi3
1138     brvc 0f
1139     ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
1140     cpi     B7, 0x80
1141     XCALL   __sbc_8
1142     subi    A7, 0x80
1143 0:  ret
1144 ENDF __ssadd_8
1145 #endif /* L_ssadd_8 */
1146
1147 #if defined (L_sssub_8)
1148 FALIAS __sssubta3
1149 FALIAS __sssubda3
1150 FALIAS __sssubdq3
1151
1152 DEFUN __sssub_8
1153     XCALL   __subdi3
1154     brvc 0f
1155     ;; A = (B < 0) ? INT64_MAX : INT64_MIN
1156     ldi     A7, 0x7f
1157     cp      A7, B7
1158     XCALL   __sbc_8
1159     subi    A7, 0x80
1160 0:  ret
1161 ENDF __sssub_8
1162 #endif /* L_sssub_8 */
1163
1164 #undef A0
1165 #undef A1
1166 #undef A2
1167 #undef A3
1168 #undef A4
1169 #undef A5
1170 #undef A6
1171 #undef A7
1172 #undef B0
1173 #undef B1
1174 #undef B2
1175 #undef B3
1176 #undef B4
1177 #undef B5
1178 #undef B6
1179 #undef B7