1 /* -*- Mode: Asm -*- */
2 ;; Copyright (C) 2012-2013
3 ;; Free Software Foundation, Inc.
4 ;; Contributed by Sean D'Epagnier (sean@depagnier.com)
5 ;; Georg-Johann Lay (avr@gjlay.de)
7 ;; This file is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by the
9 ;; Free Software Foundation; either version 3, or (at your option) any
12 ;; In addition to the permissions in the GNU General Public License, the
13 ;; Free Software Foundation gives you unlimited permission to link the
14 ;; compiled version of this file into combinations with other programs,
15 ;; and to distribute those combinations without any restriction coming
16 ;; from the use of this file. (The General Public License restrictions
17 ;; do apply in other respects; for example, they cover modification of
18 ;; the file, and distribution when not linked into a combine
21 ;; This file is distributed in the hope that it will be useful, but
22 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 ;; General Public License for more details.
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with this program; see the file COPYING. If not, write to
28 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
29 ;; Boston, MA 02110-1301, USA.
31 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32 ;; Fixed point library routines for AVR
33 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
35 .section .text.libgcc.fixed, "ax", @progbits
37 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38 ;; Conversions to float
39 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41 #if defined (L_fractqqsf)
43 ;; Move in place for SA -> SF conversion
52 #endif /* L_fractqqsf */
54 #if defined (L_fractuqqsf)
56 ;; Move in place for USA -> SF conversion
64 #endif /* L_fractuqqsf */
66 #if defined (L_fracthqsf)
68 ;; Move in place for SA -> SF conversion
76 #endif /* L_fracthqsf */
78 #if defined (L_fractuhqsf)
80 ;; Move in place for USA -> SF conversion
87 #endif /* L_fractuhqsf */
89 #if defined (L_fracthasf)
91 ;; Move in place for SA -> SF conversion
100 #endif /* L_fracthasf */
102 #if defined (L_fractuhasf)
104 ;; Move in place for USA -> SF conversion
112 #endif /* L_fractuhasf */
115 #if defined (L_fractsqsf)
118 ;; Divide non-zero results by 2^31 to move the
119 ;; decimal point into place
122 subi r24, exp_lo (31)
123 sbci r25, exp_hi (31)
126 #endif /* L_fractsqsf */
128 #if defined (L_fractusqsf)
131 ;; Divide non-zero results by 2^32 to move the
132 ;; decimal point into place
133 cpse r25, __zero_reg__
134 subi r25, exp_hi (32)
137 #endif /* L_fractusqsf */
139 #if defined (L_fractsasf)
142 ;; Divide non-zero results by 2^15 to move the
143 ;; decimal point into place
146 subi r24, exp_lo (15)
147 sbci r25, exp_hi (15)
150 #endif /* L_fractsasf */
152 #if defined (L_fractusasf)
155 ;; Divide non-zero results by 2^16 to move the
156 ;; decimal point into place
157 cpse r25, __zero_reg__
158 subi r25, exp_hi (16)
161 #endif /* L_fractusasf */
163 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
164 ;; Conversions from float
165 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
167 #if defined (L_fractsfqq)
169 ;; Multiply with 2^{24+7} to get a QQ result in r25
170 subi r24, exp_lo (-31)
171 sbci r25, exp_hi (-31)
176 #endif /* L_fractsfqq */
178 #if defined (L_fractsfuqq)
180 ;; Multiply with 2^{24+8} to get a UQQ result in r25
181 subi r25, exp_hi (-32)
186 #endif /* L_fractsfuqq */
188 #if defined (L_fractsfha)
190 ;; Multiply with 2^{16+7} to get a HA result in r25:r24
191 subi r24, exp_lo (-23)
192 sbci r25, exp_hi (-23)
195 #endif /* L_fractsfha */
197 #if defined (L_fractsfuha)
199 ;; Multiply with 2^24 to get a UHA result in r25:r24
200 subi r25, exp_hi (-24)
203 #endif /* L_fractsfuha */
205 #if defined (L_fractsfhq)
209 ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
210 ;; resp. with 2^31 to get a SQ result in r25:r22
211 subi r24, exp_lo (-31)
212 sbci r25, exp_hi (-31)
215 #endif /* L_fractsfhq */
217 #if defined (L_fractsfuhq)
221 ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
222 ;; resp. with 2^32 to get a USQ result in r25:r22
223 subi r25, exp_hi (-32)
226 #endif /* L_fractsfuhq */
228 #if defined (L_fractsfsa)
230 ;; Multiply with 2^15 to get a SA result in r25:r22
231 subi r24, exp_lo (-15)
232 sbci r25, exp_hi (-15)
235 #endif /* L_fractsfsa */
237 #if defined (L_fractsfusa)
239 ;; Multiply with 2^16 to get a USA result in r25:r22
240 subi r25, exp_hi (-16)
243 #endif /* L_fractsfusa */
246 ;; For multiplication the functions here are called directly from
247 ;; avr-fixed.md instead of using the standard libcall mechanisms.
248 ;; This can make better code because GCC knows exactly which
249 ;; of the call-used registers (not all of them) are clobbered. */
251 /*******************************************************
252 Fractional Multiplication 8 x 8 without MUL
253 *******************************************************/
255 #if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
257 ;;; Clobbers: __tmp_reg__, R22, R24, R25
261 ;; TR 18037 requires that (-1) * (-1) does not overflow
262 ;; The only input that can produce -1 is (-1)^2.
268 #endif /* L_mulqq3 && ! HAVE_MUL */
270 /*******************************************************
271 Fractional Multiply .16 x .16 with and without MUL
272 *******************************************************/
274 #if defined (L_mulhq3)
275 ;;; Same code with and without MUL, but the interfaces differ:
276 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
277 ;;; Clobbers: ABI, called by optabs
278 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
279 ;;; Clobbers: __tmp_reg__, R22, R23
280 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
283 ;; Shift result into place
292 1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
293 ldi r24, lo8 (0x7fff)
294 ldi r25, hi8 (0x7fff)
297 #endif /* defined (L_mulhq3) */
299 #if defined (L_muluhq3)
300 ;;; Same code with and without MUL, but the interfaces differ:
301 ;;; no MUL: (R25:R24) *= (R23:R22)
302 ;;; Clobbers: ABI, called by optabs
303 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
304 ;;; Clobbers: __tmp_reg__, R22, R23
305 ;;; Rounding: -0.5 LSB < error <= 0.5 LSB
313 #endif /* L_muluhq3 */
316 /*******************************************************
317 Fixed Multiply 8.8 x 8.8 with and without MUL
318 *******************************************************/
320 #if defined (L_mulha3)
321 ;;; Same code with and without MUL, but the interfaces differ:
322 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
323 ;;; Clobbers: ABI, called by optabs
324 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
325 ;;; Clobbers: __tmp_reg__, R22, R23
326 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
334 #endif /* L_mulha3 */
336 #if defined (L_muluha3)
337 ;;; Same code with and without MUL, but the interfaces differ:
338 ;;; no MUL: (R25:R24) *= (R23:R22)
339 ;;; Clobbers: ABI, called by optabs
340 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
341 ;;; Clobbers: __tmp_reg__, R22, R23
342 ;;; Rounding: -0.5 LSB < error <= 0.5 LSB
347 #endif /* L_muluha3 */
349 #if defined (L_muluha3_round)
350 DEFUN __muluha3_round
351 ;; Shift result into place
359 #endif /* L_muluha3_round */
362 /*******************************************************
363 Fixed Multiplication 16.16 x 16.16
364 *******************************************************/
366 ;; Bits outside the result (below LSB), used in the signed version
367 #define GUARD __tmp_reg__
369 #if defined (__AVR_HAVE_MUL__)
389 #if defined (L_mulusa3)
390 ;;; (C3:C0) = (A3:A0) * (B3:B0)
396 ;;; Round for last digit iff T = 1
397 ;;; Return guard bits in GUARD (__tmp_reg__).
398 ;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
399 ;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
400 DEFUN __mulusa3_round
401 ;; Some of the MUL instructions have LSBs outside the result.
402 ;; Don't ignore these LSBs in order to tame rounding error.
403 ;; Use C2/C3 for these LSBs.
407 mul A0, B0 $ movw C2, r0
409 mul A1, B0 $ add C3, r0 $ adc C0, r1
410 mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
412 ;; Round if T = 1. Store guarding bits outside the result for rounding
413 ;; and left-shift by the signed version (function below).
419 ;; The following MULs don't have LSBs outside the result.
420 ;; C2/C3 is the high part.
422 mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
423 mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
424 mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
427 mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
428 mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
429 mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
430 mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
433 mul A1, B3 $ add C2, r0 $ adc C3, r1
434 mul A2, B2 $ add C2, r0 $ adc C3, r1
435 mul A3, B1 $ add C2, r0 $ adc C3, r1
437 mul A2, B3 $ add C3, r0
438 mul A3, B2 $ add C3, r0
440 ;; Guard bits used in the signed version below.
445 #endif /* L_mulusa3 */
447 #if defined (L_mulsa3)
448 ;;; (C3:C0) = (A3:A0) * (B3:B0)
449 ;;; Clobbers: __tmp_reg__, T
450 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
453 XCALL __mulusa3_round
454 ;; A posteriori sign extension of the operands
464 ;; Shift 1 bit left to adjust for 15 fractional bits
478 #endif /* L_mulsa3 */
493 #else /* __AVR_HAVE_MUL__ */
522 #if defined (L_mulsa3)
523 ;;; (R25:R22) *= (R21:R18)
524 ;;; Clobbers: ABI, called by optabs
525 ;;; Rounding: -1 LSB <= error <= 1 LSB
531 XCALL __mulusa3_round
536 ;; A1, A0 survived in R27:R26
543 ;; sign-extend A. A3 survived in R31
549 ;; Shift 1 bit left to adjust for 15 fractional bits
563 #endif /* L_mulsa3 */
565 #if defined (L_mulusa3)
566 ;;; (R25:R22) *= (R21:R18)
567 ;;; Clobbers: ABI, called by optabs
568 ;;; Rounding: -1 LSB <= error <= 1 LSB
574 ;;; A[] survives in 26, 27, 30, 31
575 ;;; Also used by __mulsa3 with T = 0
577 ;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
578 DEFUN __mulusa3_round
589 ;; Loop the integral part
591 1: ;; CC += A * 2^n; n >= 0
592 add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
595 lsl A0 $ rol A1 $ rol A2 $ rol A3
598 ;; Carry = n-th bit of B; n >= 0
605 ;; Loop the fractional part
606 ;; B2/B3 is 0 now, use as guard bits for rounding
607 ;; Restore multiplicand
612 4: ;; CC += A:Guard * 2^n; n < 0
613 add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
616 lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
619 ;; Carry = n-th bit of B; n < 0
626 ;; Save guard bits and set carry for rounding
629 ;; Move result into place
646 #endif /* L_mulusa3 */
669 #endif /* __AVR_HAVE_MUL__ */
673 /*******************************************************
674 Fractional Division 8 / 8
675 *******************************************************/
677 #define r_divd r25 /* dividend */
678 #define r_quo r24 /* quotient */
679 #define r_div r22 /* divisor */
680 #define r_sign __tmp_reg__
682 #if defined (L_divqq3)
692 sbrc r_sign, 7 ; negate result if needed
696 #endif /* L_divqq3 */
698 #if defined (L_udivuqq3)
703 ;; Result is out of [0, 1) ==> Return 1 - eps.
707 #endif /* L_udivuqq3 */
710 #if defined (L_divqq_helper)
712 clr r_quo ; clear quotient
713 inc __zero_reg__ ; init loop counter, used per shift
715 lsl r_divd ; shift dividend
716 brcs 0f ; dividend overflow
717 cp r_divd,r_div ; compare dividend & divisor
718 brcc 0f ; dividend >= divisor
719 rol r_quo ; shift quotient (with CARRY)
722 sub r_divd,r_div ; restore dividend
723 lsl r_quo ; shift quotient (without CARRY)
725 lsl __zero_reg__ ; shift loop-counter bit
727 com r_quo ; complement result
728 ; because C flag was complemented in loop
731 #endif /* L_divqq_helper */
739 /*******************************************************
740 Fractional Division 16 / 16
741 *******************************************************/
742 #define r_divdL 26 /* dividend Low */
743 #define r_divdH 27 /* dividend Hig */
744 #define r_quoL 24 /* quotient Low */
745 #define r_quoH 25 /* quotient High */
746 #define r_divL 22 /* divisor */
747 #define r_divH 23 /* divisor */
750 #if defined (L_divhq3)
764 breq __divhq3_minus1 ; if equal return -1
769 ;; negate result if needed
778 #endif /* defined (L_divhq3) */
780 #if defined (L_udivuhq3)
782 sub r_quoH,r_quoH ; clear quotient and carry
786 DEFUN __udivuha3_common
787 clr r_quoL ; clear quotient
788 ldi r_cnt,16 ; init loop counter
790 rol r_divdL ; shift dividend (with CARRY)
792 brcs __udivuhq3_ep ; dividend overflow
793 cp r_divdL,r_divL ; compare dividend & divisor
795 brcc __udivuhq3_ep ; dividend >= divisor
796 rol r_quoL ; shift quotient (with CARRY)
799 sub r_divdL,r_divL ; restore dividend
801 lsl r_quoL ; shift quotient (without CARRY)
803 rol r_quoH ; shift quotient
804 dec r_cnt ; decrement loop counter
806 com r_quoL ; complement result
807 com r_quoH ; because C flag was complemented in loop
809 ENDF __udivuha3_common
810 #endif /* defined (L_udivuhq3) */
812 /*******************************************************
813 Fixed Division 8.8 / 8.8
814 *******************************************************/
815 #if defined (L_divha3)
828 lsr r_quoH ; adjust to 7 fractional bits
830 sbrs r0, 7 ; negate result if needed
835 #endif /* defined (L_divha3) */
837 #if defined (L_udivuha3)
842 lsl r_quoH ; shift quotient into carry
843 XJMP __udivuha3_common ; same as fractional after rearrange
845 #endif /* defined (L_udivuha3) */
855 /*******************************************************
856 Fixed Division 16.16 / 16.16
857 *******************************************************/
859 #define r_arg1L 24 /* arg1 gets passed already in place */
863 #define r_divdL 26 /* dividend Low */
866 #define r_divdHH 31 /* dividend High */
867 #define r_quoL 22 /* quotient Low */
870 #define r_quoHH 25 /* quotient High */
871 #define r_divL 18 /* divisor Low */
874 #define r_divHH 21 /* divisor High */
875 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
877 #if defined (L_divsa3)
890 lsr r_quoHH ; adjust to 15 fractional bits
894 sbrs r0, 7 ; negate result if needed
899 #endif /* defined (L_divsa3) */
901 #if defined (L_udivusa3)
903 ldi r_divdHL, 32 ; init loop counter
907 wmov r_quoL, r_divdHL
908 lsl r_quoHL ; shift quotient into carry
911 rol r_divdL ; shift dividend (with CARRY)
915 brcs __udivusa3_ep ; dividend overflow
916 cp r_divdL,r_divL ; compare dividend & divisor
920 brcc __udivusa3_ep ; dividend >= divisor
921 rol r_quoL ; shift quotient (with CARRY)
924 sub r_divdL,r_divL ; restore dividend
928 lsl r_quoL ; shift quotient (without CARRY)
930 rol r_quoH ; shift quotient
933 dec r_cnt ; decrement loop counter
935 com r_quoL ; complement result
936 com r_quoH ; because C flag was complemented in loop
941 #endif /* defined (L_udivusa3) */
962 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
963 ;; Saturation, 2 Bytes
964 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
966 ;; First Argument and Return Register
970 #if defined (L_ssneg_2)
977 #endif /* L_ssneg_2 */
979 #if defined (L_ssabs_2)
985 #endif /* L_ssabs_2 */
992 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
993 ;; Saturation, 4 Bytes
994 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
996 ;; First Argument and Return Register
1002 #if defined (L_ssneg_4)
1012 #endif /* L_ssneg_4 */
1014 #if defined (L_ssabs_4)
1020 #endif /* L_ssabs_4 */
1029 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1030 ;; Saturation, 8 Bytes
1031 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1033 ;; First Argument and Return Register
1043 #if defined (L_clr_8)
1048 ;; Clear Carry and all Bytes
1050 ;; Clear Carry and set Z
1054 ;; Propagate Carry to all Bytes, Carry unaltered
1063 #endif /* L_clr_8 */
1065 #if defined (L_ssneg_8)
1079 #endif /* L_ssneg_8 */
1081 #if defined (L_ssabs_8)
1091 #endif /* L_ssabs_8 */
1103 #if defined (L_usadd_8)
1112 0: ;; A[] = 0xffffffff
1115 #endif /* L_usadd_8 */
1117 #if defined (L_ussub_8)
1129 #endif /* L_ussub_8 */
1131 #if defined (L_ssadd_8)
1139 ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
1145 #endif /* L_ssadd_8 */
1147 #if defined (L_sssub_8)
1155 ;; A = (B < 0) ? INT64_MAX : INT64_MIN
1162 #endif /* L_sssub_8 */