From 8410904a77b9238a1b65798a0fcd92f340ffe0ca Mon Sep 17 00:00:00 2001 From: Nick Clifton Date: Tue, 27 Jan 2015 11:36:01 +0000 Subject: [PATCH] cmpsi2.S: Use function start and end macros. * config/rl78/cmpsi2.S: Use function start and end macros. (__gcc_bcmp): New function. * config/rl78/lshrsi3.S: Use function start and end macros. * config/rl78/mulsi3.S: Add support for G10. (__mulqi3): New function for G10. * config/rl78/signbit.S: Use function start and end macros. * config/rl78/t-rl78 (LIB2ADD): Add bit-count.S, fpbit-sf.S and fpmath-sf.S. (LIB2FUNCS_EXCLUDE): Define. (LIB2FUNCS_ST): Define. * config/rl78/trampoline.S: Use function start and end macros. * config/rl78/vregs.h (START_FUNC): New macro. (START_ANOTHER_FUNC): New macro. (END_FUNC): New macro. (END_ANOTHER_FUNC): New macro. * config/rl78/bit-count.S: New file. Contains assembler implementations of the bit counting functions: ___clzhi2, __clzsi2, ctzhi2, ctzsi2, ffshi2, ffssi2, __partityhi2, __paritysi2, __popcounthi2 and __popcountsi2. * config/rl78/fpbit-sf.S: New file. Contains assembler implementationas of the math functions: __negsf2, __cmpsf2, __eqsf2, __nesf2, __lesf2, __ltsf2, __gesf2, gtsf2, __unordsf2, __fixsfsi, __fixunssfsi, __floatsisf and __floatunssisf. * config/rl78/fpmath-sf.S: New file. Contains assembler implementations of the math functions: __subsf3, __addsf3, __mulsf3 and __divsf3 From-SVN: r220162 --- libgcc/ChangeLog | 29 + libgcc/config/rl78/bit-count.S | 213 +++++++ libgcc/config/rl78/cmpsi2.S | 73 ++- libgcc/config/rl78/fpbit-sf.S | 608 ++++++++++++++++++ libgcc/config/rl78/fpmath-sf.S | 1030 +++++++++++++++++++++++++++++++ libgcc/config/rl78/lshrsi3.S | 9 +- libgcc/config/rl78/mulsi3.S | 56 +- libgcc/config/rl78/signbit.S | 20 +- libgcc/config/rl78/t-rl78 | 18 +- libgcc/config/rl78/trampoline.S | 27 +- libgcc/config/rl78/vregs.h | 16 +- 11 files changed, 2046 insertions(+), 53 deletions(-) create mode 100644 libgcc/config/rl78/bit-count.S create mode 100644 libgcc/config/rl78/fpbit-sf.S create mode 100644 libgcc/config/rl78/fpmath-sf.S diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 08054548bc5..dbe2b82905a 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,32 @@ +2015-01-27 Nick Clifton + + * config/rl78/cmpsi2.S: Use function start and end macros. + (__gcc_bcmp): New function. + * config/rl78/lshrsi3.S: Use function start and end macros. + * config/rl78/mulsi3.S: Add support for G10. + (__mulqi3): New function for G10. + * config/rl78/signbit.S: Use function start and end macros. + * config/rl78/t-rl78 (LIB2ADD): Add bit-count.S, fpbit-sf.S and + fpmath-sf.S. + (LIB2FUNCS_EXCLUDE): Define. + (LIB2FUNCS_ST): Define. + * config/rl78/trampoline.S: Use function start and end macros. + * config/rl78/vregs.h (START_FUNC): New macro. + (START_ANOTHER_FUNC): New macro. + (END_FUNC): New macro. + (END_ANOTHER_FUNC): New macro. + * config/rl78/bit-count.S: New file. Contains assembler + implementations of the bit counting functions: ___clzhi2, + __clzsi2, ctzhi2, ctzsi2, ffshi2, ffssi2, __partityhi2, + __paritysi2, __popcounthi2 and __popcountsi2. + * config/rl78/fpbit-sf.S: New file. Contains assembler + implementationas of the math functions: __negsf2, __cmpsf2, + __eqsf2, __nesf2, __lesf2, __ltsf2, __gesf2, gtsf2, __unordsf2, + __fixsfsi, __fixunssfsi, __floatsisf and __floatunssisf. + * config/rl78/fpmath-sf.S: New file. Contains assembler + implementations of the math functions: __subsf3, __addsf3, + __mulsf3 and __divsf3 + 2015-01-27 Rainer Orth * config.host (i[34567]86-*-solaris2*, x86_64-*-solaris2.1[0-9]*): diff --git a/libgcc/config/rl78/bit-count.S b/libgcc/config/rl78/bit-count.S new file mode 100644 index 00000000000..2685c84ca3f --- /dev/null +++ b/libgcc/config/rl78/bit-count.S @@ -0,0 +1,213 @@ +; Copyright (C) 2012-2014 Free Software Foundation, Inc. +; Contributed by Red Hat. +; +; This file is free software; you can redistribute it and/or modify it +; under the terms of the GNU General Public License as published by the +; Free Software Foundation; either version 3, or (at your option) any +; later version. +; +; This file is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; Under Section 7 of GPL version 3, you are granted additional +; permissions described in the GCC Runtime Library Exception, version +; 3.1, as published by the Free Software Foundation. +; +; You should have received a copy of the GNU General Public License and +; a copy of the GCC Runtime Library Exception along with this program; +; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +; . + +#include "vregs.h" + +START_FUNC ___clzhi2 + ;; Argument is in [SP+4], return in R8. + movw ax, [SP+4] + + .global __clzhi2_internal +__clzhi2_internal: + movw r8, #16 + cmpw ax, #0 + bz $clzhi2_is_zero + mov e, #0xff +1: + inc e + shlw ax, 1 + bnc $1b + mov a, e + mov r8, a +clzhi2_is_zero: + ret +END_FUNC ___clzhi2 + + +START_FUNC ___clzsi2 + ;; Argument is in [SP+6]:[SP+4], return in R8. + movw ax, [SP+6] + cmpw ax, #0 + bnz $__clzhi2_internal + movw ax, [SP+4] + call !__clzhi2_internal + movw ax, r8 + addw ax, #16 + movw r8, ax + ret +END_FUNC ___clzsi2 + + +START_FUNC ___ctzhi2 + ;; Argument is in [SP+4], return in R8. + movw ax, [SP+4] + + .global __ctzhi2_internal +__ctzhi2_internal: + movw r8, #16 + cmpw ax, #0 + bz $ctzhi2_is_zero + mov e, #0xff +1: + inc e + shrw ax, 1 + bnc $1b + mov a, e + mov r8, a +ctzhi2_is_zero: + ret +END_FUNC ___ctzhi2 + + +START_FUNC ___ctzsi2 + ;; Argument is in [SP+6]:[SP+4], return in R8. + movw ax, [SP+4] + cmpw ax, #0 + bnz $__ctzhi2_internal + movw ax, [SP+6] + call !__ctzhi2_internal + movw ax, r8 + addw ax, #16 + movw r8, ax + ret +END_FUNC ___ctzsi2 + + +START_FUNC ___ffshi2 + ;; Argument is in [SP+4], return in R8. + movw ax, [SP+4] + + .global __ffshi2_internal +__ffshi2_internal: + movw r8, #0 + cmpw ax, #0 + bz $ffshi2_is_zero + mov e, #0 +1: + inc e + shrw ax, 1 + bnc $1b + mov a, e + mov r8, a +ffshi2_is_zero: + ret +END_FUNC ___ffshi2 + + +START_FUNC ___ffssi2 + ;; Argument is in [SP+6]:[SP+4], return in R8. + movw ax, [SP+4] + cmpw ax, #0 + bnz $__ffshi2_internal + movw ax, [SP+6] + cmpw ax, #0 + bz $1f + call !__ffshi2_internal + movw ax, r8 + addw ax, #16 +1: + movw r8, ax + ret +END_FUNC ___ffssi2 + + +START_FUNC ___parityqi_internal + mov1 cy, a.0 + xor1 cy, a.1 + xor1 cy, a.2 + xor1 cy, a.3 + xor1 cy, a.4 + xor1 cy, a.5 + xor1 cy, a.6 + xor1 cy, a.7 + movw ax, #0 + bnc $1f + incw ax +1: + movw r8, ax + ret +END_FUNC ___parityqi_internal + + +START_FUNC ___parityhi2 + ;; Argument is in [SP+4], return in R8. + movw ax, [SP+4] + xor a, x + br $___parityqi_internal +END_FUNC ___parityhi2 + + +START_FUNC ___paritysi2 + ;; Argument is in [SP+6]:[SP+4], return in R8. + movw ax, [SP+4] + xor a, x + mov b, a + movw ax, [SP+6] + xor a, x + xor a, b + br $___parityqi_internal +END_FUNC ___paritysi2 + + + +START_FUNC ___popcounthi2 + ;; Argument is in [SP+4], return in R8. + mov d, #2 + br $___popcountqi_internal +END_FUNC ___popcounthi2 + + +START_FUNC ___popcountsi2 + ;; Argument is in [SP+6]:[SP+4], return in R8. + mov d, #4 + br $___popcountqi_internal +END_FUNC ___popcountsi2 + + +START_FUNC ___popcountqi_internal + ;; There are D bytes starting at [HL] + ;; store count in R8. + + movw ax, sp + addw ax, #4 + movw hl, ax + mov a, #0 +1: + xch a, b + mov a, [hl] + xch a, b + mov e, #8 +2: + shl b,1 + addc a, #0 + dec e + bnz $2b + + incw hl + dec d + bnz $1b + + mov x, a + mov a, #0 + movw r8, ax + ret +END_FUNC ___popcountqi_internal diff --git a/libgcc/config/rl78/cmpsi2.S b/libgcc/config/rl78/cmpsi2.S index 557b4597132..f0d8292625d 100644 --- a/libgcc/config/rl78/cmpsi2.S +++ b/libgcc/config/rl78/cmpsi2.S @@ -31,9 +31,8 @@ ;; If A is less than B it returns 0. If A is greater ;; than B it returns 2. If they are equal it returns 1. - .global ___cmpsi2 - .type ___cmpsi2, @function -___cmpsi2: +START_FUNC ___cmpsi2 + ;; A is at [sp+4] ;; B is at [sp+8] ;; Result put in R8 @@ -88,18 +87,18 @@ ___cmpsi2: movw r8, ax ret - .size ___cmpsi2, . - ___cmpsi2 - - +END_FUNC ___cmpsi2 + +;; ------------------------------------------------------ + ;; int __ucmpsi2 (unsigned long A, unsigned long B) ;; ;; Performs an unsigned comparison of A and B. ;; If A is less than B it returns 0. If A is greater ;; than B it returns 2. If they are equal it returns 1. - .global ___ucmpsi2 - .type ___ucmpsi2, @function -___ucmpsi2: +START_FUNC ___ucmpsi2 + ;; A is at [sp+4] ;; B is at [sp+8] ;; Result put in R8..R9 @@ -117,5 +116,57 @@ ___ucmpsi2: br !!.Lless_than_or_greater_than br !!.Lcompare_bottom_words - .size ___ucmpsi2, . - ___ucmpsi2 - \ No newline at end of file +END_FUNC ___ucmpsi2 + +;; ------------------------------------------------------ + + ;; signed int __gcc_bcmp (const unsigned char *s1, const unsigned char *s2, size_t size) + ;; Result is negative if S1 is less than S2, + ;; positive if S1 is greater, 0 if S1 and S2 are equal. + +START_FUNC __gcc_bcmp + + ;; S1 is at [sp+4] + ;; S2 is at [sp+6] + ;; SIZE is at [sp+8] + ;; Result in r8/r9 + + movw r10, #0 +1: + ;; Compare R10 against the SIZE parameter + movw ax, [sp+8] + subw ax, r10 + sknz + br !!1f + + ;; Load S2[r10] into R8 + movw ax, [sp+6] + addw ax, r10 + movw hl, ax + mov a, [hl] + mov r8, a + + ;; Load S1[r10] into A + movw ax, [sp+4] + addw ax, r10 + movw hl, ax + mov a, [hl] + + ;; Increment offset + incw r10 + + ;; Compare loaded bytes + cmp a, r8 + sknz + br !!1b + + ;; They differ. Subtract *S2 from *S1 and return as the result. + mov x, a + mov a, #0 + mov r9, #0 + subw ax, r8 +1: + movw r8, ax + ret + +END_FUNC __gcc_bcmp diff --git a/libgcc/config/rl78/fpbit-sf.S b/libgcc/config/rl78/fpbit-sf.S new file mode 100644 index 00000000000..042facee14e --- /dev/null +++ b/libgcc/config/rl78/fpbit-sf.S @@ -0,0 +1,608 @@ +; SF format is: +; +; [sign] 1.[23bits] E[8bits(n-127)] +; +; SEEEEEEE Emmmmmmm mmmmmmmm mmmmmmmm +; +; [A+0] mmmmmmmm +; [A+1] mmmmmmmm +; [A+2] Emmmmmmm +; [A+3] SEEEEEEE +; +; Special values (xxx != 0): +; +; s1111111 10000000 00000000 00000000 infinity +; s1111111 1xxxxxxx xxxxxxxx xxxxxxxx NaN +; s0000000 00000000 00000000 00000000 zero +; s0000000 0xxxxxxx xxxxxxxx xxxxxxxx denormals +; +; Note that CMPtype is "signed char" for rl78 +; + +#include "vregs.h" + +#define Z PSW.6 + +START_FUNC ___negsf2 + + ;; Negate the floating point value. + ;; Input at [SP+4]..[SP+7]. + ;; Output to R8..R11. + + movw ax, [SP+4] + movw r8, ax + movw ax, [SP+6] + xor a, #0x80 + movw r10, ax + ret + +END_FUNC ___negsf2 + +;; ------------------internal functions used by later code -------------- + +START_FUNC __int_isnan + + ;; [HL] points to value, returns Z if it's a NaN + + mov a, [hl+2] + and a, #0x80 + mov x, a + mov a, [hl+3] + and a, #0x7f + cmpw ax, #0x7f80 + skz + ret ; return NZ if not NaN + mov a, [hl+2] + and a, #0x7f + or a, [hl+1] + or a, [hl] + bnz $1f + clr1 Z ; Z, normal + ret +1: + set1 Z ; nan + ret + +END_FUNC __int_isnan + +START_FUNC __int_eithernan + + ;; call from toplevel functions, returns Z if either number is a NaN, + ;; or NZ if both are OK. + + movw ax, sp + addw ax, #8 + movw hl, ax + call $!__int_isnan + bz $1f + + movw ax, sp + addw ax, #12 + movw hl, ax + call $!__int_isnan +1: + ret + +END_FUNC __int_eithernan + +START_FUNC __int_iszero + + ;; [HL] points to value, returns Z if it's zero + + mov a, [hl+3] + and a, #0x7f + or a, [hl+2] + or a, [hl+1] + or a, [hl] + ret + +END_FUNC __int_iszero + +START_FUNC __int_cmpsf + + ;; This is always called from some other function here, + ;; so the stack offsets are adjusted accordingly. + + ;; X [SP+8] <=> Y [SP+12] : <=> 0 + + movw ax, sp + addw ax, #8 + movw hl, ax + call $!__int_iszero + bnz $1f + + movw ax, sp + addw ax, #12 + movw hl, ax + call $!__int_iszero + bnz $2f + ;; At this point, both args are zero. + mov a, #0 + ret + +2: + movw ax, sp + addw ax, #8 + movw hl, ax +1: + ;; At least one arg is non-zero so we can just compare magnitudes. + ;; Args are [HL] and [HL+4]. + + mov a, [HL+3] + xor a, [HL+7] + mov1 cy, a.7 + bnc $1f + + mov a, [HL+3] + sar a, 7 + or a, #1 + ret + +1: ;; Signs the same, compare magnitude. It's safe to lump + ;; the sign bits, exponent, and mantissa together here, since they're + ;; stored in the right sequence. + movw ax, [HL+2] + cmpw ax, [HL+6] + bc $ybig_cmpsf ; branch if X < Y + bnz $xbig_cmpsf ; branch if X > Y + + movw ax, [HL] + cmpw ax, [HL+4] + bc $ybig_cmpsf ; branch if X < Y + bnz $xbig_cmpsf ; branch if X > Y + + mov a, #0 + ret + +xbig_cmpsf: ; |X| > |Y| so return A = 1 if pos, 0xff if neg + mov a, [HL+3] + sar a, 7 + or a, #1 + ret +ybig_cmpsf: ; |X| < |Y| so return A = 0xff if pos, 1 if neg + mov a, [HL+3] + xor a, #0x80 + sar a, 7 + or a, #1 + ret + +END_FUNC __int_cmpsf + +;; ---------------------------------------------------------- + +START_FUNC ___cmpsf2 + ;; This functions calculates "A <=> B". That is, if A is less than B + ;; they return -1, if A is greater than B, they return 1, and if A + ;; and B are equal they return 0. If either argument is NaN the + ;; behaviour is undefined. + + ;; Input at [SP+4]..[SP+7]. + ;; Output to R8..R9. + + call $!__int_eithernan + bnz $1f + movw r8, #1 + ret +1: + call $!__int_cmpsf + mov r8, a + sar a, 7 + mov r9, a + ret + +END_FUNC ___cmpsf2 + +;; ---------------------------------------------------------- + + ;; These functions are all basically the same as ___cmpsf2 + ;; except that they define how they handle NaNs. + +START_FUNC ___eqsf2 + ;; Returns zero iff neither argument is NaN + ;; and both arguments are equal. +START_ANOTHER_FUNC ___nesf2 + ;; Returns non-zero iff either argument is NaN or the arguments are + ;; unequal. Effectively __nesf2 is the same as __eqsf2 +START_ANOTHER_FUNC ___lesf2 + ;; Returns a value less than or equal to zero if neither + ;; argument is NaN, and the first is less than or equal to the second. +START_ANOTHER_FUNC ___ltsf2 + ;; Returns a value less than zero if neither argument is + ;; NaN, and the first is strictly less than the second. + + ;; Input at [SP+4]..[SP+7]. + ;; Output to R8. + + mov r8, #1 + +;;; Fall through + +START_ANOTHER_FUNC __int_cmp_common + + call $!__int_eithernan + sknz + ;; return value (pre-filled-in below) for "either is nan" + ret + + call $!__int_cmpsf + mov r8, a + ret + +END_ANOTHER_FUNC __int_cmp_common +END_ANOTHER_FUNC ___ltsf2 +END_ANOTHER_FUNC ___lesf2 +END_ANOTHER_FUNC ___nesf2 +END_FUNC ___eqsf2 + +START_FUNC ___gesf2 + ;; Returns a value greater than or equal to zero if neither argument + ;; is a NaN and the first is greater than or equal to the second. +START_ANOTHER_FUNC ___gtsf2 + ;; Returns a value greater than zero if neither argument + ;; is NaN, and the first is strictly greater than the second. + + mov r8, #0xffff + br $__int_cmp_common + +END_ANOTHER_FUNC ___gtsf2 +END_FUNC ___gesf2 + +;; ---------------------------------------------------------- + +START_FUNC ___unordsf2 + ;; Returns a nonzero value if either argument is NaN, otherwise 0. + + call $!__int_eithernan + movw r8, #0 + sknz ; this is from the call, not the movw + movw r8, #1 + ret + +END_FUNC ___unordsf2 + +;; ---------------------------------------------------------- + +START_FUNC ___fixsfsi + ;; Converts its floating point argument into a signed long, + ;; rounding toward zero. + ;; The behaviour with NaNs and Infinities is not well defined. + ;; We choose to return 0 for NaNs, -INTMAX for -inf and INTMAX for +inf. + ;; This matches the behaviour of the C function in libgcc2.c. + + ;; Input at [SP+4]..[SP+7], result is in (lsb) R8..R11 (msb). + + ;; Special case handling for infinities as __fixunssfsi + ;; will not give us the values that we want. + movw ax, sp + addw ax, #4 + movw hl, ax + call !!__int_isinf + bnz $1f + mov a, [SP+7] + bt a.7, $2f + ;; +inf + movw r8, #-1 + movw r10, #0x7fff + ret + ;; -inf +2: mov r8, #0 + mov r10, #0x8000 + ret + + ;; Load the value into r10:r11:X:A +1: movw ax, [SP+4] + movw r10, ax + movw ax, [SP+6] + + ;; If the value is positive we can just use __fixunssfsi + bf a.7, $__int_fixunssfsi + + ;; Otherwise we negate the value, call __fixunssfsi and + ;; then negate its result. + clr1 a.7 + call $!__int_fixunssfsi + + movw ax, #0 + subw ax, r8 + movw r8, ax + movw ax, #0 + sknc + decw ax + subw ax, r10 + movw r10, ax + + ;; Check for a positive result (which should only happen when + ;; __fixunssfsi returns UINTMAX or 0). In such cases just return 0. + mov a, r11 + bt a.7, $1f + movw r10,#0x0 + movw r8, #0x0 + +1: ret + +END_FUNC ___fixsfsi + +START_FUNC ___fixunssfsi + ;; Converts its floating point argument into an unsigned long + ;; rounding towards zero. Negative arguments all become zero. + ;; We choose to return 0 for NaNs and -inf, but UINTMAX for +inf. + ;; This matches the behaviour of the C function in libgcc2.c. + + ;; Input at [SP+4]..[SP+7], result is in (lsb) R8..R11 (msb) + + ;; Get the input value. + movw ax, [SP+4] + movw r10, ax + movw ax, [SP+6] + + ;; Fall through into the internal function. + + .global __int_fixunssfsi +__int_fixunssfsi: + ;; Input in (lsb) r10.r11.x.a (msb). + + ;; Test for a negative input. We shift the other bits at the + ;; same time so that A ends up holding the whole exponent: + ;; + ;; before: + ;; SEEEEEEE EMMMMMMM MMMMMMMM MMMMMMMM + ;; A X R11 R10 + ;; + ;; after: + ;; EEEEEEEE MMMMMMM0 MMMMMMMM MMMMMMMM + ;; A X R11 R10 + shlw ax, 1 + bnc $1f + + ;; Return zero. +2: movw r8, #0 + movw r10, #0 + ret + + ;; An exponent of -1 is either a NaN or infinity. +1: cmp a, #-1 + bnz $3f + ;; For NaN we return 0. For infinity we return UINTMAX. + mov a, x + or a, r10 + or a, r11 + cmp0 a + bnz $2b + +6: movw r8, #-1 ; -1 => UINT_MAX + movw r10, #-1 + ret + + ;; If the exponent is negative the value is < 1 and so the + ;; converted value is 0. Note we must allow for the bias + ;; applied to the exponent. Thus a value of 127 in the + ;; EEEEEEEE bits actually represents an exponent of 0, whilst + ;; a value less than 127 actually represents a negative exponent. + ;; Also if the EEEEEEEE bits are all zero then this represents + ;; either a denormal value or 0.0. Either way for these values + ;; we return 0. +3: sub a, #127 + bc $2b + + ;; A now holds the bias adjusted exponent, which is known to be >= 0. + ;; If the exponent is > 31 then the conversion will overflow. + cmp a, #32 + bnc $6b +4: + ;; Save the exponent in H. We increment it by one because we want + ;; to be sure that the loop below will always execute at least once. + inc a + mov h, a + + ;; Get the top 24 bits of the mantissa into A:X:R10 + ;; Include the implicit 1-bit that is inherent in the IEEE fp format. + ;; + ;; before: + ;; EEEEEEEE MMMMMMM0 MMMMMMMM MMMMMMMM + ;; H X R11 R10 + ;; after: + ;; EEEEEEEE 1MMMMMMM MMMMMMMM MMMMMMMM + ;; H A X R10 + + mov a, r11 + xch a, x + shr a, 1 + set1 a.7 + + ;; Clear B:C:R12:R13 + movw bc, #0 + movw r12, #0 + + ;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13), + ;; decrementing the exponent as we go. + + ;; before: + ;; MMMMMMMM MMMMMMMM MMMMMMMM xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + ;; A X R10 B C R12 R13 + ;; first iter: + ;; MMMMMMMM MMMMMMMM MMMMMMM0 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxM + ;; A X R10 B C R12 R13 + ;; second iter: + ;; MMMMMMMM MMMMMMMM MMMMMM00 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxMM + ;; A X R10 B C R12 R13 + ;; etc. +5: + xch a, r10 + shl a, 1 + xch a, r10 + + rolwc ax, 1 + + xch a, r13 + rolc a, 1 + xch a, r13 + + xch a, r12 + rolc a, 1 + xch a, r12 + + rolwc bc, 1 + + dec h + bnz $5b + + ;; Result is currently in (lsb) r13.r12. c. b. (msb), + ;; Move it into (lsb) r8. r9. r10. r11 (msb). + + mov a, r13 + mov r8, a + + mov a, r12 + mov r9, a + + mov a, c + mov r10, a + + mov a, b + mov r11, a + + ret + +END_FUNC ___fixunssfsi + +;; ------------------------------------------------------------------------ + +START_FUNC ___floatsisf + ;; Converts its signed long argument into a floating point. + ;; Argument in [SP+4]..[SP+7]. Result in R8..R11. + + ;; Get the argument. + movw ax, [SP+4] + movw bc, ax + movw ax, [SP+6] + + ;; Test the sign bit. If the value is positive then drop into + ;; the unsigned conversion routine. + bf a.7, $2f + + ;; If negative convert to positive ... + movw hl, ax + movw ax, #0 + subw ax, bc + movw bc, ax + movw ax, #0 + sknc + decw ax + subw ax, hl + + ;; If the result is negative then the input was 0x80000000 and + ;; we want to return -0.0, which will not happen if we call + ;; __int_floatunsisf. + bt a.7, $1f + + ;; Call the unsigned conversion routine. + call $!__int_floatunsisf + + ;; Negate the result. + set1 r11.7 + + ;; Done. + ret + +1: ;; Return -0.0 aka 0xcf000000 + + clrb a + mov r8, a + mov r9, a + mov r10, a + mov a, #0xcf + mov r11, a + ret + +START_ANOTHER_FUNC ___floatunsisf + ;; Converts its unsigned long argument into a floating point. + ;; Argument in [SP+4]..[SP+7]. Result in R8..R11. + + ;; Get the argument. + movw ax, [SP+4] + movw bc, ax + movw ax, [SP+6] + +2: ;; Internal entry point from __floatsisf + ;; Input in AX (high) and BC (low) + .global __int_floatunsisf +__int_floatunsisf: + + ;; Special case handling for zero. + cmpw ax, #0 + bnz $1f + movw ax, bc + cmpw ax, #0 + movw ax, #0 + bnz $1f + + ;; Return 0.0 + movw r8, ax + movw r10, ax + ret + +1: ;; Pre-load the loop count/exponent. + ;; Exponents are biased by 0x80 and we start the loop knowing that + ;; we are going to skip the highest set bit. Hence the highest value + ;; that we can get for the exponent is 0x1e (bits from input) + 0x80 = 0x9e. + mov h, #0x9e + + ;; Move bits off the top of AX:BC until we hit a 1 bit. + ;; Decrement the count of remaining bits as we go. + +2: shlw bc, 1 + rolwc ax, 1 + bc $3f + dec h + br $2b + + ;; Ignore the first one bit - it is implicit in the IEEE format. + ;; The count of remaining bits is the exponent. + + ;; Assemble the final floating point value. We have... + ;; before: + ;; EEEEEEEE MMMMMMMM MMMMMMMM MMMMMMMM xxxxxxxx + ;; H A X B C + ;; after: + ;; 0EEEEEEE EMMMMMMM MMMMMMMM MMMMMMMM + ;; R11 R10 R9 R8 + + +3: shrw ax, 1 + mov r10, a + mov a, x + mov r9, a + + mov a, b + rorc a, 1 + + ;; If the bottom bit of B was set before we shifted it out then we + ;; need to round the result up. Unless none of the bits in C are set. + ;; In this case we are exactly half-way between two values, and we + ;; round towards an even value. We round up by increasing the + ;; mantissa by 1. If this results in a zero mantissa we have to + ;; increment the exponent. We round down by ignoring the dropped bits. + + bnc $4f + cmp0 c + sknz + bf a.0, $4f + +5: ;; Round the mantissa up by 1. + add a, #1 + addc r9, #0 + addc r10, #0 + bf r10.7, $4f + inc h + clr1 r10.7 + +4: mov r8, a + mov a, h + shr a, 1 + mov r11, a + sknc + set1 r10.7 + ret + +END_ANOTHER_FUNC ___floatunsisf +END_FUNC ___floatsisf diff --git a/libgcc/config/rl78/fpmath-sf.S b/libgcc/config/rl78/fpmath-sf.S new file mode 100644 index 00000000000..f232f6b5f83 --- /dev/null +++ b/libgcc/config/rl78/fpmath-sf.S @@ -0,0 +1,1030 @@ +; SF format is: +; +; [sign] 1.[23bits] E[8bits(n-127)] +; +; SEEEEEEE Emmmmmmm mmmmmmmm mmmmmmmm +; +; [A+0] mmmmmmmm +; [A+1] mmmmmmmm +; [A+2] Emmmmmmm +; [A+3] SEEEEEEE +; +; Special values (xxx != 0): +; +; r11 r10 r9 r8 +; [HL+3] [HL+2] [HL+1] [HL+0] +; s1111111 10000000 00000000 00000000 infinity +; s1111111 1xxxxxxx xxxxxxxx xxxxxxxx NaN +; s0000000 00000000 00000000 00000000 zero +; s0000000 0xxxxxxx xxxxxxxx xxxxxxxx denormals +; +; Note that CMPtype is "signed char" for rl78 +; + +#include "vregs.h" + +#define Z PSW.6 + +; External Functions: +; +; __int_isnan [HL] -> Z if NaN +; __int_iszero [HL] -> Z if zero + +START_FUNC __int_isinf + ;; [HL] points to value, returns Z if it's #Inf + + mov a, [hl+2] + and a, #0x80 + mov x, a + mov a, [hl+3] + and a, #0x7f + cmpw ax, #0x7f80 + skz + ret ; return NZ if not NaN + mov a, [hl+2] + and a, #0x7f + or a, [hl+1] + or a, [hl] + ret + +END_FUNC __int_isinf + +START_FUNC _int_unpack_sf + ;; convert 32-bit SFmode [DE] to 6-byte struct [HL] ("A") + +#define A_SIGN [hl+0] /* byte */ +#define A_EXP [hl+2] /* word */ +#define A_FRAC_L [hl+4] /* word */ +#define A_FRAC_LH [hl+5] /* byte */ +#define A_FRAC_H [hl+6] /* word or byte */ +#define A_FRAC_HH [hl+7] /* byte */ + +#define B_SIGN [hl+8] +#define B_EXP [hl+10] +#define B_FRAC_L [hl+12] +#define B_FRAC_LH [hl+13] +#define B_FRAC_H [hl+14] +#define B_FRAC_HH [hl+15] + + mov a, [de+3] + sar a, 7 + mov A_SIGN, a + + movw ax, [de+2] + and a, #0x7f + shrw ax, 7 + movw bc, ax ; remember if the exponent is all zeros + subw ax, #127 ; exponent is now non-biased + movw A_EXP, ax + + movw ax, [de] + movw A_FRAC_L, ax + + mov a, [de+2] + and a, #0x7f + cmp0 c ; if the exp is all zeros, it's denormal + skz + or a, #0x80 + mov A_FRAC_H, a + + mov a, #0 + mov A_FRAC_HH, a + + ;; rounding-bit-shift + movw ax, A_FRAC_L + shlw ax, 1 + movw A_FRAC_L, ax + mov a, A_FRAC_H + rolc a, 1 + mov A_FRAC_H, a + mov a, A_FRAC_HH + rolc a, 1 + mov A_FRAC_HH, a + + ret + +END_FUNC _int_unpack_sf + +; func(SF a,SF b) +; [SP+4..7] a +; [SP+8..11] b + +START_FUNC ___subsf3 + + ;; a - b => a + (-b) + + ;; Note - we cannot just change the sign of B on the stack and + ;; then fall through into __addsf3. The stack'ed value may be + ;; used again (it was created by our caller after all). Instead + ;; we have to allocate some stack space of our own, copy A and B, + ;; change the sign of B, call __addsf3, release the allocated stack + ;; and then return. + + subw sp, #8 + movw ax, [sp+4+8] + movw [sp], ax + movw ax, [sp+4+2+8] + movw [sp+2], ax + movw ax, [sp+4+4+8] + movw [sp+4], ax + mov a, [sp+4+6+8] + mov [sp+6], a + mov a, [sp+4+7+8] + xor a, #0x80 + mov [sp+7], a + call $!___addsf3 + addw sp, #8 + ret +END_FUNC ___subsf3 + +START_FUNC ___addsf3 + + ;; if (isnan(a)) return a + movw ax, sp + addw ax, #4 + movw hl, ax + call !!__int_isnan + bnz $1f +ret_a: + movw ax, [sp+4] + movw r8, ax + movw ax, [sp+6] + movw r10, ax + ret + +1: ;; if (isnan (b)) return b; + movw ax, sp + addw ax, #8 + movw hl, ax + call !!__int_isnan + bnz $2f +ret_b: + movw ax, [sp+8] + movw r8, ax + movw ax, [sp+10] + movw r10, ax + ret + +2: ;; if (isinf (a)) + movw ax, sp + addw ax, #4 + movw hl, ax + call $!__int_isinf + bnz $3f + + ;; if (isinf (b) && a->sign != b->sign) return NaN + + movw ax, sp + addw ax, #8 + movw hl, ax + call $!__int_isinf + bnz $ret_a + + mov a, [sp+7] + mov h, a + mov a, [sp+11] + xor a, h + bf a.7, $ret_a + + movw r8, #0x0001 + movw r10, #0x7f80 + ret + +3: ;; if (isinf (b)) return b; + movw ax, sp + addw ax, #8 + movw hl, ax + call $!__int_isinf + bz $ret_b + + ;; if (iszero (b)) + movw ax, sp + addw ax, #8 + movw hl, ax + call !!__int_iszero + bnz $4f + + ;; if (iszero (a)) + movw ax, sp + addw ax, #4 + movw hl, ax + call !!__int_iszero + bnz $ret_a + + movw ax, [sp+4] + movw r8, ax + mov a, [sp+7] + mov h, a + movw ax, [sp+10] + and a, h + movw r10, ax + ret + +4: ;; if (iszero (a)) return b; + movw ax, sp + addw ax, #4 + movw hl, ax + call !!__int_iszero + bz $ret_b + +; Normalize the two numbers relative to each other. At this point, +; we need the numbers converted to their "unpacked" format. + + subw sp, #16 ; Save room for two unpacked values. + + movw ax, sp + movw hl, ax + addw ax, #16+4 + movw de, ax + call $!_int_unpack_sf + + movw ax, sp + addw ax, #8 + movw hl, ax + addw ax, #16+8-8 + movw de, ax + call $!_int_unpack_sf + + movw ax, sp + movw hl, ax + + ;; diff = a.exponent - b.exponent + movw ax, B_EXP ; sign/exponent word + movw bc, ax + movw ax, A_EXP ; sign/exponent word + + subw ax, bc ; a = a.exp - b.exp + movw de, ax ; d = sdiff + + ;; if (diff < 0) diff = -diff + bf a.7, $1f + xor a, #0xff + xor r_0, #0xff ; x + incw ax ; a = diff +1: + ;; if (diff >= 23) zero the smaller one + cmpw ax, #24 + bc $.L661 ; if a < 23 goto 661 + + ;; zero out the smaller one + + movw ax, de + bt a.7, $1f ; if sdiff < 0 (a_exp < b_exp) goto 1f + ;; "zero out" b + movw ax, A_EXP + movw B_EXP, ax + movw ax, #0 + movw B_FRAC_L, ax + movw B_FRAC_H, ax + br $5f +1: + ;; "zero out" a + movw ax, B_EXP + movw A_EXP, ax + movw ax, #0 + movw A_FRAC_L, ax + movw A_FRAC_H, ax + + br $5f +.L661: + ;; shift the smaller one so they have the same exponents +1: + movw ax, de + bt a.7, $1f + cmpw ax, #0 ; sdiff > 0 + bnh $1f ; if (sdiff <= 0) goto 1f + + decw de + incw B_EXP ; because it's [HL+byte] + + movw ax, B_FRAC_H + shrw ax, 1 + movw B_FRAC_H, ax + mov a, B_FRAC_LH + rorc a, 1 + mov B_FRAC_LH, a + mov a, B_FRAC_L + rorc a, 1 + mov B_FRAC_L, a + + br $1b +1: + movw ax, de + bf a.7, $1f + + incw de + incw A_EXP ; because it's [HL+byte] + + movw ax, A_FRAC_H + shrw ax, 1 + movw A_FRAC_H, ax + mov a, A_FRAC_LH + rorc a, 1 + mov A_FRAC_LH, a + mov a, A_FRAC_L + rorc a, 1 + mov A_FRAC_L, a + + br $1b +1: + +5: ;; At this point, A and B have the same exponent. + + mov a, A_SIGN + cmp a, B_SIGN + bnz $1f + + ;; Same sign, just add. + movw ax, A_FRAC_L + addw ax, B_FRAC_L + movw A_FRAC_L, ax + mov a, A_FRAC_H + addc a, B_FRAC_H + mov A_FRAC_H, a + mov a, A_FRAC_HH + addc a, B_FRAC_HH + mov A_FRAC_HH, a + + br $.L728 + +1: ;; Signs differ - A has A_SIGN still. + bf a.7, $.L696 + + ;; A is negative, do B-A + movw ax, B_FRAC_L + subw ax, A_FRAC_L + movw A_FRAC_L, ax + mov a, B_FRAC_H + subc a, A_FRAC_H + mov A_FRAC_H, a + mov a, B_FRAC_HH + subc a, A_FRAC_HH + mov A_FRAC_HH, a + + br $.L698 +.L696: + ;; B is negative, do A-B + movw ax, A_FRAC_L + subw ax, B_FRAC_L + movw A_FRAC_L, ax + mov a, A_FRAC_H + subc a, B_FRAC_H + mov A_FRAC_H, a + mov a, A_FRAC_HH + subc a, B_FRAC_HH + mov A_FRAC_HH, a + +.L698: + ;; A is still A_FRAC_HH + bt a.7, $.L706 + + ;; subtraction was positive + mov a, #0 + mov A_SIGN, a + br $.L712 + +.L706: + ;; subtraction was negative + mov a, #0xff + mov A_SIGN, a + + ;; This negates A_FRAC + mov a, A_FRAC_L + xor a, #0xff ; XOR doesn't mess with carry + add a, #1 ; INC doesn't set the carry + mov A_FRAC_L, a + mov a, A_FRAC_LH + xor a, #0xff + addc a, #0 + mov A_FRAC_LH, a + mov a, A_FRAC_H + xor a, #0xff + addc a, #0 + mov A_FRAC_H, a + mov a, A_FRAC_HH + xor a, #0xff + addc a, #0 + mov A_FRAC_HH, a + +.L712: + ;; Renormalize the subtraction + + mov a, A_FRAC_L + or a, A_FRAC_LH + or a, A_FRAC_H + or a, A_FRAC_HH + bz $.L728 + + ;; Mantissa is not zero, left shift until the MSB is in the + ;; right place +1: + movw ax, A_FRAC_H + cmpw ax, #0x0200 + bnc $.L728 + + decw A_EXP + + movw ax, A_FRAC_L + shlw ax, 1 + movw A_FRAC_L, ax + movw ax, A_FRAC_H + rolwc ax, 1 + movw A_FRAC_H, ax + br $1b + +.L728: + ;; normalize A and pack it + + movw ax, A_FRAC_H + cmpw ax, #0x01ff + bnh $1f + ;; overflow in the mantissa; adjust + movw ax, A_FRAC_H + shrw ax, 1 + movw A_FRAC_H, ax + mov a, A_FRAC_LH + rorc a, 1 + mov A_FRAC_LH, a + mov a, A_FRAC_L + rorc a, 1 + mov A_FRAC_L, a + incw A_EXP +1: + + call $!__rl78_int_pack_a_r8 + addw sp, #16 + ret + +END_FUNC ___addsf3 + +START_FUNC __rl78_int_pack_a_r8 + ;; pack A to R8 + movw ax, A_EXP + addw ax, #126 ; not 127, we want the "bt/bf" test to check for denormals + + bf a.7, $1f + ;; make a denormal +2: + movw bc, ax + movw ax, A_FRAC_H + shrw ax, 1 + movw A_FRAC_H, ax + mov a, A_FRAC_LH + rorc a, 1 + mov A_FRAC_LH, a + mov a, A_FRAC_L + rorc a, 1 + mov A_FRAC_L, a + movw ax, bc + incw ax + bt a.7, $2b + decw ax +1: + incw ax ; now it's as if we added 127 + movw A_EXP, ax + + cmpw ax, #0xfe + bnh $1f + ;; store #Inf instead + mov a, A_SIGN + or a, #0x7f + mov x, #0x80 + movw r10, ax + movw r8, #0 + ret + +1: + bf a.7, $1f ; note AX has EXP at top of loop + ;; underflow, denormal? + movw ax, A_FRAC_H + shrw ax, 1 + movw A_FRAC_H, ax + mov a, A_FRAC_LH + rorc a, 1 + movw A_FRAC_LH, ax + mov a, A_FRAC_L + rorc a, 1 + movw A_FRAC_L, ax + incw A_EXP + movw ax, A_EXP + br $1b + +1: + ;; undo the rounding-bit-shift + mov a, A_FRAC_L + bf a.0, $1f + ;; round up + movw ax, A_FRAC_L + addw ax, #1 + movw A_FRAC_L, ax + sknc + incw A_FRAC_H +1: + movw ax, A_FRAC_H + shrw ax, 1 + movw A_FRAC_H, ax + mov a, A_FRAC_LH + rorc a, 1 + mov A_FRAC_LH, a + mov a, A_FRAC_L + rorc a, 1 + mov A_FRAC_L, a + + movw ax, A_FRAC_L + movw r8, ax + + or a, x + or a, A_FRAC_H + or a, A_FRAC_HH + bnz $1f + movw ax, #0 + movw A_EXP, ax +1: + mov a, A_FRAC_H + and a, #0x7f + mov b, a + mov a, A_EXP + shl a, 7 + or a, b + mov r10, a + + mov a, A_SIGN + and a, #0x80 + mov b, a + mov a, A_EXP + shr a, 1 + or a, b + mov r11, a + + ret +END_FUNC __rl78_int_pack_a_r8 + +START_FUNC ___mulsf3 + + ;; if (isnan(a)) return a + movw ax, sp + addw ax, #4 + movw hl, ax + call !!__int_isnan + bnz $1f +mret_a: + movw ax, [sp+4] + movw r8, ax + mov a, [sp+11] + and a, #0x80 + mov b, a + movw ax, [sp+6] + xor a, b ; sign is always a ^ b + movw r10, ax + ret +1: + ;; if (isnan (b)) return b; + movw ax, sp + addw ax, #8 + movw hl, ax + call !!__int_isnan + bnz $1f +mret_b: + movw ax, [sp+8] + movw r8, ax + mov a, [sp+7] + and a, #0x80 + mov b, a + movw ax, [sp+10] + xor a, b ; sign is always a ^ b + movw r10, ax + ret +1: + ;; if (isinf (a)) return (b==0) ? nan : a + movw ax, sp + addw ax, #4 + movw hl, ax + call $!__int_isinf + bnz $.L805 + + movw ax, sp + addw ax, #8 + movw hl, ax + call !!__int_iszero + bnz $mret_a + + movw r8, #0x0001 ; return NaN + movw r10, #0x7f80 + ret + +.L805: + ;; if (isinf (b)) return (a==0) ? nan : b + movw ax, sp + addw ax, #8 + movw hl, ax + call $!__int_isinf + bnz $.L814 + + movw ax, sp + addw ax, #4 + movw hl, ax + call !!__int_iszero + bnz $mret_b + + movw r8, #0x0001 ; return NaN + movw r10, #0x7f80 + ret + +.L814: + movw ax, sp + addw ax, #4 + movw hl, ax + call !!__int_iszero + bz $mret_a + + movw ax, sp + addw ax, #8 + movw hl, ax + call !!__int_iszero + bz $mret_b + + ;; at this point, we're doing the multiplication. + + subw sp, #16 ; save room for two unpacked values + + movw ax, sp + movw hl, ax + addw ax, #16+4 + movw de, ax + call $!_int_unpack_sf + + movw ax, sp + addw ax, #8 + movw hl, ax + addw ax, #16+8-8 + movw de, ax + call $!_int_unpack_sf + + movw ax, sp + movw hl, ax + + ;; multiply SI a.FRAC * SI b.FRAC to DI r8 + + subw sp, #16 + movw ax, A_FRAC_L + movw [sp+0], ax + movw ax, A_FRAC_H + movw [sp+2], ax + + movw ax, B_FRAC_L + movw [sp+8], ax + movw ax, B_FRAC_H + movw [sp+10], ax + + movw ax, #0 + movw [sp+4], ax + movw [sp+6], ax + movw [sp+12], ax + movw [sp+14], ax + + call !!___muldi3 ; MTMPa * MTMPb -> R8..R15 + addw sp, #16 + + movw ax, sp + movw hl, ax + + ;; add the exponents together + movw ax, A_EXP + addw ax, B_EXP + movw bc, ax ; exponent in BC + + ;; now, re-normalize the DI value in R8..R15 to have the + ;; MSB in the "right" place, adjusting BC as we shift it. + + ;; The value will normally be in this range: + ;; R15 R8 + ;; 0001_0000_0000_0000 + ;; 0003_ffff_fc00_0001 + + ;; so to speed it up, we normalize to: + ;; 0001_xxxx_xxxx_xxxx + ;; then extract the bytes we want (r11-r14) + +1: + mov a, r15 + cmp0 a + bnz $2f + mov a, r14 + and a, #0xfe + bz $1f +2: + ;; shift right, inc exponent + movw ax, r14 + shrw ax, 1 + movw r14, ax + mov a, r13 + rorc a, 1 + mov r13, a + mov a, r12 + rorc a, 1 + mov r12, a + mov a, r11 + rorc a, 1 + mov r11, a + ;; we don't care about r8/r9/r10 if we're shifting this way + incw bc + br $1b +1: + mov a, r15 + or a, r14 + bnz $1f + ;; shift left, dec exponent + movw ax, r8 + shlw ax, 1 + movw r8, ax + movw ax, r10 + rolwc ax, 1 + movw r10, ax + movw ax, r12 + rolwc ax, 1 + movw r12, ax + movw ax, r14 + rolwc ax, 1 + movw r14, ax + decw bc + br $1b +1: + ;; at this point, FRAC is in R11..R14 and EXP is in BC + movw ax, bc + movw A_EXP, ax + + mov a, r11 + mov A_FRAC_L, a + mov a, r12 + mov A_FRAC_LH, a + mov a, r13 + mov A_FRAC_H, a + mov a, r14 + mov A_FRAC_HH, a + + mov a, A_SIGN + xor a, B_SIGN + mov A_SIGN, a + + call $!__rl78_int_pack_a_r8 + + addw sp, #16 + ret + +END_FUNC ___mulsf3 + +START_FUNC ___divsf3 + + ;; if (isnan(a)) return a + movw ax, sp + addw ax, #4 + movw hl, ax + call !!__int_isnan + bnz $1f +dret_a: + movw ax, [sp+4] + movw r8, ax + mov a, [sp+11] + and a, #0x80 + mov b, a + movw ax, [sp+6] + xor a, b ; sign is always a ^ b + movw r10, ax + ret +1: + ;; if (isnan (b)) return b; + movw ax, sp + addw ax, #8 + movw hl, ax + call !!__int_isnan + bnz $1f +dret_b: + movw ax, [sp+8] + movw r8, ax + mov a, [sp+7] + and a, #0x80 + mov b, a + movw ax, [sp+10] + xor a, b ; sign is always a ^ b + movw r10, ax + ret +1: + + ;; if (isinf (a)) return isinf(b) ? nan : a + + movw ax, sp + addw ax, #4 + movw hl, ax + call $!__int_isinf + bnz $1f + + movw ax, sp + addw ax, #8 + movw hl, ax + call $!__int_isinf + bnz $dret_a +dret_nan: + movw r8, #0x0001 ; return NaN + movw r10, #0x7f80 + ret + +1: + + ;; if (iszero (a)) return iszero(b) ? nan : a + + movw ax, sp + addw ax, #4 + movw hl, ax + call !!__int_iszero + bnz $1f + + movw ax, sp + addw ax, #8 + movw hl, ax + call !!__int_iszero + bnz $dret_a + br $dret_nan + +1: + ;; if (isinf (b)) return 0 + + movw ax, sp + addw ax, #8 + movw hl, ax + call $!__int_isinf + bnz $1f + + mov a, [sp+7] + mov b, a + mov a, [sp+11] + xor a, b + and a, #0x80 + mov r11, a + movw r8, #0 + mov r10, #0 + ret + +1: + ;; if (iszero (b)) return Inf + + movw ax, sp + addw ax, #8 + movw hl, ax + call !!__int_iszero + bnz $1f + + mov a, [sp+7] + mov b, a + mov a, [sp+11] + xor a, b + or a, #0x7f + mov r11, a + movw r8, #0 + mov r10, #0x80 + ret +1: + + ;; at this point, we're doing the division. Normalized + ;; mantissas look like: + ;; 01.xx.xx.xx + ;; so we divide: + ;; 01.xx.xx.xx.00.00.00.00 + ;; by 01.xx.xx.xx + ;; to get approx 00.80.00.00.00 to 01.ff.ff.ff.00 + + + subw sp, #16 ; save room for two unpacked values + + movw ax, sp + movw hl, ax + addw ax, #16+4 + movw de, ax + call $!_int_unpack_sf + + movw ax, sp + addw ax, #8 + movw hl, ax + addw ax, #16+8-8 + movw de, ax + call $!_int_unpack_sf + + movw ax, sp + movw hl, ax + + ;; divide DI a.FRAC / SI b.FRAC to DI r8 + + subw sp, #16 + movw ax, A_FRAC_L + movw [sp+4], ax + movw ax, A_FRAC_H + movw [sp+6], ax + + movw ax, B_FRAC_L + movw [sp+8], ax + movw ax, B_FRAC_H + movw [sp+10], ax + + movw ax, #0 + movw [sp+0], ax + movw [sp+2], ax + movw [sp+12], ax + movw [sp+14], ax + + call !!___divdi3 ; MTMPa / MTMPb -> R8..R15 + addw sp, #16 + + movw ax, sp + movw hl, ax + + ;; subtract the exponents A - B + movw ax, A_EXP + subw ax, B_EXP + movw bc, ax ; exponent in BC + + ;; now, re-normalize the DI value in R8..R15 to have the + ;; MSB in the "right" place, adjusting BC as we shift it. + + ;; The value will normally be in this range: + ;; R15 R8 + ;; 0000_0000_8000_0000 + ;; 0000_0001_ffff_ff00 + + ;; so to speed it up, we normalize to: + ;; 0000_0001_xxxx_xxxx + ;; then extract the bytes we want (r9-r12) + +1: + movw ax, r14 + cmpw ax, #0 + bnz $2f + movw ax, r12 + cmpw ax, #1 + bnh $1f +2: + ;; shift right, inc exponent + movw ax, r14 + shrw ax, 1 + movw r14, ax + mov a, r13 + rorc a, 1 + mov r13, a + mov a, r12 + rorc a, 1 + mov r12, a + mov a, r11 + rorc a, 1 + mov r11, a + mov a, r10 + rorc a, 1 + mov r10, a + mov a, r9 + rorc a, 1 + mov r9, a + mov a, r8 + rorc a, 1 + mov r8, a + + incw bc + br $1b +1: + ;; the previous loop leaves r15.r13 zero + mov a, r12 + cmp0 a + bnz $1f + ;; shift left, dec exponent + movw ax, r8 + shlw ax, 1 + movw r8, ax + movw ax, r10 + rolwc ax, 1 + movw r10, ax + movw ax, r12 + rolwc ax, 1 + movw r12, ax + ;; don't need to do r14 + decw bc + br $1b +1: + ;; at this point, FRAC is in R8..R11 and EXP is in BC + movw ax, bc + movw A_EXP, ax + + mov a, r9 + mov A_FRAC_L, a + mov a, r10 + mov A_FRAC_LH, a + mov a, r11 + mov A_FRAC_H, a + mov a, r12 + mov A_FRAC_HH, a + + mov a, A_SIGN + xor a, B_SIGN + mov A_SIGN, a + + call $!__rl78_int_pack_a_r8 + + addw sp, #16 + ret + +END_FUNC ___divsf3 diff --git a/libgcc/config/rl78/lshrsi3.S b/libgcc/config/rl78/lshrsi3.S index 176e6deff45..164917932fe 100644 --- a/libgcc/config/rl78/lshrsi3.S +++ b/libgcc/config/rl78/lshrsi3.S @@ -22,11 +22,7 @@ #include "vregs.h" - .text - .global ___lshrsi3 - .type ___lshrsi3, @function -___lshrsi3: - +START_FUNC ___lshrsi3 ;; input: ;; ;; [zero] @@ -46,7 +42,6 @@ ___lshrsi3: ;; B - count mov a, [sp+8] ; A now contains the count - cmp a, #0x20 bc $.Lcount_is_normal @@ -113,4 +108,4 @@ ___lshrsi3: br $.Lloop_top - .size ___lshrsi3, .-___lshrsi3 +END_FUNC ___lshrsi3 diff --git a/libgcc/config/rl78/mulsi3.S b/libgcc/config/rl78/mulsi3.S index 5d04ac23afb..c19865f78e5 100644 --- a/libgcc/config/rl78/mulsi3.S +++ b/libgcc/config/rl78/mulsi3.S @@ -33,6 +33,18 @@ ; DE count (resL-tmp) ; HL [sp+4] +; Register use (G10): +; +; AX op2L +; BC op2H +; DE count +; HL [sp+4] +; r8/r9 res32L +; r10/r11 (resH) +; r12/r13 (resL-tmp) +; r16/r17 res32H +; r18/r19 op1 + START_FUNC ___mulsi3 ;; A is at [sp+4] ;; B is at [sp+8] @@ -159,7 +171,7 @@ START_FUNC ___mulsi3 sknc incw ax addw ax, r_2 -.Lmul_hisi_no_add: +.Lmul_hisi_no_add: sel rb1 shlw bc, 1 sel rb0 @@ -267,3 +279,45 @@ START_FUNC ___mulhi3 .Lmul_hi_done: ret END_FUNC ___mulhi3 + +;;; -------------------------------------- +#ifdef __RL78_G10__ + START_FUNC ___mulqi3 + + mov a, [sp+4] + mov r9, a + mov a, [sp+6] + mov r10, a + mov a, #9 + mov r11, a + clrb a + mov r8, a +.L2: + cmp0 r10 + skz + dec r11 + sknz + ret + mov a, r10 + and a, #1 + mov r12, a + cmp0 r12 + sknz + br !!.L3 + mov a, r9 + mov l, a + mov a, r8 + add a, l + mov r8, a +.L3: + mov a, r9 + add a, a + mov r9, a + mov a, r10 + shr a, 1 + mov r10, a + br !!.L2 + + END_FUNC ___mulqi3 +#endif + diff --git a/libgcc/config/rl78/signbit.S b/libgcc/config/rl78/signbit.S index cb1105fec5b..d315e1c83ca 100644 --- a/libgcc/config/rl78/signbit.S +++ b/libgcc/config/rl78/signbit.S @@ -37,11 +37,9 @@ .text - .global _signbit -_signbit: - .global _signbitf -_signbitf: - ;; X is at [sp+4] +START_FUNC _signbit +START_ANOTHER_FUNC _signbitf + ;; X is at [sp+4]..[SP+7] ;; result is in R8..R9 movw r8, #0 @@ -50,12 +48,12 @@ _signbitf: sknc movw r8, #1 ret - .size _signbit, . - _signbit - .size _signbitf, . - _signbitf +END_ANOTHER_FUNC _signbitf +END_FUNC _signbit - .global _signbitl -_signbitl: - ;; X is at [sp+4] + +START_FUNC _signbitl + ;; X is at [sp+4]..[SP+7] ;; result is in R8..R9 movw r8, #0 @@ -64,4 +62,4 @@ _signbitl: sknc movw r8, #1 ret - .size _signbitl, . - _signbitl +END_FUNC _signbitl diff --git a/libgcc/config/rl78/t-rl78 b/libgcc/config/rl78/t-rl78 index 59b1f75920e..e030c99c1bd 100644 --- a/libgcc/config/rl78/t-rl78 +++ b/libgcc/config/rl78/t-rl78 @@ -20,8 +20,6 @@ LIB2ADD = \ $(srcdir)/config/rl78/trampoline.S \ - $(srcdir)/config/rl78/lib2div.c \ - $(srcdir)/config/rl78/lib2mul.c \ $(srcdir)/config/rl78/lib2shift.c \ $(srcdir)/config/rl78/lshrsi3.S \ $(srcdir)/config/rl78/mulsi3.S \ @@ -29,6 +27,22 @@ LIB2ADD = \ $(srcdir)/config/rl78/divmodhi.S \ $(srcdir)/config/rl78/divmodqi.S \ $(srcdir)/config/rl78/signbit.S \ + $(srcdir)/config/rl78/bit-count.S \ + $(srcdir)/config/rl78/fpbit-sf.S \ + $(srcdir)/config/rl78/fpmath-sf.S \ $(srcdir)/config/rl78/cmpsi2.S +LIB2FUNCS_EXCLUDE = _clzhi2 _clzsi2 _ctzhi2 _ctzsi2 \ + _popcounthi2 _popcountsi2 \ + _parityhi2 _paritysi2 _ffssi2 _ffshi2 \ + _negate_sf _compare_sf _eq_sf _ne_sf _gt_sf _ge_sf \ + _lt_sf _le_sf _unord_sf \ + _si_to_sf _usi_to_sf \ + _sf_to_si _sf_to_usi \ + _fixunssfsi _fixsfsi \ + _addsub_sf _mul_sf _div_sf + +# Remove __gcc_bcmp from LIB2FUNCS_ST +LIB2FUNCS_ST = _eprintf + HOST_LIBGCC2_CFLAGS += -Os -ffunction-sections -fdata-sections diff --git a/libgcc/config/rl78/trampoline.S b/libgcc/config/rl78/trampoline.S index 357e88976d4..9ea8fc4a82b 100644 --- a/libgcc/config/rl78/trampoline.S +++ b/libgcc/config/rl78/trampoline.S @@ -80,14 +80,10 @@ trampoline_array_end: pointer in R10, allocate a trampoline and return its address in R8. */ - .text - .global ___trampoline_init - .type ___trampoline_init, @function -___trampoline_init: - +START_FUNC ___trampoline_init movw hl, #trampoline_array -1: - movw ax, [hl + TO_ADDR] + +1: movw ax, [hl + TO_ADDR] cmpw ax, #0 bz $2f @@ -107,30 +103,27 @@ ___trampoline_init: movw ax, [hl + TO_STUB] movw r8, ax - ret - .size ___trampoline_init, . - ___trampoline_init +END_FUNC ___trampoline_init - .global ___trampoline_uninit - .type ___trampoline_uninit, @function -___trampoline_uninit: + +START_FUNC ___trampoline_uninit movw hl, #trampoline_array movw ax, sp movw bc, ax -1: - movw ax, [hl + TO_FRAME] + +1: movw ax, [hl + TO_FRAME] cmpw ax, bc bc $2f clrw ax movw [hl + TO_ADDR], ax -2: - movw ax, hl +2: movw ax, hl addw ax, #TO_SIZE movw hl, ax cmpw ax, #trampoline_array_end bnz $1b ret - .size ___trampoline_uninit, . - ___trampoline_uninit +END_FUNC ___trampoline_uninit diff --git a/libgcc/config/rl78/vregs.h b/libgcc/config/rl78/vregs.h index d5209e20fa5..f1bc5c24761 100644 --- a/libgcc/config/rl78/vregs.h +++ b/libgcc/config/rl78/vregs.h @@ -55,17 +55,25 @@ r23 = 0xffeef #endif +.macro START_ANOTHER_FUNC name + .global \name + .type \name , @function +\name: +.endm + /* Start a function in its own section, so that it can be subject to linker garbage collection. */ .macro START_FUNC name .pushsection .text.\name,"ax",@progbits - .global \name - .type \name , @function -\name: + START_ANOTHER_FUNC \name +.endm + +.macro END_ANOTHER_FUNC name + .size \name , . - \name .endm /* End the function. Set the size. */ .macro END_FUNC name - .size \name , . - \name + END_ANOTHER_FUNC \name .popsection .endm -- 2.30.2