* config/rl78/cmpsi2.S: Use function start and end macros.
(__gcc_bcmp): New function.
* config/rl78/lshrsi3.S: Use function start and end macros.
* config/rl78/mulsi3.S: Add support for G10.
(__mulqi3): New function for G10.
* config/rl78/signbit.S: Use function start and end macros.
* config/rl78/t-rl78 (LIB2ADD): Add bit-count.S, fpbit-sf.S and
fpmath-sf.S.
(LIB2FUNCS_EXCLUDE): Define.
(LIB2FUNCS_ST): Define.
* config/rl78/trampoline.S: Use function start and end macros.
* config/rl78/vregs.h (START_FUNC): New macro.
(START_ANOTHER_FUNC): New macro.
(END_FUNC): New macro.
(END_ANOTHER_FUNC): New macro.
* config/rl78/bit-count.S: New file. Contains assembler
implementations of the bit counting functions: ___clzhi2,
__clzsi2, ctzhi2, ctzsi2, ffshi2, ffssi2, __partityhi2,
__paritysi2, __popcounthi2 and __popcountsi2.
* config/rl78/fpbit-sf.S: New file. Contains assembler
implementationas of the math functions: __negsf2, __cmpsf2,
__eqsf2, __nesf2, __lesf2, __ltsf2, __gesf2, gtsf2, __unordsf2,
__fixsfsi, __fixunssfsi, __floatsisf and __floatunssisf.
* config/rl78/fpmath-sf.S: New file. Contains assembler
implementations of the math functions: __subsf3, __addsf3,
__mulsf3 and __divsf3
From-SVN: r220162
+2015-01-27 Nick Clifton <nickc@redhat.com>
+
+ * config/rl78/cmpsi2.S: Use function start and end macros.
+ (__gcc_bcmp): New function.
+ * config/rl78/lshrsi3.S: Use function start and end macros.
+ * config/rl78/mulsi3.S: Add support for G10.
+ (__mulqi3): New function for G10.
+ * config/rl78/signbit.S: Use function start and end macros.
+ * config/rl78/t-rl78 (LIB2ADD): Add bit-count.S, fpbit-sf.S and
+ fpmath-sf.S.
+ (LIB2FUNCS_EXCLUDE): Define.
+ (LIB2FUNCS_ST): Define.
+ * config/rl78/trampoline.S: Use function start and end macros.
+ * config/rl78/vregs.h (START_FUNC): New macro.
+ (START_ANOTHER_FUNC): New macro.
+ (END_FUNC): New macro.
+ (END_ANOTHER_FUNC): New macro.
+ * config/rl78/bit-count.S: New file. Contains assembler
+ implementations of the bit counting functions: ___clzhi2,
+ __clzsi2, ctzhi2, ctzsi2, ffshi2, ffssi2, __partityhi2,
+ __paritysi2, __popcounthi2 and __popcountsi2.
+ * config/rl78/fpbit-sf.S: New file. Contains assembler
+ implementationas of the math functions: __negsf2, __cmpsf2,
+ __eqsf2, __nesf2, __lesf2, __ltsf2, __gesf2, gtsf2, __unordsf2,
+ __fixsfsi, __fixunssfsi, __floatsisf and __floatunssisf.
+ * config/rl78/fpmath-sf.S: New file. Contains assembler
+ implementations of the math functions: __subsf3, __addsf3,
+ __mulsf3 and __divsf3
+
2015-01-27 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* config.host (i[34567]86-*-solaris2*, x86_64-*-solaris2.1[0-9]*):
--- /dev/null
+; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+; Contributed by Red Hat.
+;
+; This file is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published by the
+; Free Software Foundation; either version 3, or (at your option) any
+; later version.
+;
+; This file is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+; General Public License for more details.
+;
+; Under Section 7 of GPL version 3, you are granted additional
+; permissions described in the GCC Runtime Library Exception, version
+; 3.1, as published by the Free Software Foundation.
+;
+; You should have received a copy of the GNU General Public License and
+; a copy of the GCC Runtime Library Exception along with this program;
+; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+; <http://www.gnu.org/licenses/>.
+
+#include "vregs.h"
+
+START_FUNC ___clzhi2
+ ;; Argument is in [SP+4], return in R8.
+ movw ax, [SP+4]
+
+ .global __clzhi2_internal
+__clzhi2_internal:
+ movw r8, #16
+ cmpw ax, #0
+ bz $clzhi2_is_zero
+ mov e, #0xff
+1:
+ inc e
+ shlw ax, 1
+ bnc $1b
+ mov a, e
+ mov r8, a
+clzhi2_is_zero:
+ ret
+END_FUNC ___clzhi2
+
+
+START_FUNC ___clzsi2
+ ;; Argument is in [SP+6]:[SP+4], return in R8.
+ movw ax, [SP+6]
+ cmpw ax, #0
+ bnz $__clzhi2_internal
+ movw ax, [SP+4]
+ call !__clzhi2_internal
+ movw ax, r8
+ addw ax, #16
+ movw r8, ax
+ ret
+END_FUNC ___clzsi2
+
+
+START_FUNC ___ctzhi2
+ ;; Argument is in [SP+4], return in R8.
+ movw ax, [SP+4]
+
+ .global __ctzhi2_internal
+__ctzhi2_internal:
+ movw r8, #16
+ cmpw ax, #0
+ bz $ctzhi2_is_zero
+ mov e, #0xff
+1:
+ inc e
+ shrw ax, 1
+ bnc $1b
+ mov a, e
+ mov r8, a
+ctzhi2_is_zero:
+ ret
+END_FUNC ___ctzhi2
+
+
+START_FUNC ___ctzsi2
+ ;; Argument is in [SP+6]:[SP+4], return in R8.
+ movw ax, [SP+4]
+ cmpw ax, #0
+ bnz $__ctzhi2_internal
+ movw ax, [SP+6]
+ call !__ctzhi2_internal
+ movw ax, r8
+ addw ax, #16
+ movw r8, ax
+ ret
+END_FUNC ___ctzsi2
+
+
+START_FUNC ___ffshi2
+ ;; Argument is in [SP+4], return in R8.
+ movw ax, [SP+4]
+
+ .global __ffshi2_internal
+__ffshi2_internal:
+ movw r8, #0
+ cmpw ax, #0
+ bz $ffshi2_is_zero
+ mov e, #0
+1:
+ inc e
+ shrw ax, 1
+ bnc $1b
+ mov a, e
+ mov r8, a
+ffshi2_is_zero:
+ ret
+END_FUNC ___ffshi2
+
+
+START_FUNC ___ffssi2
+ ;; Argument is in [SP+6]:[SP+4], return in R8.
+ movw ax, [SP+4]
+ cmpw ax, #0
+ bnz $__ffshi2_internal
+ movw ax, [SP+6]
+ cmpw ax, #0
+ bz $1f
+ call !__ffshi2_internal
+ movw ax, r8
+ addw ax, #16
+1:
+ movw r8, ax
+ ret
+END_FUNC ___ffssi2
+
+
+START_FUNC ___parityqi_internal
+ mov1 cy, a.0
+ xor1 cy, a.1
+ xor1 cy, a.2
+ xor1 cy, a.3
+ xor1 cy, a.4
+ xor1 cy, a.5
+ xor1 cy, a.6
+ xor1 cy, a.7
+ movw ax, #0
+ bnc $1f
+ incw ax
+1:
+ movw r8, ax
+ ret
+END_FUNC ___parityqi_internal
+
+
+START_FUNC ___parityhi2
+ ;; Argument is in [SP+4], return in R8.
+ movw ax, [SP+4]
+ xor a, x
+ br $___parityqi_internal
+END_FUNC ___parityhi2
+
+
+START_FUNC ___paritysi2
+ ;; Argument is in [SP+6]:[SP+4], return in R8.
+ movw ax, [SP+4]
+ xor a, x
+ mov b, a
+ movw ax, [SP+6]
+ xor a, x
+ xor a, b
+ br $___parityqi_internal
+END_FUNC ___paritysi2
+
+
+
+START_FUNC ___popcounthi2
+ ;; Argument is in [SP+4], return in R8.
+ mov d, #2
+ br $___popcountqi_internal
+END_FUNC ___popcounthi2
+
+
+START_FUNC ___popcountsi2
+ ;; Argument is in [SP+6]:[SP+4], return in R8.
+ mov d, #4
+ br $___popcountqi_internal
+END_FUNC ___popcountsi2
+
+
+START_FUNC ___popcountqi_internal
+ ;; There are D bytes starting at [HL]
+ ;; store count in R8.
+
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ mov a, #0
+1:
+ xch a, b
+ mov a, [hl]
+ xch a, b
+ mov e, #8
+2:
+ shl b,1
+ addc a, #0
+ dec e
+ bnz $2b
+
+ incw hl
+ dec d
+ bnz $1b
+
+ mov x, a
+ mov a, #0
+ movw r8, ax
+ ret
+END_FUNC ___popcountqi_internal
;; If A is less than B it returns 0. If A is greater
;; than B it returns 2. If they are equal it returns 1.
- .global ___cmpsi2
- .type ___cmpsi2, @function
-___cmpsi2:
+START_FUNC ___cmpsi2
+
;; A is at [sp+4]
;; B is at [sp+8]
;; Result put in R8
movw r8, ax
ret
- .size ___cmpsi2, . - ___cmpsi2
-
-
+END_FUNC ___cmpsi2
+
+;; ------------------------------------------------------
+
;; int __ucmpsi2 (unsigned long A, unsigned long B)
;;
;; Performs an unsigned comparison of A and B.
;; If A is less than B it returns 0. If A is greater
;; than B it returns 2. If they are equal it returns 1.
- .global ___ucmpsi2
- .type ___ucmpsi2, @function
-___ucmpsi2:
+START_FUNC ___ucmpsi2
+
;; A is at [sp+4]
;; B is at [sp+8]
;; Result put in R8..R9
br !!.Lless_than_or_greater_than
br !!.Lcompare_bottom_words
- .size ___ucmpsi2, . - ___ucmpsi2
-
\ No newline at end of file
+END_FUNC ___ucmpsi2
+
+;; ------------------------------------------------------
+
+ ;; signed int __gcc_bcmp (const unsigned char *s1, const unsigned char *s2, size_t size)
+ ;; Result is negative if S1 is less than S2,
+ ;; positive if S1 is greater, 0 if S1 and S2 are equal.
+
+START_FUNC __gcc_bcmp
+
+ ;; S1 is at [sp+4]
+ ;; S2 is at [sp+6]
+ ;; SIZE is at [sp+8]
+ ;; Result in r8/r9
+
+ movw r10, #0
+1:
+ ;; Compare R10 against the SIZE parameter
+ movw ax, [sp+8]
+ subw ax, r10
+ sknz
+ br !!1f
+
+ ;; Load S2[r10] into R8
+ movw ax, [sp+6]
+ addw ax, r10
+ movw hl, ax
+ mov a, [hl]
+ mov r8, a
+
+ ;; Load S1[r10] into A
+ movw ax, [sp+4]
+ addw ax, r10
+ movw hl, ax
+ mov a, [hl]
+
+ ;; Increment offset
+ incw r10
+
+ ;; Compare loaded bytes
+ cmp a, r8
+ sknz
+ br !!1b
+
+ ;; They differ. Subtract *S2 from *S1 and return as the result.
+ mov x, a
+ mov a, #0
+ mov r9, #0
+ subw ax, r8
+1:
+ movw r8, ax
+ ret
+
+END_FUNC __gcc_bcmp
--- /dev/null
+; SF format is:
+;
+; [sign] 1.[23bits] E[8bits(n-127)]
+;
+; SEEEEEEE Emmmmmmm mmmmmmmm mmmmmmmm
+;
+; [A+0] mmmmmmmm
+; [A+1] mmmmmmmm
+; [A+2] Emmmmmmm
+; [A+3] SEEEEEEE
+;
+; Special values (xxx != 0):
+;
+; s1111111 10000000 00000000 00000000 infinity
+; s1111111 1xxxxxxx xxxxxxxx xxxxxxxx NaN
+; s0000000 00000000 00000000 00000000 zero
+; s0000000 0xxxxxxx xxxxxxxx xxxxxxxx denormals
+;
+; Note that CMPtype is "signed char" for rl78
+;
+
+#include "vregs.h"
+
+#define Z PSW.6
+
+START_FUNC ___negsf2
+
+ ;; Negate the floating point value.
+ ;; Input at [SP+4]..[SP+7].
+ ;; Output to R8..R11.
+
+ movw ax, [SP+4]
+ movw r8, ax
+ movw ax, [SP+6]
+ xor a, #0x80
+ movw r10, ax
+ ret
+
+END_FUNC ___negsf2
+
+;; ------------------internal functions used by later code --------------
+
+START_FUNC __int_isnan
+
+ ;; [HL] points to value, returns Z if it's a NaN
+
+ mov a, [hl+2]
+ and a, #0x80
+ mov x, a
+ mov a, [hl+3]
+ and a, #0x7f
+ cmpw ax, #0x7f80
+ skz
+ ret ; return NZ if not NaN
+ mov a, [hl+2]
+ and a, #0x7f
+ or a, [hl+1]
+ or a, [hl]
+ bnz $1f
+ clr1 Z ; Z, normal
+ ret
+1:
+ set1 Z ; nan
+ ret
+
+END_FUNC __int_isnan
+
+START_FUNC __int_eithernan
+
+ ;; call from toplevel functions, returns Z if either number is a NaN,
+ ;; or NZ if both are OK.
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_isnan
+ bz $1f
+
+ movw ax, sp
+ addw ax, #12
+ movw hl, ax
+ call $!__int_isnan
+1:
+ ret
+
+END_FUNC __int_eithernan
+
+START_FUNC __int_iszero
+
+ ;; [HL] points to value, returns Z if it's zero
+
+ mov a, [hl+3]
+ and a, #0x7f
+ or a, [hl+2]
+ or a, [hl+1]
+ or a, [hl]
+ ret
+
+END_FUNC __int_iszero
+
+START_FUNC __int_cmpsf
+
+ ;; This is always called from some other function here,
+ ;; so the stack offsets are adjusted accordingly.
+
+ ;; X [SP+8] <=> Y [SP+12] : <a> <=> 0
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_iszero
+ bnz $1f
+
+ movw ax, sp
+ addw ax, #12
+ movw hl, ax
+ call $!__int_iszero
+ bnz $2f
+ ;; At this point, both args are zero.
+ mov a, #0
+ ret
+
+2:
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+1:
+ ;; At least one arg is non-zero so we can just compare magnitudes.
+ ;; Args are [HL] and [HL+4].
+
+ mov a, [HL+3]
+ xor a, [HL+7]
+ mov1 cy, a.7
+ bnc $1f
+
+ mov a, [HL+3]
+ sar a, 7
+ or a, #1
+ ret
+
+1: ;; Signs the same, compare magnitude. It's safe to lump
+ ;; the sign bits, exponent, and mantissa together here, since they're
+ ;; stored in the right sequence.
+ movw ax, [HL+2]
+ cmpw ax, [HL+6]
+ bc $ybig_cmpsf ; branch if X < Y
+ bnz $xbig_cmpsf ; branch if X > Y
+
+ movw ax, [HL]
+ cmpw ax, [HL+4]
+ bc $ybig_cmpsf ; branch if X < Y
+ bnz $xbig_cmpsf ; branch if X > Y
+
+ mov a, #0
+ ret
+
+xbig_cmpsf: ; |X| > |Y| so return A = 1 if pos, 0xff if neg
+ mov a, [HL+3]
+ sar a, 7
+ or a, #1
+ ret
+ybig_cmpsf: ; |X| < |Y| so return A = 0xff if pos, 1 if neg
+ mov a, [HL+3]
+ xor a, #0x80
+ sar a, 7
+ or a, #1
+ ret
+
+END_FUNC __int_cmpsf
+
+;; ----------------------------------------------------------
+
+START_FUNC ___cmpsf2
+ ;; This functions calculates "A <=> B". That is, if A is less than B
+ ;; they return -1, if A is greater than B, they return 1, and if A
+ ;; and B are equal they return 0. If either argument is NaN the
+ ;; behaviour is undefined.
+
+ ;; Input at [SP+4]..[SP+7].
+ ;; Output to R8..R9.
+
+ call $!__int_eithernan
+ bnz $1f
+ movw r8, #1
+ ret
+1:
+ call $!__int_cmpsf
+ mov r8, a
+ sar a, 7
+ mov r9, a
+ ret
+
+END_FUNC ___cmpsf2
+
+;; ----------------------------------------------------------
+
+ ;; These functions are all basically the same as ___cmpsf2
+ ;; except that they define how they handle NaNs.
+
+START_FUNC ___eqsf2
+ ;; Returns zero iff neither argument is NaN
+ ;; and both arguments are equal.
+START_ANOTHER_FUNC ___nesf2
+ ;; Returns non-zero iff either argument is NaN or the arguments are
+ ;; unequal. Effectively __nesf2 is the same as __eqsf2
+START_ANOTHER_FUNC ___lesf2
+ ;; Returns a value less than or equal to zero if neither
+ ;; argument is NaN, and the first is less than or equal to the second.
+START_ANOTHER_FUNC ___ltsf2
+ ;; Returns a value less than zero if neither argument is
+ ;; NaN, and the first is strictly less than the second.
+
+ ;; Input at [SP+4]..[SP+7].
+ ;; Output to R8.
+
+ mov r8, #1
+
+;;; Fall through
+
+START_ANOTHER_FUNC __int_cmp_common
+
+ call $!__int_eithernan
+ sknz
+ ;; return value (pre-filled-in below) for "either is nan"
+ ret
+
+ call $!__int_cmpsf
+ mov r8, a
+ ret
+
+END_ANOTHER_FUNC __int_cmp_common
+END_ANOTHER_FUNC ___ltsf2
+END_ANOTHER_FUNC ___lesf2
+END_ANOTHER_FUNC ___nesf2
+END_FUNC ___eqsf2
+
+START_FUNC ___gesf2
+ ;; Returns a value greater than or equal to zero if neither argument
+ ;; is a NaN and the first is greater than or equal to the second.
+START_ANOTHER_FUNC ___gtsf2
+ ;; Returns a value greater than zero if neither argument
+ ;; is NaN, and the first is strictly greater than the second.
+
+ mov r8, #0xffff
+ br $__int_cmp_common
+
+END_ANOTHER_FUNC ___gtsf2
+END_FUNC ___gesf2
+
+;; ----------------------------------------------------------
+
+START_FUNC ___unordsf2
+ ;; Returns a nonzero value if either argument is NaN, otherwise 0.
+
+ call $!__int_eithernan
+ movw r8, #0
+ sknz ; this is from the call, not the movw
+ movw r8, #1
+ ret
+
+END_FUNC ___unordsf2
+
+;; ----------------------------------------------------------
+
+START_FUNC ___fixsfsi
+ ;; Converts its floating point argument into a signed long,
+ ;; rounding toward zero.
+ ;; The behaviour with NaNs and Infinities is not well defined.
+ ;; We choose to return 0 for NaNs, -INTMAX for -inf and INTMAX for +inf.
+ ;; This matches the behaviour of the C function in libgcc2.c.
+
+ ;; Input at [SP+4]..[SP+7], result is in (lsb) R8..R11 (msb).
+
+ ;; Special case handling for infinities as __fixunssfsi
+ ;; will not give us the values that we want.
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_isinf
+ bnz $1f
+ mov a, [SP+7]
+ bt a.7, $2f
+ ;; +inf
+ movw r8, #-1
+ movw r10, #0x7fff
+ ret
+ ;; -inf
+2: mov r8, #0
+ mov r10, #0x8000
+ ret
+
+ ;; Load the value into r10:r11:X:A
+1: movw ax, [SP+4]
+ movw r10, ax
+ movw ax, [SP+6]
+
+ ;; If the value is positive we can just use __fixunssfsi
+ bf a.7, $__int_fixunssfsi
+
+ ;; Otherwise we negate the value, call __fixunssfsi and
+ ;; then negate its result.
+ clr1 a.7
+ call $!__int_fixunssfsi
+
+ movw ax, #0
+ subw ax, r8
+ movw r8, ax
+ movw ax, #0
+ sknc
+ decw ax
+ subw ax, r10
+ movw r10, ax
+
+ ;; Check for a positive result (which should only happen when
+ ;; __fixunssfsi returns UINTMAX or 0). In such cases just return 0.
+ mov a, r11
+ bt a.7, $1f
+ movw r10,#0x0
+ movw r8, #0x0
+
+1: ret
+
+END_FUNC ___fixsfsi
+
+START_FUNC ___fixunssfsi
+ ;; Converts its floating point argument into an unsigned long
+ ;; rounding towards zero. Negative arguments all become zero.
+ ;; We choose to return 0 for NaNs and -inf, but UINTMAX for +inf.
+ ;; This matches the behaviour of the C function in libgcc2.c.
+
+ ;; Input at [SP+4]..[SP+7], result is in (lsb) R8..R11 (msb)
+
+ ;; Get the input value.
+ movw ax, [SP+4]
+ movw r10, ax
+ movw ax, [SP+6]
+
+ ;; Fall through into the internal function.
+
+ .global __int_fixunssfsi
+__int_fixunssfsi:
+ ;; Input in (lsb) r10.r11.x.a (msb).
+
+ ;; Test for a negative input. We shift the other bits at the
+ ;; same time so that A ends up holding the whole exponent:
+ ;;
+ ;; before:
+ ;; SEEEEEEE EMMMMMMM MMMMMMMM MMMMMMMM
+ ;; A X R11 R10
+ ;;
+ ;; after:
+ ;; EEEEEEEE MMMMMMM0 MMMMMMMM MMMMMMMM
+ ;; A X R11 R10
+ shlw ax, 1
+ bnc $1f
+
+ ;; Return zero.
+2: movw r8, #0
+ movw r10, #0
+ ret
+
+ ;; An exponent of -1 is either a NaN or infinity.
+1: cmp a, #-1
+ bnz $3f
+ ;; For NaN we return 0. For infinity we return UINTMAX.
+ mov a, x
+ or a, r10
+ or a, r11
+ cmp0 a
+ bnz $2b
+
+6: movw r8, #-1 ; -1 => UINT_MAX
+ movw r10, #-1
+ ret
+
+ ;; If the exponent is negative the value is < 1 and so the
+ ;; converted value is 0. Note we must allow for the bias
+ ;; applied to the exponent. Thus a value of 127 in the
+ ;; EEEEEEEE bits actually represents an exponent of 0, whilst
+ ;; a value less than 127 actually represents a negative exponent.
+ ;; Also if the EEEEEEEE bits are all zero then this represents
+ ;; either a denormal value or 0.0. Either way for these values
+ ;; we return 0.
+3: sub a, #127
+ bc $2b
+
+ ;; A now holds the bias adjusted exponent, which is known to be >= 0.
+ ;; If the exponent is > 31 then the conversion will overflow.
+ cmp a, #32
+ bnc $6b
+4:
+ ;; Save the exponent in H. We increment it by one because we want
+ ;; to be sure that the loop below will always execute at least once.
+ inc a
+ mov h, a
+
+ ;; Get the top 24 bits of the mantissa into A:X:R10
+ ;; Include the implicit 1-bit that is inherent in the IEEE fp format.
+ ;;
+ ;; before:
+ ;; EEEEEEEE MMMMMMM0 MMMMMMMM MMMMMMMM
+ ;; H X R11 R10
+ ;; after:
+ ;; EEEEEEEE 1MMMMMMM MMMMMMMM MMMMMMMM
+ ;; H A X R10
+
+ mov a, r11
+ xch a, x
+ shr a, 1
+ set1 a.7
+
+ ;; Clear B:C:R12:R13
+ movw bc, #0
+ movw r12, #0
+
+ ;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
+ ;; decrementing the exponent as we go.
+
+ ;; before:
+ ;; MMMMMMMM MMMMMMMM MMMMMMMM xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ ;; A X R10 B C R12 R13
+ ;; first iter:
+ ;; MMMMMMMM MMMMMMMM MMMMMMM0 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxM
+ ;; A X R10 B C R12 R13
+ ;; second iter:
+ ;; MMMMMMMM MMMMMMMM MMMMMM00 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxMM
+ ;; A X R10 B C R12 R13
+ ;; etc.
+5:
+ xch a, r10
+ shl a, 1
+ xch a, r10
+
+ rolwc ax, 1
+
+ xch a, r13
+ rolc a, 1
+ xch a, r13
+
+ xch a, r12
+ rolc a, 1
+ xch a, r12
+
+ rolwc bc, 1
+
+ dec h
+ bnz $5b
+
+ ;; Result is currently in (lsb) r13.r12. c. b. (msb),
+ ;; Move it into (lsb) r8. r9. r10. r11 (msb).
+
+ mov a, r13
+ mov r8, a
+
+ mov a, r12
+ mov r9, a
+
+ mov a, c
+ mov r10, a
+
+ mov a, b
+ mov r11, a
+
+ ret
+
+END_FUNC ___fixunssfsi
+
+;; ------------------------------------------------------------------------
+
+START_FUNC ___floatsisf
+ ;; Converts its signed long argument into a floating point.
+ ;; Argument in [SP+4]..[SP+7]. Result in R8..R11.
+
+ ;; Get the argument.
+ movw ax, [SP+4]
+ movw bc, ax
+ movw ax, [SP+6]
+
+ ;; Test the sign bit. If the value is positive then drop into
+ ;; the unsigned conversion routine.
+ bf a.7, $2f
+
+ ;; If negative convert to positive ...
+ movw hl, ax
+ movw ax, #0
+ subw ax, bc
+ movw bc, ax
+ movw ax, #0
+ sknc
+ decw ax
+ subw ax, hl
+
+ ;; If the result is negative then the input was 0x80000000 and
+ ;; we want to return -0.0, which will not happen if we call
+ ;; __int_floatunsisf.
+ bt a.7, $1f
+
+ ;; Call the unsigned conversion routine.
+ call $!__int_floatunsisf
+
+ ;; Negate the result.
+ set1 r11.7
+
+ ;; Done.
+ ret
+
+1: ;; Return -0.0 aka 0xcf000000
+
+ clrb a
+ mov r8, a
+ mov r9, a
+ mov r10, a
+ mov a, #0xcf
+ mov r11, a
+ ret
+
+START_ANOTHER_FUNC ___floatunsisf
+ ;; Converts its unsigned long argument into a floating point.
+ ;; Argument in [SP+4]..[SP+7]. Result in R8..R11.
+
+ ;; Get the argument.
+ movw ax, [SP+4]
+ movw bc, ax
+ movw ax, [SP+6]
+
+2: ;; Internal entry point from __floatsisf
+ ;; Input in AX (high) and BC (low)
+ .global __int_floatunsisf
+__int_floatunsisf:
+
+ ;; Special case handling for zero.
+ cmpw ax, #0
+ bnz $1f
+ movw ax, bc
+ cmpw ax, #0
+ movw ax, #0
+ bnz $1f
+
+ ;; Return 0.0
+ movw r8, ax
+ movw r10, ax
+ ret
+
+1: ;; Pre-load the loop count/exponent.
+ ;; Exponents are biased by 0x80 and we start the loop knowing that
+ ;; we are going to skip the highest set bit. Hence the highest value
+ ;; that we can get for the exponent is 0x1e (bits from input) + 0x80 = 0x9e.
+ mov h, #0x9e
+
+ ;; Move bits off the top of AX:BC until we hit a 1 bit.
+ ;; Decrement the count of remaining bits as we go.
+
+2: shlw bc, 1
+ rolwc ax, 1
+ bc $3f
+ dec h
+ br $2b
+
+ ;; Ignore the first one bit - it is implicit in the IEEE format.
+ ;; The count of remaining bits is the exponent.
+
+ ;; Assemble the final floating point value. We have...
+ ;; before:
+ ;; EEEEEEEE MMMMMMMM MMMMMMMM MMMMMMMM xxxxxxxx
+ ;; H A X B C
+ ;; after:
+ ;; 0EEEEEEE EMMMMMMM MMMMMMMM MMMMMMMM
+ ;; R11 R10 R9 R8
+
+
+3: shrw ax, 1
+ mov r10, a
+ mov a, x
+ mov r9, a
+
+ mov a, b
+ rorc a, 1
+
+ ;; If the bottom bit of B was set before we shifted it out then we
+ ;; need to round the result up. Unless none of the bits in C are set.
+ ;; In this case we are exactly half-way between two values, and we
+ ;; round towards an even value. We round up by increasing the
+ ;; mantissa by 1. If this results in a zero mantissa we have to
+ ;; increment the exponent. We round down by ignoring the dropped bits.
+
+ bnc $4f
+ cmp0 c
+ sknz
+ bf a.0, $4f
+
+5: ;; Round the mantissa up by 1.
+ add a, #1
+ addc r9, #0
+ addc r10, #0
+ bf r10.7, $4f
+ inc h
+ clr1 r10.7
+
+4: mov r8, a
+ mov a, h
+ shr a, 1
+ mov r11, a
+ sknc
+ set1 r10.7
+ ret
+
+END_ANOTHER_FUNC ___floatunsisf
+END_FUNC ___floatsisf
--- /dev/null
+; SF format is:
+;
+; [sign] 1.[23bits] E[8bits(n-127)]
+;
+; SEEEEEEE Emmmmmmm mmmmmmmm mmmmmmmm
+;
+; [A+0] mmmmmmmm
+; [A+1] mmmmmmmm
+; [A+2] Emmmmmmm
+; [A+3] SEEEEEEE
+;
+; Special values (xxx != 0):
+;
+; r11 r10 r9 r8
+; [HL+3] [HL+2] [HL+1] [HL+0]
+; s1111111 10000000 00000000 00000000 infinity
+; s1111111 1xxxxxxx xxxxxxxx xxxxxxxx NaN
+; s0000000 00000000 00000000 00000000 zero
+; s0000000 0xxxxxxx xxxxxxxx xxxxxxxx denormals
+;
+; Note that CMPtype is "signed char" for rl78
+;
+
+#include "vregs.h"
+
+#define Z PSW.6
+
+; External Functions:
+;
+; __int_isnan [HL] -> Z if NaN
+; __int_iszero [HL] -> Z if zero
+
+START_FUNC __int_isinf
+ ;; [HL] points to value, returns Z if it's #Inf
+
+ mov a, [hl+2]
+ and a, #0x80
+ mov x, a
+ mov a, [hl+3]
+ and a, #0x7f
+ cmpw ax, #0x7f80
+ skz
+ ret ; return NZ if not NaN
+ mov a, [hl+2]
+ and a, #0x7f
+ or a, [hl+1]
+ or a, [hl]
+ ret
+
+END_FUNC __int_isinf
+
+START_FUNC _int_unpack_sf
+ ;; convert 32-bit SFmode [DE] to 6-byte struct [HL] ("A")
+
+#define A_SIGN [hl+0] /* byte */
+#define A_EXP [hl+2] /* word */
+#define A_FRAC_L [hl+4] /* word */
+#define A_FRAC_LH [hl+5] /* byte */
+#define A_FRAC_H [hl+6] /* word or byte */
+#define A_FRAC_HH [hl+7] /* byte */
+
+#define B_SIGN [hl+8]
+#define B_EXP [hl+10]
+#define B_FRAC_L [hl+12]
+#define B_FRAC_LH [hl+13]
+#define B_FRAC_H [hl+14]
+#define B_FRAC_HH [hl+15]
+
+ mov a, [de+3]
+ sar a, 7
+ mov A_SIGN, a
+
+ movw ax, [de+2]
+ and a, #0x7f
+ shrw ax, 7
+ movw bc, ax ; remember if the exponent is all zeros
+ subw ax, #127 ; exponent is now non-biased
+ movw A_EXP, ax
+
+ movw ax, [de]
+ movw A_FRAC_L, ax
+
+ mov a, [de+2]
+ and a, #0x7f
+ cmp0 c ; if the exp is all zeros, it's denormal
+ skz
+ or a, #0x80
+ mov A_FRAC_H, a
+
+ mov a, #0
+ mov A_FRAC_HH, a
+
+ ;; rounding-bit-shift
+ movw ax, A_FRAC_L
+ shlw ax, 1
+ movw A_FRAC_L, ax
+ mov a, A_FRAC_H
+ rolc a, 1
+ mov A_FRAC_H, a
+ mov a, A_FRAC_HH
+ rolc a, 1
+ mov A_FRAC_HH, a
+
+ ret
+
+END_FUNC _int_unpack_sf
+
+; func(SF a,SF b)
+; [SP+4..7] a
+; [SP+8..11] b
+
+START_FUNC ___subsf3
+
+ ;; a - b => a + (-b)
+
+ ;; Note - we cannot just change the sign of B on the stack and
+ ;; then fall through into __addsf3. The stack'ed value may be
+ ;; used again (it was created by our caller after all). Instead
+ ;; we have to allocate some stack space of our own, copy A and B,
+ ;; change the sign of B, call __addsf3, release the allocated stack
+ ;; and then return.
+
+ subw sp, #8
+ movw ax, [sp+4+8]
+ movw [sp], ax
+ movw ax, [sp+4+2+8]
+ movw [sp+2], ax
+ movw ax, [sp+4+4+8]
+ movw [sp+4], ax
+ mov a, [sp+4+6+8]
+ mov [sp+6], a
+ mov a, [sp+4+7+8]
+ xor a, #0x80
+ mov [sp+7], a
+ call $!___addsf3
+ addw sp, #8
+ ret
+END_FUNC ___subsf3
+
+START_FUNC ___addsf3
+
+ ;; if (isnan(a)) return a
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_isnan
+ bnz $1f
+ret_a:
+ movw ax, [sp+4]
+ movw r8, ax
+ movw ax, [sp+6]
+ movw r10, ax
+ ret
+
+1: ;; if (isnan (b)) return b;
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_isnan
+ bnz $2f
+ret_b:
+ movw ax, [sp+8]
+ movw r8, ax
+ movw ax, [sp+10]
+ movw r10, ax
+ ret
+
+2: ;; if (isinf (a))
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call $!__int_isinf
+ bnz $3f
+
+ ;; if (isinf (b) && a->sign != b->sign) return NaN
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_isinf
+ bnz $ret_a
+
+ mov a, [sp+7]
+ mov h, a
+ mov a, [sp+11]
+ xor a, h
+ bf a.7, $ret_a
+
+ movw r8, #0x0001
+ movw r10, #0x7f80
+ ret
+
+3: ;; if (isinf (b)) return b;
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_isinf
+ bz $ret_b
+
+ ;; if (iszero (b))
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_iszero
+ bnz $4f
+
+ ;; if (iszero (a))
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_iszero
+ bnz $ret_a
+
+ movw ax, [sp+4]
+ movw r8, ax
+ mov a, [sp+7]
+ mov h, a
+ movw ax, [sp+10]
+ and a, h
+ movw r10, ax
+ ret
+
+4: ;; if (iszero (a)) return b;
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_iszero
+ bz $ret_b
+
+; Normalize the two numbers relative to each other. At this point,
+; we need the numbers converted to their "unpacked" format.
+
+ subw sp, #16 ; Save room for two unpacked values.
+
+ movw ax, sp
+ movw hl, ax
+ addw ax, #16+4
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ addw ax, #16+8-8
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ movw hl, ax
+
+ ;; diff = a.exponent - b.exponent
+ movw ax, B_EXP ; sign/exponent word
+ movw bc, ax
+ movw ax, A_EXP ; sign/exponent word
+
+ subw ax, bc ; a = a.exp - b.exp
+ movw de, ax ; d = sdiff
+
+ ;; if (diff < 0) diff = -diff
+ bf a.7, $1f
+ xor a, #0xff
+ xor r_0, #0xff ; x
+ incw ax ; a = diff
+1:
+ ;; if (diff >= 23) zero the smaller one
+ cmpw ax, #24
+ bc $.L661 ; if a < 23 goto 661
+
+ ;; zero out the smaller one
+
+ movw ax, de
+ bt a.7, $1f ; if sdiff < 0 (a_exp < b_exp) goto 1f
+ ;; "zero out" b
+ movw ax, A_EXP
+ movw B_EXP, ax
+ movw ax, #0
+ movw B_FRAC_L, ax
+ movw B_FRAC_H, ax
+ br $5f
+1:
+ ;; "zero out" a
+ movw ax, B_EXP
+ movw A_EXP, ax
+ movw ax, #0
+ movw A_FRAC_L, ax
+ movw A_FRAC_H, ax
+
+ br $5f
+.L661:
+ ;; shift the smaller one so they have the same exponents
+1:
+ movw ax, de
+ bt a.7, $1f
+ cmpw ax, #0 ; sdiff > 0
+ bnh $1f ; if (sdiff <= 0) goto 1f
+
+ decw de
+ incw B_EXP ; because it's [HL+byte]
+
+ movw ax, B_FRAC_H
+ shrw ax, 1
+ movw B_FRAC_H, ax
+ mov a, B_FRAC_LH
+ rorc a, 1
+ mov B_FRAC_LH, a
+ mov a, B_FRAC_L
+ rorc a, 1
+ mov B_FRAC_L, a
+
+ br $1b
+1:
+ movw ax, de
+ bf a.7, $1f
+
+ incw de
+ incw A_EXP ; because it's [HL+byte]
+
+ movw ax, A_FRAC_H
+ shrw ax, 1
+ movw A_FRAC_H, ax
+ mov a, A_FRAC_LH
+ rorc a, 1
+ mov A_FRAC_LH, a
+ mov a, A_FRAC_L
+ rorc a, 1
+ mov A_FRAC_L, a
+
+ br $1b
+1:
+
+5: ;; At this point, A and B have the same exponent.
+
+ mov a, A_SIGN
+ cmp a, B_SIGN
+ bnz $1f
+
+ ;; Same sign, just add.
+ movw ax, A_FRAC_L
+ addw ax, B_FRAC_L
+ movw A_FRAC_L, ax
+ mov a, A_FRAC_H
+ addc a, B_FRAC_H
+ mov A_FRAC_H, a
+ mov a, A_FRAC_HH
+ addc a, B_FRAC_HH
+ mov A_FRAC_HH, a
+
+ br $.L728
+
+1: ;; Signs differ - A has A_SIGN still.
+ bf a.7, $.L696
+
+ ;; A is negative, do B-A
+ movw ax, B_FRAC_L
+ subw ax, A_FRAC_L
+ movw A_FRAC_L, ax
+ mov a, B_FRAC_H
+ subc a, A_FRAC_H
+ mov A_FRAC_H, a
+ mov a, B_FRAC_HH
+ subc a, A_FRAC_HH
+ mov A_FRAC_HH, a
+
+ br $.L698
+.L696:
+ ;; B is negative, do A-B
+ movw ax, A_FRAC_L
+ subw ax, B_FRAC_L
+ movw A_FRAC_L, ax
+ mov a, A_FRAC_H
+ subc a, B_FRAC_H
+ mov A_FRAC_H, a
+ mov a, A_FRAC_HH
+ subc a, B_FRAC_HH
+ mov A_FRAC_HH, a
+
+.L698:
+ ;; A is still A_FRAC_HH
+ bt a.7, $.L706
+
+ ;; subtraction was positive
+ mov a, #0
+ mov A_SIGN, a
+ br $.L712
+
+.L706:
+ ;; subtraction was negative
+ mov a, #0xff
+ mov A_SIGN, a
+
+ ;; This negates A_FRAC
+ mov a, A_FRAC_L
+ xor a, #0xff ; XOR doesn't mess with carry
+ add a, #1 ; INC doesn't set the carry
+ mov A_FRAC_L, a
+ mov a, A_FRAC_LH
+ xor a, #0xff
+ addc a, #0
+ mov A_FRAC_LH, a
+ mov a, A_FRAC_H
+ xor a, #0xff
+ addc a, #0
+ mov A_FRAC_H, a
+ mov a, A_FRAC_HH
+ xor a, #0xff
+ addc a, #0
+ mov A_FRAC_HH, a
+
+.L712:
+ ;; Renormalize the subtraction
+
+ mov a, A_FRAC_L
+ or a, A_FRAC_LH
+ or a, A_FRAC_H
+ or a, A_FRAC_HH
+ bz $.L728
+
+ ;; Mantissa is not zero, left shift until the MSB is in the
+ ;; right place
+1:
+ movw ax, A_FRAC_H
+ cmpw ax, #0x0200
+ bnc $.L728
+
+ decw A_EXP
+
+ movw ax, A_FRAC_L
+ shlw ax, 1
+ movw A_FRAC_L, ax
+ movw ax, A_FRAC_H
+ rolwc ax, 1
+ movw A_FRAC_H, ax
+ br $1b
+
+.L728:
+ ;; normalize A and pack it
+
+ movw ax, A_FRAC_H
+ cmpw ax, #0x01ff
+ bnh $1f
+ ;; overflow in the mantissa; adjust
+ movw ax, A_FRAC_H
+ shrw ax, 1
+ movw A_FRAC_H, ax
+ mov a, A_FRAC_LH
+ rorc a, 1
+ mov A_FRAC_LH, a
+ mov a, A_FRAC_L
+ rorc a, 1
+ mov A_FRAC_L, a
+ incw A_EXP
+1:
+
+ call $!__rl78_int_pack_a_r8
+ addw sp, #16
+ ret
+
+END_FUNC ___addsf3
+
+START_FUNC __rl78_int_pack_a_r8
+ ;; pack A to R8
+ movw ax, A_EXP
+ addw ax, #126 ; not 127, we want the "bt/bf" test to check for denormals
+
+ bf a.7, $1f
+ ;; make a denormal
+2:
+ movw bc, ax
+ movw ax, A_FRAC_H
+ shrw ax, 1
+ movw A_FRAC_H, ax
+ mov a, A_FRAC_LH
+ rorc a, 1
+ mov A_FRAC_LH, a
+ mov a, A_FRAC_L
+ rorc a, 1
+ mov A_FRAC_L, a
+ movw ax, bc
+ incw ax
+ bt a.7, $2b
+ decw ax
+1:
+ incw ax ; now it's as if we added 127
+ movw A_EXP, ax
+
+ cmpw ax, #0xfe
+ bnh $1f
+ ;; store #Inf instead
+ mov a, A_SIGN
+ or a, #0x7f
+ mov x, #0x80
+ movw r10, ax
+ movw r8, #0
+ ret
+
+1:
+ bf a.7, $1f ; note AX has EXP at top of loop
+ ;; underflow, denormal?
+ movw ax, A_FRAC_H
+ shrw ax, 1
+ movw A_FRAC_H, ax
+ mov a, A_FRAC_LH
+ rorc a, 1
+ movw A_FRAC_LH, ax
+ mov a, A_FRAC_L
+ rorc a, 1
+ movw A_FRAC_L, ax
+ incw A_EXP
+ movw ax, A_EXP
+ br $1b
+
+1:
+ ;; undo the rounding-bit-shift
+ mov a, A_FRAC_L
+ bf a.0, $1f
+ ;; round up
+ movw ax, A_FRAC_L
+ addw ax, #1
+ movw A_FRAC_L, ax
+ sknc
+ incw A_FRAC_H
+1:
+ movw ax, A_FRAC_H
+ shrw ax, 1
+ movw A_FRAC_H, ax
+ mov a, A_FRAC_LH
+ rorc a, 1
+ mov A_FRAC_LH, a
+ mov a, A_FRAC_L
+ rorc a, 1
+ mov A_FRAC_L, a
+
+ movw ax, A_FRAC_L
+ movw r8, ax
+
+ or a, x
+ or a, A_FRAC_H
+ or a, A_FRAC_HH
+ bnz $1f
+ movw ax, #0
+ movw A_EXP, ax
+1:
+ mov a, A_FRAC_H
+ and a, #0x7f
+ mov b, a
+ mov a, A_EXP
+ shl a, 7
+ or a, b
+ mov r10, a
+
+ mov a, A_SIGN
+ and a, #0x80
+ mov b, a
+ mov a, A_EXP
+ shr a, 1
+ or a, b
+ mov r11, a
+
+ ret
+END_FUNC __rl78_int_pack_a_r8
+
+START_FUNC ___mulsf3
+
+ ;; if (isnan(a)) return a
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_isnan
+ bnz $1f
+mret_a:
+ movw ax, [sp+4]
+ movw r8, ax
+ mov a, [sp+11]
+ and a, #0x80
+ mov b, a
+ movw ax, [sp+6]
+ xor a, b ; sign is always a ^ b
+ movw r10, ax
+ ret
+1:
+ ;; if (isnan (b)) return b;
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_isnan
+ bnz $1f
+mret_b:
+ movw ax, [sp+8]
+ movw r8, ax
+ mov a, [sp+7]
+ and a, #0x80
+ mov b, a
+ movw ax, [sp+10]
+ xor a, b ; sign is always a ^ b
+ movw r10, ax
+ ret
+1:
+ ;; if (isinf (a)) return (b==0) ? nan : a
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call $!__int_isinf
+ bnz $.L805
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_iszero
+ bnz $mret_a
+
+ movw r8, #0x0001 ; return NaN
+ movw r10, #0x7f80
+ ret
+
+.L805:
+ ;; if (isinf (b)) return (a==0) ? nan : b
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_isinf
+ bnz $.L814
+
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_iszero
+ bnz $mret_b
+
+ movw r8, #0x0001 ; return NaN
+ movw r10, #0x7f80
+ ret
+
+.L814:
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_iszero
+ bz $mret_a
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_iszero
+ bz $mret_b
+
+ ;; at this point, we're doing the multiplication.
+
+ subw sp, #16 ; save room for two unpacked values
+
+ movw ax, sp
+ movw hl, ax
+ addw ax, #16+4
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ addw ax, #16+8-8
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ movw hl, ax
+
+ ;; multiply SI a.FRAC * SI b.FRAC to DI r8
+
+ subw sp, #16
+ movw ax, A_FRAC_L
+ movw [sp+0], ax
+ movw ax, A_FRAC_H
+ movw [sp+2], ax
+
+ movw ax, B_FRAC_L
+ movw [sp+8], ax
+ movw ax, B_FRAC_H
+ movw [sp+10], ax
+
+ movw ax, #0
+ movw [sp+4], ax
+ movw [sp+6], ax
+ movw [sp+12], ax
+ movw [sp+14], ax
+
+ call !!___muldi3 ; MTMPa * MTMPb -> R8..R15
+ addw sp, #16
+
+ movw ax, sp
+ movw hl, ax
+
+ ;; add the exponents together
+ movw ax, A_EXP
+ addw ax, B_EXP
+ movw bc, ax ; exponent in BC
+
+ ;; now, re-normalize the DI value in R8..R15 to have the
+ ;; MSB in the "right" place, adjusting BC as we shift it.
+
+ ;; The value will normally be in this range:
+ ;; R15 R8
+ ;; 0001_0000_0000_0000
+ ;; 0003_ffff_fc00_0001
+
+ ;; so to speed it up, we normalize to:
+ ;; 0001_xxxx_xxxx_xxxx
+ ;; then extract the bytes we want (r11-r14)
+
+1:
+ mov a, r15
+ cmp0 a
+ bnz $2f
+ mov a, r14
+ and a, #0xfe
+ bz $1f
+2:
+ ;; shift right, inc exponent
+ movw ax, r14
+ shrw ax, 1
+ movw r14, ax
+ mov a, r13
+ rorc a, 1
+ mov r13, a
+ mov a, r12
+ rorc a, 1
+ mov r12, a
+ mov a, r11
+ rorc a, 1
+ mov r11, a
+ ;; we don't care about r8/r9/r10 if we're shifting this way
+ incw bc
+ br $1b
+1:
+ mov a, r15
+ or a, r14
+ bnz $1f
+ ;; shift left, dec exponent
+ movw ax, r8
+ shlw ax, 1
+ movw r8, ax
+ movw ax, r10
+ rolwc ax, 1
+ movw r10, ax
+ movw ax, r12
+ rolwc ax, 1
+ movw r12, ax
+ movw ax, r14
+ rolwc ax, 1
+ movw r14, ax
+ decw bc
+ br $1b
+1:
+ ;; at this point, FRAC is in R11..R14 and EXP is in BC
+ movw ax, bc
+ movw A_EXP, ax
+
+ mov a, r11
+ mov A_FRAC_L, a
+ mov a, r12
+ mov A_FRAC_LH, a
+ mov a, r13
+ mov A_FRAC_H, a
+ mov a, r14
+ mov A_FRAC_HH, a
+
+ mov a, A_SIGN
+ xor a, B_SIGN
+ mov A_SIGN, a
+
+ call $!__rl78_int_pack_a_r8
+
+ addw sp, #16
+ ret
+
+END_FUNC ___mulsf3
+
+START_FUNC ___divsf3
+
+ ;; if (isnan(a)) return a
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_isnan
+ bnz $1f
+dret_a:
+ movw ax, [sp+4]
+ movw r8, ax
+ mov a, [sp+11]
+ and a, #0x80
+ mov b, a
+ movw ax, [sp+6]
+ xor a, b ; sign is always a ^ b
+ movw r10, ax
+ ret
+1:
+ ;; if (isnan (b)) return b;
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_isnan
+ bnz $1f
+dret_b:
+ movw ax, [sp+8]
+ movw r8, ax
+ mov a, [sp+7]
+ and a, #0x80
+ mov b, a
+ movw ax, [sp+10]
+ xor a, b ; sign is always a ^ b
+ movw r10, ax
+ ret
+1:
+
+ ;; if (isinf (a)) return isinf(b) ? nan : a
+
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call $!__int_isinf
+ bnz $1f
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_isinf
+ bnz $dret_a
+dret_nan:
+ movw r8, #0x0001 ; return NaN
+ movw r10, #0x7f80
+ ret
+
+1:
+
+ ;; if (iszero (a)) return iszero(b) ? nan : a
+
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_iszero
+ bnz $1f
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_iszero
+ bnz $dret_a
+ br $dret_nan
+
+1:
+ ;; if (isinf (b)) return 0
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_isinf
+ bnz $1f
+
+ mov a, [sp+7]
+ mov b, a
+ mov a, [sp+11]
+ xor a, b
+ and a, #0x80
+ mov r11, a
+ movw r8, #0
+ mov r10, #0
+ ret
+
+1:
+ ;; if (iszero (b)) return Inf
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_iszero
+ bnz $1f
+
+ mov a, [sp+7]
+ mov b, a
+ mov a, [sp+11]
+ xor a, b
+ or a, #0x7f
+ mov r11, a
+ movw r8, #0
+ mov r10, #0x80
+ ret
+1:
+
+ ;; at this point, we're doing the division. Normalized
+ ;; mantissas look like:
+ ;; 01.xx.xx.xx
+ ;; so we divide:
+ ;; 01.xx.xx.xx.00.00.00.00
+ ;; by 01.xx.xx.xx
+ ;; to get approx 00.80.00.00.00 to 01.ff.ff.ff.00
+
+
+ subw sp, #16 ; save room for two unpacked values
+
+ movw ax, sp
+ movw hl, ax
+ addw ax, #16+4
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ addw ax, #16+8-8
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ movw hl, ax
+
+ ;; divide DI a.FRAC / SI b.FRAC to DI r8
+
+ subw sp, #16
+ movw ax, A_FRAC_L
+ movw [sp+4], ax
+ movw ax, A_FRAC_H
+ movw [sp+6], ax
+
+ movw ax, B_FRAC_L
+ movw [sp+8], ax
+ movw ax, B_FRAC_H
+ movw [sp+10], ax
+
+ movw ax, #0
+ movw [sp+0], ax
+ movw [sp+2], ax
+ movw [sp+12], ax
+ movw [sp+14], ax
+
+ call !!___divdi3 ; MTMPa / MTMPb -> R8..R15
+ addw sp, #16
+
+ movw ax, sp
+ movw hl, ax
+
+ ;; subtract the exponents A - B
+ movw ax, A_EXP
+ subw ax, B_EXP
+ movw bc, ax ; exponent in BC
+
+ ;; now, re-normalize the DI value in R8..R15 to have the
+ ;; MSB in the "right" place, adjusting BC as we shift it.
+
+ ;; The value will normally be in this range:
+ ;; R15 R8
+ ;; 0000_0000_8000_0000
+ ;; 0000_0001_ffff_ff00
+
+ ;; so to speed it up, we normalize to:
+ ;; 0000_0001_xxxx_xxxx
+ ;; then extract the bytes we want (r9-r12)
+
+1:
+ movw ax, r14
+ cmpw ax, #0
+ bnz $2f
+ movw ax, r12
+ cmpw ax, #1
+ bnh $1f
+2:
+ ;; shift right, inc exponent
+ movw ax, r14
+ shrw ax, 1
+ movw r14, ax
+ mov a, r13
+ rorc a, 1
+ mov r13, a
+ mov a, r12
+ rorc a, 1
+ mov r12, a
+ mov a, r11
+ rorc a, 1
+ mov r11, a
+ mov a, r10
+ rorc a, 1
+ mov r10, a
+ mov a, r9
+ rorc a, 1
+ mov r9, a
+ mov a, r8
+ rorc a, 1
+ mov r8, a
+
+ incw bc
+ br $1b
+1:
+ ;; the previous loop leaves r15.r13 zero
+ mov a, r12
+ cmp0 a
+ bnz $1f
+ ;; shift left, dec exponent
+ movw ax, r8
+ shlw ax, 1
+ movw r8, ax
+ movw ax, r10
+ rolwc ax, 1
+ movw r10, ax
+ movw ax, r12
+ rolwc ax, 1
+ movw r12, ax
+ ;; don't need to do r14
+ decw bc
+ br $1b
+1:
+ ;; at this point, FRAC is in R8..R11 and EXP is in BC
+ movw ax, bc
+ movw A_EXP, ax
+
+ mov a, r9
+ mov A_FRAC_L, a
+ mov a, r10
+ mov A_FRAC_LH, a
+ mov a, r11
+ mov A_FRAC_H, a
+ mov a, r12
+ mov A_FRAC_HH, a
+
+ mov a, A_SIGN
+ xor a, B_SIGN
+ mov A_SIGN, a
+
+ call $!__rl78_int_pack_a_r8
+
+ addw sp, #16
+ ret
+
+END_FUNC ___divsf3
#include "vregs.h"
- .text
- .global ___lshrsi3
- .type ___lshrsi3, @function
-___lshrsi3:
-
+START_FUNC ___lshrsi3
;; input:
;;
;; [zero]
;; B - count
mov a, [sp+8] ; A now contains the count
-
cmp a, #0x20
bc $.Lcount_is_normal
br $.Lloop_top
- .size ___lshrsi3, .-___lshrsi3
+END_FUNC ___lshrsi3
; DE count (resL-tmp)
; HL [sp+4]
+; Register use (G10):
+;
+; AX op2L
+; BC op2H
+; DE count
+; HL [sp+4]
+; r8/r9 res32L
+; r10/r11 (resH)
+; r12/r13 (resL-tmp)
+; r16/r17 res32H
+; r18/r19 op1
+
START_FUNC ___mulsi3
;; A is at [sp+4]
;; B is at [sp+8]
sknc
incw ax
addw ax, r_2
-.Lmul_hisi_no_add:
+.Lmul_hisi_no_add:
sel rb1
shlw bc, 1
sel rb0
.Lmul_hi_done:
ret
END_FUNC ___mulhi3
+
+;;; --------------------------------------
+#ifdef __RL78_G10__
+ START_FUNC ___mulqi3
+
+ mov a, [sp+4]
+ mov r9, a
+ mov a, [sp+6]
+ mov r10, a
+ mov a, #9
+ mov r11, a
+ clrb a
+ mov r8, a
+.L2:
+ cmp0 r10
+ skz
+ dec r11
+ sknz
+ ret
+ mov a, r10
+ and a, #1
+ mov r12, a
+ cmp0 r12
+ sknz
+ br !!.L3
+ mov a, r9
+ mov l, a
+ mov a, r8
+ add a, l
+ mov r8, a
+.L3:
+ mov a, r9
+ add a, a
+ mov r9, a
+ mov a, r10
+ shr a, 1
+ mov r10, a
+ br !!.L2
+
+ END_FUNC ___mulqi3
+#endif
+
.text
- .global _signbit
-_signbit:
- .global _signbitf
-_signbitf:
- ;; X is at [sp+4]
+START_FUNC _signbit
+START_ANOTHER_FUNC _signbitf
+ ;; X is at [sp+4]..[SP+7]
;; result is in R8..R9
movw r8, #0
sknc
movw r8, #1
ret
- .size _signbit, . - _signbit
- .size _signbitf, . - _signbitf
+END_ANOTHER_FUNC _signbitf
+END_FUNC _signbit
- .global _signbitl
-_signbitl:
- ;; X is at [sp+4]
+
+START_FUNC _signbitl
+ ;; X is at [sp+4]..[SP+7]
;; result is in R8..R9
movw r8, #0
sknc
movw r8, #1
ret
- .size _signbitl, . - _signbitl
+END_FUNC _signbitl
LIB2ADD = \
$(srcdir)/config/rl78/trampoline.S \
- $(srcdir)/config/rl78/lib2div.c \
- $(srcdir)/config/rl78/lib2mul.c \
$(srcdir)/config/rl78/lib2shift.c \
$(srcdir)/config/rl78/lshrsi3.S \
$(srcdir)/config/rl78/mulsi3.S \
$(srcdir)/config/rl78/divmodhi.S \
$(srcdir)/config/rl78/divmodqi.S \
$(srcdir)/config/rl78/signbit.S \
+ $(srcdir)/config/rl78/bit-count.S \
+ $(srcdir)/config/rl78/fpbit-sf.S \
+ $(srcdir)/config/rl78/fpmath-sf.S \
$(srcdir)/config/rl78/cmpsi2.S
+LIB2FUNCS_EXCLUDE = _clzhi2 _clzsi2 _ctzhi2 _ctzsi2 \
+ _popcounthi2 _popcountsi2 \
+ _parityhi2 _paritysi2 _ffssi2 _ffshi2 \
+ _negate_sf _compare_sf _eq_sf _ne_sf _gt_sf _ge_sf \
+ _lt_sf _le_sf _unord_sf \
+ _si_to_sf _usi_to_sf \
+ _sf_to_si _sf_to_usi \
+ _fixunssfsi _fixsfsi \
+ _addsub_sf _mul_sf _div_sf
+
+# Remove __gcc_bcmp from LIB2FUNCS_ST
+LIB2FUNCS_ST = _eprintf
+
HOST_LIBGCC2_CFLAGS += -Os -ffunction-sections -fdata-sections
pointer in R10, allocate a trampoline and return its address in
R8. */
- .text
- .global ___trampoline_init
- .type ___trampoline_init, @function
-___trampoline_init:
-
+START_FUNC ___trampoline_init
movw hl, #trampoline_array
-1:
- movw ax, [hl + TO_ADDR]
+
+1: movw ax, [hl + TO_ADDR]
cmpw ax, #0
bz $2f
movw ax, [hl + TO_STUB]
movw r8, ax
-
ret
- .size ___trampoline_init, . - ___trampoline_init
+END_FUNC ___trampoline_init
- .global ___trampoline_uninit
- .type ___trampoline_uninit, @function
-___trampoline_uninit:
+
+START_FUNC ___trampoline_uninit
movw hl, #trampoline_array
movw ax, sp
movw bc, ax
-1:
- movw ax, [hl + TO_FRAME]
+
+1: movw ax, [hl + TO_FRAME]
cmpw ax, bc
bc $2f
clrw ax
movw [hl + TO_ADDR], ax
-2:
- movw ax, hl
+2: movw ax, hl
addw ax, #TO_SIZE
movw hl, ax
cmpw ax, #trampoline_array_end
bnz $1b
ret
- .size ___trampoline_uninit, . - ___trampoline_uninit
+END_FUNC ___trampoline_uninit
#endif
+.macro START_ANOTHER_FUNC name
+ .global \name
+ .type \name , @function
+\name:
+.endm
+
/* Start a function in its own section, so that it
can be subject to linker garbage collection. */
.macro START_FUNC name
.pushsection .text.\name,"ax",@progbits
- .global \name
- .type \name , @function
-\name:
+ START_ANOTHER_FUNC \name
+.endm
+
+.macro END_ANOTHER_FUNC name
+ .size \name , . - \name
.endm
/* End the function. Set the size. */
.macro END_FUNC name
- .size \name , . - \name
+ END_ANOTHER_FUNC \name
.popsection
.endm