From: Nick Clifton <nickc@redhat.com>
Date: Tue, 27 Jan 2015 11:36:01 +0000 (+0000)
Subject: cmpsi2.S: Use function start and end macros.
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8410904a77b9238a1b65798a0fcd92f340ffe0ca;p=gcc.git

cmpsi2.S: Use function start and end macros.

	* config/rl78/cmpsi2.S: Use function start and end macros.
	(__gcc_bcmp): New function.
	* config/rl78/lshrsi3.S: Use function start and end macros.
	* config/rl78/mulsi3.S: Add support for G10.
	(__mulqi3): New function for G10.
	* config/rl78/signbit.S: Use function start and end macros.
	* config/rl78/t-rl78 (LIB2ADD): Add bit-count.S, fpbit-sf.S and
	fpmath-sf.S.
	(LIB2FUNCS_EXCLUDE): Define.
	(LIB2FUNCS_ST): Define.
	* config/rl78/trampoline.S: Use function start and end macros.
	* config/rl78/vregs.h (START_FUNC): New macro.
	(START_ANOTHER_FUNC): New macro.
	(END_FUNC): New macro.
	(END_ANOTHER_FUNC): New macro.
	* config/rl78/bit-count.S: New file.  Contains assembler
	implementations of the bit counting functions: ___clzhi2,
	__clzsi2, ctzhi2, ctzsi2, ffshi2, ffssi2, __partityhi2,
	__paritysi2, __popcounthi2 and __popcountsi2.
	* config/rl78/fpbit-sf.S: New file.  Contains assembler
	implementationas of the math functions: __negsf2, __cmpsf2,
	__eqsf2, __nesf2, __lesf2, __ltsf2, __gesf2, gtsf2, __unordsf2,
	__fixsfsi,  __fixunssfsi, __floatsisf and __floatunssisf.
	* config/rl78/fpmath-sf.S: New file.  Contains assembler
	implementations of the math functions: __subsf3, __addsf3,
	__mulsf3 and __divsf3

From-SVN: r220162
---

diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 08054548bc5..dbe2b82905a 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,32 @@
+2015-01-27  Nick Clifton  <nickc@redhat.com>
+
+	* config/rl78/cmpsi2.S: Use function start and end macros.
+	(__gcc_bcmp): New function.
+	* config/rl78/lshrsi3.S: Use function start and end macros.
+	* config/rl78/mulsi3.S: Add support for G10.
+	(__mulqi3): New function for G10.
+	* config/rl78/signbit.S: Use function start and end macros.
+	* config/rl78/t-rl78 (LIB2ADD): Add bit-count.S, fpbit-sf.S and
+	fpmath-sf.S.
+	(LIB2FUNCS_EXCLUDE): Define.
+	(LIB2FUNCS_ST): Define.
+	* config/rl78/trampoline.S: Use function start and end macros.
+	* config/rl78/vregs.h (START_FUNC): New macro.
+	(START_ANOTHER_FUNC): New macro.
+	(END_FUNC): New macro.
+	(END_ANOTHER_FUNC): New macro.
+	* config/rl78/bit-count.S: New file.  Contains assembler
+	implementations of the bit counting functions: ___clzhi2,
+	__clzsi2, ctzhi2, ctzsi2, ffshi2, ffssi2, __partityhi2,
+	__paritysi2, __popcounthi2 and __popcountsi2.
+	* config/rl78/fpbit-sf.S: New file.  Contains assembler
+	implementationas of the math functions: __negsf2, __cmpsf2,
+	__eqsf2, __nesf2, __lesf2, __ltsf2, __gesf2, gtsf2, __unordsf2,
+	__fixsfsi,  __fixunssfsi, __floatsisf and __floatunssisf.
+	* config/rl78/fpmath-sf.S: New file.  Contains assembler
+	implementations of the math functions: __subsf3, __addsf3,
+	__mulsf3 and __divsf3
+
 2015-01-27  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	* config.host (i[34567]86-*-solaris2*, x86_64-*-solaris2.1[0-9]*):
diff --git a/libgcc/config/rl78/bit-count.S b/libgcc/config/rl78/bit-count.S
new file mode 100644
index 00000000000..2685c84ca3f
--- /dev/null
+++ b/libgcc/config/rl78/bit-count.S
@@ -0,0 +1,213 @@
+;   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;   Contributed by Red Hat.
+; 
+; This file is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published by the
+; Free Software Foundation; either version 3, or (at your option) any
+; later version.
+; 
+; This file is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; General Public License for more details.
+; 
+; Under Section 7 of GPL version 3, you are granted additional
+; permissions described in the GCC Runtime Library Exception, version
+; 3.1, as published by the Free Software Foundation.
+;
+; You should have received a copy of the GNU General Public License and
+; a copy of the GCC Runtime Library Exception along with this program;
+; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+; <http://www.gnu.org/licenses/>.
+
+#include "vregs.h"
+
+START_FUNC	___clzhi2
+	;; Argument is in [SP+4], return in R8.
+	movw	ax, [SP+4]
+
+	.global __clzhi2_internal
+__clzhi2_internal:
+	movw	r8, #16
+	cmpw	ax, #0
+	bz	$clzhi2_is_zero
+	mov	e, #0xff
+1:
+	inc	e
+	shlw	ax, 1
+	bnc	$1b
+	mov	a, e
+	mov	r8, a
+clzhi2_is_zero:
+	ret
+END_FUNC	___clzhi2
+
+
+START_FUNC	___clzsi2
+	;; Argument is in [SP+6]:[SP+4], return in R8.
+	movw	ax, [SP+6]
+	cmpw	ax, #0
+	bnz	$__clzhi2_internal
+	movw	ax, [SP+4]
+	call	!__clzhi2_internal
+	movw	ax, r8
+	addw	ax, #16
+	movw	r8, ax
+	ret
+END_FUNC	___clzsi2
+
+
+START_FUNC	___ctzhi2
+	;; Argument is in [SP+4], return in R8.
+	movw	ax, [SP+4]
+
+	.global __ctzhi2_internal
+__ctzhi2_internal:
+	movw	r8, #16
+	cmpw	ax, #0
+	bz	$ctzhi2_is_zero
+	mov	e, #0xff
+1:
+	inc	e
+	shrw	ax, 1
+	bnc	$1b
+	mov	a, e
+	mov	r8, a
+ctzhi2_is_zero:
+	ret
+END_FUNC	___ctzhi2
+
+
+START_FUNC	___ctzsi2
+	;; Argument is in [SP+6]:[SP+4], return in R8.
+	movw	ax, [SP+4]
+	cmpw	ax, #0
+	bnz	$__ctzhi2_internal
+	movw	ax, [SP+6]
+	call	!__ctzhi2_internal
+	movw	ax, r8
+	addw	ax, #16
+	movw	r8, ax
+	ret
+END_FUNC	___ctzsi2
+
+
+START_FUNC	___ffshi2
+	;; Argument is in [SP+4], return in R8.
+	movw	ax, [SP+4]
+
+	.global __ffshi2_internal
+__ffshi2_internal:
+	movw	r8, #0
+	cmpw	ax, #0
+	bz	$ffshi2_is_zero
+	mov	e, #0
+1:
+	inc	e
+	shrw	ax, 1
+	bnc	$1b
+	mov	a, e
+	mov	r8, a
+ffshi2_is_zero:
+	ret
+END_FUNC	___ffshi2
+
+
+START_FUNC	___ffssi2
+	;; Argument is in [SP+6]:[SP+4], return in R8.
+	movw	ax, [SP+4]
+	cmpw	ax, #0
+	bnz	$__ffshi2_internal
+	movw	ax, [SP+6]
+	cmpw	ax, #0
+	bz	$1f
+	call	!__ffshi2_internal
+	movw	ax, r8
+	addw	ax, #16
+1:	
+	movw	r8, ax
+	ret
+END_FUNC	___ffssi2
+
+
+START_FUNC	___parityqi_internal
+	mov1	cy, a.0
+	xor1	cy, a.1
+	xor1	cy, a.2
+	xor1	cy, a.3
+	xor1	cy, a.4
+	xor1	cy, a.5
+	xor1	cy, a.6
+	xor1	cy, a.7
+	movw	ax, #0
+	bnc	$1f
+	incw	ax
+1:
+	movw	r8, ax
+	ret
+END_FUNC	___parityqi_internal
+
+
+START_FUNC	___parityhi2
+	;; Argument is in [SP+4], return in R8.
+	movw	ax, [SP+4]
+	xor	a, x
+	br	$___parityqi_internal
+END_FUNC	___parityhi2
+
+
+START_FUNC	___paritysi2
+	;; Argument is in [SP+6]:[SP+4], return in R8.
+	movw	ax, [SP+4]
+	xor	a, x
+	mov	b, a
+	movw	ax, [SP+6]
+	xor	a, x
+	xor	a, b
+	br	$___parityqi_internal
+END_FUNC	___paritysi2
+
+
+
+START_FUNC	___popcounthi2
+	;; Argument is in [SP+4], return in R8.
+	mov	d, #2
+	br	$___popcountqi_internal
+END_FUNC	___popcounthi2
+
+
+START_FUNC	___popcountsi2
+	;; Argument is in [SP+6]:[SP+4], return in R8.
+	mov	d, #4
+	br	$___popcountqi_internal
+END_FUNC	___popcountsi2
+
+
+START_FUNC	___popcountqi_internal
+	;; There are D bytes starting at [HL]
+	;; store count in R8.
+
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	mov	a, #0
+1:
+	xch	a, b
+	mov	a, [hl]
+	xch	a, b
+	mov	e, #8
+2:	
+	shl	b,1
+	addc	a, #0
+	dec	e
+	bnz	$2b
+
+	incw	hl
+	dec	d
+	bnz	$1b
+
+	mov	x, a
+	mov	a, #0
+	movw	r8, ax
+	ret	
+END_FUNC	___popcountqi_internal
diff --git a/libgcc/config/rl78/cmpsi2.S b/libgcc/config/rl78/cmpsi2.S
index 557b4597132..f0d8292625d 100644
--- a/libgcc/config/rl78/cmpsi2.S
+++ b/libgcc/config/rl78/cmpsi2.S
@@ -31,9 +31,8 @@
 	;; If A is less than B it returns 0.  If A is greater
 	;; than B it returns 2.  If they are equal it returns 1.
 
-	.global	___cmpsi2
-        .type   ___cmpsi2, @function
-___cmpsi2:
+START_FUNC ___cmpsi2
+
 	;; A is at [sp+4]
 	;; B is at [sp+8]
 	;; Result put in R8
@@ -88,18 +87,18 @@ ___cmpsi2:
 	movw	r8, ax
 	ret
 
-	.size	___cmpsi2, . - ___cmpsi2
-	
-	
+END_FUNC ___cmpsi2
+
+;; ------------------------------------------------------
+
 	;;   int __ucmpsi2 (unsigned long A, unsigned long B)
 	;;
 	;; Performs an unsigned comparison of A and B.
 	;; If A is less than B it returns 0.  If A is greater
 	;; than B it returns 2.  If they are equal it returns 1.
 
-	.global	___ucmpsi2
-        .type   ___ucmpsi2, @function
-___ucmpsi2:
+START_FUNC ___ucmpsi2
+
 	;; A is at [sp+4]
 	;; B is at [sp+8]
 	;; Result put in R8..R9
@@ -117,5 +116,57 @@ ___ucmpsi2:
 	br	!!.Lless_than_or_greater_than
 	br	!!.Lcompare_bottom_words
 
-	.size	___ucmpsi2, . - ___ucmpsi2
-	
\ No newline at end of file
+END_FUNC ___ucmpsi2
+
+;; ------------------------------------------------------
+	
+	;;   signed int __gcc_bcmp (const unsigned char *s1, const unsigned char *s2, size_t size)
+	;;   Result is negative if S1 is less than S2,
+	;;   positive if S1 is greater, 0 if S1 and S2 are equal.
+
+START_FUNC __gcc_bcmp
+
+	;; S1 is at [sp+4]
+	;; S2 is at [sp+6]
+	;; SIZE is at [sp+8]
+	;; Result in r8/r9
+	
+        movw	r10, #0
+1:
+	;; Compare R10 against the SIZE parameter
+        movw	ax, [sp+8]
+        subw	ax, r10
+        sknz
+        br	!!1f
+
+	;; Load S2[r10] into R8
+        movw	ax, [sp+6]
+        addw	ax, r10
+        movw	hl, ax
+        mov	a, [hl]
+        mov	r8, a
+
+	;; Load S1[r10] into A
+        movw	ax, [sp+4]
+        addw	ax, r10
+        movw	hl, ax
+        mov	a, [hl]
+
+	;; Increment offset
+        incw	r10
+
+	;; Compare loaded bytes
+        cmp	a, r8
+        sknz
+        br	!!1b
+
+	;; They differ.  Subtract *S2 from *S1 and return as the result.
+	mov	x, a
+	mov	a, #0
+	mov	r9, #0
+	subw	ax, r8
+1:
+	movw	r8, ax
+        ret
+
+END_FUNC __gcc_bcmp
diff --git a/libgcc/config/rl78/fpbit-sf.S b/libgcc/config/rl78/fpbit-sf.S
new file mode 100644
index 00000000000..042facee14e
--- /dev/null
+++ b/libgcc/config/rl78/fpbit-sf.S
@@ -0,0 +1,608 @@
+; SF format is:
+;
+; [sign] 1.[23bits] E[8bits(n-127)]
+;
+; SEEEEEEE Emmmmmmm mmmmmmmm mmmmmmmm
+;
+; [A+0] mmmmmmmm
+; [A+1] mmmmmmmm
+; [A+2] Emmmmmmm
+; [A+3] SEEEEEEE
+;
+; Special values (xxx != 0):
+;
+;  s1111111 10000000 00000000 00000000	infinity
+;  s1111111 1xxxxxxx xxxxxxxx xxxxxxxx	NaN
+;  s0000000 00000000 00000000 00000000	zero
+;  s0000000 0xxxxxxx xxxxxxxx xxxxxxxx	denormals
+;
+; Note that CMPtype is "signed char" for rl78
+;
+	
+#include "vregs.h"
+
+#define Z	PSW.6
+
+START_FUNC	___negsf2
+
+	;; Negate the floating point value.
+	;; Input at [SP+4]..[SP+7].
+	;; Output to R8..R11.
+
+	movw	ax, [SP+4]
+	movw	r8, ax
+	movw	ax, [SP+6]
+	xor	a, #0x80
+	movw	r10, ax
+	ret
+
+END_FUNC	___negsf2
+
+;; ------------------internal functions used by later code --------------
+
+START_FUNC	__int_isnan
+
+	;; [HL] points to value, returns Z if it's a NaN
+
+	mov	a, [hl+2]
+	and	a, #0x80
+	mov	x, a
+	mov	a, [hl+3]
+	and	a, #0x7f
+	cmpw	ax, #0x7f80
+	skz
+	ret			; return NZ if not NaN
+	mov	a, [hl+2]
+	and	a, #0x7f
+	or	a, [hl+1]
+	or	a, [hl]
+	bnz	$1f
+	clr1	Z		; Z, normal
+	ret
+1:
+	set1	Z		; nan
+	ret
+
+END_FUNC	__int_isnan
+
+START_FUNC	__int_eithernan
+
+	;; call from toplevel functions, returns Z if either number is a NaN,
+	;; or NZ if both are OK.
+
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	$!__int_isnan
+	bz	$1f
+
+	movw	ax, sp
+	addw	ax, #12
+	movw	hl, ax
+	call	$!__int_isnan
+1:
+	ret
+
+END_FUNC	__int_eithernan
+
+START_FUNC	__int_iszero
+
+	;; [HL] points to value, returns Z if it's zero
+
+	mov	a, [hl+3]
+	and	a, #0x7f
+	or	a, [hl+2]
+	or	a, [hl+1]
+	or	a, [hl]
+	ret
+
+END_FUNC	__int_iszero
+
+START_FUNC	__int_cmpsf
+
+	;; This is always called from some other function here,
+	;; so the stack offsets are adjusted accordingly.
+
+	;; X [SP+8] <=> Y [SP+12] : <a> <=> 0
+
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	$!__int_iszero
+	bnz	$1f
+
+	movw	ax, sp
+	addw	ax, #12
+	movw	hl, ax
+	call	$!__int_iszero
+	bnz	$2f
+	;; At this point, both args are zero.
+	mov	a, #0
+	ret
+
+2:
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+1:
+	;; At least one arg is non-zero so we can just compare magnitudes.
+	;; Args are [HL] and [HL+4].
+
+	mov	a, [HL+3]
+	xor	a, [HL+7]
+	mov1	cy, a.7
+	bnc	$1f
+
+	mov	a, [HL+3]
+	sar	a, 7
+	or	a, #1
+	ret
+
+1:	;; Signs the same, compare magnitude.  It's safe to lump
+	;; the sign bits, exponent, and mantissa together here, since they're
+	;; stored in the right sequence.
+	movw	ax, [HL+2]
+	cmpw	ax, [HL+6]
+	bc	$ybig_cmpsf	; branch if X < Y
+	bnz	$xbig_cmpsf	; branch if X > Y
+
+	movw	ax, [HL]
+	cmpw	ax, [HL+4]
+	bc	$ybig_cmpsf	; branch if X < Y
+	bnz	$xbig_cmpsf	; branch if X > Y
+
+	mov	a, #0
+	ret
+
+xbig_cmpsf:			; |X| > |Y| so return A = 1 if pos, 0xff if neg
+	mov	a, [HL+3]
+	sar	a, 7
+	or	a, #1
+	ret
+ybig_cmpsf:			; |X| < |Y| so return A = 0xff if pos, 1 if neg
+	mov	a, [HL+3]
+	xor	a, #0x80
+	sar	a, 7
+	or	a, #1
+	ret
+
+END_FUNC	__int_cmpsf
+
+;; ----------------------------------------------------------
+
+START_FUNC	___cmpsf2
+	;; This functions calculates "A <=> B".  That is, if A is less than B
+	;; they return -1, if A is greater than B, they return 1, and if A
+	;; and B are equal they return 0.  If either argument is NaN the
+	;; behaviour is undefined.
+
+	;; Input at [SP+4]..[SP+7].
+	;; Output to R8..R9.
+
+	call	$!__int_eithernan
+	bnz	$1f
+	movw	r8, #1
+	ret
+1:
+	call	$!__int_cmpsf
+	mov	r8, a
+	sar	a, 7
+	mov	r9, a
+	ret
+
+END_FUNC	___cmpsf2
+
+;; ----------------------------------------------------------
+
+	;; These functions are all basically the same as ___cmpsf2
+	;; except that they define how they handle NaNs.
+
+START_FUNC		___eqsf2
+	;; Returns zero iff neither argument is NaN
+	;; and both arguments are equal.
+START_ANOTHER_FUNC	___nesf2
+	;; Returns non-zero iff either argument is NaN or the arguments are
+	;; unequal.  Effectively __nesf2 is the same as __eqsf2
+START_ANOTHER_FUNC	___lesf2
+	;; Returns a value less than or equal to zero if neither
+	;; argument is NaN, and the first is less than or equal to the second.
+START_ANOTHER_FUNC	___ltsf2
+	;; Returns a value less than zero if neither argument is
+	;; NaN, and the first is strictly less than the second.
+
+	;; Input at [SP+4]..[SP+7].
+	;; Output to R8.
+
+	mov	r8, #1
+
+;;;  Fall through
+
+START_ANOTHER_FUNC	__int_cmp_common
+
+	call	$!__int_eithernan
+	sknz
+	;; return value (pre-filled-in below) for "either is nan"
+	ret
+
+	call	$!__int_cmpsf
+	mov	r8, a
+	ret
+
+END_ANOTHER_FUNC	__int_cmp_common
+END_ANOTHER_FUNC	___ltsf2
+END_ANOTHER_FUNC	___lesf2
+END_ANOTHER_FUNC	___nesf2
+END_FUNC		___eqsf2
+
+START_FUNC		___gesf2
+	;; Returns a value greater than or equal to zero if neither argument
+	;; is a NaN and the first is greater than or equal to the second.
+START_ANOTHER_FUNC	___gtsf2
+	;; Returns a value greater than zero if neither argument
+	;; is NaN, and the first is strictly greater than the second.
+
+	mov	r8, #0xffff
+	br	$__int_cmp_common
+
+END_ANOTHER_FUNC	___gtsf2
+END_FUNC		___gesf2
+
+;; ----------------------------------------------------------
+
+START_FUNC	___unordsf2
+	;; Returns a nonzero value if either argument is NaN, otherwise 0.
+
+	call	$!__int_eithernan
+	movw	r8, #0
+	sknz			; this is from the call, not the movw
+	movw	r8, #1
+	ret
+	
+END_FUNC	___unordsf2
+
+;; ----------------------------------------------------------
+
+START_FUNC	___fixsfsi
+	;; Converts its floating point argument into a signed long,
+	;; rounding toward zero.
+	;; The behaviour with NaNs and Infinities is not well defined.
+	;; We choose to return 0 for NaNs, -INTMAX for -inf and INTMAX for +inf.
+	;; This matches the behaviour of the C function in libgcc2.c.
+
+	;; Input at [SP+4]..[SP+7], result is in (lsb) R8..R11 (msb).
+
+	;; Special case handling for infinities as __fixunssfsi
+	;; will not give us the values that we want.
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	!!__int_isinf
+	bnz	$1f
+	mov	a, [SP+7]
+	bt	a.7, $2f
+	;; +inf
+	movw	r8, #-1
+	movw	r10, #0x7fff
+	ret
+	;; -inf
+2:	mov	r8, #0
+	mov	r10, #0x8000
+	ret
+	
+	;; Load the value into r10:r11:X:A
+1:	movw	ax, [SP+4]
+	movw	r10, ax
+	movw	ax, [SP+6]
+
+	;; If the value is positive we can just use __fixunssfsi
+	bf	a.7, $__int_fixunssfsi
+
+	;; Otherwise we negate the value, call __fixunssfsi and
+	;; then negate its result.
+	clr1	a.7
+	call	$!__int_fixunssfsi
+
+	movw	ax, #0
+	subw	ax, r8
+	movw	r8, ax
+	movw	ax, #0
+        sknc
+        decw    ax
+        subw    ax, r10
+	movw	r10, ax
+	
+	;; Check for a positive result (which should only happen when
+	;; __fixunssfsi returns UINTMAX or 0).  In such cases just return 0.
+	mov	a, r11
+	bt      a.7, $1f
+	movw	r10,#0x0
+	movw	r8, #0x0
+
+1:	ret
+
+END_FUNC   	___fixsfsi
+
+START_FUNC 	___fixunssfsi
+	;; Converts its floating point argument into an unsigned long
+	;; rounding towards zero.  Negative arguments all become zero.
+	;; We choose to return 0 for NaNs and -inf, but UINTMAX for +inf.
+	;; This matches the behaviour of the C function in libgcc2.c.
+
+	;; Input at [SP+4]..[SP+7], result is in (lsb) R8..R11 (msb)
+	
+	;; Get the input value.
+	movw	ax, [SP+4]
+	movw	r10, ax
+	movw	ax, [SP+6]
+
+	;; Fall through into the internal function.
+	
+	.global __int_fixunssfsi
+__int_fixunssfsi:
+	;; Input in (lsb) r10.r11.x.a (msb).
+
+	;; Test for a negative input.  We shift the other bits at the
+	;; same time so that A ends up holding the whole exponent:
+	;;
+	;; before:
+	;;   SEEEEEEE EMMMMMMM MMMMMMMM MMMMMMMM
+	;;       A       X        R11     R10
+	;;
+	;; after:
+	;;   EEEEEEEE MMMMMMM0 MMMMMMMM MMMMMMMM
+	;;       A       X        R11     R10
+	shlw	ax, 1
+	bnc	$1f
+
+	;; Return zero.
+2:	movw	r8, #0
+	movw	r10, #0
+	ret
+
+	;; An exponent of -1 is either a NaN or infinity.
+1:	cmp	a, #-1
+	bnz	$3f
+	;; For NaN we return 0.  For infinity we return UINTMAX.
+	mov	a, x
+	or	a, r10
+	or	a, r11
+	cmp0	a
+	bnz	$2b
+
+6:	movw	r8, #-1		; -1 => UINT_MAX
+	movw	r10, #-1
+	ret
+	
+	;; If the exponent is negative the value is < 1 and so the
+	;; converted value is 0.  Note we must allow for the bias
+	;; applied to the exponent.  Thus a value of 127 in the
+	;; EEEEEEEE bits actually represents an exponent of 0, whilst
+	;; a value less than 127 actually represents a negative exponent.
+	;; Also if the EEEEEEEE bits are all zero then this represents
+	;; either a denormal value or 0.0.  Either way for these values
+	;; we return 0.
+3:	sub     a, #127
+	bc	$2b
+
+	;; A now holds the bias adjusted exponent, which is known to be >= 0.
+	;; If the exponent is > 31 then the conversion will overflow.
+	cmp 	a, #32
+	bnc	$6b
+4:
+	;; Save the exponent in H.  We increment it by one because we want
+	;; to be sure that the loop below will always execute at least once.
+ 	inc	a
+	mov	h, a
+
+	;; Get the top 24 bits of the mantissa into A:X:R10
+	;; Include the implicit 1-bit that is inherent in the IEEE fp format.
+	;;
+	;; before:
+	;;   EEEEEEEE MMMMMMM0 MMMMMMMM MMMMMMMM
+	;;       H       X        R11     R10
+	;; after:
+	;;   EEEEEEEE 1MMMMMMM MMMMMMMM MMMMMMMM
+	;;       H       A        X       R10
+
+	mov	a, r11
+	xch	a, x
+	shr	a, 1
+	set1	a.7
+
+	;; Clear B:C:R12:R13
+	movw	bc, #0
+	movw	r12, #0
+
+	;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
+	;; decrementing the exponent as we go.
+
+	;; before:
+	;;   MMMMMMMM MMMMMMMM MMMMMMMM   xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+	;;       A        X      R10          B       C       R12      R13
+	;; first iter:
+	;;   MMMMMMMM MMMMMMMM MMMMMMM0   xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxM
+	;;       A        X      R10          B       C       R12      R13
+	;; second iter:
+	;;   MMMMMMMM MMMMMMMM MMMMMM00   xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxMM
+	;;       A        X      R10          B       C       R12      R13
+	;; etc.
+5:
+	xch	a, r10
+	shl	a, 1
+	xch	a, r10
+	
+	rolwc	ax, 1
+	
+	xch	a, r13
+	rolc	a, 1
+	xch	a, r13
+
+	xch	a, r12
+	rolc	a, 1
+	xch	a, r12
+
+	rolwc	bc, 1
+	
+	dec	h
+	bnz	$5b
+
+	;; Result is currently in (lsb) r13.r12. c.  b.  (msb),
+	;; Move it into           (lsb) r8. r9. r10. r11 (msb).
+
+	mov	a, r13
+	mov	r8, a
+
+	mov	a, r12
+	mov	r9, a
+	
+	mov	a, c
+	mov	r10, a
+
+	mov	a, b
+	mov	r11, a
+
+	ret
+
+END_FUNC	___fixunssfsi
+
+;; ------------------------------------------------------------------------
+
+START_FUNC	___floatsisf
+	;; Converts its signed long argument into a floating point.
+	;; Argument in [SP+4]..[SP+7].  Result in R8..R11.
+
+	;; Get the argument.
+	movw	ax, [SP+4]
+	movw	bc, ax
+	movw	ax, [SP+6]
+
+	;; Test the sign bit.  If the value is positive then drop into
+	;; the unsigned conversion routine.
+	bf 	a.7, $2f
+
+	;; If negative convert to positive ...
+	movw 	hl, ax
+	movw	ax, #0
+	subw	ax, bc
+	movw	bc, ax
+	movw	ax, #0
+	sknc
+	decw	ax
+	subw	ax, hl
+
+	;; If the result is negative then the input was 0x80000000 and
+	;; we want to return -0.0, which will not happen if we call
+	;; __int_floatunsisf.
+	bt	a.7, $1f
+
+	;;  Call the unsigned conversion routine.
+	call	$!__int_floatunsisf
+
+	;; Negate the result.
+	set1	r11.7
+
+	;; Done.
+	ret
+
+1:	;; Return -0.0 aka 0xcf000000
+
+	clrb	a
+	mov	r8, a
+	mov	r9, a
+	mov	r10, a
+	mov	a, #0xcf
+	mov	r11, a
+	ret
+	
+START_ANOTHER_FUNC	___floatunsisf
+	;; Converts its unsigned long argument into a floating point.
+	;; Argument in [SP+4]..[SP+7].  Result in R8..R11.
+
+	;; Get the argument.
+	movw	ax, [SP+4]
+	movw	bc, ax
+	movw	ax, [SP+6]
+
+2:	;; Internal entry point from __floatsisf
+	;; Input in AX (high) and BC (low)
+	.global __int_floatunsisf
+__int_floatunsisf:
+	
+	;; Special case handling for zero.
+	cmpw	ax, #0
+	bnz	$1f
+	movw	ax, bc
+	cmpw	ax, #0
+	movw	ax, #0
+	bnz	$1f
+
+	;; Return 0.0
+	movw	r8, ax
+	movw	r10, ax
+	ret
+
+1:	;; Pre-load the loop count/exponent.
+	;; Exponents are biased by 0x80 and we start the loop knowing that
+	;; we are going to skip the highest set bit.  Hence the highest value
+	;; that we can get for the exponent is 0x1e (bits from input) + 0x80 = 0x9e.
+	mov     h, #0x9e
+
+	;; Move bits off the top of AX:BC until we hit a 1 bit.
+	;; Decrement the count of remaining bits as we go.
+
+2:	shlw	bc, 1
+	rolwc	ax, 1
+	bc	$3f
+	dec	h
+	br	$2b
+
+	;; Ignore the first one bit - it is implicit in the IEEE format.
+	;; The count of remaining bits is the exponent.
+
+	;; Assemble the final floating point value.  We have...
+	;; before:
+	;;   EEEEEEEE MMMMMMMM MMMMMMMM MMMMMMMM xxxxxxxx
+	;;       H        A       X        B         C
+	;; after:
+	;;   0EEEEEEE EMMMMMMM MMMMMMMM MMMMMMMM
+	;;      R11      R10      R9       R8
+
+	
+3:	shrw	ax, 1
+	mov	r10, a
+	mov	a, x
+	mov	r9, a	
+
+	mov	a, b
+	rorc	a, 1	
+
+	;; If the bottom bit of B was set before we shifted it out then we
+	;; need to round the result up.  Unless none of the bits in C are set.
+	;; In this case we are exactly half-way between two values, and we
+	;; round towards an even value.  We round up by increasing the
+	;; mantissa by 1.  If this results in a zero mantissa we have to
+	;; increment the exponent.  We round down by ignoring the dropped bits.
+	
+	bnc	$4f
+	cmp0	c
+	sknz	
+	bf	a.0, $4f
+
+5:	;; Round the mantissa up by 1.
+	add	a, #1
+	addc	r9, #0
+	addc	r10, #0
+	bf	r10.7, $4f
+	inc	h
+	clr1	r10.7
+
+4:	mov	r8, a
+	mov	a, h
+	shr	a, 1
+	mov	r11, a
+	sknc
+	set1	r10.7
+	ret
+
+END_ANOTHER_FUNC	___floatunsisf	
+END_FUNC		___floatsisf
diff --git a/libgcc/config/rl78/fpmath-sf.S b/libgcc/config/rl78/fpmath-sf.S
new file mode 100644
index 00000000000..f232f6b5f83
--- /dev/null
+++ b/libgcc/config/rl78/fpmath-sf.S
@@ -0,0 +1,1030 @@
+; SF format is:
+;
+; [sign] 1.[23bits] E[8bits(n-127)]
+;
+; SEEEEEEE Emmmmmmm mmmmmmmm mmmmmmmm
+;
+; [A+0] mmmmmmmm
+; [A+1] mmmmmmmm
+; [A+2] Emmmmmmm
+; [A+3] SEEEEEEE
+;
+; Special values (xxx != 0):
+;
+;  r11      r10      r9       r8
+;  [HL+3]   [HL+2]   [HL+1]   [HL+0]   
+;  s1111111 10000000 00000000 00000000	infinity
+;  s1111111 1xxxxxxx xxxxxxxx xxxxxxxx	NaN
+;  s0000000 00000000 00000000 00000000	zero
+;  s0000000 0xxxxxxx xxxxxxxx xxxxxxxx	denormals
+;
+; Note that CMPtype is "signed char" for rl78
+;
+	
+#include "vregs.h"
+
+#define Z	PSW.6
+
+; External Functions:
+;
+;  __int_isnan  [HL] -> Z if NaN
+;  __int_iszero  [HL] -> Z if zero
+
+START_FUNC	__int_isinf
+	;; [HL] points to value, returns Z if it's #Inf
+
+	mov	a, [hl+2]
+	and	a, #0x80
+	mov	x, a
+	mov	a, [hl+3]
+	and	a, #0x7f
+	cmpw	ax, #0x7f80
+	skz
+	ret			; return NZ if not NaN
+	mov	a, [hl+2]
+	and	a, #0x7f
+	or	a, [hl+1]
+	or	a, [hl]
+	ret
+
+END_FUNC	__int_isinf
+
+START_FUNC	_int_unpack_sf
+	;; convert 32-bit SFmode [DE] to 6-byte struct [HL] ("A")
+
+#define A_SIGN		[hl+0]	/* byte */
+#define A_EXP		[hl+2]	/* word */
+#define A_FRAC_L	[hl+4]	/* word */
+#define A_FRAC_LH	[hl+5]	/* byte */
+#define A_FRAC_H	[hl+6]	/* word or byte */
+#define A_FRAC_HH	[hl+7]	/* byte */
+
+#define B_SIGN		[hl+8]
+#define B_EXP		[hl+10]
+#define B_FRAC_L	[hl+12]
+#define B_FRAC_LH	[hl+13]
+#define B_FRAC_H	[hl+14]
+#define B_FRAC_HH	[hl+15]
+
+	mov	a, [de+3]
+	sar	a, 7
+	mov	A_SIGN, a
+
+	movw	ax, [de+2]
+	and	a, #0x7f
+	shrw	ax, 7
+	movw	bc, ax		; remember if the exponent is all zeros
+	subw	ax, #127	; exponent is now non-biased
+	movw	A_EXP, ax
+
+	movw	ax, [de]
+	movw	A_FRAC_L, ax
+
+	mov	a, [de+2]
+	and	a, #0x7f
+	cmp0	c		; if the exp is all zeros, it's denormal
+	skz
+	or	a, #0x80
+	mov	A_FRAC_H, a
+
+	mov	a, #0
+	mov	A_FRAC_HH, a
+
+	;; rounding-bit-shift
+	movw	ax, A_FRAC_L
+	shlw	ax, 1
+	movw	A_FRAC_L, ax
+	mov	a, A_FRAC_H
+	rolc	a, 1
+	mov	A_FRAC_H, a
+	mov	a, A_FRAC_HH
+	rolc	a, 1
+	mov	A_FRAC_HH, a
+
+	ret
+
+END_FUNC	_int_unpack_sf
+
+;	func(SF a,SF b)
+;	[SP+4..7]	a
+;	[SP+8..11]	b
+
+START_FUNC		___subsf3
+
+	;; a - b => a + (-b)
+
+	;; Note - we cannot just change the sign of B on the stack and
+	;; then fall through into __addsf3.  The stack'ed value may be
+	;; used again (it was created by our caller after all).  Instead
+	;; we have to allocate some stack space of our own, copy A and B,
+	;; change the sign of B, call __addsf3, release the allocated stack
+	;; and then return.
+
+	subw	sp, #8
+	movw	ax, [sp+4+8]
+	movw	[sp], ax
+	movw	ax, [sp+4+2+8]
+	movw	[sp+2], ax
+	movw	ax, [sp+4+4+8]
+	movw	[sp+4], ax
+	mov 	a, [sp+4+6+8]
+	mov	[sp+6], a
+	mov	a, [sp+4+7+8]
+	xor	a, #0x80
+	mov	[sp+7], a
+	call	$!___addsf3
+	addw	sp, #8
+	ret
+END_FUNC	___subsf3
+
+START_FUNC	___addsf3
+
+	;; if (isnan(a)) return a
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	!!__int_isnan
+	bnz	$1f
+ret_a:
+	movw	ax, [sp+4]
+	movw	r8, ax
+	movw	ax, [sp+6]
+	movw	r10, ax
+	ret
+
+1:	;; if (isnan (b)) return b;
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	!!__int_isnan
+	bnz	$2f
+ret_b:
+	movw	ax, [sp+8]
+	movw	r8, ax
+	movw	ax, [sp+10]
+	movw	r10, ax
+	ret
+
+2:	;; if (isinf (a))
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	$!__int_isinf
+	bnz	$3f
+
+	;;   if (isinf (b) && a->sign != b->sign) return NaN
+	
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	$!__int_isinf
+	bnz	$ret_a
+
+	mov	a, [sp+7]
+	mov	h, a
+	mov	a, [sp+11]
+	xor	a, h
+	bf	a.7, $ret_a
+
+	movw	r8,  #0x0001
+	movw	r10, #0x7f80
+	ret
+
+3:	;; if (isinf (b)) return b;
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	$!__int_isinf
+	bz	$ret_b
+
+	;; if (iszero (b))
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	!!__int_iszero
+	bnz	$4f
+
+	;;   if (iszero (a))
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	!!__int_iszero
+	bnz	$ret_a
+
+	movw	ax, [sp+4]
+	movw	r8, ax
+	mov	a, [sp+7]
+	mov	h, a
+	movw	ax, [sp+10]
+	and	a, h
+	movw	r10, ax
+	ret
+
+4:	;; if (iszero (a)) return b;
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	!!__int_iszero
+	bz	$ret_b
+
+; Normalize the two numbers relative to each other.  At this point,
+; we need the numbers converted to their "unpacked" format.
+
+	subw	sp, #16		; Save room for two unpacked values.
+
+	movw	ax, sp
+	movw	hl, ax
+	addw	ax, #16+4
+	movw	de, ax
+	call	$!_int_unpack_sf
+
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	addw	ax, #16+8-8
+	movw	de, ax
+	call	$!_int_unpack_sf
+
+	movw	ax, sp
+	movw	hl, ax
+
+	;; diff = a.exponent - b.exponent
+	movw	ax, B_EXP	; sign/exponent word
+	movw	bc, ax
+	movw	ax, A_EXP	; sign/exponent word
+	
+	subw	ax, bc		; a = a.exp - b.exp
+	movw	de, ax		; d = sdiff
+
+	;;  if (diff < 0) diff = -diff
+	bf	a.7, $1f
+	xor	a, #0xff
+	xor	r_0, #0xff	; x
+	incw	ax		; a = diff
+1:	
+	;; if (diff >= 23) zero the smaller one
+	cmpw	ax, #24
+	bc	$.L661		; if a < 23 goto 661
+
+	;; zero out the smaller one
+
+	movw	ax, de
+	bt	a.7, $1f	; if sdiff < 0 (a_exp < b_exp) goto 1f
+	;; "zero out" b
+	movw	ax, A_EXP
+	movw	B_EXP, ax
+	movw	ax, #0
+	movw	B_FRAC_L, ax
+	movw	B_FRAC_H, ax
+	br	$5f
+1:	
+	;; "zero out" a
+	movw	ax, B_EXP
+	movw	A_EXP, ax
+	movw	ax, #0
+	movw	A_FRAC_L, ax
+	movw	A_FRAC_H, ax
+
+	br	$5f
+.L661:
+	;; shift the smaller one so they have the same exponents
+1:	
+	movw	ax, de
+	bt	a.7, $1f
+	cmpw	ax, #0		; sdiff > 0
+	bnh	$1f		; if (sdiff <= 0) goto 1f
+
+	decw	de
+	incw	B_EXP		; because it's [HL+byte]
+
+	movw	ax, B_FRAC_H
+	shrw	ax, 1
+	movw	B_FRAC_H, ax
+	mov	a, B_FRAC_LH
+	rorc	a, 1
+	mov	B_FRAC_LH, a
+	mov	a, B_FRAC_L
+	rorc	a, 1
+	mov	B_FRAC_L, a
+	
+	br	$1b
+1:	
+	movw	ax, de
+	bf	a.7, $1f
+
+	incw	de
+	incw	A_EXP		; because it's [HL+byte]
+
+	movw	ax, A_FRAC_H
+	shrw	ax, 1
+	movw	A_FRAC_H, ax
+	mov	a, A_FRAC_LH
+	rorc	a, 1
+	mov	A_FRAC_LH, a
+	mov	a, A_FRAC_L
+	rorc	a, 1
+	mov	A_FRAC_L, a
+	
+	br	$1b
+1:	
+
+5:	;; At this point, A and B have the same exponent.
+
+	mov	a, A_SIGN
+	cmp	a, B_SIGN
+	bnz	$1f
+
+	;; Same sign, just add.
+	movw	ax, A_FRAC_L
+	addw	ax, B_FRAC_L
+	movw	A_FRAC_L, ax
+	mov	a, A_FRAC_H
+	addc	a, B_FRAC_H
+	mov	A_FRAC_H, a
+	mov	a, A_FRAC_HH
+	addc	a, B_FRAC_HH
+	mov	A_FRAC_HH, a
+
+	br	$.L728
+
+1:	;; Signs differ - A has A_SIGN still.
+	bf	a.7, $.L696
+
+	;; A is negative, do B-A
+	movw	ax, B_FRAC_L
+	subw	ax, A_FRAC_L
+	movw	A_FRAC_L, ax
+	mov	a, B_FRAC_H
+	subc	a, A_FRAC_H
+	mov	A_FRAC_H, a
+	mov	a, B_FRAC_HH
+	subc	a, A_FRAC_HH
+	mov	A_FRAC_HH, a
+
+	br	$.L698
+.L696:
+	;; B is negative, do A-B
+	movw	ax, A_FRAC_L
+	subw	ax, B_FRAC_L
+	movw	A_FRAC_L, ax
+	mov	a, A_FRAC_H
+	subc	a, B_FRAC_H
+	mov	A_FRAC_H, a
+	mov	a, A_FRAC_HH
+	subc	a, B_FRAC_HH
+	mov	A_FRAC_HH, a
+
+.L698:	
+	;; A is still A_FRAC_HH
+	bt	a.7, $.L706
+	
+	;; subtraction was positive
+	mov	a, #0
+	mov	A_SIGN, a
+	br	$.L712
+
+.L706:
+	;; subtraction was negative
+	mov	a, #0xff
+	mov	A_SIGN, a
+
+	;; This negates A_FRAC
+	mov	a, A_FRAC_L
+	xor	a, #0xff		; XOR doesn't mess with carry
+	add	a, #1			; INC doesn't set the carry
+	mov	A_FRAC_L, a
+	mov	a, A_FRAC_LH
+	xor	a, #0xff
+	addc	a, #0
+	mov	A_FRAC_LH, a
+	mov	a, A_FRAC_H
+	xor	a, #0xff
+	addc	a, #0
+	mov	A_FRAC_H, a
+	mov	a, A_FRAC_HH
+	xor	a, #0xff
+	addc	a, #0
+	mov	A_FRAC_HH, a
+
+.L712:
+	;; Renormalize the subtraction
+
+	mov	a, A_FRAC_L
+	or	a, A_FRAC_LH
+	or	a, A_FRAC_H
+	or	a, A_FRAC_HH
+	bz	$.L728
+
+	;; Mantissa is not zero, left shift until the MSB is in the
+	;; right place
+1:
+	movw	ax, A_FRAC_H
+	cmpw	ax, #0x0200
+	bnc	$.L728
+
+	decw	A_EXP
+
+	movw	ax, A_FRAC_L
+	shlw	ax, 1
+	movw	A_FRAC_L, ax
+	movw	ax, A_FRAC_H
+	rolwc	ax, 1
+	movw	A_FRAC_H, ax
+	br	$1b
+
+.L728:
+	;; normalize A and pack it
+
+	movw	ax, A_FRAC_H
+	cmpw	ax, #0x01ff
+	bnh	$1f
+	;; overflow in the mantissa; adjust
+	movw	ax, A_FRAC_H
+	shrw	ax, 1
+	movw	A_FRAC_H, ax
+	mov	a, A_FRAC_LH
+	rorc	a, 1
+	mov	A_FRAC_LH, a
+	mov	a, A_FRAC_L
+	rorc	a, 1
+	mov	A_FRAC_L, a
+	incw	A_EXP
+1:	
+
+	call	$!__rl78_int_pack_a_r8
+	addw	sp, #16
+	ret
+
+END_FUNC	___addsf3
+
+START_FUNC	__rl78_int_pack_a_r8
+	;; pack A to R8
+	movw	ax, A_EXP
+	addw	ax, #126	; not 127, we want the "bt/bf" test to check for denormals
+
+	bf	a.7, $1f
+	;; make a denormal
+2:
+	movw	bc, ax
+	movw	ax, A_FRAC_H
+	shrw	ax, 1
+	movw	A_FRAC_H, ax
+	mov	a, A_FRAC_LH
+	rorc	a, 1
+	mov	A_FRAC_LH, a
+	mov	a, A_FRAC_L
+	rorc	a, 1
+	mov	A_FRAC_L, a
+	movw	ax, bc
+	incw	ax
+	bt	a.7, $2b
+	decw	ax
+1:	
+	incw	ax		; now it's as if we added 127
+	movw	A_EXP, ax
+
+	cmpw	ax, #0xfe
+	bnh	$1f
+	;; store #Inf instead
+	mov	a, A_SIGN
+	or	a, #0x7f
+	mov	x, #0x80
+	movw	r10, ax
+	movw	r8, #0
+	ret
+
+1:
+	bf	a.7, $1f	; note AX has EXP at top of loop
+	;; underflow, denormal?
+	movw	ax, A_FRAC_H
+	shrw	ax, 1
+	movw	A_FRAC_H, ax
+	mov	a, A_FRAC_LH
+	rorc	a, 1
+	movw	A_FRAC_LH, ax
+	mov	a, A_FRAC_L
+	rorc	a, 1
+	movw	A_FRAC_L, ax
+	incw	A_EXP
+	movw	ax, A_EXP
+	br	$1b
+
+1:
+	;; undo the rounding-bit-shift
+	mov	a, A_FRAC_L
+	bf	a.0, $1f
+	;; round up
+	movw	ax, A_FRAC_L
+	addw	ax, #1
+	movw	A_FRAC_L, ax
+	sknc
+	incw	A_FRAC_H
+1:	
+	movw	ax, A_FRAC_H
+	shrw	ax, 1
+	movw	A_FRAC_H, ax
+	mov	a, A_FRAC_LH
+	rorc	a, 1
+	mov	A_FRAC_LH, a
+	mov	a, A_FRAC_L
+	rorc	a, 1
+	mov	A_FRAC_L, a
+
+	movw	ax, A_FRAC_L
+	movw	r8, ax
+
+	or	a, x
+	or	a, A_FRAC_H
+	or	a, A_FRAC_HH
+	bnz	$1f
+	movw	ax, #0
+	movw	A_EXP, ax
+1:	
+	mov	a, A_FRAC_H
+	and	a, #0x7f
+	mov	b, a
+	mov	a, A_EXP
+	shl	a, 7
+	or	a, b
+	mov	r10, a
+
+	mov	a, A_SIGN
+	and	a, #0x80
+	mov	b, a
+	mov	a, A_EXP
+	shr	a, 1
+	or	a, b
+	mov	r11, a
+
+	ret
+END_FUNC	__rl78_int_pack_a_r8
+
+START_FUNC	___mulsf3
+
+	;; if (isnan(a)) return a
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	!!__int_isnan
+	bnz	$1f
+mret_a:
+	movw	ax, [sp+4]
+	movw	r8, ax
+	mov	a, [sp+11]
+	and	a, #0x80
+	mov	b, a
+	movw	ax, [sp+6]
+	xor	a, b		; sign is always a ^ b
+	movw	r10, ax
+	ret
+1:	
+	;; if (isnan (b)) return b;
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	!!__int_isnan
+	bnz	$1f
+mret_b:
+	movw	ax, [sp+8]
+	movw	r8, ax
+	mov	a, [sp+7]
+	and	a, #0x80
+	mov	b, a
+	movw	ax, [sp+10]
+	xor	a, b		; sign is always a ^ b
+	movw	r10, ax
+	ret
+1:	
+	;; if (isinf (a)) return (b==0) ? nan : a
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	$!__int_isinf
+	bnz	$.L805
+
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	!!__int_iszero
+	bnz	$mret_a
+
+	movw	r8,  #0x0001	; return NaN
+	movw	r10, #0x7f80
+	ret
+
+.L805:	
+	;; if (isinf (b)) return (a==0) ? nan : b
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	$!__int_isinf
+	bnz	$.L814
+
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	!!__int_iszero
+	bnz	$mret_b
+
+	movw	r8,  #0x0001	; return NaN
+	movw	r10, #0x7f80
+	ret
+
+.L814:
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	!!__int_iszero
+	bz	$mret_a
+	
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	!!__int_iszero
+	bz	$mret_b
+
+	;; at this point, we're doing the multiplication.
+
+	subw	sp, #16	; save room for two unpacked values
+
+	movw	ax, sp
+	movw	hl, ax
+	addw	ax, #16+4
+	movw	de, ax
+	call	$!_int_unpack_sf
+
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	addw	ax, #16+8-8
+	movw	de, ax
+	call	$!_int_unpack_sf
+
+	movw	ax, sp
+	movw	hl, ax
+
+	;; multiply SI a.FRAC * SI b.FRAC to DI r8
+
+	subw	sp, #16
+	movw	ax, A_FRAC_L
+	movw	[sp+0], ax
+	movw	ax, A_FRAC_H
+	movw	[sp+2], ax
+
+	movw	ax, B_FRAC_L
+	movw	[sp+8], ax
+	movw	ax, B_FRAC_H
+	movw	[sp+10], ax
+
+	movw	ax, #0
+	movw	[sp+4], ax
+	movw	[sp+6], ax
+	movw	[sp+12], ax
+	movw	[sp+14], ax
+
+	call	!!___muldi3	; MTMPa * MTMPb -> R8..R15
+	addw	sp, #16
+
+	movw	ax, sp
+	movw	hl, ax
+
+	;;  add the exponents together
+	movw	ax, A_EXP
+	addw	ax, B_EXP
+	movw	bc, ax		; exponent in BC
+
+	;; now, re-normalize the DI value in R8..R15 to have the
+	;; MSB in the "right" place, adjusting BC as we shift it.
+
+	;; The value will normally be in this range:
+	;; R15              R8
+	;; 0001_0000_0000_0000
+	;; 0003_ffff_fc00_0001
+
+	;; so to speed it up, we normalize to:
+	;; 0001_xxxx_xxxx_xxxx
+	;; then extract the bytes we want (r11-r14)
+
+1:
+	mov	a, r15
+	cmp0	a
+	bnz	$2f
+	mov	a, r14
+	and	a, #0xfe
+	bz	$1f
+2:	
+	;; shift right, inc exponent
+	movw	ax, r14
+	shrw	ax, 1
+	movw	r14, ax
+	mov	a, r13
+	rorc	a, 1
+	mov	r13, a
+	mov	a, r12
+	rorc	a, 1
+	mov	r12, a
+	mov	a, r11
+	rorc	a, 1
+	mov	r11, a
+	;; we don't care about r8/r9/r10 if we're shifting this way
+	incw	bc
+	br	$1b
+1:	
+	mov	a, r15
+	or	a, r14
+	bnz	$1f
+	;; shift left, dec exponent
+	movw	ax, r8
+	shlw	ax, 1
+	movw	r8, ax
+	movw	ax, r10
+	rolwc	ax, 1
+	movw	r10, ax
+	movw	ax, r12
+	rolwc	ax, 1
+	movw	r12, ax
+	movw	ax, r14
+	rolwc	ax, 1
+	movw	r14, ax
+	decw	bc
+	br	$1b
+1:
+	;; at this point, FRAC is in R11..R14 and EXP is in BC
+	movw	ax, bc
+	movw	A_EXP, ax
+
+	mov	a, r11
+	mov	A_FRAC_L, a
+	mov	a, r12
+	mov	A_FRAC_LH, a
+	mov	a, r13
+	mov	A_FRAC_H, a
+	mov	a, r14
+	mov	A_FRAC_HH, a
+
+	mov	a, A_SIGN
+	xor	a, B_SIGN
+	mov	A_SIGN, a
+
+	call	$!__rl78_int_pack_a_r8
+
+	addw	sp, #16
+	ret
+
+END_FUNC		___mulsf3
+	
+START_FUNC		___divsf3
+
+	;; if (isnan(a)) return a
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	!!__int_isnan
+	bnz	$1f
+dret_a:
+	movw	ax, [sp+4]
+	movw	r8, ax
+	mov	a, [sp+11]
+	and	a, #0x80
+	mov	b, a
+	movw	ax, [sp+6]
+	xor	a, b		; sign is always a ^ b
+	movw	r10, ax
+	ret
+1:	
+	;; if (isnan (b)) return b;
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	!!__int_isnan
+	bnz	$1f
+dret_b:
+	movw	ax, [sp+8]
+	movw	r8, ax
+	mov	a, [sp+7]
+	and	a, #0x80
+	mov	b, a
+	movw	ax, [sp+10]
+	xor	a, b		; sign is always a ^ b
+	movw	r10, ax
+	ret
+1:	
+
+	;; if (isinf (a)) return isinf(b) ? nan : a
+
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	$!__int_isinf
+	bnz	$1f
+	
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	$!__int_isinf
+	bnz	$dret_a
+dret_nan:	
+	movw	r8,  #0x0001	; return NaN
+	movw	r10, #0x7f80
+	ret
+	
+1:	
+
+	;; if (iszero (a)) return iszero(b) ? nan : a
+
+	movw	ax, sp
+	addw	ax, #4
+	movw	hl, ax
+	call	!!__int_iszero
+	bnz	$1f
+	
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	!!__int_iszero
+	bnz	$dret_a
+	br	$dret_nan
+	
+1:	
+	;; if (isinf (b)) return 0
+
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	$!__int_isinf
+	bnz	$1f
+	
+	mov	a, [sp+7]
+	mov	b, a
+	mov	a, [sp+11]
+	xor	a, b
+	and	a, #0x80
+	mov	r11, a
+	movw	r8, #0
+	mov	r10, #0
+	ret
+	
+1:	
+	;; if (iszero (b)) return Inf
+
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	call	!!__int_iszero
+	bnz	$1f
+
+	mov	a, [sp+7]
+	mov	b, a
+	mov	a, [sp+11]
+	xor	a, b
+	or	a, #0x7f
+	mov	r11, a
+	movw	r8, #0
+	mov	r10, #0x80
+	ret
+1:	
+
+	;; at this point, we're doing the division.  Normalized
+	;; mantissas look like:
+	;; 01.xx.xx.xx
+	;; so we divide:
+	;; 01.xx.xx.xx.00.00.00.00
+	;; by          01.xx.xx.xx
+	;; to get approx 00.80.00.00.00 to 01.ff.ff.ff.00
+
+
+	subw	sp, #16	; save room for two unpacked values
+
+	movw	ax, sp
+	movw	hl, ax
+	addw	ax, #16+4
+	movw	de, ax
+	call	$!_int_unpack_sf
+
+	movw	ax, sp
+	addw	ax, #8
+	movw	hl, ax
+	addw	ax, #16+8-8
+	movw	de, ax
+	call	$!_int_unpack_sf
+
+	movw	ax, sp
+	movw	hl, ax
+
+	;; divide DI a.FRAC / SI b.FRAC to DI r8
+
+	subw	sp, #16
+	movw	ax, A_FRAC_L
+	movw	[sp+4], ax
+	movw	ax, A_FRAC_H
+	movw	[sp+6], ax
+
+	movw	ax, B_FRAC_L
+	movw	[sp+8], ax
+	movw	ax, B_FRAC_H
+	movw	[sp+10], ax
+
+	movw	ax, #0
+	movw	[sp+0], ax
+	movw	[sp+2], ax
+	movw	[sp+12], ax
+	movw	[sp+14], ax
+
+	call	!!___divdi3	; MTMPa / MTMPb -> R8..R15
+	addw	sp, #16
+
+	movw	ax, sp
+	movw	hl, ax
+
+	;;  subtract the exponents A - B
+	movw	ax, A_EXP
+	subw	ax, B_EXP
+	movw	bc, ax		; exponent in BC
+
+	;; now, re-normalize the DI value in R8..R15 to have the
+	;; MSB in the "right" place, adjusting BC as we shift it.
+
+	;; The value will normally be in this range:
+	;; R15              R8
+	;; 0000_0000_8000_0000
+	;; 0000_0001_ffff_ff00
+
+	;; so to speed it up, we normalize to:
+	;; 0000_0001_xxxx_xxxx
+	;; then extract the bytes we want (r9-r12)
+
+1:
+	movw	ax, r14
+	cmpw	ax, #0
+	bnz	$2f
+	movw	ax, r12
+	cmpw	ax, #1
+	bnh	$1f
+2:	
+	;; shift right, inc exponent
+	movw	ax, r14
+	shrw	ax, 1
+	movw	r14, ax
+	mov	a, r13
+	rorc	a, 1
+	mov	r13, a
+	mov	a, r12
+	rorc	a, 1
+	mov	r12, a
+	mov	a, r11
+	rorc	a, 1
+	mov	r11, a
+	mov	a, r10
+	rorc	a, 1
+	mov	r10, a
+	mov	a, r9
+	rorc	a, 1
+	mov	r9, a
+	mov	a, r8
+	rorc	a, 1
+	mov	r8, a
+
+	incw	bc
+	br	$1b
+1:	
+	;; the previous loop leaves r15.r13 zero
+	mov	a, r12
+	cmp0	a
+	bnz	$1f
+	;; shift left, dec exponent
+	movw	ax, r8
+	shlw	ax, 1
+	movw	r8, ax
+	movw	ax, r10
+	rolwc	ax, 1
+	movw	r10, ax
+	movw	ax, r12
+	rolwc	ax, 1
+	movw	r12, ax
+	;; don't need to do r14
+	decw	bc
+	br	$1b
+1:
+	;; at this point, FRAC is in R8..R11 and EXP is in BC
+	movw	ax, bc
+	movw	A_EXP, ax
+
+	mov	a, r9
+	mov	A_FRAC_L, a
+	mov	a, r10
+	mov	A_FRAC_LH, a
+	mov	a, r11
+	mov	A_FRAC_H, a
+	mov	a, r12
+	mov	A_FRAC_HH, a
+
+	mov	a, A_SIGN
+	xor	a, B_SIGN
+	mov	A_SIGN, a
+
+	call	$!__rl78_int_pack_a_r8
+
+	addw	sp, #16
+	ret
+	
+END_FUNC	___divsf3
diff --git a/libgcc/config/rl78/lshrsi3.S b/libgcc/config/rl78/lshrsi3.S
index 176e6deff45..164917932fe 100644
--- a/libgcc/config/rl78/lshrsi3.S
+++ b/libgcc/config/rl78/lshrsi3.S
@@ -22,11 +22,7 @@
 
 #include "vregs.h"
 	
-	.text
-	.global	___lshrsi3
-	.type	___lshrsi3, @function
-___lshrsi3:
-
+START_FUNC ___lshrsi3
 	;; input:
 	;; 
 	;; [zero]
@@ -46,7 +42,6 @@ ___lshrsi3:
 	;; B  - count
 
 	mov	a, [sp+8]	; A now contains the count
-
 	cmp	a, #0x20
 	bc	$.Lcount_is_normal
 
@@ -113,4 +108,4 @@ ___lshrsi3:
 
 	br	$.Lloop_top
 
-	.size	___lshrsi3, .-___lshrsi3
+END_FUNC ___lshrsi3
diff --git a/libgcc/config/rl78/mulsi3.S b/libgcc/config/rl78/mulsi3.S
index 5d04ac23afb..c19865f78e5 100644
--- a/libgcc/config/rl78/mulsi3.S
+++ b/libgcc/config/rl78/mulsi3.S
@@ -33,6 +33,18 @@
 ; DE	count	(resL-tmp)
 ; HL	[sp+4]
 
+; Register use (G10):
+;	
+; AX	    op2L
+; BC	    op2H
+; DE	    count
+; HL	    [sp+4]
+; r8/r9	    res32L
+; r10/r11   (resH)
+; r12/r13   (resL-tmp)
+; r16/r17   res32H
+; r18/r19   op1
+
 START_FUNC ___mulsi3
 	;; A is at [sp+4]
 	;; B is at [sp+8]
@@ -159,7 +171,7 @@ START_FUNC ___mulsi3
 	sknc
 	incw	ax
 	addw	ax, r_2
-.Lmul_hisi_no_add:	
+.Lmul_hisi_no_add:
 	sel	rb1
 	shlw	bc, 1
 	sel	rb0
@@ -267,3 +279,45 @@ START_FUNC ___mulhi3
 .Lmul_hi_done:
 	ret
 END_FUNC ___mulhi3
+
+;;; --------------------------------------
+#ifdef __RL78_G10__
+	START_FUNC ___mulqi3
+
+       	mov	a, [sp+4]
+       	mov	r9, a
+       	mov	a, [sp+6]
+       	mov	r10, a
+       	mov	a, #9
+       	mov	r11, a
+       	clrb	a
+       	mov	r8, a
+.L2:
+    	cmp0	r10
+    	skz
+    	dec	r11
+    	sknz
+    	ret
+    	mov	a, r10
+    	and	a, #1
+    	mov	r12, a
+    	cmp0	r12
+    	sknz
+    	br	!!.L3
+    	mov	a, r9
+    	mov	l, a
+    	mov	a, r8
+    	add	a, l
+    	mov	r8, a
+.L3:
+       	mov	a, r9
+       	add	a, a
+       	mov	r9, a
+       	mov	a, r10
+       	shr	a, 1
+       	mov	r10, a
+       	br	!!.L2
+
+	END_FUNC   ___mulqi3
+#endif	
+
diff --git a/libgcc/config/rl78/signbit.S b/libgcc/config/rl78/signbit.S
index cb1105fec5b..d315e1c83ca 100644
--- a/libgcc/config/rl78/signbit.S
+++ b/libgcc/config/rl78/signbit.S
@@ -37,11 +37,9 @@
 
 	.text
 
-	.global	_signbit
-_signbit:
-	.global	_signbitf
-_signbitf:
-	;; X is at [sp+4]
+START_FUNC		_signbit
+START_ANOTHER_FUNC	_signbitf
+	;; X is at [sp+4]..[SP+7]
 	;; result is in R8..R9
 
 	movw	r8, #0
@@ -50,12 +48,12 @@ _signbitf:
 	sknc
 	movw	r8, #1
 	ret
-	.size	_signbit, . - _signbit
-	.size	_signbitf, . - _signbitf
+END_ANOTHER_FUNC	_signbitf
+END_FUNC		_signbit
 
-	.global	_signbitl
-_signbitl:
-	;; X is at [sp+4]
+
+START_FUNC	_signbitl
+	;; X is at [sp+4]..[SP+7]
 	;; result is in R8..R9
 
 	movw	r8, #0
@@ -64,4 +62,4 @@ _signbitl:
 	sknc
 	movw	r8, #1
 	ret
-	.size	_signbitl, . - _signbitl
+END_FUNC 	_signbitl
diff --git a/libgcc/config/rl78/t-rl78 b/libgcc/config/rl78/t-rl78
index 59b1f75920e..e030c99c1bd 100644
--- a/libgcc/config/rl78/t-rl78
+++ b/libgcc/config/rl78/t-rl78
@@ -20,8 +20,6 @@
 
 LIB2ADD = \
 	$(srcdir)/config/rl78/trampoline.S \
-	$(srcdir)/config/rl78/lib2div.c \
-	$(srcdir)/config/rl78/lib2mul.c \
 	$(srcdir)/config/rl78/lib2shift.c \
 	$(srcdir)/config/rl78/lshrsi3.S \
 	$(srcdir)/config/rl78/mulsi3.S \
@@ -29,6 +27,22 @@ LIB2ADD = \
 	$(srcdir)/config/rl78/divmodhi.S \
 	$(srcdir)/config/rl78/divmodqi.S \
 	$(srcdir)/config/rl78/signbit.S \
+	$(srcdir)/config/rl78/bit-count.S \
+	$(srcdir)/config/rl78/fpbit-sf.S \
+	$(srcdir)/config/rl78/fpmath-sf.S \
 	$(srcdir)/config/rl78/cmpsi2.S
 
+LIB2FUNCS_EXCLUDE = _clzhi2 _clzsi2 _ctzhi2 _ctzsi2 \
+  _popcounthi2 _popcountsi2 \
+  _parityhi2 _paritysi2 _ffssi2 _ffshi2 \
+  _negate_sf _compare_sf _eq_sf _ne_sf _gt_sf _ge_sf \
+  _lt_sf _le_sf _unord_sf \
+  _si_to_sf _usi_to_sf \
+  _sf_to_si   _sf_to_usi \
+  _fixunssfsi _fixsfsi \
+  _addsub_sf _mul_sf _div_sf
+
+# Remove __gcc_bcmp from LIB2FUNCS_ST
+LIB2FUNCS_ST = _eprintf
+	
 HOST_LIBGCC2_CFLAGS += -Os -ffunction-sections -fdata-sections
diff --git a/libgcc/config/rl78/trampoline.S b/libgcc/config/rl78/trampoline.S
index 357e88976d4..9ea8fc4a82b 100644
--- a/libgcc/config/rl78/trampoline.S
+++ b/libgcc/config/rl78/trampoline.S
@@ -80,14 +80,10 @@ trampoline_array_end:
    pointer in R10, allocate a trampoline and return its address in
    R8. */
 
-	.text
-	.global ___trampoline_init
-        .type   ___trampoline_init, @function
-___trampoline_init:
-
+START_FUNC ___trampoline_init
 	movw	hl, #trampoline_array
-1:
-	movw	ax, [hl + TO_ADDR]
+
+1:	movw	ax, [hl + TO_ADDR]
 	cmpw	ax, #0
 	bz	$2f
 
@@ -107,30 +103,27 @@ ___trampoline_init:
 
 	movw	ax, [hl + TO_STUB]
 	movw	r8, ax
-
 	ret
-	.size	___trampoline_init, . - ___trampoline_init
+END_FUNC ___trampoline_init
 
-	.global	___trampoline_uninit
-        .type   ___trampoline_uninit, @function
-___trampoline_uninit:
+
+START_FUNC ___trampoline_uninit
 	movw	hl, #trampoline_array
 	movw	ax, sp
 	movw	bc, ax
-1:
-	movw	ax, [hl + TO_FRAME]
+
+1:	movw	ax, [hl + TO_FRAME]
 	cmpw	ax, bc
 	bc	$2f
 
 	clrw	ax
 	movw	[hl + TO_ADDR], ax
 
-2:
-	movw	ax, hl
+2:	movw	ax, hl
 	addw	ax, #TO_SIZE
 	movw	hl, ax
 	cmpw	ax, #trampoline_array_end
 	bnz	$1b
 
 	ret
-	.size	___trampoline_uninit, . - ___trampoline_uninit
+END_FUNC ___trampoline_uninit
diff --git a/libgcc/config/rl78/vregs.h b/libgcc/config/rl78/vregs.h
index d5209e20fa5..f1bc5c24761 100644
--- a/libgcc/config/rl78/vregs.h
+++ b/libgcc/config/rl78/vregs.h
@@ -55,17 +55,25 @@ r23	=	0xffeef
 
 #endif
 
+.macro START_ANOTHER_FUNC name
+	.global \name
+	.type \name , @function
+\name:
+.endm
+    
     /* Start a function in its own section, so that it
        can be subject to linker garbage collection.  */
 .macro START_FUNC name
 	.pushsection .text.\name,"ax",@progbits
-	.global \name
-	.type \name , @function
-\name:
+	START_ANOTHER_FUNC \name
+.endm
+
+.macro END_ANOTHER_FUNC name	
+	.size \name , . - \name
 .endm
 
     /* End the function.  Set the size.  */
 .macro END_FUNC name	
-	.size \name , . - \name
+	END_ANOTHER_FUNC \name
 	.popsection
 .endm